llama-index
65 строк · 1.8 Кб
1"""Video audio parser.
2
3Contains parsers for mp3, mp4 files.
4
5"""
6
7from pathlib import Path8from typing import Any, Dict, List, Optional, cast9
10from llama_index.legacy.readers.base import BaseReader11from llama_index.legacy.schema import Document12
13
14class VideoAudioReader(BaseReader):15"""Video audio parser.16
17Extract text from transcript of video/audio files.
18
19"""
20
21def __init__(self, *args: Any, model_version: str = "base", **kwargs: Any) -> None:22"""Init parser."""23super().__init__(*args, **kwargs)24self._model_version = model_version25
26try:27import whisper28except ImportError:29raise ImportError(30"Please install OpenAI whisper model "31"'pip install git+https://github.com/openai/whisper.git' "32"to use the model"33)34
35model = whisper.load_model(self._model_version)36
37self.parser_config = {"model": model}38
39def load_data(40self, file: Path, extra_info: Optional[Dict] = None41) -> List[Document]:42"""Parse file."""43import whisper44
45if file.name.endswith("mp4"):46try:47from pydub import AudioSegment48except ImportError:49raise ImportError("Please install pydub 'pip install pydub' ")50# open file51video = AudioSegment.from_file(file, format="mp4")52
53# Extract audio from video54audio = video.split_to_mono()[0]55
56file_str = str(file)[:-4] + ".mp3"57# export file58audio.export(file_str, format="mp3")59
60model = cast(whisper.Whisper, self.parser_config["model"])61result = model.transcribe(str(file))62
63transcript = result["text"]64
65return [Document(text=transcript, metadata=extra_info or {})]66