DataProcessingFramework
66 строк · 2.2 Кб
1from typing import Optional, Union2
3from DPF.datatypes import ColumnDataType, ShardedDataType4from DPF.modalities import MODALITIES5
6from .sharded_config import ShardedDatasetConfig7
8
9class ShardedFilesDatasetConfig(ShardedDatasetConfig):10"""Config for ShardedFiles dataset type"""11
12def __init__(13self,14path: str,15datatypes: list[Union[ShardedDataType, ColumnDataType]],16datafiles_ext: str = "csv",17):18"""19Parameters
20----------
21path: str
22Path to directory with shards
23datatypes: list[Union[ShardedDataType, ColumnDataType]]
24List of datatypes in dataset
25datafiles_ext: str = "csv"
26Extension of files with metadata in shards
27"""
28super().__init__(path, datatypes, datafiles_ext)29
30@classmethod31def from_path_and_columns(32cls,33path: str,34image_name_col: Optional[str] = None,35video_name_col: Optional[str] = None,36text_col: Optional[str] = None,37datafiles_ext: str = "csv",38) -> "ShardedFilesDatasetConfig":39"""40Parameters
41----------
42path: str
43Path to directory with shards
44image_name_col: Optional[str] = None
45Name of column with image filenames in shard
46video_name_col: Optional[str] = None
47Name of column with video filenames in shard
48text_col: Optional[str] = None
49Name of column with text
50datafiles_ext: str = "csv"
51Extension of files with metadata in shards
52
53Returns
54-------
55ShardedFilesDatasetConfig
56Instance of itself
57"""
58datatypes: list[Union[ShardedDataType, ColumnDataType]] = []59if image_name_col:60datatypes.append(ShardedDataType(MODALITIES['image'], image_name_col))61if video_name_col:62datatypes.append(ShardedDataType(MODALITIES['video'], video_name_col))63if text_col:64datatypes.append(ColumnDataType(MODALITIES['text'], text_col))65assert len(datatypes) > 0, "At least one modality should be provided"66return cls(path, datatypes, datafiles_ext=datafiles_ext)67