DataProcessingFramework
54 строки · 1.2 Кб
1from dataclasses import dataclass
2from typing import Literal, Optional
3
4ModalityName = Literal["image", "video", "text"]
5
6
7@dataclass
8class DataModality:
9"""Represents the modality of data
10
11Parameters
12----------
13name: ModalityName
14Name of modality. Should be unique
15path_column: str
16Default column path to files with this modality
17sharded_file_name_column: str
18Default column name of filenames in shard with this modality
19column: Optional[str] = None
20Default column name. If this modality can be stored in a column use None.
21"""
22name: ModalityName
23path_column: str
24sharded_file_name_column: str
25column: Optional[str] = None
26
27@property
28def can_be_column(self) -> bool:
29return self.column is not None
30
31def __hash__(self) -> int:
32return hash(self.name)
33
34def __str__(self) -> str:
35return self.name
36
37def __repr__(self) -> str:
38return self.name
39
40
41MODALITIES = {
42'image': DataModality(
43'image', 'image_path',
44'image_name', None
45),
46'video': DataModality(
47'video', 'video_path',
48'video_name', None
49),
50'text': DataModality(
51'text', 'text_path',
52'text_name', 'text'
53)
54}
55