DataProcessingFramework
101 строка · 2.7 Кб
1from abc import ABC, abstractmethod2
3from DPF.modalities import DataModality4
5
6class DataType(ABC):7"""Represents modality in a specific storage format"""8
9def __init__(self, modality: DataModality):10assert self.is_file or (modality.can_be_column and not self.is_file)11self.modality = modality12
13@property14@abstractmethod15def is_file(self) -> bool:16pass17
18
19class ColumnDataType(DataType):20"""Represents modality in the column of table"""21
22def __init__(23self,24modality: DataModality,25user_column_name: str26):27"""28Parameters
29----------
30modality: DataModality
31instance of DPF.modalities.Modality
32user_column_name: str
33Name of column with data of this modality
34"""
35super().__init__(modality)36self.user_column_name = user_column_name37
38@property39def is_file(self) -> bool:40return False41
42@property43def column_name(self) -> str:44return self.modality.column # type: ignore45
46def __repr__(self) -> str:47return f'ColumnDataType(modality={self.modality}, user_column_name="{self.user_column_name}")'48
49
50class FileDataType(DataType):51"""Represents data with modality in file"""52
53def __init__(54self,55modality: DataModality,56user_path_column_name: str57):58"""59Parameters
60----------
61modality: DataModality
62instance of DPF.modalities.Modality
63user_path_column_name: str
64Name of column with paths to files of this modality
65"""
66super().__init__(modality)67self.user_path_column_name = user_path_column_name68
69@property70def is_file(self) -> bool:71return True72
73def __repr__(self) -> str:74return f'FileDataType(modality={self.modality}, user_path_column_name="{self.user_path_column_name}")'75
76
77class ShardedDataType(DataType):78"""Represents data with modality in files in dataset with sharded format"""79
80def __init__(81self,82modality: DataModality,83user_basename_column_name: str,84):85"""86Parameters
87----------
88modality: DataModality
89instance of DPF.modalities.Modality
90user_basename_column_name: str
91Column name with file names of this modality
92"""
93super().__init__(modality)94self.user_basename_column_name = user_basename_column_name95
96@property97def is_file(self) -> bool:98return True99
100def __repr__(self) -> str:101return f'ShardedDataType(modality={self.modality}, user_basename_column_name="{self.user_basename_column_name}")'102