DataProcessingFramework

Форк
0
101 строка · 2.7 Кб
1
from abc import ABC, abstractmethod
2

3
from DPF.modalities import DataModality
4

5

6
class DataType(ABC):
7
    """Represents modality in a specific storage format"""
8

9
    def __init__(self, modality: DataModality):
10
        assert self.is_file or (modality.can_be_column and not self.is_file)
11
        self.modality = modality
12

13
    @property
14
    @abstractmethod
15
    def is_file(self) -> bool:
16
        pass
17

18

19
class ColumnDataType(DataType):
20
    """Represents modality in the column of table"""
21

22
    def __init__(
23
        self,
24
        modality: DataModality,
25
        user_column_name: str
26
    ):
27
        """
28
        Parameters
29
        ----------
30
        modality: DataModality
31
            instance of DPF.modalities.Modality
32
        user_column_name: str
33
            Name of column with data of this modality
34
        """
35
        super().__init__(modality)
36
        self.user_column_name = user_column_name
37

38
    @property
39
    def is_file(self) -> bool:
40
        return False
41

42
    @property
43
    def column_name(self) -> str:
44
        return self.modality.column  # type: ignore
45

46
    def __repr__(self) -> str:
47
        return f'ColumnDataType(modality={self.modality}, user_column_name="{self.user_column_name}")'
48

49

50
class FileDataType(DataType):
51
    """Represents data with modality in file"""
52

53
    def __init__(
54
        self,
55
        modality: DataModality,
56
        user_path_column_name: str
57
    ):
58
        """
59
        Parameters
60
        ----------
61
        modality: DataModality
62
            instance of DPF.modalities.Modality
63
        user_path_column_name: str
64
            Name of column with paths to files of this modality
65
        """
66
        super().__init__(modality)
67
        self.user_path_column_name = user_path_column_name
68

69
    @property
70
    def is_file(self) -> bool:
71
        return True
72

73
    def __repr__(self) -> str:
74
        return f'FileDataType(modality={self.modality}, user_path_column_name="{self.user_path_column_name}")'
75

76

77
class ShardedDataType(DataType):
78
    """Represents data with modality in files in dataset with sharded format"""
79

80
    def __init__(
81
        self,
82
        modality: DataModality,
83
        user_basename_column_name: str,
84
    ):
85
        """
86
        Parameters
87
        ----------
88
        modality: DataModality
89
            instance of DPF.modalities.Modality
90
        user_basename_column_name: str
91
            Column name with file names of this modality
92
        """
93
        super().__init__(modality)
94
        self.user_basename_column_name = user_basename_column_name
95

96
    @property
97
    def is_file(self) -> bool:
98
        return True
99

100
    def __repr__(self) -> str:
101
        return f'ShardedDataType(modality={self.modality}, user_basename_column_name="{self.user_basename_column_name}")'
102

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.