DataProcessingFramework
/
simple_example.py
27 строк · 819.0 Байт
1from DPF import ShardsDatasetConfig, DatasetReader
2from DPF.filters.images.info_filter import ImageInfoFilter
3from DPF.filters.images.hash_filters import PHashFilter
4
5
6if __name__ == "__main__":
7config = ShardsDatasetConfig.from_path_and_columns(
8'examples/example_dataset',
9image_name_col='image_name',
10text_col="caption"
11)
12
13reader = DatasetReader()
14processor = reader.read_from_config(config)
15
16print("Dataset summary:", processor.print_summary())
17
18datafilter = ImageInfoFilter(workers=16)
19print('Applying ImageInfoFilter')
20processor.apply_data_filter(datafilter)
21
22datafilter = PHashFilter(sim_hash_size=8, workers=16)
23print('Applying PHashFilter')
24processor.apply_data_filter(datafilter)
25
26print('Result dataset metadata')
27print(processor.df)