otter
26 строк · 877.0 Байт
1import unittest2from unittest.mock import Mock3from pipeline.mimicit_utils.data import get_mmc4_dataset4
5
6class TestGetMMC4Dataset(unittest.TestCase):7def test_get_mmc4_dataset(self):8# Mock the required inputs9args = Mock(10mmc4_shards="/home/luodian/projects/Otter/archived/000000000.tar",11train_num_samples_mmc4=1000,12mmc4_textsim_threshold=0.32,13batch_size_mmc4=10,14seed=0,15workers=2,16world_size=1,17)18image_processor = Mock()19tokenizer = Mock()20
21# Call the function to test22data_info = get_mmc4_dataset(args, image_processor, tokenizer)23
24# Check if the dataloader's attributes are as expected25self.assertEqual(data_info.dataloader.num_batches, 100)26self.assertEqual(data_info.dataloader.num_samples, 1000)27