lmops
1import random2import torch3
4def load_train_dataset(dataset,size=None,listify=True):5if size is not None and size<len(dataset['train']):6data = dataset['train']7rand = random.Random(x=42)8index_list = list(range(len(data)))9rand.shuffle(index_list) #shuffle index_list10x = data.select(index_list[:size])11
12else:13x = dataset['train']14if listify:15return list(x)16else:17return x18
19def pad2sameLen(20values,21pad_idx=0,22left_pad=False23):24"""Convert a list of 1d tensors into a padded 2d tensor.25ensuring same lengths
26"""
27size = max(v.shape[-1] for v in values)28if left_pad:29res=torch.stack([torch.nn.functional.pad(v,(size-v.shape[-1],0),value=pad_idx) for v in values])30else:31res=torch.stack([torch.nn.functional.pad(v,(0,size-v.shape[-1]),value=pad_idx) for v in values])32return res33