I'm trying to get the torch dataloader to load the data under a specific sequence determined by the random seed 1. Here's my code:
import random
import torch.utils.data.dataset as Dataset
import torch.utils.data.dataloader as DataLoader
from torch.utils.data.sampler import Sampler
class MyDataset(Dataset.Dataset):
def __init__(self):
self.Data = [x for x in range(10)]
self.Label = [x for x in range(10)]
def __getitem__(self, index):
data = self.Data[index]
label = self.Label[index]
return data, label
def __len__(self):
return len(self.Data)
class RandSeqSampler(Sampler):
def __init__(self, data_source):
super().__init__(data_source)
self.data_source = data_source
def __iter__(self):
indices = list(range(len(self.data_source)))
random.shuffle(indices)
return iter(indices)
def __len__(self):
return len(self.data_source)
random.seed(1)
dataset = MyDataset()
dataloader = DataLoader.DataLoader(dataset=dataset, batch_size=1, sampler=RandSeqSampler(dataset))
for i, (data, label) in enumerate(dataloader):
print(data, label)
print("\n\n\n\n\n")
for i, (data, label) in enumerate(dataloader):
print(data, label)
random.seed(1)
a = [x for x in range(10)]
random.shuffle(a)
print(a)
random.shuffle(a)
print(a)
The output is
tensor([6]) tensor([6])
tensor([8]) tensor([8])
tensor([9]) tensor([9])
tensor([7]) tensor([7])
tensor([5]) tensor([5])
tensor([3]) tensor([3])
tensor([0]) tensor([0])
tensor([4]) tensor([4])
tensor([1]) tensor([1])
tensor([2]) tensor([2])
tensor([4]) tensor([4])
tensor([8]) tensor([8])
tensor([2]) tensor([2])
tensor([6]) tensor([6])
tensor([5]) tensor([5])
tensor([9]) tensor([9])
tensor([0]) tensor([0])
tensor([7]) tensor([7])
tensor([1]) tensor([1])
tensor([3]) tensor([3])
[6, 8, 9, 7, 5, 3, 0, 4, 1, 2]
[5, 1, 9, 0, 3, 2, 6, 4, 8, 7]
You can see that the dataloader load data in the same order as the shuffled order in the first iteration (both 6, 8, 9, 7, 5, 3, 0, 4, 1, 2), but the data loaded the second iteration follows a different order than the shuffled order ([4,8,2,6,5,9,0,7,1,3] and [5, 1, 9, 0, 3, 2, 6, 4, 8, 7]). I would like the data loaded follow the same order as the shuffled order, which means instead of loading [4,8,2,6,5,9,0,7,1,3], I would like to load [5, 1, 9, 0, 3, 2, 6, 4, 8, 7]. Any ideas of how to achieve that? Any help is appreciated