Skip to content
Snippets Groups Projects
Commit 94c299c2 authored by mohanty's avatar mohanty
Browse files

Add some documentation to dataset classes

parent f110b5b8
No related branches found
No related tags found
No related merge requests found
...@@ -262,42 +262,90 @@ if __name__ == "__main__": ...@@ -262,42 +262,90 @@ if __name__ == "__main__":
dataset = ZEWDPCBaseDataset( dataset = ZEWDPCBaseDataset(
images_dir="./data/prepared/v0.1/dataset_debug", images_dir="./data/prepared/v0.1/dataset_debug",
labels_path="./data/prepared/v0.1/dataset_debug/labels.csv", labels_path="./data/prepared/v0.1/dataset_debug/labels.csv",
drop_labels=True, drop_labels=False,
) )
print("Labels Dictionary :", dataset.labels_column_names)
print("Labels Dictionary :", dataset.labels_column_names)
""" """
You can treat the dataset as an iterator, where you can iterate over all the data samples. You can treat the dataset as an iterator,
where you can iterate over all the data samples.
""" """
for sample in dataset: for sample in tqdm.tqdm(dataset):
"""
Each of the samples will have the following structure :
{
'idx': 0,
'image': array([[[110, 128, 140],
[110, 128, 139],
[110, 128, 140],
...,
[132, 152, 166],
[134, 154, 168],
[137, 158, 173]]]),
'label': [0, 0, 1, 1]
}
where :
`idx` : contains the reference id for this image
`image` : contains the image as an numpy array loaded by skimage.io.imread
`label` : contains the associated labels for this data point.
The values at each of the indices in the label represent the presence or absence
of the following features :
['scratch_small', 'scratch_large', 'dent_small', 'dent_large']
If `drop_labels` is passed as True during the instantiation of the class,
then the `labels` key is not included in the sample.
"""
print(sample) print(sample)
break break
###########################################################################
###########################################################################
##
## Protected Dataset Access Examples
###########################################################################
###########################################################################
p_dataset = ZEWDPCProtectedDataset(
images_dir="./data/prepared/v0.1/dataset_debug",
labels_path="./data/prepared/v0.1/dataset_debug/labels.csv",
budget=150,
)
"""
The Protected Dataset is derived from the Base Dataset,
and is used to hold the unlabelled dataset.
This can be treated as an iterator as well, and can be used to
access the samples very much like the Base Dataset.
A Protected Dataset is always instantiated by providing a "budget", which
refers to the maximum number of labels that can be probed from the said
Protected Dataset instance.
# p_dataset = ZEWDPCProtectedDataset( The samples in a Protected Dataset will never contain the `label` key.
# images_dir="./data/prepared/v0.1/dataset_debug", """
# labels_path="./data/prepared/v0.1/dataset_debug/labels.csv", for sample in tqdm.tqdm(p_dataset):
# budget=150, idx = sample["idx"]
# )
# Labels, instead have to be "purchased"
label = p_dataset.purchase_label(idx)
# for sample in tqdm.tqdm(p_dataset): print(label, p_dataset.check_available_budget())
# idx = sample["idx"] # When the budget for accessing the labels has been exhausted, the
# Protected Dataset will throw an OutOfBudetException.
# label = p_dataset.purchase_label(idx)
# print(label, p_dataset.check_available_budget()) if idx == 50:
# Example of transform applied to the images in the dataset
# if idx == 50: # midway.
# # Example of transform application midway preprocess = transforms.Compose(
# preprocess = transforms.Compose( [
# [ transforms.ToTensor(),
# transforms.ToTensor(), transforms.Grayscale(num_output_channels=1),
# transforms.Grayscale(num_output_channels=1), transforms.Normalize(mean=[0.5], std=[0.5]),
# transforms.Normalize(mean=[0.5], std=[0.5]), ]
# ] )
# ) p_dataset.set_transform(preprocess)
# p_dataset.set_transform(preprocess) print(sample.keys())
# print(sample.keys()) input("Transofrm applied. Press any key....")
# input("Transofrm applied. Press any key....")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment