Add some documentation to dataset classes

94c299c2 · mohanty · f110b5b8 · 94c299c2
Commit 94c299c2 authored 3 years ago by mohanty
--- a/dataset.py
+++ b/dataset.py
@@ -262,42 +262,90 @@ if __name__ == "__main__":
    dataset = ZEWDPCBaseDataset(
        images_dir="./data/prepared/v0.1/dataset_debug",
        labels_path="./data/prepared/v0.1/dataset_debug/labels.csv",
-        drop_labels=True,
+        drop_labels=False,
    )
-    print("Labels Dictionary :", dataset.labels_column_names)
+    print("Labels Dictionary :", dataset.labels_column_names)
    """
-    You can treat the dataset as an iterator, where you can iterate over all the data samples.
+    You can treat the dataset as an iterator, 
+    where you can iterate over all the data samples.
    """
-    for sample in dataset:
+    for sample in tqdm.tqdm(dataset):
+        """
+        Each of the samples will have the following structure : 
+            {
+                'idx': 0, 
+                'image': array([[[110, 128, 140],
+                    [110, 128, 139],
+                    [110, 128, 140],
+                    ...,
+                    [132, 152, 166],
+                    [134, 154, 168],
+                    [137, 158, 173]]]),
+                'label': [0, 0, 1, 1]
+            }    
+        where : 
+            `idx`   : contains the reference id for this image
+            `image` : contains the image as an numpy array loaded by skimage.io.imread
+            `label` : contains the associated labels for this data point. 
+                      The values at each of the indices in the label represent the presence or absence 
+                      of the following features : 
+                        ['scratch_small', 'scratch_large', 'dent_small', 'dent_large']
+                    If `drop_labels` is passed as True during the instantiation of the class, 
+                    then the `labels` key is not included in the sample. 
+        """
        print(sample)
        break
+    ###########################################################################
+    ###########################################################################
+    ## 
+    ## Protected Dataset Access Examples
+    ###########################################################################
+    ###########################################################################    
+    p_dataset = ZEWDPCProtectedDataset(
+        images_dir="./data/prepared/v0.1/dataset_debug",
+        labels_path="./data/prepared/v0.1/dataset_debug/labels.csv",
+        budget=150,
+    )
+    """
+    The Protected Dataset is derived from the Base Dataset,
+    and is used to hold the unlabelled dataset. 
+    This can be treated as an iterator as well, and can be used to 
+    access the samples very much like the Base Dataset. 
+    A Protected Dataset is always instantiated by providing a "budget", which 
+    refers to the maximum number of labels that can be probed from the said 
+    Protected Dataset instance. 
-    # p_dataset = ZEWDPCProtectedDataset(
+    The samples in a Protected Dataset will never contain the `label` key. 
-    #     images_dir="./data/prepared/v0.1/dataset_debug",
+    """
-    #     labels_path="./data/prepared/v0.1/dataset_debug/labels.csv",
+    for sample in tqdm.tqdm(p_dataset):
-    #     budget=150,
+        idx = sample["idx"]
-    # )
+        # Labels, instead have to be "purchased"
+        label = p_dataset.purchase_label(idx)
-    # for sample in tqdm.tqdm(p_dataset):
+        print(label, p_dataset.check_available_budget())
-    #     idx = sample["idx"]
+        # When the budget for accessing the labels has been exhausted, the 
+        # Protected Dataset will throw an OutOfBudetException. 
-    #     label = p_dataset.purchase_label(idx)
-    #     print(label, p_dataset.check_available_budget())
+        if idx == 50:
+            # Example of transform applied to the images in the dataset 
-    #     if idx == 50:
+            # midway.
-    #         # Example of transform application midway
+            preprocess = transforms.Compose(
-    #         preprocess = transforms.Compose(
+                [
-    #             [
+                    transforms.ToTensor(),
-    #                 transforms.ToTensor(),
+                    transforms.Grayscale(num_output_channels=1),
-    #                 transforms.Grayscale(num_output_channels=1),
+                    transforms.Normalize(mean=[0.5], std=[0.5]),
-    #                 transforms.Normalize(mean=[0.5], std=[0.5]),
+                ]
-    #             ]
+            )
-    #         )
+            p_dataset.set_transform(preprocess)
-    #         p_dataset.set_transform(preprocess)
+            print(sample.keys())
-    #         print(sample.keys())
+            input("Transofrm applied. Press any key....")
-    #         input("Transofrm applied. Press any key....")