Skip to content

CAGS

npfl138.datasets.cags.CAGS

Source code in npfl138/datasets/cags.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
class CAGS:
    C: int = 3
    """The number of image channels."""
    H: int = 224
    """The image height."""
    W: int = 224
    """The image width."""
    LABELS: int = 34
    """The number of labels."""
    LABEL_NAMES: list[str] = [
        # Cats
        "Abyssinian", "Bengal", "Bombay", "British_Shorthair", "Egyptian_Mau",
        "Maine_Coon", "Russian_Blue", "Siamese", "Sphynx",
        # Dogs
        "american_bulldog", "american_pit_bull_terrier", "basset_hound",
        "beagle", "boxer", "chihuahua", "english_cocker_spaniel",
        "english_setter", "german_shorthaired", "great_pyrenees", "havanese",
        "japanese_chin", "keeshond", "leonberger", "miniature_pinscher",
        "newfoundland", "pomeranian", "pug", "saint_bernard", "samoyed",
        "scottish_terrier", "shiba_inu", "staffordshire_bull_terrier",
        "wheaten_terrier", "yorkshire_terrier",
    ]
    """The list of label names in the dataset."""
    Element = TypedDict("Element", {"image": torch.Tensor, "mask": torch.Tensor, "label": torch.Tensor})
    """The type of a single dataset element."""

    _URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2425/datasets/"

    class Dataset(TFRecordDataset):
        def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
            super().__init__(path, size, decode_on_demand)

        def __len__(self) -> int:
            """Return the number of elements in the dataset."""
            return super().__len__()

        def __getitem__(self, index: int) -> "CAGS.Element":
            """Return the `index`-th element of the dataset."""
            return super().__getitem__(index)

        def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "CAGS.Element":
            return {
                "image": torchvision.io.decode_image(
                    data["image"][indices["image"][index]:indices["image"][index + 1]],
                    torchvision.io.ImageReadMode.RGB),
                "mask": torchvision.io.decode_image(
                    data["mask"][indices["mask"][index]:indices["mask"][index + 1]],
                    torchvision.io.ImageReadMode.GRAY).to(dtype=torch.float32).div(255),
                "label": data["label"][index],
            }

    def __init__(self, decode_on_demand: bool = False) -> None:
        "Load the CAGS dataset, downloading it if necessary."
        for dataset, size in [("train", 2_142), ("dev", 306), ("test", 612)]:
            path = "cags.{}.tfrecord".format(dataset)
            if not os.path.exists(path):
                print("Downloading file {}...".format(path), file=sys.stderr)
                urllib.request.urlretrieve("{}/{}".format(self._URL, path), filename="{}.tmp".format(path))
                os.rename("{}.tmp".format(path), path)

            setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # The MaskIoUMetric
    class MaskIoUMetric(metrics.MaskIoU):
        """The MaskIoUMetric is a metric for evaluating the segmentation task."""
        def __init__(self, from_logits: bool = False) -> None:
            super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

    # Evaluation infrastructure.
    @staticmethod
    def evaluate_classification(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
        """Evaluate the `predictions` labels against the gold dataset.

        Returns:
          accurracy: The average accuracy of the predicted labels in percentages.
        """
        gold = [int(example["label"]) for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
        return 100 * correct / len(gold)

    @staticmethod
    def evaluate_classification_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with label predictions against the gold dataset.

        Returns:
          accurracy: The average accuracy of the predicted labels in percentages.
        """
        predictions = [int(line) for line in predictions_file]
        return CAGS.evaluate_classification(gold_dataset, predictions)

    @staticmethod
    def evaluate_segmentation(gold_dataset: Dataset, predictions: Sequence[torch.Tensor]) -> float:
        """Evaluate the `predictions` masks against the gold dataset.

        Returns:
          iou: The average iou of the predicted masks in percentages.
        """
        gold = [example["mask"] for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        iou = CAGS.MaskIoUMetric()
        for i in range(len(gold)):
            iou.update(gold[i], predictions[i])

        return 100 * iou.compute()

    @staticmethod
    def load_segmentation_file(predictions_file: TextIO) -> list[torch.Tensor]:
        predictions = []
        for line in predictions_file:
            runs = [int(run) for run in line.split()]
            assert sum(runs) == CAGS.H * CAGS.W

            offset = 0
            predictions.append(torch.zeros(CAGS.H * CAGS.W, dtype=torch.float32))
            for i, run in enumerate(runs):
                predictions[-1][offset:offset + run] = i % 2
                offset += run
        return predictions

    @staticmethod
    def evaluate_segmentation_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with mask predictions against the gold dataset.

        Returns:
          iou: The average iou of the predicted masks in percentages.
        """
        return CAGS.evaluate_segmentation(gold_dataset, CAGS.load_segmentation_file(predictions_file))

    @staticmethod
    def visualize(image: torch.Tensor, mask: torch.Tensor, show: bool):
        """Visualize the given image plus predicted mask.

        Parameters:
          image: A torch.Tensor of shape [C, H, W] with dtype torch.uint8
          mask: A torch.Tensor with H * W float values in [0, 1]
          show: controls whether to show the figure or return it:
            if `True`, the figure is shown using `plt.show()`;
            if `False`, the `plt.Figure` instance is returned; it can be saved
            to TensorBoard using a the `add_figure` method of a `SummaryWriter`.
        """
        import matplotlib.pyplot as plt

        figure = plt.figure(figsize=(10, 4))
        plt.axis("off")
        byte_mask = mask.reshape([CAGS.H, CAGS.W]).to(dtype=torch.uint8)
        visualization = torch.zeros([3, CAGS.H, 3 * CAGS.W], dtype=torch.uint8)
        visualization[:, :, :CAGS.W] = image
        visualization[:, :, CAGS.W:2 * CAGS.W] = 255 * byte_mask
        visualization[:, :, 2 * CAGS.W:] = image * byte_mask + 255 * (1 - byte_mask)
        plt.imshow(visualization.movedim(0, -1).numpy(force=True))
        if show:
            plt.show()
        else:
            return figure

C class-attribute instance-attribute

C: int = 3

The number of image channels.

H class-attribute instance-attribute

H: int = 224

The image height.

W class-attribute instance-attribute

W: int = 224

The image width.

LABELS class-attribute instance-attribute

LABELS: int = 34

The number of labels.

LABEL_NAMES class-attribute instance-attribute

LABEL_NAMES: list[str] = [
    "Abyssinian",
    "Bengal",
    "Bombay",
    "British_Shorthair",
    "Egyptian_Mau",
    "Maine_Coon",
    "Russian_Blue",
    "Siamese",
    "Sphynx",
    "american_bulldog",
    "american_pit_bull_terrier",
    "basset_hound",
    "beagle",
    "boxer",
    "chihuahua",
    "english_cocker_spaniel",
    "english_setter",
    "german_shorthaired",
    "great_pyrenees",
    "havanese",
    "japanese_chin",
    "keeshond",
    "leonberger",
    "miniature_pinscher",
    "newfoundland",
    "pomeranian",
    "pug",
    "saint_bernard",
    "samoyed",
    "scottish_terrier",
    "shiba_inu",
    "staffordshire_bull_terrier",
    "wheaten_terrier",
    "yorkshire_terrier",
]

The list of label names in the dataset.

Element class-attribute instance-attribute

Element = TypedDict(
    "Element", {"image": Tensor, "mask": Tensor, "label": Tensor}
)

The type of a single dataset element.

Dataset

Bases: TFRecordDataset

Source code in npfl138/datasets/cags.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class Dataset(TFRecordDataset):
    def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
        super().__init__(path, size, decode_on_demand)

    def __len__(self) -> int:
        """Return the number of elements in the dataset."""
        return super().__len__()

    def __getitem__(self, index: int) -> "CAGS.Element":
        """Return the `index`-th element of the dataset."""
        return super().__getitem__(index)

    def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "CAGS.Element":
        return {
            "image": torchvision.io.decode_image(
                data["image"][indices["image"][index]:indices["image"][index + 1]],
                torchvision.io.ImageReadMode.RGB),
            "mask": torchvision.io.decode_image(
                data["mask"][indices["mask"][index]:indices["mask"][index + 1]],
                torchvision.io.ImageReadMode.GRAY).to(dtype=torch.float32).div(255),
            "label": data["label"][index],
        }

__len__

__len__() -> int

Return the number of elements in the dataset.

Source code in npfl138/datasets/cags.py
51
52
53
def __len__(self) -> int:
    """Return the number of elements in the dataset."""
    return super().__len__()

__getitem__

__getitem__(index: int) -> Element

Return the index-th element of the dataset.

Source code in npfl138/datasets/cags.py
55
56
57
def __getitem__(self, index: int) -> "CAGS.Element":
    """Return the `index`-th element of the dataset."""
    return super().__getitem__(index)

__init__

__init__(decode_on_demand: bool = False) -> None

Load the CAGS dataset, downloading it if necessary.

Source code in npfl138/datasets/cags.py
70
71
72
73
74
75
76
77
78
79
def __init__(self, decode_on_demand: bool = False) -> None:
    "Load the CAGS dataset, downloading it if necessary."
    for dataset, size in [("train", 2_142), ("dev", 306), ("test", 612)]:
        path = "cags.{}.tfrecord".format(dataset)
        if not os.path.exists(path):
            print("Downloading file {}...".format(path), file=sys.stderr)
            urllib.request.urlretrieve("{}/{}".format(self._URL, path), filename="{}.tmp".format(path))
            os.rename("{}.tmp".format(path), path)

        setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

train instance-attribute

train: Dataset

The training dataset.

dev instance-attribute

dev: Dataset

The development dataset.

test instance-attribute

test: Dataset

The test dataset.

MaskIoUMetric

Bases: MaskIoU

The MaskIoUMetric is a metric for evaluating the segmentation task.

Source code in npfl138/datasets/cags.py
89
90
91
92
class MaskIoUMetric(metrics.MaskIoU):
    """The MaskIoUMetric is a metric for evaluating the segmentation task."""
    def __init__(self, from_logits: bool = False) -> None:
        super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

evaluate_classification staticmethod

evaluate_classification(
    gold_dataset: Dataset, predictions: Sequence[int]
) -> float

Evaluate the predictions labels against the gold dataset.

Returns:

  • accurracy ( float ) –

    The average accuracy of the predicted labels in percentages.

Source code in npfl138/datasets/cags.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
@staticmethod
def evaluate_classification(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
    """Evaluate the `predictions` labels against the gold dataset.

    Returns:
      accurracy: The average accuracy of the predicted labels in percentages.
    """
    gold = [int(example["label"]) for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
    return 100 * correct / len(gold)

evaluate_classification_file staticmethod

evaluate_classification_file(
    gold_dataset: Dataset, predictions_file: TextIO
) -> float

Evaluate the file with label predictions against the gold dataset.

Returns:

  • accurracy ( float ) –

    The average accuracy of the predicted labels in percentages.

Source code in npfl138/datasets/cags.py
111
112
113
114
115
116
117
118
119
@staticmethod
def evaluate_classification_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with label predictions against the gold dataset.

    Returns:
      accurracy: The average accuracy of the predicted labels in percentages.
    """
    predictions = [int(line) for line in predictions_file]
    return CAGS.evaluate_classification(gold_dataset, predictions)

evaluate_segmentation staticmethod

evaluate_segmentation(
    gold_dataset: Dataset, predictions: Sequence[Tensor]
) -> float

Evaluate the predictions masks against the gold dataset.

Returns:

  • iou ( float ) –

    The average iou of the predicted masks in percentages.

Source code in npfl138/datasets/cags.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
@staticmethod
def evaluate_segmentation(gold_dataset: Dataset, predictions: Sequence[torch.Tensor]) -> float:
    """Evaluate the `predictions` masks against the gold dataset.

    Returns:
      iou: The average iou of the predicted masks in percentages.
    """
    gold = [example["mask"] for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    iou = CAGS.MaskIoUMetric()
    for i in range(len(gold)):
        iou.update(gold[i], predictions[i])

    return 100 * iou.compute()

evaluate_segmentation_file staticmethod

evaluate_segmentation_file(
    gold_dataset: Dataset, predictions_file: TextIO
) -> float

Evaluate the file with mask predictions against the gold dataset.

Returns:

  • iou ( float ) –

    The average iou of the predicted masks in percentages.

Source code in npfl138/datasets/cags.py
154
155
156
157
158
159
160
161
@staticmethod
def evaluate_segmentation_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with mask predictions against the gold dataset.

    Returns:
      iou: The average iou of the predicted masks in percentages.
    """
    return CAGS.evaluate_segmentation(gold_dataset, CAGS.load_segmentation_file(predictions_file))

visualize staticmethod

visualize(image: Tensor, mask: Tensor, show: bool)

Visualize the given image plus predicted mask.

Parameters:

  • image (Tensor) –

    A torch.Tensor of shape [C, H, W] with dtype torch.uint8

  • mask (Tensor) –

    A torch.Tensor with H * W float values in [0, 1]

  • show (bool) –

    controls whether to show the figure or return it: if True, the figure is shown using plt.show(); if False, the plt.Figure instance is returned; it can be saved to TensorBoard using a the add_figure method of a SummaryWriter.

Source code in npfl138/datasets/cags.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
@staticmethod
def visualize(image: torch.Tensor, mask: torch.Tensor, show: bool):
    """Visualize the given image plus predicted mask.

    Parameters:
      image: A torch.Tensor of shape [C, H, W] with dtype torch.uint8
      mask: A torch.Tensor with H * W float values in [0, 1]
      show: controls whether to show the figure or return it:
        if `True`, the figure is shown using `plt.show()`;
        if `False`, the `plt.Figure` instance is returned; it can be saved
        to TensorBoard using a the `add_figure` method of a `SummaryWriter`.
    """
    import matplotlib.pyplot as plt

    figure = plt.figure(figsize=(10, 4))
    plt.axis("off")
    byte_mask = mask.reshape([CAGS.H, CAGS.W]).to(dtype=torch.uint8)
    visualization = torch.zeros([3, CAGS.H, 3 * CAGS.W], dtype=torch.uint8)
    visualization[:, :, :CAGS.W] = image
    visualization[:, :, CAGS.W:2 * CAGS.W] = 255 * byte_mask
    visualization[:, :, 2 * CAGS.W:] = image * byte_mask + 255 * (1 - byte_mask)
    plt.imshow(visualization.movedim(0, -1).numpy(force=True))
    if show:
        plt.show()
    else:
        return figure