Skip to content

CAGS

The CAGS dataset consists of images of cats and dogs of size \(224×224\), each classified in one of the 34 breeds and each containing a mask indicating the presence of the animal.

You can see a demo of the dataset here.

The dataset is split into:

  • train: 2,142 images for training;
  • dev: 306 images for development (validation);
  • test: 612 images for testing.

npfl138.datasets.cags.CAGS

Source code in npfl138/datasets/cags.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
class CAGS:
    C: int = 3
    """The number of image channels."""
    H: int = 224
    """The image height."""
    W: int = 224
    """The image width."""
    LABELS: int = 34
    """The number of labels."""
    LABEL_NAMES: list[str] = [
        # Cats
        "Abyssinian", "Bengal", "Bombay", "British_Shorthair", "Egyptian_Mau",
        "Maine_Coon", "Russian_Blue", "Siamese", "Sphynx",
        # Dogs
        "american_bulldog", "american_pit_bull_terrier", "basset_hound",
        "beagle", "boxer", "chihuahua", "english_cocker_spaniel",
        "english_setter", "german_shorthaired", "great_pyrenees", "havanese",
        "japanese_chin", "keeshond", "leonberger", "miniature_pinscher",
        "newfoundland", "pomeranian", "pug", "saint_bernard", "samoyed",
        "scottish_terrier", "shiba_inu", "staffordshire_bull_terrier",
        "wheaten_terrier", "yorkshire_terrier",
    ]
    """The list of label names in the dataset."""
    Element = TypedDict("Element", {"image": torch.Tensor, "mask": torch.Tensor, "label": torch.Tensor})
    """The type of a single dataset element."""

    URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2526/datasets"

    class Dataset(TFRecordDataset):
        def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
            super().__init__(path, size, decode_on_demand)

        def __len__(self) -> int:
            """Return the number of elements in the dataset."""
            return super().__len__()

        def __getitem__(self, index: int) -> "CAGS.Element":
            """Return the `index`-th element of the dataset."""
            return super().__getitem__(index)

        def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "CAGS.Element":
            return {
                "image": torchvision.io.decode_image(
                    data["image"][indices["image"][index]:indices["image"][index + 1]],
                    torchvision.io.ImageReadMode.RGB),
                "mask": torchvision.io.decode_image(
                    data["mask"][indices["mask"][index]:indices["mask"][index + 1]],
                    torchvision.io.ImageReadMode.GRAY).to(dtype=torch.float32).div(255),
                "label": data["label"][index],
            }

    def __init__(self, decode_on_demand: bool = False) -> None:
        """Load the CAGS dataset, downloading it if necessary."""
        for dataset, size in [("train", 2_142), ("dev", 306), ("test", 612)]:
            path = download_url_to_file(self.URL, f"cags.{dataset}.tfrecord")
            setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # The MaskIoUMetric
    class MaskIoUMetric(metrics.MaskIoU):
        """The MaskIoUMetric is a metric for evaluating the segmentation task."""
        def __init__(self, from_logits: bool = False) -> None:
            """Construct a new `MaskIoUMetric`.

            Parameters:
              from_logits: If `True`, the predictions are expected to be logits; otherwise, they
                are probabilities (the default). However, the target masks must always be probabilities.
            """
            super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

    # Evaluation infrastructure.
    @staticmethod
    def evaluate_classification(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
        """Evaluate the `predictions` labels against the gold dataset.

        Returns:
          accuracy: The average accuracy of the predicted labels.
        """
        gold = [int(example["label"]) for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
        return correct / len(gold)

    @staticmethod
    def evaluate_classification_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with label predictions against the gold dataset.

        Returns:
          accuracy: The average accuracy of the predicted labels.
        """
        predictions = [int(line) for line in predictions_file]
        return CAGS.evaluate_classification(gold_dataset, predictions)

    @staticmethod
    def evaluate_segmentation(gold_dataset: Dataset, predictions: Sequence[torch.Tensor]) -> float:
        """Evaluate the `predictions` masks against the gold dataset.

        Returns:
          iou: The average iou of the predicted masks.
        """
        gold = [example["mask"] for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        iou = CAGS.MaskIoUMetric()
        for i in range(len(gold)):
            iou.update(predictions[i], gold[i])

        return iou.compute().item()

    @staticmethod
    def load_segmentation_file(predictions_file: TextIO) -> list[torch.Tensor]:
        predictions = []
        for line in predictions_file:
            runs = [int(run) for run in line.split()]
            assert sum(runs) == CAGS.H * CAGS.W

            offset = 0
            predictions.append(torch.zeros(CAGS.H * CAGS.W, dtype=torch.float32))
            for i, run in enumerate(runs):
                predictions[-1][offset:offset + run] = i % 2
                offset += run
        return predictions

    @staticmethod
    def evaluate_segmentation_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with mask predictions against the gold dataset.

        Returns:
          iou: The average iou of the predicted masks.
        """
        return CAGS.evaluate_segmentation(gold_dataset, CAGS.load_segmentation_file(predictions_file))

    @staticmethod
    def visualize(image: torch.Tensor, mask: torch.Tensor, show: bool):
        """Visualize the given image plus predicted mask.

        Parameters:
          image: A torch.Tensor of shape [C, H, W] with dtype torch.uint8
          mask: A torch.Tensor with H * W float values in [0, 1]
          show: controls whether to show the figure or return it:
            if `True`, the figure is shown using `plt.show()`;
            if `False`, the `plt.Figure` instance is returned; it can be saved
            to TensorBoard using the [npfl138.Logger.log_figure][] method of
            an [npfl138.TrainableModule.logger][].
        """
        import matplotlib.pyplot as plt

        figure = plt.figure(figsize=(10, 4))
        plt.axis("off")
        byte_mask = mask.reshape([CAGS.H, CAGS.W]).to(dtype=torch.uint8)
        visualization = torch.zeros([3, CAGS.H, 3 * CAGS.W], dtype=torch.uint8)
        visualization[:, :, :CAGS.W] = image
        visualization[:, :, CAGS.W:2 * CAGS.W] = 255 * byte_mask
        visualization[:, :, 2 * CAGS.W:] = image * byte_mask + 255 * (1 - byte_mask)
        plt.imshow(visualization.movedim(0, -1).numpy(force=True))
        if show:
            plt.show()
        else:
            return figure

C class-attribute instance-attribute

C: int = 3

The number of image channels.

H class-attribute instance-attribute

H: int = 224

The image height.

W class-attribute instance-attribute

W: int = 224

The image width.

LABELS class-attribute instance-attribute

LABELS: int = 34

The number of labels.

LABEL_NAMES class-attribute instance-attribute

LABEL_NAMES: list[str] = [
    "Abyssinian",
    "Bengal",
    "Bombay",
    "British_Shorthair",
    "Egyptian_Mau",
    "Maine_Coon",
    "Russian_Blue",
    "Siamese",
    "Sphynx",
    "american_bulldog",
    "american_pit_bull_terrier",
    "basset_hound",
    "beagle",
    "boxer",
    "chihuahua",
    "english_cocker_spaniel",
    "english_setter",
    "german_shorthaired",
    "great_pyrenees",
    "havanese",
    "japanese_chin",
    "keeshond",
    "leonberger",
    "miniature_pinscher",
    "newfoundland",
    "pomeranian",
    "pug",
    "saint_bernard",
    "samoyed",
    "scottish_terrier",
    "shiba_inu",
    "staffordshire_bull_terrier",
    "wheaten_terrier",
    "yorkshire_terrier",
]

The list of label names in the dataset.

Element class-attribute instance-attribute

Element = TypedDict(
    "Element", {"image": Tensor, "mask": Tensor, "label": Tensor}
)

The type of a single dataset element.

Dataset

Bases: TFRecordDataset

Source code in npfl138/datasets/cags.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class Dataset(TFRecordDataset):
    def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
        super().__init__(path, size, decode_on_demand)

    def __len__(self) -> int:
        """Return the number of elements in the dataset."""
        return super().__len__()

    def __getitem__(self, index: int) -> "CAGS.Element":
        """Return the `index`-th element of the dataset."""
        return super().__getitem__(index)

    def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "CAGS.Element":
        return {
            "image": torchvision.io.decode_image(
                data["image"][indices["image"][index]:indices["image"][index + 1]],
                torchvision.io.ImageReadMode.RGB),
            "mask": torchvision.io.decode_image(
                data["mask"][indices["mask"][index]:indices["mask"][index + 1]],
                torchvision.io.ImageReadMode.GRAY).to(dtype=torch.float32).div(255),
            "label": data["label"][index],
        }

__len__

__len__() -> int

Return the number of elements in the dataset.

Source code in npfl138/datasets/cags.py
61
62
63
def __len__(self) -> int:
    """Return the number of elements in the dataset."""
    return super().__len__()

__getitem__

__getitem__(index: int) -> Element

Return the index-th element of the dataset.

Source code in npfl138/datasets/cags.py
65
66
67
def __getitem__(self, index: int) -> "CAGS.Element":
    """Return the `index`-th element of the dataset."""
    return super().__getitem__(index)

__init__

__init__(decode_on_demand: bool = False) -> None

Load the CAGS dataset, downloading it if necessary.

Source code in npfl138/datasets/cags.py
80
81
82
83
84
def __init__(self, decode_on_demand: bool = False) -> None:
    """Load the CAGS dataset, downloading it if necessary."""
    for dataset, size in [("train", 2_142), ("dev", 306), ("test", 612)]:
        path = download_url_to_file(self.URL, f"cags.{dataset}.tfrecord")
        setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

train instance-attribute

train: Dataset

The training dataset.

dev instance-attribute

dev: Dataset

The development dataset.

test instance-attribute

test: Dataset

The test dataset.

MaskIoUMetric

Bases: MaskIoU

The MaskIoUMetric is a metric for evaluating the segmentation task.

Source code in npfl138/datasets/cags.py
 94
 95
 96
 97
 98
 99
100
101
102
103
class MaskIoUMetric(metrics.MaskIoU):
    """The MaskIoUMetric is a metric for evaluating the segmentation task."""
    def __init__(self, from_logits: bool = False) -> None:
        """Construct a new `MaskIoUMetric`.

        Parameters:
          from_logits: If `True`, the predictions are expected to be logits; otherwise, they
            are probabilities (the default). However, the target masks must always be probabilities.
        """
        super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

__init__

__init__(from_logits: bool = False) -> None

Construct a new MaskIoUMetric.

Parameters:

  • from_logits (bool, default: False ) –

    If True, the predictions are expected to be logits; otherwise, they are probabilities (the default). However, the target masks must always be probabilities.

Source code in npfl138/datasets/cags.py
 96
 97
 98
 99
100
101
102
103
def __init__(self, from_logits: bool = False) -> None:
    """Construct a new `MaskIoUMetric`.

    Parameters:
      from_logits: If `True`, the predictions are expected to be logits; otherwise, they
        are probabilities (the default). However, the target masks must always be probabilities.
    """
    super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

evaluate_classification staticmethod

evaluate_classification(
    gold_dataset: Dataset, predictions: Sequence[int]
) -> float

Evaluate the predictions labels against the gold dataset.

Returns:

  • accuracy ( float ) –

    The average accuracy of the predicted labels.

Source code in npfl138/datasets/cags.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
@staticmethod
def evaluate_classification(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
    """Evaluate the `predictions` labels against the gold dataset.

    Returns:
      accuracy: The average accuracy of the predicted labels.
    """
    gold = [int(example["label"]) for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
    return correct / len(gold)

evaluate_classification_file staticmethod

evaluate_classification_file(
    gold_dataset: Dataset, predictions_file: TextIO
) -> float

Evaluate the file with label predictions against the gold dataset.

Returns:

  • accuracy ( float ) –

    The average accuracy of the predicted labels.

Source code in npfl138/datasets/cags.py
122
123
124
125
126
127
128
129
130
@staticmethod
def evaluate_classification_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with label predictions against the gold dataset.

    Returns:
      accuracy: The average accuracy of the predicted labels.
    """
    predictions = [int(line) for line in predictions_file]
    return CAGS.evaluate_classification(gold_dataset, predictions)

evaluate_segmentation staticmethod

evaluate_segmentation(
    gold_dataset: Dataset, predictions: Sequence[Tensor]
) -> float

Evaluate the predictions masks against the gold dataset.

Returns:

  • iou ( float ) –

    The average iou of the predicted masks.

Source code in npfl138/datasets/cags.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
@staticmethod
def evaluate_segmentation(gold_dataset: Dataset, predictions: Sequence[torch.Tensor]) -> float:
    """Evaluate the `predictions` masks against the gold dataset.

    Returns:
      iou: The average iou of the predicted masks.
    """
    gold = [example["mask"] for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    iou = CAGS.MaskIoUMetric()
    for i in range(len(gold)):
        iou.update(predictions[i], gold[i])

    return iou.compute().item()

evaluate_segmentation_file staticmethod

evaluate_segmentation_file(
    gold_dataset: Dataset, predictions_file: TextIO
) -> float

Evaluate the file with mask predictions against the gold dataset.

Returns:

  • iou ( float ) –

    The average iou of the predicted masks.

Source code in npfl138/datasets/cags.py
165
166
167
168
169
170
171
172
@staticmethod
def evaluate_segmentation_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with mask predictions against the gold dataset.

    Returns:
      iou: The average iou of the predicted masks.
    """
    return CAGS.evaluate_segmentation(gold_dataset, CAGS.load_segmentation_file(predictions_file))

visualize staticmethod

visualize(image: Tensor, mask: Tensor, show: bool)

Visualize the given image plus predicted mask.

Parameters:

  • image (Tensor) –

    A torch.Tensor of shape [C, H, W] with dtype torch.uint8

  • mask (Tensor) –

    A torch.Tensor with H * W float values in [0, 1]

  • show (bool) –

    controls whether to show the figure or return it: if True, the figure is shown using plt.show(); if False, the plt.Figure instance is returned; it can be saved to TensorBoard using the npfl138.Logger.log_figure method of an npfl138.TrainableModule.logger.

Source code in npfl138/datasets/cags.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
@staticmethod
def visualize(image: torch.Tensor, mask: torch.Tensor, show: bool):
    """Visualize the given image plus predicted mask.

    Parameters:
      image: A torch.Tensor of shape [C, H, W] with dtype torch.uint8
      mask: A torch.Tensor with H * W float values in [0, 1]
      show: controls whether to show the figure or return it:
        if `True`, the figure is shown using `plt.show()`;
        if `False`, the `plt.Figure` instance is returned; it can be saved
        to TensorBoard using the [npfl138.Logger.log_figure][] method of
        an [npfl138.TrainableModule.logger][].
    """
    import matplotlib.pyplot as plt

    figure = plt.figure(figsize=(10, 4))
    plt.axis("off")
    byte_mask = mask.reshape([CAGS.H, CAGS.W]).to(dtype=torch.uint8)
    visualization = torch.zeros([3, CAGS.H, 3 * CAGS.W], dtype=torch.uint8)
    visualization[:, :, :CAGS.W] = image
    visualization[:, :, CAGS.W:2 * CAGS.W] = 255 * byte_mask
    visualization[:, :, 2 * CAGS.W:] = image * byte_mask + 255 * (1 - byte_mask)
    plt.imshow(visualization.movedim(0, -1).numpy(force=True))
    if show:
        plt.show()
    else:
        return figure