CAGS

The CAGS dataset consists of images of cats and dogs of size \(224×224\), each classified in one of the 34 breeds and each containing a mask indicating the presence of the animal.

You can see a demo of the dataset here.

The dataset is split into:

train: 2,142 images for training;
dev: 306 images for development (validation);
test: 612 images for testing.

npfl138.datasets.cags.CAGS

Source code in npfl138/datasets/cags.py

class CAGS:
    C: int = 3
    """The number of image channels."""
    H: int = 224
    """The image height."""
    W: int = 224
    """The image width."""
    LABELS: int = 34
    """The number of labels."""
    LABEL_NAMES: list[str] = [
        # Cats
        "Abyssinian", "Bengal", "Bombay", "British_Shorthair", "Egyptian_Mau",
        "Maine_Coon", "Russian_Blue", "Siamese", "Sphynx",
        # Dogs
        "american_bulldog", "american_pit_bull_terrier", "basset_hound",
        "beagle", "boxer", "chihuahua", "english_cocker_spaniel",
        "english_setter", "german_shorthaired", "great_pyrenees", "havanese",
        "japanese_chin", "keeshond", "leonberger", "miniature_pinscher",
        "newfoundland", "pomeranian", "pug", "saint_bernard", "samoyed",
        "scottish_terrier", "shiba_inu", "staffordshire_bull_terrier",
        "wheaten_terrier", "yorkshire_terrier",
    ]
    """The list of label names in the dataset."""
    Element = TypedDict("Element", {"image": torch.Tensor, "mask": torch.Tensor, "label": torch.Tensor})
    """The type of a single dataset element."""

    URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2526/datasets"

    class Dataset(TFRecordDataset):
        def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
            super().__init__(path, size, decode_on_demand)

        def __len__(self) -> int:
            """Return the number of elements in the dataset."""
            return super().__len__()

        def __getitem__(self, index: int) -> "CAGS.Element":
            """Return the `index`-th element of the dataset."""
            return super().__getitem__(index)

        def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "CAGS.Element":
            return {
                "image": torchvision.io.decode_image(
                    data["image"][indices["image"][index]:indices["image"][index + 1]],
                    torchvision.io.ImageReadMode.RGB),
                "mask": torchvision.io.decode_image(
                    data["mask"][indices["mask"][index]:indices["mask"][index + 1]],
                    torchvision.io.ImageReadMode.GRAY).to(dtype=torch.float32).div(255),
                "label": data["label"][index],
            }

    def __init__(self, decode_on_demand: bool = False) -> None:
        """Load the CAGS dataset, downloading it if necessary."""
        for dataset, size in [("train", 2_142), ("dev", 306), ("test", 612)]:
            path = download_url_to_file(self.URL, f"cags.{dataset}.tfrecord")
            setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # The MaskIoUMetric
    class MaskIoUMetric(metrics.MaskIoU):
        """The MaskIoUMetric is a metric for evaluating the segmentation task."""
        def __init__(self, from_logits: bool = False) -> None:
            """Construct a new `MaskIoUMetric`.

            Parameters:
              from_logits: If `True`, the predictions are expected to be logits; otherwise, they
                are probabilities (the default). However, the target masks must always be probabilities.
            """
            super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

    # Evaluation infrastructure.
    @staticmethod
    def evaluate_classification(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
        """Evaluate the `predictions` labels against the gold dataset.

        Returns:
          accuracy: The average accuracy of the predicted labels.
        """
        gold = [int(example["label"]) for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
        return correct / len(gold)

    @staticmethod
    def evaluate_classification_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with label predictions against the gold dataset.

        Returns:
          accuracy: The average accuracy of the predicted labels.
        """
        predictions = [int(line) for line in predictions_file]
        return CAGS.evaluate_classification(gold_dataset, predictions)

    @staticmethod
    def evaluate_segmentation(gold_dataset: Dataset, predictions: Sequence[torch.Tensor]) -> float:
        """Evaluate the `predictions` masks against the gold dataset.

        Returns:
          iou: The average iou of the predicted masks.
        """
        gold = [example["mask"] for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        iou = CAGS.MaskIoUMetric()
        for i in range(len(gold)):
            iou.update(predictions[i], gold[i])

        return iou.compute().item()

    @staticmethod
    def load_segmentation_file(predictions_file: TextIO) -> list[torch.Tensor]:
        predictions = []
        for line in predictions_file:
            runs = [int(run) for run in line.split()]
            assert sum(runs) == CAGS.H * CAGS.W

            offset = 0
            predictions.append(torch.zeros(CAGS.H * CAGS.W, dtype=torch.float32))
            for i, run in enumerate(runs):
                predictions[-1][offset:offset + run] = i % 2
                offset += run
        return predictions

    @staticmethod
    def evaluate_segmentation_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with mask predictions against the gold dataset.

        Returns:
          iou: The average iou of the predicted masks.
        """
        return CAGS.evaluate_segmentation(gold_dataset, CAGS.load_segmentation_file(predictions_file))

    @staticmethod
    def visualize(image: torch.Tensor, mask: torch.Tensor, show: bool):
        """Visualize the given image plus predicted mask.

        Parameters:
          image: A torch.Tensor of shape [C, H, W] with dtype torch.uint8
          mask: A torch.Tensor with H * W float values in [0, 1]
          show: controls whether to show the figure or return it:
            if `True`, the figure is shown using `plt.show()`;
            if `False`, the `plt.Figure` instance is returned; it can be saved
            to TensorBoard using the [npfl138.Logger.log_figure][] method of
            an [npfl138.TrainableModule.logger][].
        """
        import matplotlib.pyplot as plt

        figure = plt.figure(figsize=(10, 4))
        plt.axis("off")
        byte_mask = mask.reshape([CAGS.H, CAGS.W]).to(dtype=torch.uint8)
        visualization = torch.zeros([3, CAGS.H, 3 * CAGS.W], dtype=torch.uint8)
        visualization[:, :, :CAGS.W] = image
        visualization[:, :, CAGS.W:2 * CAGS.W] = 255 * byte_mask
        visualization[:, :, 2 * CAGS.W:] = image * byte_mask + 255 * (1 - byte_mask)
        plt.imshow(visualization.movedim(0, -1).numpy(force=True))
        if show:
            plt.show()
        else:
            return figure

C `class-attribute` `instance-attribute`

C: int = 3

The number of image channels.

H `class-attribute` `instance-attribute`

H: int = 224

The image height.

W `class-attribute` `instance-attribute`

W: int = 224

The image width.

LABELS `class-attribute` `instance-attribute`

LABELS: int = 34

The number of labels.

LABEL_NAMES `class-attribute` `instance-attribute`

LABEL_NAMES: list[str] = [
    "Abyssinian",
    "Bengal",
    "Bombay",
    "British_Shorthair",
    "Egyptian_Mau",
    "Maine_Coon",
    "Russian_Blue",
    "Siamese",
    "Sphynx",
    "american_bulldog",
    "american_pit_bull_terrier",
    "basset_hound",
    "beagle",
    "boxer",
    "chihuahua",
    "english_cocker_spaniel",
    "english_setter",
    "german_shorthaired",
    "great_pyrenees",
    "havanese",
    "japanese_chin",
    "keeshond",
    "leonberger",
    "miniature_pinscher",
    "newfoundland",
    "pomeranian",
    "pug",
    "saint_bernard",
    "samoyed",
    "scottish_terrier",
    "shiba_inu",
    "staffordshire_bull_terrier",
    "wheaten_terrier",
    "yorkshire_terrier",
]

The list of label names in the dataset.

Element `class-attribute` `instance-attribute`

Element = TypedDict(
    "Element", {"image": Tensor, "mask": Tensor, "label": Tensor}
)

The type of a single dataset element.

Dataset

Bases: TFRecordDataset

Source code in npfl138/datasets/cags.py

class Dataset(TFRecordDataset):
    def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
        super().__init__(path, size, decode_on_demand)

    def __len__(self) -> int:
        """Return the number of elements in the dataset."""
        return super().__len__()

    def __getitem__(self, index: int) -> "CAGS.Element":
        """Return the `index`-th element of the dataset."""
        return super().__getitem__(index)

    def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "CAGS.Element":
        return {
            "image": torchvision.io.decode_image(
                data["image"][indices["image"][index]:indices["image"][index + 1]],
                torchvision.io.ImageReadMode.RGB),
            "mask": torchvision.io.decode_image(
                data["mask"][indices["mask"][index]:indices["mask"][index + 1]],
                torchvision.io.ImageReadMode.GRAY).to(dtype=torch.float32).div(255),
            "label": data["label"][index],
        }

len

__len__() -> int

Return the number of elements in the dataset.

Source code in npfl138/datasets/cags.py

def __len__(self) -> int:
    """Return the number of elements in the dataset."""
    return super().__len__()

getitem

__getitem__(index: int) -> Element

Return the index-th element of the dataset.

Source code in npfl138/datasets/cags.py

def __getitem__(self, index: int) -> "CAGS.Element":
    """Return the `index`-th element of the dataset."""
    return super().__getitem__(index)

init

__init__(decode_on_demand: bool = False) -> None

Load the CAGS dataset, downloading it if necessary.

Source code in npfl138/datasets/cags.py

def __init__(self, decode_on_demand: bool = False) -> None:
    """Load the CAGS dataset, downloading it if necessary."""
    for dataset, size in [("train", 2_142), ("dev", 306), ("test", 612)]:
        path = download_url_to_file(self.URL, f"cags.{dataset}.tfrecord")
        setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

train `instance-attribute`

train: Dataset

The training dataset.

dev `instance-attribute`

dev: Dataset

The development dataset.

test `instance-attribute`

test: Dataset

The test dataset.

MaskIoUMetric

Bases: MaskIoU

The MaskIoUMetric is a metric for evaluating the segmentation task.

Source code in npfl138/datasets/cags.py

class MaskIoUMetric(metrics.MaskIoU):
    """The MaskIoUMetric is a metric for evaluating the segmentation task."""
    def __init__(self, from_logits: bool = False) -> None:
        """Construct a new `MaskIoUMetric`.

        Parameters:
          from_logits: If `True`, the predictions are expected to be logits; otherwise, they
            are probabilities (the default). However, the target masks must always be probabilities.
        """
        super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

init

__init__(from_logits: bool = False) -> None

Construct a new MaskIoUMetric.

Parameters:

from_logits (bool, default: False ) –

If True, the predictions are expected to be logits; otherwise, they are probabilities (the default). However, the target masks must always be probabilities.

Source code in npfl138/datasets/cags.py

def __init__(self, from_logits: bool = False) -> None:
    """Construct a new `MaskIoUMetric`.

    Parameters:
      from_logits: If `True`, the predictions are expected to be logits; otherwise, they
        are probabilities (the default). However, the target masks must always be probabilities.
    """
    super().__init__((CAGS.H, CAGS.W), from_logits=from_logits)

evaluate_classification `staticmethod`

evaluate_classification(
    gold_dataset: Dataset, predictions: Sequence[int]
) -> float

Evaluate the predictions labels against the gold dataset.

Returns:

accuracy ( float ) –

The average accuracy of the predicted labels.

Source code in npfl138/datasets/cags.py

@staticmethod
def evaluate_classification(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
    """Evaluate the `predictions` labels against the gold dataset.

    Returns:
      accuracy: The average accuracy of the predicted labels.
    """
    gold = [int(example["label"]) for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
    return correct / len(gold)

evaluate_classification_file `staticmethod`

evaluate_classification_file(
    gold_dataset: Dataset, predictions_file: TextIO
) -> float

Evaluate the file with label predictions against the gold dataset.

Returns:

accuracy ( float ) –

The average accuracy of the predicted labels.

Source code in npfl138/datasets/cags.py

@staticmethod
def evaluate_classification_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with label predictions against the gold dataset.

    Returns:
      accuracy: The average accuracy of the predicted labels.
    """
    predictions = [int(line) for line in predictions_file]
    return CAGS.evaluate_classification(gold_dataset, predictions)

evaluate_segmentation `staticmethod`

evaluate_segmentation(
    gold_dataset: Dataset, predictions: Sequence[Tensor]
) -> float

Evaluate the predictions masks against the gold dataset.

Returns:

iou ( float ) –

The average iou of the predicted masks.

Source code in npfl138/datasets/cags.py

@staticmethod
def evaluate_segmentation(gold_dataset: Dataset, predictions: Sequence[torch.Tensor]) -> float:
    """Evaluate the `predictions` masks against the gold dataset.

    Returns:
      iou: The average iou of the predicted masks.
    """
    gold = [example["mask"] for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    iou = CAGS.MaskIoUMetric()
    for i in range(len(gold)):
        iou.update(predictions[i], gold[i])

    return iou.compute().item()

evaluate_segmentation_file `staticmethod`

evaluate_segmentation_file(
    gold_dataset: Dataset, predictions_file: TextIO
) -> float

Evaluate the file with mask predictions against the gold dataset.

Returns:

iou ( float ) –

The average iou of the predicted masks.

Source code in npfl138/datasets/cags.py

@staticmethod
def evaluate_segmentation_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with mask predictions against the gold dataset.

    Returns:
      iou: The average iou of the predicted masks.
    """
    return CAGS.evaluate_segmentation(gold_dataset, CAGS.load_segmentation_file(predictions_file))

visualize `staticmethod`

visualize(image: Tensor, mask: Tensor, show: bool)

Visualize the given image plus predicted mask.

Parameters:

image (Tensor) –

A torch.Tensor of shape [C, H, W] with dtype torch.uint8
mask (Tensor) –

A torch.Tensor with H * W float values in [0, 1]
show (bool) –

controls whether to show the figure or return it: if True, the figure is shown using plt.show(); if False, the plt.Figure instance is returned; it can be saved to TensorBoard using the npfl138.Logger.log_figure method of an npfl138.TrainableModule.logger.

Source code in npfl138/datasets/cags.py

@staticmethod
def visualize(image: torch.Tensor, mask: torch.Tensor, show: bool):
    """Visualize the given image plus predicted mask.

    Parameters:
      image: A torch.Tensor of shape [C, H, W] with dtype torch.uint8
      mask: A torch.Tensor with H * W float values in [0, 1]
      show: controls whether to show the figure or return it:
        if `True`, the figure is shown using `plt.show()`;
        if `False`, the `plt.Figure` instance is returned; it can be saved
        to TensorBoard using the [npfl138.Logger.log_figure][] method of
        an [npfl138.TrainableModule.logger][].
    """
    import matplotlib.pyplot as plt

    figure = plt.figure(figsize=(10, 4))
    plt.axis("off")
    byte_mask = mask.reshape([CAGS.H, CAGS.W]).to(dtype=torch.uint8)
    visualization = torch.zeros([3, CAGS.H, 3 * CAGS.W], dtype=torch.uint8)
    visualization[:, :, :CAGS.W] = image
    visualization[:, :, CAGS.W:2 * CAGS.W] = 255 * byte_mask
    visualization[:, :, 2 * CAGS.W:] = image * byte_mask + 255 * (1 - byte_mask)
    plt.imshow(visualization.movedim(0, -1).numpy(force=True))
    if show:
        plt.show()
    else:
        return figure

CAGS

npfl138.datasets.cags.CAGS

C class-attribute instance-attribute

H class-attribute instance-attribute

W class-attribute instance-attribute

LABELS class-attribute instance-attribute

LABEL_NAMES class-attribute instance-attribute

Element class-attribute instance-attribute

Dataset

__len__

__getitem__

__init__

train instance-attribute

dev instance-attribute

test instance-attribute

MaskIoUMetric

__init__

evaluate_classification staticmethod

evaluate_classification_file staticmethod

evaluate_segmentation staticmethod

evaluate_segmentation_file staticmethod

visualize staticmethod

C `class-attribute` `instance-attribute`

H `class-attribute` `instance-attribute`

W `class-attribute` `instance-attribute`

LABELS `class-attribute` `instance-attribute`

LABEL_NAMES `class-attribute` `instance-attribute`

Element `class-attribute` `instance-attribute`

len

getitem

init

train `instance-attribute`

dev `instance-attribute`

test `instance-attribute`

init

evaluate_classification `staticmethod`

evaluate_classification_file `staticmethod`

evaluate_segmentation `staticmethod`

evaluate_segmentation_file `staticmethod`

visualize `staticmethod`