HOMRDataset

HOMR dataset contains synthetically generated handwritten music and corresponding annotations.

The input images are grayscale, they have arbitrary height and width, and they represent a single stave of music. The annotations are “semantic” (the notes are annotated with their real pitch and duration which depend on current clef and key signature).

A visualization of first 100 training examples is available here.

npfl138.datasets.homr_dataset.HOMRDataset

Source code in npfl138/datasets/homr_dataset.py

class HOMRDataset:
    C: int = 1
    """Number of channels in the input images."""

    MARKS: int = 938
    """The number of different marks in the dataset."""
    MARK_NAMES: list[str]  # Set at the bottom of the script for readability
    """The list of mark names in the dataset."""

    Element = TypedDict("Element", {"image": torch.Tensor, "marks": torch.Tensor})
    """The type of a single dataset element."""

    URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2425/datasets/"

    class Dataset(TFRecordDataset):
        def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
            super().__init__(path, size, decode_on_demand)

        def __len__(self) -> int:
            """Return the number of elements in the dataset."""
            return super().__len__()

        def __getitem__(self, index: int) -> "HOMRDataset.Element":
            """Return the `index`-th element of the dataset."""
            return super().__getitem__(index)

        def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "HOMRDataset.Element":
            return {
                "image": torchvision.io.decode_image(
                    data["image"][indices["image"][index]:indices["image"][index + 1]],
                    torchvision.io.ImageReadMode.GRAY),
                "marks": data["marks"][indices["marks"][index]:indices["marks"][index + 1]],
            }

    def __init__(self, decode_on_demand: bool = False) -> None:
        "Load the HOMR dataset, downloading it if necessary."
        for dataset, size in [("train", 51_365), ("dev", 5_027), ("test", 5_023)]:
            path = "homr.{}.tfrecord".format(dataset)
            if not os.path.exists(path):
                print("Downloading file {}...".format(path), file=sys.stderr)
                urllib.request.urlretrieve("{}/{}".format(self.URL, path), filename="{}.tmp".format(path))
                os.rename("{}.tmp".format(path), path)

            setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # The EditDistanceMetric
    EditDistanceMetric = metrics.EditDistance
    """The edit distance metric used for evaluation."""

    # Evaluation infrastructure
    @staticmethod
    def evaluate(gold_dataset: Dataset, predictions: Sequence[Sequence[str]]) -> float:
        """Evaluate the `predictions` against the gold dataset.

        Returns:
          edit_distance: The average edit distance of the predictions in percentages.
        """
        gold = [[HOMRDataset.MARK_NAMES[mark] for mark in example["marks"]] for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        return 100 * metrics.EditDistance().update(predictions, gold).compute()

    @staticmethod
    def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with predictions against the gold dataset.

        Returns:
          edit_distance: The average edit distance of the predictions in percentages.
        """
        predictions = []
        for line in predictions_file:
            predictions.append(line.rstrip("\n").split())
        return HOMRDataset.evaluate(gold_dataset, predictions)

C `class-attribute` `instance-attribute`

C: int = 1

Number of channels in the input images.

MARKS `class-attribute` `instance-attribute`

MARKS: int = 938

The number of different marks in the dataset.

MARK_NAMES `instance-attribute`

MARK_NAMES: list[str]

The list of mark names in the dataset.

Element `class-attribute` `instance-attribute`

Element = TypedDict('Element', {'image': Tensor, 'marks': Tensor})

The type of a single dataset element.

Dataset

Bases: TFRecordDataset

Source code in npfl138/datasets/homr_dataset.py

class Dataset(TFRecordDataset):
    def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
        super().__init__(path, size, decode_on_demand)

    def __len__(self) -> int:
        """Return the number of elements in the dataset."""
        return super().__len__()

    def __getitem__(self, index: int) -> "HOMRDataset.Element":
        """Return the `index`-th element of the dataset."""
        return super().__getitem__(index)

    def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "HOMRDataset.Element":
        return {
            "image": torchvision.io.decode_image(
                data["image"][indices["image"][index]:indices["image"][index + 1]],
                torchvision.io.ImageReadMode.GRAY),
            "marks": data["marks"][indices["marks"][index]:indices["marks"][index + 1]],
        }

len

__len__() -> int

Return the number of elements in the dataset.

Source code in npfl138/datasets/homr_dataset.py

def __len__(self) -> int:
    """Return the number of elements in the dataset."""
    return super().__len__()

getitem

__getitem__(index: int) -> Element

Return the index-th element of the dataset.

Source code in npfl138/datasets/homr_dataset.py

def __getitem__(self, index: int) -> "HOMRDataset.Element":
    """Return the `index`-th element of the dataset."""
    return super().__getitem__(index)

init

__init__(decode_on_demand: bool = False) -> None

Load the HOMR dataset, downloading it if necessary.

Source code in npfl138/datasets/homr_dataset.py

def __init__(self, decode_on_demand: bool = False) -> None:
    "Load the HOMR dataset, downloading it if necessary."
    for dataset, size in [("train", 51_365), ("dev", 5_027), ("test", 5_023)]:
        path = "homr.{}.tfrecord".format(dataset)
        if not os.path.exists(path):
            print("Downloading file {}...".format(path), file=sys.stderr)
            urllib.request.urlretrieve("{}/{}".format(self.URL, path), filename="{}.tmp".format(path))
            os.rename("{}.tmp".format(path), path)

        setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

train `instance-attribute`

train: Dataset

The training dataset.

dev `instance-attribute`

dev: Dataset

The development dataset.

test `instance-attribute`

test: Dataset

The test dataset.

EditDistanceMetric `class-attribute` `instance-attribute`

EditDistanceMetric = EditDistance

The edit distance metric used for evaluation.

evaluate `staticmethod`

evaluate(gold_dataset: Dataset, predictions: Sequence[Sequence[str]]) -> float

Evaluate the predictions against the gold dataset.

Returns:

edit_distance ( float ) –

The average edit distance of the predictions in percentages.

Source code in npfl138/datasets/homr_dataset.py

@staticmethod
def evaluate(gold_dataset: Dataset, predictions: Sequence[Sequence[str]]) -> float:
    """Evaluate the `predictions` against the gold dataset.

    Returns:
      edit_distance: The average edit distance of the predictions in percentages.
    """
    gold = [[HOMRDataset.MARK_NAMES[mark] for mark in example["marks"]] for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    return 100 * metrics.EditDistance().update(predictions, gold).compute()

evaluate_file `staticmethod`

evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float

Evaluate the file with predictions against the gold dataset.

Returns:

edit_distance ( float ) –

The average edit distance of the predictions in percentages.

Source code in npfl138/datasets/homr_dataset.py

@staticmethod
def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with predictions against the gold dataset.

    Returns:
      edit_distance: The average edit distance of the predictions in percentages.
    """
    predictions = []
    for line in predictions_file:
        predictions.append(line.rstrip("\n").split())
    return HOMRDataset.evaluate(gold_dataset, predictions)

HOMRDataset

npfl138.datasets.homr_dataset.HOMRDataset

C class-attribute instance-attribute

MARKS class-attribute instance-attribute

MARK_NAMES instance-attribute

Element class-attribute instance-attribute

Dataset

__len__

__getitem__

__init__

train instance-attribute

dev instance-attribute

test instance-attribute

EditDistanceMetric class-attribute instance-attribute

evaluate staticmethod

evaluate_file staticmethod

C `class-attribute` `instance-attribute`

MARKS `class-attribute` `instance-attribute`

MARK_NAMES `instance-attribute`

Element `class-attribute` `instance-attribute`

len

getitem

init

train `instance-attribute`

dev `instance-attribute`

test `instance-attribute`

EditDistanceMetric `class-attribute` `instance-attribute`

evaluate `staticmethod`

evaluate_file `staticmethod`