Skip to content

HOMRDataset

HOMR dataset contains synthetically generated handwritten music and corresponding annotations.

The input images are grayscale, they have arbitrary height and width, and they represent a single stave of music. The annotations are “semantic” (the notes are annotated with their real pitch and duration which depend on current clef and key signature).

A visualization of first 100 training examples is available here.

npfl138.datasets.homr_dataset.HOMRDataset

Source code in npfl138/datasets/homr_dataset.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class HOMRDataset:
    C: int = 1
    """Number of channels in the input images."""

    MARKS: int = 938
    """The number of different marks in the dataset."""
    MARK_NAMES: list[str]  # Set at the bottom of the script for readability
    """The list of mark names in the dataset."""

    Element = TypedDict("Element", {"image": torch.Tensor, "marks": torch.Tensor})
    """The type of a single dataset element."""

    URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2425/datasets/"

    class Dataset(TFRecordDataset):
        def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
            super().__init__(path, size, decode_on_demand)

        def __len__(self) -> int:
            """Return the number of elements in the dataset."""
            return super().__len__()

        def __getitem__(self, index: int) -> "HOMRDataset.Element":
            """Return the `index`-th element of the dataset."""
            return super().__getitem__(index)

        def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "HOMRDataset.Element":
            return {
                "image": torchvision.io.decode_image(
                    data["image"][indices["image"][index]:indices["image"][index + 1]],
                    torchvision.io.ImageReadMode.GRAY),
                "marks": data["marks"][indices["marks"][index]:indices["marks"][index + 1]],
            }

    def __init__(self, decode_on_demand: bool = False) -> None:
        "Load the HOMR dataset, downloading it if necessary."
        for dataset, size in [("train", 51_365), ("dev", 5_027), ("test", 5_023)]:
            path = "homr.{}.tfrecord".format(dataset)
            if not os.path.exists(path):
                print("Downloading file {}...".format(path), file=sys.stderr)
                urllib.request.urlretrieve("{}/{}".format(self.URL, path), filename="{}.tmp".format(path))
                os.rename("{}.tmp".format(path), path)

            setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # The EditDistanceMetric
    EditDistanceMetric = metrics.EditDistance
    """The edit distance metric used for evaluation."""

    # Evaluation infrastructure
    @staticmethod
    def evaluate(gold_dataset: Dataset, predictions: Sequence[Sequence[str]]) -> float:
        """Evaluate the `predictions` against the gold dataset.

        Returns:
          edit_distance: The average edit distance of the predictions in percentages.
        """
        gold = [[HOMRDataset.MARK_NAMES[mark] for mark in example["marks"]] for example in gold_dataset]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        return 100 * metrics.EditDistance().update(predictions, gold).compute()

    @staticmethod
    def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with predictions against the gold dataset.

        Returns:
          edit_distance: The average edit distance of the predictions in percentages.
        """
        predictions = []
        for line in predictions_file:
            predictions.append(line.rstrip("\n").split())
        return HOMRDataset.evaluate(gold_dataset, predictions)

C class-attribute instance-attribute

C: int = 1

Number of channels in the input images.

MARKS class-attribute instance-attribute

MARKS: int = 938

The number of different marks in the dataset.

MARK_NAMES instance-attribute

MARK_NAMES: list[str]

The list of mark names in the dataset.

Element class-attribute instance-attribute

Element = TypedDict('Element', {'image': Tensor, 'marks': Tensor})

The type of a single dataset element.

Dataset

Bases: TFRecordDataset

Source code in npfl138/datasets/homr_dataset.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class Dataset(TFRecordDataset):
    def __init__(self, path: str, size: int, decode_on_demand: bool) -> None:
        super().__init__(path, size, decode_on_demand)

    def __len__(self) -> int:
        """Return the number of elements in the dataset."""
        return super().__len__()

    def __getitem__(self, index: int) -> "HOMRDataset.Element":
        """Return the `index`-th element of the dataset."""
        return super().__getitem__(index)

    def _tfrecord_decode(self, data: dict, indices: dict, index: int) -> "HOMRDataset.Element":
        return {
            "image": torchvision.io.decode_image(
                data["image"][indices["image"][index]:indices["image"][index + 1]],
                torchvision.io.ImageReadMode.GRAY),
            "marks": data["marks"][indices["marks"][index]:indices["marks"][index + 1]],
        }

__len__

__len__() -> int

Return the number of elements in the dataset.

Source code in npfl138/datasets/homr_dataset.py
45
46
47
def __len__(self) -> int:
    """Return the number of elements in the dataset."""
    return super().__len__()

__getitem__

__getitem__(index: int) -> Element

Return the index-th element of the dataset.

Source code in npfl138/datasets/homr_dataset.py
49
50
51
def __getitem__(self, index: int) -> "HOMRDataset.Element":
    """Return the `index`-th element of the dataset."""
    return super().__getitem__(index)

__init__

__init__(decode_on_demand: bool = False) -> None

Load the HOMR dataset, downloading it if necessary.

Source code in npfl138/datasets/homr_dataset.py
61
62
63
64
65
66
67
68
69
70
def __init__(self, decode_on_demand: bool = False) -> None:
    "Load the HOMR dataset, downloading it if necessary."
    for dataset, size in [("train", 51_365), ("dev", 5_027), ("test", 5_023)]:
        path = "homr.{}.tfrecord".format(dataset)
        if not os.path.exists(path):
            print("Downloading file {}...".format(path), file=sys.stderr)
            urllib.request.urlretrieve("{}/{}".format(self.URL, path), filename="{}.tmp".format(path))
            os.rename("{}.tmp".format(path), path)

        setattr(self, dataset, self.Dataset(path, size, decode_on_demand))

train instance-attribute

train: Dataset

The training dataset.

dev instance-attribute

dev: Dataset

The development dataset.

test instance-attribute

test: Dataset

The test dataset.

EditDistanceMetric class-attribute instance-attribute

EditDistanceMetric = EditDistance

The edit distance metric used for evaluation.

evaluate staticmethod

evaluate(gold_dataset: Dataset, predictions: Sequence[Sequence[str]]) -> float

Evaluate the predictions against the gold dataset.

Returns:

  • edit_distance ( float ) –

    The average edit distance of the predictions in percentages.

Source code in npfl138/datasets/homr_dataset.py
84
85
86
87
88
89
90
91
92
93
94
95
96
97
@staticmethod
def evaluate(gold_dataset: Dataset, predictions: Sequence[Sequence[str]]) -> float:
    """Evaluate the `predictions` against the gold dataset.

    Returns:
      edit_distance: The average edit distance of the predictions in percentages.
    """
    gold = [[HOMRDataset.MARK_NAMES[mark] for mark in example["marks"]] for example in gold_dataset]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    return 100 * metrics.EditDistance().update(predictions, gold).compute()

evaluate_file staticmethod

evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float

Evaluate the file with predictions against the gold dataset.

Returns:

  • edit_distance ( float ) –

    The average edit distance of the predictions in percentages.

Source code in npfl138/datasets/homr_dataset.py
 99
100
101
102
103
104
105
106
107
108
109
@staticmethod
def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with predictions against the gold dataset.

    Returns:
      edit_distance: The average edit distance of the predictions in percentages.
    """
    predictions = []
    for line in predictions_file:
        predictions.append(line.rstrip("\n").split())
    return HOMRDataset.evaluate(gold_dataset, predictions)