Skip to content

ReadingComprehensionDataset

The ReadingComprehensionDataset class represents a reading comprehension dataset.

  • Loads a reading comprehension data.
  • The data consists of three datasets:
    • train
    • dev
    • test
  • Each dataset contains a list of paragraphs in the paragraphs field.
  • Each paragraph is a dictionary containing the following:
    • context: text
    • qas: list of questions and answers, each a dictionary with:
      • question: text of the question
      • answers: a list of answers, each answer a dictionary containing:
        • text: answer test as string, exactly as appearing in the context
        • start: character offset of the answer text in the context

npfl138.datasets.reading_comprehension_dataset.ReadingComprehensionDataset

Source code in npfl138/datasets/reading_comprehension_dataset.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
class ReadingComprehensionDataset:
    Paragraph = TypedDict("Paragraph", {"context": str, "qas": list[TypedDict("QA", {
        "question": str, "answers": list[TypedDict("Answer", {"text": str, "start": int})]})]})
    """The type of a single Paragraph containing possibly several questions and corresponding answers."""
    _URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2425/datasets/"

    class Dataset:
        def __init__(self, data_file: BinaryIO) -> None:
            # Load the data
            self._paragraphs = []
            in_paragraph = False
            for line in data_file:
                line = line.decode("utf-8").rstrip("\r\n")
                if line:
                    if not in_paragraph:
                        self._paragraphs.append({"context": line, "qas": []})
                        in_paragraph = True
                    else:
                        question, *qas = line.split("\t")
                        assert len(qas) % 2 == 0

                        self._paragraphs[-1]["qas"].append({
                            "question": question,
                            "answers": [
                                {"text": qas[i], "start": int(qas[i + 1])} for i in range(0, len(qas), 2)]})
                else:
                    in_paragraph = False

        @property
        def paragraphs(self) -> list["ReadingComprehensionDataset.Paragraph"]:
            """The paragraphs in this dataset."""
            return self._paragraphs

    def __init__(self, name: str = "reading_comprehension") -> None:
        """Load the dataset, downloading it if necessary."""
        path = "{}.zip".format(name)
        if not os.path.exists(path):
            print("Downloading dataset {}...".format(name), file=sys.stderr)
            urllib.request.urlretrieve("{}/{}".format(self._URL, path), filename="{}.tmp".format(path))
            os.rename("{}.tmp".format(path), path)

        with zipfile.ZipFile(path, "r") as zip_file:
            for dataset in ["train", "dev", "test"]:
                with zip_file.open("{}_{}.txt".format(os.path.splitext(path)[0], dataset), "r") as dataset_file:
                    setattr(self, dataset, self.Dataset(dataset_file))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # Evaluation infrastructure.
    @staticmethod
    def evaluate(gold_dataset: Dataset, predictions: Sequence[str]) -> float:
        """Evaluate the `predictions` against the gold dataset.

        Returns:
          accuracy: The accuracy of the predictions in percentages.
        """
        gold = [qa["answers"] for paragraph in gold_dataset.paragraphs for qa in paragraph["qas"]]
        if len(predictions) != len(gold):
            raise RuntimeError("The predictions contain different number of answers than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        correct, total = 0, 0
        for prediction, gold_answers in zip(predictions, gold):
            correct += any(prediction == gold_answer["text"] for gold_answer in gold_answers)
            total += 1

        return 100 * correct / total

    @staticmethod
    def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with predictions against the gold dataset.

        Returns:
          accuracy: The accuracy of the predictions in percentages.
        """
        predictions = [answer.strip() for answer in predictions_file]
        return ReadingComprehensionDataset.evaluate(gold_dataset, predictions)

Paragraph class-attribute instance-attribute

Paragraph = TypedDict(
    "Paragraph",
    {
        "context": str,
        "qas": list[
            TypedDict(
                "QA",
                {
                    "question": str,
                    "answers": list[
                        TypedDict("Answer", {"text": str, "start": int})
                    ],
                },
            )
        ],
    },
)

The type of a single Paragraph containing possibly several questions and corresponding answers.

Dataset

Source code in npfl138/datasets/reading_comprehension_dataset.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class Dataset:
    def __init__(self, data_file: BinaryIO) -> None:
        # Load the data
        self._paragraphs = []
        in_paragraph = False
        for line in data_file:
            line = line.decode("utf-8").rstrip("\r\n")
            if line:
                if not in_paragraph:
                    self._paragraphs.append({"context": line, "qas": []})
                    in_paragraph = True
                else:
                    question, *qas = line.split("\t")
                    assert len(qas) % 2 == 0

                    self._paragraphs[-1]["qas"].append({
                        "question": question,
                        "answers": [
                            {"text": qas[i], "start": int(qas[i + 1])} for i in range(0, len(qas), 2)]})
            else:
                in_paragraph = False

    @property
    def paragraphs(self) -> list["ReadingComprehensionDataset.Paragraph"]:
        """The paragraphs in this dataset."""
        return self._paragraphs

paragraphs property

paragraphs: list[Paragraph]

The paragraphs in this dataset.

__init__

__init__(name: str = 'reading_comprehension') -> None

Load the dataset, downloading it if necessary.

Source code in npfl138/datasets/reading_comprehension_dataset.py
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(self, name: str = "reading_comprehension") -> None:
    """Load the dataset, downloading it if necessary."""
    path = "{}.zip".format(name)
    if not os.path.exists(path):
        print("Downloading dataset {}...".format(name), file=sys.stderr)
        urllib.request.urlretrieve("{}/{}".format(self._URL, path), filename="{}.tmp".format(path))
        os.rename("{}.tmp".format(path), path)

    with zipfile.ZipFile(path, "r") as zip_file:
        for dataset in ["train", "dev", "test"]:
            with zip_file.open("{}_{}.txt".format(os.path.splitext(path)[0], dataset), "r") as dataset_file:
                setattr(self, dataset, self.Dataset(dataset_file))

train instance-attribute

train: Dataset

The training dataset.

dev instance-attribute

dev: Dataset

The development dataset.

test instance-attribute

test: Dataset

The test dataset.

evaluate staticmethod

evaluate(gold_dataset: Dataset, predictions: Sequence[str]) -> float

Evaluate the predictions against the gold dataset.

Returns:

  • accuracy ( float ) –

    The accuracy of the predictions in percentages.

Source code in npfl138/datasets/reading_comprehension_dataset.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
@staticmethod
def evaluate(gold_dataset: Dataset, predictions: Sequence[str]) -> float:
    """Evaluate the `predictions` against the gold dataset.

    Returns:
      accuracy: The accuracy of the predictions in percentages.
    """
    gold = [qa["answers"] for paragraph in gold_dataset.paragraphs for qa in paragraph["qas"]]
    if len(predictions) != len(gold):
        raise RuntimeError("The predictions contain different number of answers than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    correct, total = 0, 0
    for prediction, gold_answers in zip(predictions, gold):
        correct += any(prediction == gold_answer["text"] for gold_answer in gold_answers)
        total += 1

    return 100 * correct / total

evaluate_file staticmethod

evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float

Evaluate the file with predictions against the gold dataset.

Returns:

  • accuracy ( float ) –

    The accuracy of the predictions in percentages.

Source code in npfl138/datasets/reading_comprehension_dataset.py
102
103
104
105
106
107
108
109
110
@staticmethod
def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with predictions against the gold dataset.

    Returns:
      accuracy: The accuracy of the predictions in percentages.
    """
    predictions = [answer.strip() for answer in predictions_file]
    return ReadingComprehensionDataset.evaluate(gold_dataset, predictions)