Skip to content

MorphoAnalyzer

npfl138.datasets.morpho_analyzer.MorphoAnalyzer

Loads a morphological analyses in a vertical format.

The analyzer provides only a method get(word: str) returning a list of analyses, each containing two fields lemma and tag. If an analysis of the word is not found, an empty list is returned.

Source code in npfl138/datasets/morpho_analyzer.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class MorphoAnalyzer:
    """ Loads a morphological analyses in a vertical format.

    The analyzer provides only a method `get(word: str)` returning a list
    of analyses, each containing two fields `lemma` and `tag`.
    If an analysis of the word is not found, an empty list is returned.
    """

    URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2526/datasets"

    class LemmaTag:
        """A class representing a morphological analysis of a word."""
        def __init__(self, lemma: str, tag: str) -> None:
            self.lemma = lemma
            """A lemma of the word."""
            self.tag = tag
            """A tag of the word."""

        def __repr__(self) -> str:
            return f"(lemma: {self.lemma}, tag: {self.tag})"

    def __init__(self, dataset: str) -> None:
        """Loads the morphological analyses from the specified dataset."""
        path = download_url_to_file(self.URL, f"{dataset}.zip")

        self.analyses = {}
        with zipfile.ZipFile(path, "r") as zip_file:
            with zip_file.open(f"{dataset}.txt", "r") as analyses_file:
                for line in analyses_file:
                    line = line.decode("utf-8").rstrip("\n")
                    columns = line.split("\t")

                    analyses = []
                    for i in range(1, len(columns) - 1, 2):
                        analyses.append(self.LemmaTag(columns[i], columns[i + 1]))
                    self.analyses[columns[0]] = analyses

    def get(self, word: str) -> list[LemmaTag]:
        """Returns a (possibly empty) list of morphological analyses for the given word."""
        return self.analyses.get(word, [])

LemmaTag

A class representing a morphological analysis of a word.

Source code in npfl138/datasets/morpho_analyzer.py
21
22
23
24
25
26
27
28
29
30
class LemmaTag:
    """A class representing a morphological analysis of a word."""
    def __init__(self, lemma: str, tag: str) -> None:
        self.lemma = lemma
        """A lemma of the word."""
        self.tag = tag
        """A tag of the word."""

    def __repr__(self) -> str:
        return f"(lemma: {self.lemma}, tag: {self.tag})"

lemma instance-attribute

lemma = lemma

A lemma of the word.

tag instance-attribute

tag = tag

A tag of the word.

__init__

__init__(dataset: str) -> None

Loads the morphological analyses from the specified dataset.

Source code in npfl138/datasets/morpho_analyzer.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(self, dataset: str) -> None:
    """Loads the morphological analyses from the specified dataset."""
    path = download_url_to_file(self.URL, f"{dataset}.zip")

    self.analyses = {}
    with zipfile.ZipFile(path, "r") as zip_file:
        with zip_file.open(f"{dataset}.txt", "r") as analyses_file:
            for line in analyses_file:
                line = line.decode("utf-8").rstrip("\n")
                columns = line.split("\t")

                analyses = []
                for i in range(1, len(columns) - 1, 2):
                    analyses.append(self.LemmaTag(columns[i], columns[i + 1]))
                self.analyses[columns[0]] = analyses

get

get(word: str) -> list[LemmaTag]

Returns a (possibly empty) list of morphological analyses for the given word.

Source code in npfl138/datasets/morpho_analyzer.py
48
49
50
def get(self, word: str) -> list[LemmaTag]:
    """Returns a (possibly empty) list of morphological analyses for the given word."""
    return self.analyses.get(word, [])