Loads a morphological analyses in a vertical format.
The analyzer provides only a method get(word: str) returning a list
of analyses, each containing two fields lemma and tag.
If an analysis of the word is not found, an empty list is returned.
Source code in npfl138/datasets/morpho_analyzer.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 | class MorphoAnalyzer:
""" Loads a morphological analyses in a vertical format.
The analyzer provides only a method `get(word: str)` returning a list
of analyses, each containing two fields `lemma` and `tag`.
If an analysis of the word is not found, an empty list is returned.
"""
URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2526/datasets"
class LemmaTag:
"""A class representing a morphological analysis of a word."""
def __init__(self, lemma: str, tag: str) -> None:
self.lemma = lemma
"""A lemma of the word."""
self.tag = tag
"""A tag of the word."""
def __repr__(self) -> str:
return f"(lemma: {self.lemma}, tag: {self.tag})"
def __init__(self, dataset: str) -> None:
"""Loads the morphological analyses from the specified dataset."""
path = download_url_to_file(self.URL, f"{dataset}.zip")
self.analyses = {}
with zipfile.ZipFile(path, "r") as zip_file:
with zip_file.open(f"{dataset}.txt", "r") as analyses_file:
for line in analyses_file:
line = line.decode("utf-8").rstrip("\n")
columns = line.split("\t")
analyses = []
for i in range(1, len(columns) - 1, 2):
analyses.append(self.LemmaTag(columns[i], columns[i + 1]))
self.analyses[columns[0]] = analyses
def get(self, word: str) -> list[LemmaTag]:
"""Returns a (possibly empty) list of morphological analyses for the given word."""
return self.analyses.get(word, [])
|
LemmaTag
A class representing a morphological analysis of a word.
Source code in npfl138/datasets/morpho_analyzer.py
21
22
23
24
25
26
27
28
29
30 | class LemmaTag:
"""A class representing a morphological analysis of a word."""
def __init__(self, lemma: str, tag: str) -> None:
self.lemma = lemma
"""A lemma of the word."""
self.tag = tag
"""A tag of the word."""
def __repr__(self) -> str:
return f"(lemma: {self.lemma}, tag: {self.tag})"
|
__init__
__init__(dataset: str) -> None
Loads the morphological analyses from the specified dataset.
Source code in npfl138/datasets/morpho_analyzer.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 | def __init__(self, dataset: str) -> None:
"""Loads the morphological analyses from the specified dataset."""
path = download_url_to_file(self.URL, f"{dataset}.zip")
self.analyses = {}
with zipfile.ZipFile(path, "r") as zip_file:
with zip_file.open(f"{dataset}.txt", "r") as analyses_file:
for line in analyses_file:
line = line.decode("utf-8").rstrip("\n")
columns = line.split("\t")
analyses = []
for i in range(1, len(columns) - 1, 2):
analyses.append(self.LemmaTag(columns[i], columns[i + 1]))
self.analyses[columns[0]] = analyses
|
get
Returns a (possibly empty) list of morphological analyses for the given word.
Source code in npfl138/datasets/morpho_analyzer.py
| def get(self, word: str) -> list[LemmaTag]:
"""Returns a (possibly empty) list of morphological analyses for the given word."""
return self.analyses.get(word, [])
|