Loads a morphological analyses in a vertical format.
The analyzer provides only a method get(word: str)
returning a list
of analyses, each containing two fields lemma
and tag
.
If an analysis of the word is not found, an empty list is returned.
Source code in npfl138/datasets/morpho_analyzer.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55 | class MorphoAnalyzer:
""" Loads a morphological analyses in a vertical format.
The analyzer provides only a method `get(word: str)` returning a list
of analyses, each containing two fields `lemma` and `tag`.
If an analysis of the word is not found, an empty list is returned.
"""
_URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2425/datasets/"
class LemmaTag:
"""A class representing a morphological analysis of a word."""
def __init__(self, lemma: str, tag: str) -> None:
self.lemma = lemma
"""A lemma of the word."""
self.tag = tag
"""A tag of the word."""
def __repr__(self) -> str:
return "(lemma: {}, tag: {})".format(self.lemma, self.tag)
def __init__(self, dataset: str) -> None:
"""Loads the morphological analyses from the specified dataset."""
path = "{}.zip".format(dataset)
if not os.path.exists(path):
print("Downloading dataset {}...".format(dataset), file=sys.stderr)
urllib.request.urlretrieve("{}/{}".format(self._URL, path), filename="{}.tmp".format(path))
os.rename("{}.tmp".format(path), path)
self.analyses = {}
with zipfile.ZipFile(path, "r") as zip_file:
with zip_file.open("{}.txt".format(dataset), "r") as analyses_file:
for line in analyses_file:
line = line.decode("utf-8").rstrip("\n")
columns = line.split("\t")
analyses = []
for i in range(1, len(columns) - 1, 2):
analyses.append(self.LemmaTag(columns[i], columns[i + 1]))
self.analyses[columns[0]] = analyses
def get(self, word: str) -> list[LemmaTag]:
"""Returns a (possibly empty) list of morphological analyses for the given word."""
return self.analyses.get(word, [])
|
LemmaTag
A class representing a morphological analysis of a word.
Source code in npfl138/datasets/morpho_analyzer.py
22
23
24
25
26
27
28
29
30
31 | class LemmaTag:
"""A class representing a morphological analysis of a word."""
def __init__(self, lemma: str, tag: str) -> None:
self.lemma = lemma
"""A lemma of the word."""
self.tag = tag
"""A tag of the word."""
def __repr__(self) -> str:
return "(lemma: {}, tag: {})".format(self.lemma, self.tag)
|
__init__
__init__(dataset: str) -> None
Loads the morphological analyses from the specified dataset.
Source code in npfl138/datasets/morpho_analyzer.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 | def __init__(self, dataset: str) -> None:
"""Loads the morphological analyses from the specified dataset."""
path = "{}.zip".format(dataset)
if not os.path.exists(path):
print("Downloading dataset {}...".format(dataset), file=sys.stderr)
urllib.request.urlretrieve("{}/{}".format(self._URL, path), filename="{}.tmp".format(path))
os.rename("{}.tmp".format(path), path)
self.analyses = {}
with zipfile.ZipFile(path, "r") as zip_file:
with zip_file.open("{}.txt".format(dataset), "r") as analyses_file:
for line in analyses_file:
line = line.decode("utf-8").rstrip("\n")
columns = line.split("\t")
analyses = []
for i in range(1, len(columns) - 1, 2):
analyses.append(self.LemmaTag(columns[i], columns[i + 1]))
self.analyses[columns[0]] = analyses
|
get
Returns a (possibly empty) list of morphological analyses for the given word.
Source code in npfl138/datasets/morpho_analyzer.py
| def get(self, word: str) -> list[LemmaTag]:
"""Returns a (possibly empty) list of morphological analyses for the given word."""
return self.analyses.get(word, [])
|