(Archive: Languages and Treebanks in HamleDT 2.0)
Code | Data | Train size | Dev size | Test size | Total size | Tr/D/TsSnt | NonProjHarm | Morphological richness | ||||||||||
split | Sentences | Tokens | Sentences | Tokens | Sentences | Tokens | Sentences | Tokens | Average sentence length | % | % | % | Deps | Percent | Forms | Lemmas | Ratio | |
ar | ours | 6174 | 225853 | 786 | 28263 | 704 | 28268 | 7664 | 282384 | 36,85 | 81 | 10 | 9 | 1992 | 0,71 | 6970 | 6970 | 1,00 |
bg-ud11 | official | 7524 | 100486 | 940 | 12711 | 941 | 12395 | 9405 | 125592 | 13,35 | 80 | 10 | 10 | 229 | 0,18 | 25250 | 12984 | 1,94 |
bn | official | 979 | 6440 | 150 | 812 | 150 | 961 | 1279 | 8213 | 6,42 | 77 | 12 | 12 | 63 | 0,77 | 3582 | 2727 | 1,31 |
ca | official | 13200 | 390302 | 1724 | 53015 | 1862 | 53355 | 16786 | 496672 | 29,59 | 79 | 10 | 11 | 107 | 0,02 | 37248 | 25168 | 1,48 |
cs | official | 68495 | 1171190 | 9270 | 158962 | 10148 | 173586 | 87913 | 1503738 | 17,10 | 78 | 11 | 12 | 28488 | 1,89 | 119769 | 45445 | 2,64 |
da-ud11 | official | 4868 | 88540 | 322 | 5846 | 322 | 5852 | 5512 | 100238 | 18,19 | 88 | 6 | 6 | 2072 | 2,07 | 18893 | 12854 | 1,47 |
de | official | 36020 | 648677 | 2000 | 32033 | 2000 | 31622 | 40020 | 712332 | 17,80 | 90 | 5 | 5 | 15875 | 2,23 | 77368 | 53876 | 1,44 |
de-ud11 | official | 14118 | 269626 | 800 | 12379 | 1000 | 16609 | 15918 | 298614 | 18,76 | 89 | 5 | 6 | 2702 | 0,90 | 50662 | 1 | 50662,00 |
el-ud11 | official | 1929 | 47449 | 241 | 6039 | 241 | 5668 | 2411 | 59156 | 24,54 | 80 | 10 | 10 | 1146 | 1,94 | 11488 | 5784 | 1,99 |
en | official train split to our train+dev; official test is test | 18200 | 437044 | 377 | 9529 | 214 | 5003 | 18791 | 451576 | 24,03 | 97 | 2 | 1 | 1493 | 0,33 | 28075 | 20709 | 1,36 |
en-ud11 | official | 12543 | 204586 | 2002 | 25148 | 2077 | 25096 | 16622 | 254830 | 15,33 | 75 | 12 | 12 | 1215 | 0,48 | 22607 | 16270 | 1,39 |
es | official | 14329 | 427442 | 1655 | 50368 | 1725 | 50630 | 17709 | 528440 | 29,84 | 81 | 9 | 10 | 0 | 0,00 | 45118 | 27930 | 1,62 |
es-ud11 | official | 14137 | 382426 | 1569 | 41749 | 300 | 8476 | 16006 | 432651 | 27,03 | 88 | 10 | 2 | 1227 | 0,28 | 47210 | 1 | 47210,00 |
et | ours | 1066 | 7687 | 118 | 848 | 131 | 956 | 1315 | 9491 | 7,22 | 81 | 9 | 10 | 8 | 0,08 | 2080 | 1980 | 1,05 |
eu-ud11 | official | 3164 | 36455 | 1054 | 11924 | 1055 | 12184 | 5273 | 60563 | 11,49 | 60 | 20 | 20 | 2307 | 3,81 | 16222 | 7049 | 2,30 |
fa | ours | 11800 | 177281 | 326 | 5597 | 329 | 6694 | 12455 | 189572 | 15,22 | 95 | 3 | 3 | 3357 | 1,77 | 24219 | 13348 | 1,81 |
fa-ud11 | official | 4800 | 121046 | 600 | 15850 | 600 | 16022 | 6000 | 152918 | 25,49 | 80 | 10 | 10 | 580 | 0,38 | 15222 | 1 | 15222,00 |
fi-ud11 | official | 12217 | 162721 | 716 | 9161 | 648 | 9140 | 13581 | 181022 | 13,33 | 90 | 5 | 5 | 1350 | 0,75 | 52510 | 22829 | 2,30 |
fi-ud11ftb | official | 11459 | 97165 | 3819 | 32439 | 3819 | 32380 | 19097 | 161984 | 8,48 | 60 | 20 | 20 | 1746 | 1,08 | 46702 | 21033 | 2,22 |
fr-ud11 | official | 14551 | 354690 | 1617 | 38858 | 300 | 7079 | 16468 | 400627 | 24,33 | 88 | 10 | 2 | 3192 | 0,80 | 43137 | 1 | 43137,00 |
ga-ud11 | official | 720 | 16701 | 150 | 3164 | 150 | 3821 | 1020 | 23686 | 23,22 | 71 | 15 | 15 | 191 | 0,81 | 6162 | 3823 | 1,61 |
grc | ours | 20126 | 296927 | 518 | 6206 | 529 | 5949 | 21173 | 309082 | 14,60 | 95 | 2 | 2 | 29723 | 9,62 | 19847 | 7648 | 2,60 |
he-ud11 | official | 5241 | 135496 | 484 | 11234 | 491 | 12125 | 6216 | 158855 | 25,56 | 84 | 8 | 8 | 0 | 0,00 | 17836 | 1 | 17836,00 |
hi | official test is blind; thus our test is official development, and official train is split to our train and our development | 11000 | 245493 | 1041 | 22600 | 1233 | 26416 | 13274 | 294509 | 22,19 | 83 | 8 | 9 | 3887 | 1,32 | 17477 | 13632 | 1,28 |
hr-ud11 | official | 3557 | 78817 | 200 | 4823 | 200 | 4125 | 3957 | 87765 | 22,18 | 90 | 5 | 5 | 405 | 0,46 | 18294 | 8475 | 2,16 |
hu-ud11 | official | 1032 | 20764 | 129 | 3049 | 138 | 2725 | 1299 | 26538 | 20,43 | 79 | 10 | 11 | 554 | 2,09 | 9607 | 6243 | 1,54 |
id-ud11 | official | 4477 | 97531 | 559 | 12612 | 557 | 11780 | 5593 | 121923 | 21,80 | 80 | 10 | 10 | 161 | 0,13 | 21334 | 1 | 21334,00 |
it-ud11 | official | 11330 | 254224 | 500 | 11541 | 500 | 11444 | 12330 | 277209 | 22,48 | 92 | 4 | 4 | 923 | 0,33 | 28550 | 17482 | 1,63 |
ja | official | 16400 | 145189 | 644 | 6272 | 709 | 5711 | 17753 | 157172 | 8,85 | 92 | 4 | 4 | 2838 | 1,81 | 3274 | 1 | 3274,00 |
la-it | ours | 14500 | 246607 | 400 | 6020 | 394 | 7056 | 15294 | 259683 | 16,98 | 95 | 3 | 3 | 8646 | 3,33 | 12211 | 3375 | 3,62 |
la | ours | 2979 | 45741 | 178 | 2613 | 316 | 4789 | 3473 | 53143 | 15,30 | 86 | 5 | 9 | 3760 | 7,08 | 410 | 355 | 1,15 |
nl | official train split to our train+dev; official test is test | 13000 | 188882 | 349 | 6187 | 386 | 5585 | 13735 | 200654 | 14,61 | 95 | 3 | 3 | 9137 | 4,55 | 29373 | 20302 | 1,45 |
pl | ours | 6800 | 69499 | 700 | 6887 | 727 | 7185 | 8227 | 83571 | 10,16 | 83 | 9 | 9 | 32 | 0,04 | 25712 | 12651 | 2,03 |
pt | official | 8800 | 201845 | 271 | 4833 | 288 | 5867 | 9359 | 212545 | 22,71 | 94 | 3 | 3 | 2704 | 1,27 | 28995 | 18401 | 1,58 |
ro | ours | 3521 | 31355 | 255 | 2155 | 266 | 2640 | 4042 | 36150 | 8,94 | 87 | 6 | 7 | 0 | 0,00 | 8745 | 8745 | 1,00 |
ru | ours | 34273 | 491102 | 534 | 6227 | 402 | 3458 | 35209 | 500787 | 14,22 | 97 | 2 | 1 | 3260 | 0,65 | 90190 | 31228 | 2,89 |
sk | ours | 51913 | 814561 | 5833 | 93404 | 5492 | 85903 | 63238 | 993868 | 15,72 | 82 | 9 | 9 | 23411 | 2,36 | 109281 | 42831 | 2,55 |
sl | official test kept; official training split to our training and dev | 1200 | 22336 | 334 | 6414 | 402 | 6390 | 1936 | 35140 | 18,15 | 62 | 17 | 21 | 481 | 1,37 | 8145 | 4382 | 1,86 |
sv-ud11 | official | 4303 | 66645 | 504 | 9797 | 1219 | 20377 | 6026 | 96819 | 16,07 | 71 | 8 | 20 | 185 | 0,19 | 15920 | 9769 | 1,63 |
ta | ours | 400 | 6329 | 80 | 1263 | 120 | 1989 | 600 | 9581 | 15,97 | 67 | 13 | 20 | 26 | 0,27 | 3464 | 1910 | 1,81 |
te | official | 1300 | 5125 | 150 | 597 | 150 | 599 | 1600 | 6321 | 3,95 | 81 | 9 | 9 | 14 | 0,22 | 2943 | 1606 | 1,83 |
tr | official test kept; official training split to our training and dev | 5300 | 58972 | 335 | 6210 | 300 | 4513 | 5935 | 69695 | 11,74 | 89 | 6 | 5 | 2430 | 3,49 | 22749 | 6427 | 3,54 |
@inproceedings{ar, author = {Otakar Smr{\v{z}} and Viktor Bielick{\'{y}} and Iveta Kou{\v{r}}ilov{\'{a}} and Jakub Kr{\'{a}}{\v{c}}mar and Jan Haji{\v{c}} and Petr Zem{\'{a}}nek}, year = {2008}, title = {{P}rague {A}rabic Dependency Treebank: A Word on the Million Words}, booktitle = {Proceedings of the Workshop on Arabic and Local Languages ({LREC} 2008)}, publisher = {European Language Resources Association}, address = {Marrakech, Morocco}, pages = {16--23}, isbn = {2-9517408-4-0}, } @inproceedings{bg, author = {Kiril Simov and Petya Osenova}, year = 2005, title = {Extending the Annotation of {B}ul{T}ree{B}ank: Phase 2}, booktitle = {The Fourth Workshop on Treebanks and Linguistic Theories (TLT 2005)}, month = {December}, address = {Barcelona}, pages = {173--184} } @misc{cs, author = {Jan Haji{\v{c}} and Jarmila Panevov{\'{a}} and Eva Haji{\v{c}}ov{\'{a}} and Petr Sgall and Petr Pajas and Jan {\v{S}}t{\v{e}}p{\'{a}}nek and Ji{\v{r}}{\'{i}} Havelka and Marie Mikulov{\'{a}} and Zden{\v{e}}k {\v{Z}}abokrtsk{\'{y}} and Magda {\v{S}}ev{\v{c}}{\'{i}}kov{\'{a}}-Raz{\'{i}}mov{\'{a}}}, year = {2006} title = {{Prague Dependency Treebank 2.0}}, howpublished = {CD-ROM, Linguistic Data Consortium, LDC Catalog No.: LDC2006T01, Philadelphia}, publisher = {Linguistic Data Consortium}, address = {Philadelphia, {PA}, {USA}}, isbn = {1-58563-370-4}, } @misc{da, author = {Kromann, Matthias T. and Mikkelsen, Line and Stine Kern Lynge}, year = {2004} title = {Danish Dependency Treebank}, url = {http://code.google.com/p/copenhagen-dependency-treebank/}, address = {K{\o}benhavn, Denmark}, } @inproceedings{de, author = {Sabine Brants and Stefanie Dipper and Silvia Hansen and Wolfgang Lezius and George Smith}, year = {2002}, title = {The {TIGER} Treebank}, booktitle = "Proceedings of the Workshop on Treebanks and Linguistic Theories", address = {Sozopol}, postscript = "http://www.ims.uni-stuttgart.de/projekte/TIGER/paper/treeling2002.ps.gz", pdf = "http://www.ims.uni-stuttgart.de/projekte/TIGER/paper/treeling2002.pdf", keywords = {Treebank,German,TIGER} } % Same reference for de, es, fr and id! @inproceedings{googleudt, author = {McDonald, Ryan and Nivre, Joakim and Quirmbach-Brundage, Yvonne and Goldberg, Yoav and Das, Dipanjan and Ganchev, Kuzman and Hall, Keith and Petrov, Slav and Zhang, Hao and Täckström, Oscar and Bedini, Claudia and Bertomeu Castelló, Núria and Lee, Jungmee}, year = {2013}, title = {Universal Dependency Annotation for Multilingual Parsing}, booktitle = {Proceedings of {ACL}}, url = {http://ryanmcd.com/papers/treebanksACL2013.pdf} } @inproceedings{el, author = {Prokopis Prokopidis and Elina Desipri and Maria Koutsombogera and Harris Papageorgiou and Stelios Piperidis}, year = {2005}, title = {Theoretical and practical issues in the construction of a {G}reek dependency treebank}, booktitle = {In Proc. of the 4th Workshop on Treebanks and Linguistic Theories (TLT)}, pages = {149--160} } @inproceedings{en, author = {Surdeanu, Mihai and Johansson, Richard and Meyers, Adam and Màrquez, Lluís and Nivre, Joakim}, year = {2008}, title = {The {CoNLL-2008} Shared Task on Joint Parsing of Syntactic and Semantic Dependencies}, booktitle = {Proceedings of {CoNLL}} } @article{penn, author = {Marcus, Mitchell P. and Santorini, Beatrice and Marcinkiewicz, Mary Ann}, year = {1993}, title = {Building a Large Annotated Corpus of English: The Penn Treebank}, journal = {Computational Linguistics}, volume = {19}, number = {2}, pages = {313--330} } @inproceedings{enwebtbstanford, author = {Silveira, Natalia and Dozat, Timothy and de Marneffe, Marie-Catherine and Bowman, Samuel R. and Connor, Miriam and Bauer, John and Manning, Christopher D.}, year = {2014}, title = {A Gold Standard Dependency Corpus for {English}}, booktitle = {Proceedings of {LREC} 2014}, url = {http://nlp.stanford.edu/pubs/Gold_LREC14.pdf} } % Same reference for ca and es! @inproceedings{es, author = {Mariona Taul{\'e} and Maria Ant{\`o}nia Mart\'{\i} and Marta Recasens}, title = {{AnCora}: Multilevel Annotated Corpora for {Catalan} and {Spanish}}, booktitle = {LREC}, year = {2008}, ee = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/35.html}, crossref = {DBLP:conf/lrec/2008}, bibsource = {DBLP, http://dblp.uni-trier.de} } @inproceedings{et, author = {Bick, Eckhard and Uibo, Heli and Müürisep, Kaili}, year = {2004}, title = {Arborest -- a {VISL}-Style Treebank Derived from an {E}stonian Constraint Grammar Corpus}, booktitle = {Proceedings of Treebanks and Linguistic Theories}, url = {http://beta.visl.sdu.dk/pdf/Bick_Uibo_Muurisep_TLT04.pdf} } @inproceedings{eu, author = {Aduriz, Itzair and Aranzabe, María Jesús and Arriola, Jose Mari and Atutxa, Aitziber and Díaz de Ilarraza, Arantza and Garmendia, Aitzpea and Oronoz, Maite}, year = {2003}, title = {Construction of a {Basque} dependency treebank}, booktitle = "Proceedings of the 2nd Workshop on Treebanks and Linguistic Theories", } % Persian Dependency Treebank / Tehran @inproceedings{fa, author = {Rasooli, Mohammad Sadegh and Moloodi, Amirsaeid and Kouhestani, Manouchehr and Minaei-Bidgoli, Behrouz}, year = {2011}, title = {A Syntactic Valency Lexicon for {Persian} Verbs: The First Steps towards {Persian} Dependency Treebank}, booktitle = {5th Language \& Technology Conference ({LTC}): Human Language Technologies as a Challenge for Computer Science and Linguistics}, pages = {227--231}, address = {Poznań, Poland} } % Uppsala Persian Dependency Treebank @book{faupdt, author = {Seraji, Mojgan}, year = {2015}, title = {Morphosyntactic Corpora and Tools for {Persian}. {PhD} Thesis. Studia Linguistica Upsaliensia 16} } @inproceedings{fi, author = {Katri Haverinen and Timo Viljanen and Veronika Laippala and Samuel Kohonen and Filip Ginter and Tapio Salakoski}, title = {Treebanking {F}innish}, booktitle = {Proceedings of the Ninth International Workshop on Treebanks and Linguistic Theories (TLT9)}, year = {2010}, pages = {79--90}, url = {http://hdl.handle.net/10062/15936} } @inproceedings{finntb, author = {Atro Voutilainen and Tanja Purtonen and Kristiina Muhonen and Krister Lindén}, year = {2012}, title = {Specifying Treebanks, Outsourcing Parsebanks: {FinnTreeBank} 3}, booktitle = {Proceedings of LREC 2012} } @inproceedings{ga, author = {Lynn, Teresa and Foster, Jennifer and Dras, Mark and Tounsi, Lamia}, year = {2014}, title = {Cross-lingual Transfer Parsing for Low-Resourced Languages: An {Irish} Case Study}, booktitle = {{CLTW} 2014}, url = {http://www.nclt.dcu.ie/~tlynn/CLTW.pdf}, address = {Dublin, Ireland} } % Same reference for grc and la! @incollection {grc, author = {Bamman, David and Crane, Gregory}, year = {2011} affiliation = {Perseus Project, Tufts University, Medford/Somerville, USA}, title = {The {A}ncient {G}reek and {L}atin Dependency Treebanks}, booktitle = {Language Technology for Cultural Heritage}, series = {Theory and Applications of Natural Language Processing}, publisher = {Springer Berlin Heidelberg}, isbn = {978-3-642-20227-8}, keyword = {Computer Science}, pages = {79-98}, } @book{he, author = {Goldberg, Yoav}, year = {2011}, title = {Automatic Syntactic Processing of Modern {Hebrew} ({PhD} thesis)}, url = {http://www.cs.bgu.ac.il/~nlpproj/yoav-phd.pdf} } @inproceedings{hr, author = {Agić, Željko and Ljubešić, Nikola}, year = {2014}, title = {The {SETimes.HR} Linguistically Annotated Corpus of {Croatian}}, booktitle = {Proceedings of LREC 2014}, pages = {1724--1727}, address = {Reykjavík, Iceland} } @inproceedings{it, author = {Simonetta Montemagni and Francesco Barsotti and Marco Battista and Nicoletta Calzolari and Ornella Corazzari and Alessandro Lenci and Antonio Zampolli and Francesca Fanciulli and Maria Massetani and Remo Raffaelli and Roberto Basili and Maria Teresa Pazienza and Dario Saracino and Fabio Zanzotto and Nadia Mana and Fabio Pianesi and Rodolfo Delmonte}, year = 2003, title = {Building the {I}talian Syntactic-Semantic Treebank}, booktitle = {Building and using Parsed Corpora}, pages = {189--210}, editor = {Anne Abeillé}, series = {Language and Speech series}, address = {Dordrecht}, publisher = {Kluwer} } @inproceedings{isdt, author = {Bosco, Cristina and Montemagni, Simonetta and Simi, Maria}, year = {2013}, title = {Converting {Italian} Treebanks: Towards an {Italian} {Stanford} Dependency Treebank}, booktitle = {Proceedings of the 7th Linguistic Annotation Workshop & Interoperability with Discourse (LAW VII & ID at ACL-2013)}, pages = {61--69}, address = {Sofia, Bulgaria} } % Same reference for hi, bn and te! @inproceedings{hi, author = {Husain, Samar and Mannem, Prashanth and Ambati, Bharat and Gadde, Phani}, year = {2010}, title = {The {ICON-2010} tools contest on {Indian} language dependency parsing}, booktitle = {Proceedings of {ICON-2010} Tools Contest on {Indian} Language Dependency Parsing}, address = {Kharagpur, India} } @inproceedings{hu, author = {D{\'o}ra Csendes and J{\'a}nos Csirik and Tibor Gyim{\'o}thy and Andr{\'a}s Kocsor}, year = {2005}, title = {The {S}zeged Treebank}, booktitle = {TSD}, pages = {123-131}, ee = {http://dx.doi.org/10.1007/11551874_16}, crossref = {DBLP:conf/tsd/2005}, bibsource = {DBLP, http://dblp.uni-trier.de} } @inproceedings{ja, author = {Kawata, Yasuhiro and Bartels, Julia}, year = {2000}, title = {Stylebook for the {Japanese} Treebank in {Verbmobil}}, booktitle = {Report 240}, month = {September 29}, address = {Tübingen, Germany} } @article{lait, author = {Passarotti, Marco and Dell’Orletta, Felice}, title = {Improvements in parsing the index thomisticus treebank. Revision, combination and a feature model for medieval Latin}, journal = {Training}, volume = {2}, pages = {61--024}, year = {2010} } @inproceedings{nl, author = {Leonoor van der Beek and Bouma, Gosse and Daciuk, Jan and Gaustad, Tanja and Malouf, Robert and Gertjan van Noord and Prins, Robbert and Villada, Begoña}, year = {2002}, title = {Chapter 5. The {Alpino} Dependency Treebank}, booktitle = {Algorithms for Linguistic Processing {NWO PIONIER} Progress Report}, address = {Groningen, The Netherlands}, url = {http://odur.let.rug.nl/~vannoord/trees/Papers/report_ch5.pdf} } @inproceedings{pl, author = {Wr{\'o}blewska, Alina and Przepi{\'o}rkowski, Adam}, title = {Projection-based Annotation of a {Polish} Dependency Treebank}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation, LREC}, pages = {2306--2312}, address = {Reykjavík, Iceland}, year = {2014} } @inproceedings{pt, author = {Susana Afonso and Eckhard Bick and Renato Haber and Diana Santos}, year = {2002}, title = {{{``}Floresta sint{\'a}(c)tica{''}:} a treebank for {P}ortuguese}, booktitle = {Proceedings of the 3rd International Conference on Language Resources and Evaluation (LREC)}, address = {Las Palmas, Spain}, pages = {1698--1703}, url = {http://www.lrec-conf.org/proceedings/lrec2002/sumarios/1.htm} } @mastersthesis{ro, author = {Călăcean, Mihaela}, year = {2008}, title = {Data-driven Dependency Parsing for {R}omanian}, month = {August}, school = {Uppsala University}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.153.6068&rep=rep1&type=pdf} } @inproceedings{ru, author = {Boguslavsky, Igor and Grigorieva, Svetlana and Grigoriev, Nikolai and Kreidlin, Leonid and Frid, Nadezhda}, year = {2000}, title = {{Dependency treebank for Russian: Concept, tools, types of information}}, booktitle = {Proceedings of the 18th conference on Computational linguistics-Volume 2}, pages = {987--991}, organization={Association for Computational Linguistics Morristown, NJ, USA} } @incollection{ru-alt1, author = {Boguslavsky, Igor and Iomdin, Leonid and Petrochenkov, Vadim and Sizov, Victor and Tsinman, Leonid}, year = {2013}, title = {A Case of Hybrid Parsing: Rules Refined by Empirical and Corpus Statistics}, booktitle = {Computational Dependency Theory}, editor = {Kim Gerdes and Eva Hajičová and Leo Wanner}, publisher = {{IOS} Press}, isbn = {978-1-61499-351-3}, doi = {10.3233/978-1-61499-352-0-226}, volume = {258}, journal = {Frontiers in Artificial Intelligence and Applications}, pages = {226--240}, address = {Amsterdam, Netherlands}, } @incollection{ru-alt2, author = {Iomdin, Leonid}, year = {2014}, title = {Automatic Text Processing and Deeply Annotated Text Corpora of Russian: Interaction and Mutual Impact}, booktitle = {Jazykovedné štúdie XXXI.}, publisher = {Jazykovedný ústav Ľudovíta Štúra Slovenskej akadémie vied}, pages = {136--146}, isbn = {978-80-224-1391}, address = {Bratislava, Slovakia}, } @inproceedings{sk, author = {Mária {\noopsort{Szimkova}}Šimková and Radovan Garabík}, year = {2006}, title = {Sintaksičeskaja razmetka v Slovackom nacional'nom korpuse ({\selectlanguage{russian}Синтаксическая разметка в Словацком национальном корпусе})}, booktitle = {Trudy meždunarodnoj konferencii Korpusnaja lingvistika ({\selectlanguage{russian}Tруды международной конференции Корпусная лингвистика}) – 2006}, publisher = {St. Petersburg University Press}, address = {Sankt-Peterburg, Russia}, pages = {389--394}, isbn = {5-288-04181-4} } @inproceedings{sl, author = {D\v{z}eroski, Sa\v{s}o and Erjavec, Toma\v{z} and Ledinek, Nina and Pajas, Petr and \v{Z}abokrtsk\'{y}, Zden\v{e}k and \v{Z}ele, Andreja}, year = {2006}, title = {Towards a {S}lovene Dependency Treebank}, booktitle = {Proceedings of the Fifth International Language Resources and Evaluation Conference, {LREC} 2006}, address = {Genova, Italy}, publisher = {European Language Resources Association ({ELRA})}, pages = {1388--1391}, url = {http://hnk.ffzg.hr/bibl/lrec2006/summaries/133.html} } @inproceedings{sv, author = {Nivre, Joakim and Nilsson, Jens and Hall, Johan}, year = {2006}, title = {Talbanken05: A {Swedish} Treebank with Phrase Structure and Dependency Annotation}, booktitle = {Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC} 2006)}, publisher = {European Language Resources Association ({ELRA})}, address = {Genova, Italy}, url = {http://www.msi.vxu.se/users/nivre/research/Talbanken05.html} } @inproceedings{ta, author = {Ramasamy, Loganathan and \v{Z}abokrtsk\'{y}, Zden\v{e}k}, year = {2012}, title = {Prague Dependency Style Treebank for {Tamil}}, booktitle = {Proceedings of Eighth International Conference on Language Resources and Evaluation ({LREC} 2012)}, address = {\.{I}stanbul, Turkey}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, isbn = {978-2-9517408-7-7}, pages = {1888--1894}, url = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/456.html} } @inproceedings{tr, author = {Nart B. Atalay and Kemal Oflazer and Bilge Say}, year = {2003} title = {The Annotation Process in the {T}urkish Treebank}, booktitle = {In Proceedings of the 4th International Workshop on Linguistically Interpreted Corpora (LINC)}, publisher = {Association for Computational Linguistics}, address = {Budapest, Hungary}, }