@inproceedings{biblio-3703067950784700140, author = {Tomasz Limisiewicz and Jiří Balhar and David Mareček}, year = 2023, title = {Tokenization Impacts Multilingual Language Modeling: Assessing Vocabulary Allocation and Overlap Across Languages}, booktitle = {Findings of the Association for Computational Linguistics: ACL 2023}, pages = {5661--5681}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA, USA}, isbn = {978-1-959429-62-3}, }