@inproceedings{biblio-3703067950784700140,
  author    = {Tomasz Limisiewicz and Jiří Balhar and David Mareček},
  year      = 2023,
  title     = {Tokenization Impacts Multilingual Language Modeling: Assessing Vocabulary Allocation and Overlap Across Languages},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2023},
  pages     = {5661--5681},
  publisher = {Association for Computational Linguistics},
  address   = {Stroudsburg, PA, USA},
  isbn      = {978-1-959429-62-3},
}