@inproceedings{barriga-martinez-etal-2021-automatic,
title = "Automatic Interlinear Glossing for {O}tomi language",
author = "Barriga Mart{\'\i}nez, Diego and
Mijangos, Victor and
Gutierrez-Vasques, Ximena",
editor = "Mager, Manuel and
Oncevay, Arturo and
Rios, Annette and
Ruiz, Ivan Vladimir Meza and
Palmer, Alexis and
Neubig, Graham and
Kann, Katharina",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.americasnlp-1.5",
doi = "10.18653/v1/2021.americasnlp-1.5",
pages = "34--43",
abstract = "In linguistics, interlinear glossing is an essential procedure for analyzing the morphology of languages. This type of annotation is useful for language documentation, and it can also provide valuable data for NLP applications. We perform automatic glossing for Otomi, an under-resourced language. Our work also comprises the pre-processing and annotation of the corpus. We implement different sequential labelers. CRF models represented an efficient and good solution for our task. Two main observations emerged from our work: 1) models with a higher number of parameters (RNNs) performed worse in our low-resource scenario; and 2) the information encoded in the CRF feature function plays an important role in the prediction of labels; however, even in cases where POS tags are not available it is still possible to achieve competitive results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barriga-martinez-etal-2021-automatic">
<titleInfo>
<title>Automatic Interlinear Glossing for Otomi language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Barriga Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Mijangos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ximena</namePart>
<namePart type="family">Gutierrez-Vasques</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="given">Meza</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In linguistics, interlinear glossing is an essential procedure for analyzing the morphology of languages. This type of annotation is useful for language documentation, and it can also provide valuable data for NLP applications. We perform automatic glossing for Otomi, an under-resourced language. Our work also comprises the pre-processing and annotation of the corpus. We implement different sequential labelers. CRF models represented an efficient and good solution for our task. Two main observations emerged from our work: 1) models with a higher number of parameters (RNNs) performed worse in our low-resource scenario; and 2) the information encoded in the CRF feature function plays an important role in the prediction of labels; however, even in cases where POS tags are not available it is still possible to achieve competitive results.</abstract>
<identifier type="citekey">barriga-martinez-etal-2021-automatic</identifier>
<identifier type="doi">10.18653/v1/2021.americasnlp-1.5</identifier>
<location>
<url>https://aclanthology.org/2021.americasnlp-1.5</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>34</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Interlinear Glossing for Otomi language
%A Barriga Martínez, Diego
%A Mijangos, Victor
%A Gutierrez-Vasques, Ximena
%Y Mager, Manuel
%Y Oncevay, Arturo
%Y Rios, Annette
%Y Ruiz, Ivan Vladimir Meza
%Y Palmer, Alexis
%Y Neubig, Graham
%Y Kann, Katharina
%S Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F barriga-martinez-etal-2021-automatic
%X In linguistics, interlinear glossing is an essential procedure for analyzing the morphology of languages. This type of annotation is useful for language documentation, and it can also provide valuable data for NLP applications. We perform automatic glossing for Otomi, an under-resourced language. Our work also comprises the pre-processing and annotation of the corpus. We implement different sequential labelers. CRF models represented an efficient and good solution for our task. Two main observations emerged from our work: 1) models with a higher number of parameters (RNNs) performed worse in our low-resource scenario; and 2) the information encoded in the CRF feature function plays an important role in the prediction of labels; however, even in cases where POS tags are not available it is still possible to achieve competitive results.
%R 10.18653/v1/2021.americasnlp-1.5
%U https://aclanthology.org/2021.americasnlp-1.5
%U https://doi.org/10.18653/v1/2021.americasnlp-1.5
%P 34-43
Markdown (Informal)
[Automatic Interlinear Glossing for Otomi language](https://aclanthology.org/2021.americasnlp-1.5) (Barriga Martínez et al., AmericasNLP 2021)
ACL
- Diego Barriga Martínez, Victor Mijangos, and Ximena Gutierrez-Vasques. 2021. Automatic Interlinear Glossing for Otomi language. In Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas, pages 34–43, Online. Association for Computational Linguistics.