@inproceedings{balouchzahi-etal-2022-overview,
title = "Overview of {C}o{LI}-Kanglish: Word Level Language Identification in Code-mixed {K}annada-{E}nglish Texts at {ICON} 2022",
author = "Balouchzahi, F. and
Butt, S. and
Hegde, A. and
Ashraf, N. and
Shashirekha, H.l. and
Sidorov, Grigori and
Gelbukh, Alexander",
editor = "Chakravarthi, Bharathi Raja and
Murugappan, Abirami and
Chinnappa, Dhivya and
Hane, Adeep and
Kumeresan, Prasanna Kumar and
Ponnusamy, Rahul",
booktitle = "Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts",
month = dec,
year = "2022",
address = "IIIT Delhi, New Delhi, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.icon-wlli.8",
pages = "38--45",
abstract = "The task of Language Identification (LI) in text processing refers to automatically identifying the languages used in a text document. LI task is usually been studied at the document level and often in high-resource languages while giving less importance to low-resource languages. However, with the recent advance- ment in technologies, in a multilingual country like India, many low-resource language users post their comments using English and one or more language(s) in the form of code-mixed texts. Combination of Kannada and English is one such code-mixed text of mixing Kannada and English languages at various levels. To address the word level LI in code-mixed text, in CoLI-Kanglish shared task, we have focused on open-sourcing a Kannada-English code-mixed dataset for word level LI of Kannada, English and mixed-language words written in Roman script. The task includes classifying each word in the given text into one of six predefined categories, namely: Kannada (kn), English (en), Kannada-English (kn-en), Name (name), Lo-cation (location), and Other (other). Among the models submitted by all the participants, the best performing model obtained averaged-weighted and averaged-macro F1 scores of 0.86 and 0.62 respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="balouchzahi-etal-2022-overview">
<titleInfo>
<title>Overview of CoLI-Kanglish: Word Level Language Identification in Code-mixed Kannada-English Texts at ICON 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">F</namePart>
<namePart type="family">Balouchzahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">S</namePart>
<namePart type="family">Butt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="family">Hegde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">N</namePart>
<namePart type="family">Ashraf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H.l.</namePart>
<namePart type="family">Shashirekha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Grigori</namePart>
<namePart type="family">Sidorov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Gelbukh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abirami</namePart>
<namePart type="family">Murugappan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adeep</namePart>
<namePart type="family">Hane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prasanna</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Kumeresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Ponnusamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">IIIT Delhi, New Delhi, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of Language Identification (LI) in text processing refers to automatically identifying the languages used in a text document. LI task is usually been studied at the document level and often in high-resource languages while giving less importance to low-resource languages. However, with the recent advance- ment in technologies, in a multilingual country like India, many low-resource language users post their comments using English and one or more language(s) in the form of code-mixed texts. Combination of Kannada and English is one such code-mixed text of mixing Kannada and English languages at various levels. To address the word level LI in code-mixed text, in CoLI-Kanglish shared task, we have focused on open-sourcing a Kannada-English code-mixed dataset for word level LI of Kannada, English and mixed-language words written in Roman script. The task includes classifying each word in the given text into one of six predefined categories, namely: Kannada (kn), English (en), Kannada-English (kn-en), Name (name), Lo-cation (location), and Other (other). Among the models submitted by all the participants, the best performing model obtained averaged-weighted and averaged-macro F1 scores of 0.86 and 0.62 respectively.</abstract>
<identifier type="citekey">balouchzahi-etal-2022-overview</identifier>
<location>
<url>https://aclanthology.org/2022.icon-wlli.8</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>38</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Overview of CoLI-Kanglish: Word Level Language Identification in Code-mixed Kannada-English Texts at ICON 2022
%A Balouchzahi, F.
%A Butt, S.
%A Hegde, A.
%A Ashraf, N.
%A Shashirekha, H.l.
%A Sidorov, Grigori
%A Gelbukh, Alexander
%Y Chakravarthi, Bharathi Raja
%Y Murugappan, Abirami
%Y Chinnappa, Dhivya
%Y Hane, Adeep
%Y Kumeresan, Prasanna Kumar
%Y Ponnusamy, Rahul
%S Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts
%D 2022
%8 December
%I Association for Computational Linguistics
%C IIIT Delhi, New Delhi, India
%F balouchzahi-etal-2022-overview
%X The task of Language Identification (LI) in text processing refers to automatically identifying the languages used in a text document. LI task is usually been studied at the document level and often in high-resource languages while giving less importance to low-resource languages. However, with the recent advance- ment in technologies, in a multilingual country like India, many low-resource language users post their comments using English and one or more language(s) in the form of code-mixed texts. Combination of Kannada and English is one such code-mixed text of mixing Kannada and English languages at various levels. To address the word level LI in code-mixed text, in CoLI-Kanglish shared task, we have focused on open-sourcing a Kannada-English code-mixed dataset for word level LI of Kannada, English and mixed-language words written in Roman script. The task includes classifying each word in the given text into one of six predefined categories, namely: Kannada (kn), English (en), Kannada-English (kn-en), Name (name), Lo-cation (location), and Other (other). Among the models submitted by all the participants, the best performing model obtained averaged-weighted and averaged-macro F1 scores of 0.86 and 0.62 respectively.
%U https://aclanthology.org/2022.icon-wlli.8
%P 38-45
Markdown (Informal)
[Overview of CoLI-Kanglish: Word Level Language Identification in Code-mixed Kannada-English Texts at ICON 2022](https://aclanthology.org/2022.icon-wlli.8) (Balouchzahi et al., ICON 2022)
ACL
- F. Balouchzahi, S. Butt, A. Hegde, N. Ashraf, H.l. Shashirekha, Grigori Sidorov, and Alexander Gelbukh. 2022. Overview of CoLI-Kanglish: Word Level Language Identification in Code-mixed Kannada-English Texts at ICON 2022. In Proceedings of the 19th International Conference on Natural Language Processing (ICON): Shared Task on Word Level Language Identification in Code-mixed Kannada-English Texts, pages 38–45, IIIT Delhi, New Delhi, India. Association for Computational Linguistics.