@inproceedings{dasari-etal-2023-transformer,
title = "Transformer-based Context Aware Morphological Analyzer for {T}elugu",
author = "Dasari, Priyanka and
Chelpuri, Abhijith and
Vuppala, Nagaraju and
Marreddy, Mounika and
Krishnamurthy, Parameshwari and
Mamidi, Radhika",
editor = "Chakravarthi, Bharathi R. and
Priyadharshini, Ruba and
M, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth",
booktitle = "Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.dravidianlangtech-1.4",
pages = "25--32",
abstract = "This paper addresses the challenges faced by Indian languages in leveraging deep learning for natural language processing (NLP) due to limited resources, annotated datasets, and Transformer-based architectures. We specifically focus on Telugu and aim to construct a Telugu morph analyzer dataset comprising 10,000 sentences. Furthermore, we assess the performance of established multi-lingual Transformer models (m-Bert, XLM-R, IndicBERT) and mono-lingual Transformer models trained from scratch on an extensive Telugu corpus comprising 80,15,588 sentences (BERT-Te). Our findings demonstrate the efficacy of Transformer-based representations pretrained on Telugu data in improving the performance of the Telugu morph analyzer, surpassing existing multi-lingual approaches. This highlights the necessity of developing dedicated corpora, annotated datasets, and machine learning models in a mono-lingual setting. We present benchmark results for the Telugu morph analyzer achieved through simple fine-tuning on our dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dasari-etal-2023-transformer">
<titleInfo>
<title>Transformer-based Context Aware Morphological Analyzer for Telugu</title>
</titleInfo>
<name type="personal">
<namePart type="given">Priyanka</namePart>
<namePart type="family">Dasari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhijith</namePart>
<namePart type="family">Chelpuri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nagaraju</namePart>
<namePart type="family">Vuppala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mounika</namePart>
<namePart type="family">Marreddy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameshwari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radhika</namePart>
<namePart type="family">Mamidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper addresses the challenges faced by Indian languages in leveraging deep learning for natural language processing (NLP) due to limited resources, annotated datasets, and Transformer-based architectures. We specifically focus on Telugu and aim to construct a Telugu morph analyzer dataset comprising 10,000 sentences. Furthermore, we assess the performance of established multi-lingual Transformer models (m-Bert, XLM-R, IndicBERT) and mono-lingual Transformer models trained from scratch on an extensive Telugu corpus comprising 80,15,588 sentences (BERT-Te). Our findings demonstrate the efficacy of Transformer-based representations pretrained on Telugu data in improving the performance of the Telugu morph analyzer, surpassing existing multi-lingual approaches. This highlights the necessity of developing dedicated corpora, annotated datasets, and machine learning models in a mono-lingual setting. We present benchmark results for the Telugu morph analyzer achieved through simple fine-tuning on our dataset.</abstract>
<identifier type="citekey">dasari-etal-2023-transformer</identifier>
<location>
<url>https://aclanthology.org/2023.dravidianlangtech-1.4</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>25</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Transformer-based Context Aware Morphological Analyzer for Telugu
%A Dasari, Priyanka
%A Chelpuri, Abhijith
%A Vuppala, Nagaraju
%A Marreddy, Mounika
%A Krishnamurthy, Parameshwari
%A Mamidi, Radhika
%Y Chakravarthi, Bharathi R.
%Y Priyadharshini, Ruba
%Y M, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Sherly, Elizabeth
%S Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F dasari-etal-2023-transformer
%X This paper addresses the challenges faced by Indian languages in leveraging deep learning for natural language processing (NLP) due to limited resources, annotated datasets, and Transformer-based architectures. We specifically focus on Telugu and aim to construct a Telugu morph analyzer dataset comprising 10,000 sentences. Furthermore, we assess the performance of established multi-lingual Transformer models (m-Bert, XLM-R, IndicBERT) and mono-lingual Transformer models trained from scratch on an extensive Telugu corpus comprising 80,15,588 sentences (BERT-Te). Our findings demonstrate the efficacy of Transformer-based representations pretrained on Telugu data in improving the performance of the Telugu morph analyzer, surpassing existing multi-lingual approaches. This highlights the necessity of developing dedicated corpora, annotated datasets, and machine learning models in a mono-lingual setting. We present benchmark results for the Telugu morph analyzer achieved through simple fine-tuning on our dataset.
%U https://aclanthology.org/2023.dravidianlangtech-1.4
%P 25-32
Markdown (Informal)
[Transformer-based Context Aware Morphological Analyzer for Telugu](https://aclanthology.org/2023.dravidianlangtech-1.4) (Dasari et al., DravidianLangTech-WS 2023)
ACL
- Priyanka Dasari, Abhijith Chelpuri, Nagaraju Vuppala, Mounika Marreddy, Parameshwari Krishnamurthy, and Radhika Mamidi. 2023. Transformer-based Context Aware Morphological Analyzer for Telugu. In Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages, pages 25–32, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.