@inproceedings{che-xiao-2022-data,
title = "Data Analytics Meet Machine Translation",
author = "Che, Allen and
Xiao, Martin",
editor = "Campbell, Janice and
Larocca, Stephen and
Marciano, Jay and
Savenkov, Konstantin and
Yanishevsky, Alex",
booktitle = "Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)",
month = sep,
year = "2022",
address = "Orlando, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2022.amta-upg.11",
pages = "144--158",
abstract = "Machine translation becomes a critical piece of localization industry. With all kinds of different data, how to monitor the machine translation quality in your localized content? How to build the quality analytics framework? This paper describes a process starting from collecting the daily operation data then cleaning the data and building the analytics framework to get the insight into the data. Finally we{'}re going to share how to build the data collecting matrix, and the script to clean up the data, then run the analytics with an automation script. In the last, we would share the different visualized reports, such as Box Polit, Standard Deviation, Mean, MT touchpoint and golden ratio reports.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="che-xiao-2022-data">
<titleInfo>
<title>Data Analytics Meet Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Allen</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Janice</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Larocca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jay</namePart>
<namePart type="family">Marciano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Savenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Yanishevsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Orlando, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation becomes a critical piece of localization industry. With all kinds of different data, how to monitor the machine translation quality in your localized content? How to build the quality analytics framework? This paper describes a process starting from collecting the daily operation data then cleaning the data and building the analytics framework to get the insight into the data. Finally we’re going to share how to build the data collecting matrix, and the script to clean up the data, then run the analytics with an automation script. In the last, we would share the different visualized reports, such as Box Polit, Standard Deviation, Mean, MT touchpoint and golden ratio reports.</abstract>
<identifier type="citekey">che-xiao-2022-data</identifier>
<location>
<url>https://aclanthology.org/2022.amta-upg.11</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>144</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Analytics Meet Machine Translation
%A Che, Allen
%A Xiao, Martin
%Y Campbell, Janice
%Y Larocca, Stephen
%Y Marciano, Jay
%Y Savenkov, Konstantin
%Y Yanishevsky, Alex
%S Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track)
%D 2022
%8 September
%I Association for Machine Translation in the Americas
%C Orlando, USA
%F che-xiao-2022-data
%X Machine translation becomes a critical piece of localization industry. With all kinds of different data, how to monitor the machine translation quality in your localized content? How to build the quality analytics framework? This paper describes a process starting from collecting the daily operation data then cleaning the data and building the analytics framework to get the insight into the data. Finally we’re going to share how to build the data collecting matrix, and the script to clean up the data, then run the analytics with an automation script. In the last, we would share the different visualized reports, such as Box Polit, Standard Deviation, Mean, MT touchpoint and golden ratio reports.
%U https://aclanthology.org/2022.amta-upg.11
%P 144-158
Markdown (Informal)
[Data Analytics Meet Machine Translation](https://aclanthology.org/2022.amta-upg.11) (Che & Xiao, AMTA 2022)
ACL
- Allen Che and Martin Xiao. 2022. Data Analytics Meet Machine Translation. In Proceedings of the 15th Biennial Conference of the Association for Machine Translation in the Americas (Volume 2: Users and Providers Track and Government Track), pages 144–158, Orlando, USA. Association for Machine Translation in the Americas.