@inproceedings{ryan-hulden-2020-data,
title = "Data Augmentation for Transformer-based {G}2{P}",
author = "Ryan, Zach and
Hulden, Mans",
editor = "Nicolai, Garrett and
Gorman, Kyle and
Cotterell, Ryan",
booktitle = "Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.sigmorphon-1.21",
doi = "10.18653/v1/2020.sigmorphon-1.21",
pages = "184--188",
abstract = "The Transformer model has been shown to outperform other neural seq2seq models in several character-level tasks. It is unclear, however, if the Transformer would benefit as much as other seq2seq models from data augmentation strategies in the low-resource setting. In this paper we explore strategies for data augmentation in the g2p task together with the Transformer model. Our results show that a relatively simple alignment-based strategy of identifying consistent input-output subsequences in grapheme-phoneme data coupled together with a subsequent splicing together of such pieces to generate hallucinated data works well in the low-resource setting, often delivering substantial performance improvement over a standard Transformer model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ryan-hulden-2020-data">
<titleInfo>
<title>Data Augmentation for Transformer-based G2P</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zach</namePart>
<namePart type="family">Ryan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mans</namePart>
<namePart type="family">Hulden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Gorman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Transformer model has been shown to outperform other neural seq2seq models in several character-level tasks. It is unclear, however, if the Transformer would benefit as much as other seq2seq models from data augmentation strategies in the low-resource setting. In this paper we explore strategies for data augmentation in the g2p task together with the Transformer model. Our results show that a relatively simple alignment-based strategy of identifying consistent input-output subsequences in grapheme-phoneme data coupled together with a subsequent splicing together of such pieces to generate hallucinated data works well in the low-resource setting, often delivering substantial performance improvement over a standard Transformer model.</abstract>
<identifier type="citekey">ryan-hulden-2020-data</identifier>
<identifier type="doi">10.18653/v1/2020.sigmorphon-1.21</identifier>
<location>
<url>https://aclanthology.org/2020.sigmorphon-1.21</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>184</start>
<end>188</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation for Transformer-based G2P
%A Ryan, Zach
%A Hulden, Mans
%Y Nicolai, Garrett
%Y Gorman, Kyle
%Y Cotterell, Ryan
%S Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F ryan-hulden-2020-data
%X The Transformer model has been shown to outperform other neural seq2seq models in several character-level tasks. It is unclear, however, if the Transformer would benefit as much as other seq2seq models from data augmentation strategies in the low-resource setting. In this paper we explore strategies for data augmentation in the g2p task together with the Transformer model. Our results show that a relatively simple alignment-based strategy of identifying consistent input-output subsequences in grapheme-phoneme data coupled together with a subsequent splicing together of such pieces to generate hallucinated data works well in the low-resource setting, often delivering substantial performance improvement over a standard Transformer model.
%R 10.18653/v1/2020.sigmorphon-1.21
%U https://aclanthology.org/2020.sigmorphon-1.21
%U https://doi.org/10.18653/v1/2020.sigmorphon-1.21
%P 184-188
Markdown (Informal)
[Data Augmentation for Transformer-based G2P](https://aclanthology.org/2020.sigmorphon-1.21) (Ryan & Hulden, SIGMORPHON 2020)
ACL
- Zach Ryan and Mans Hulden. 2020. Data Augmentation for Transformer-based G2P. In Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology, pages 184–188, Online. Association for Computational Linguistics.