@inproceedings{hegel-etal-2020-substance,
title = "Substance over {S}tyle: {D}ocument-{L}evel {T}argeted {C}ontent {T}ransfer",
author = "Hegel, Allison and
Rao, Sudha and
Celikyilmaz, Asli and
Dolan, Bill",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.526",
doi = "10.18653/v1/2020.emnlp-main.526",
pages = "6485--6504",
abstract = "Existing language models excel at writing from scratch, but many real-world scenarios require rewriting an existing document to fit a set of constraints. Although sentence-level rewriting has been fairly well-studied, little work has addressed the challenge of rewriting an entire document coherently. In this work, we introduce the task of document-level targeted content transfer and address it in the recipe domain, with a recipe as the document and a dietary restriction (such as vegan or dairy-free) as the targeted constraint. We propose a novel model for this task based on the generative pre-trained language model (GPT-2) and train on a large number of roughly-aligned recipe pairs. Both automatic and human evaluations show that our model out-performs existing methods by generating coherent and diverse rewrites that obey the constraint while remaining close to the original document. Finally, we analyze our model{'}s rewrites to assess progress toward the goal of making language generation more attuned to constraints that are substantive rather than stylistic.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hegel-etal-2020-substance">
<titleInfo>
<title>Substance over Style: Document-Level Targeted Content Transfer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Allison</namePart>
<namePart type="family">Hegel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudha</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asli</namePart>
<namePart type="family">Celikyilmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bill</namePart>
<namePart type="family">Dolan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Existing language models excel at writing from scratch, but many real-world scenarios require rewriting an existing document to fit a set of constraints. Although sentence-level rewriting has been fairly well-studied, little work has addressed the challenge of rewriting an entire document coherently. In this work, we introduce the task of document-level targeted content transfer and address it in the recipe domain, with a recipe as the document and a dietary restriction (such as vegan or dairy-free) as the targeted constraint. We propose a novel model for this task based on the generative pre-trained language model (GPT-2) and train on a large number of roughly-aligned recipe pairs. Both automatic and human evaluations show that our model out-performs existing methods by generating coherent and diverse rewrites that obey the constraint while remaining close to the original document. Finally, we analyze our model’s rewrites to assess progress toward the goal of making language generation more attuned to constraints that are substantive rather than stylistic.</abstract>
<identifier type="citekey">hegel-etal-2020-substance</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.526</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.526</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>6485</start>
<end>6504</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Substance over Style: Document-Level Targeted Content Transfer
%A Hegel, Allison
%A Rao, Sudha
%A Celikyilmaz, Asli
%A Dolan, Bill
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F hegel-etal-2020-substance
%X Existing language models excel at writing from scratch, but many real-world scenarios require rewriting an existing document to fit a set of constraints. Although sentence-level rewriting has been fairly well-studied, little work has addressed the challenge of rewriting an entire document coherently. In this work, we introduce the task of document-level targeted content transfer and address it in the recipe domain, with a recipe as the document and a dietary restriction (such as vegan or dairy-free) as the targeted constraint. We propose a novel model for this task based on the generative pre-trained language model (GPT-2) and train on a large number of roughly-aligned recipe pairs. Both automatic and human evaluations show that our model out-performs existing methods by generating coherent and diverse rewrites that obey the constraint while remaining close to the original document. Finally, we analyze our model’s rewrites to assess progress toward the goal of making language generation more attuned to constraints that are substantive rather than stylistic.
%R 10.18653/v1/2020.emnlp-main.526
%U https://aclanthology.org/2020.emnlp-main.526
%U https://doi.org/10.18653/v1/2020.emnlp-main.526
%P 6485-6504
Markdown (Informal)
[Substance over Style: Document-Level Targeted Content Transfer](https://aclanthology.org/2020.emnlp-main.526) (Hegel et al., EMNLP 2020)
ACL