@inproceedings{eikema-2024-effect,
title = "The Effect of Generalisation on the Inadequacy of the Mode",
author = "Eikema, Bryan",
editor = {V{\'a}zquez, Ra{\'u}l and
Celikkanat, Hande and
Ulmer, Dennis and
Tiedemann, J{\"o}rg and
Swayamdipta, Swabha and
Aziz, Wilker and
Plank, Barbara and
Baan, Joris and
de Marneffe, Marie-Catherine},
booktitle = "Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)",
month = mar,
year = "2024",
address = "St Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.uncertainlp-1.9",
pages = "87--92",
abstract = "The highest probability sequences of most neural language generation models tend to be degenerate in some way, a problem known as the inadequacy of the mode. While many approaches to tackling particular aspects of the problem exist, such as dealing with too short sequences or excessive repetitions, explanations of why it occurs in the first place are rarer and do not agree with each other. We believe none of the existing explanations paint a complete picture. In this position paper, we want to bring light to the incredible complexity of the modelling task and the problems that generalising to previously unseen contexts bring. We argue that our desire for models to generalise to contexts it has never observed before is exactly what leads to spread of probability mass and inadequate modes. While we do not claim that adequate modes are impossible, we argue that they are not to be expected either.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eikema-2024-effect">
<titleInfo>
<title>The Effect of Generalisation on the Inadequacy of the Mode</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bryan</namePart>
<namePart type="family">Eikema</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="family">Vázquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hande</namePart>
<namePart type="family">Celikkanat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dennis</namePart>
<namePart type="family">Ulmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swabha</namePart>
<namePart type="family">Swayamdipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wilker</namePart>
<namePart type="family">Aziz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joris</namePart>
<namePart type="family">Baan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The highest probability sequences of most neural language generation models tend to be degenerate in some way, a problem known as the inadequacy of the mode. While many approaches to tackling particular aspects of the problem exist, such as dealing with too short sequences or excessive repetitions, explanations of why it occurs in the first place are rarer and do not agree with each other. We believe none of the existing explanations paint a complete picture. In this position paper, we want to bring light to the incredible complexity of the modelling task and the problems that generalising to previously unseen contexts bring. We argue that our desire for models to generalise to contexts it has never observed before is exactly what leads to spread of probability mass and inadequate modes. While we do not claim that adequate modes are impossible, we argue that they are not to be expected either.</abstract>
<identifier type="citekey">eikema-2024-effect</identifier>
<location>
<url>https://aclanthology.org/2024.uncertainlp-1.9</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>87</start>
<end>92</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Effect of Generalisation on the Inadequacy of the Mode
%A Eikema, Bryan
%Y Vázquez, Raúl
%Y Celikkanat, Hande
%Y Ulmer, Dennis
%Y Tiedemann, Jörg
%Y Swayamdipta, Swabha
%Y Aziz, Wilker
%Y Plank, Barbara
%Y Baan, Joris
%Y de Marneffe, Marie-Catherine
%S Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St Julians, Malta
%F eikema-2024-effect
%X The highest probability sequences of most neural language generation models tend to be degenerate in some way, a problem known as the inadequacy of the mode. While many approaches to tackling particular aspects of the problem exist, such as dealing with too short sequences or excessive repetitions, explanations of why it occurs in the first place are rarer and do not agree with each other. We believe none of the existing explanations paint a complete picture. In this position paper, we want to bring light to the incredible complexity of the modelling task and the problems that generalising to previously unseen contexts bring. We argue that our desire for models to generalise to contexts it has never observed before is exactly what leads to spread of probability mass and inadequate modes. While we do not claim that adequate modes are impossible, we argue that they are not to be expected either.
%U https://aclanthology.org/2024.uncertainlp-1.9
%P 87-92
Markdown (Informal)
[The Effect of Generalisation on the Inadequacy of the Mode](https://aclanthology.org/2024.uncertainlp-1.9) (Eikema, UncertaiNLP-WS 2024)
ACL