@inproceedings{barrett-etal-2022-lightweight,
title = "A Lightweight Yet Robust Approach to Textual Anomaly Detection",
author = "Barrett, Leslie and
Kingan, Robert and
Ortan, Alexandra and
Seshadri, Madhavan",
editor = "Kumar, Ritesh and
Ojha, Atul Kr. and
Zampieri, Marcos and
Malmasi, Shervin and
Kadar, Daniel",
booktitle = "Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.trac-1.8",
pages = "62--67",
abstract = "Highly imbalanced textual datasets continue to pose a challenge for supervised learning models. However, viewing such imbalanced text data as an anomaly detection (AD) problem has advantages for certain tasks such as detecting hate speech, or inappropriate and/or offensive language in large social media feeds. There the unwanted content tends to be both rare and non-uniform with respect to its thematic character, and better fits the definition of an anomaly than a class. Several recent approaches to textual AD use transformer models, achieving good results but with trade-offs in pre-training and inflexibility with respect to new domains. In this paper we compare two linear models within the NMF family, which also have a recent history in textual AD. We introduce a new approach based on an alternative regularization of the NMF objective. Our results surpass other linear AD models and are on par with deep models, performing comparably well even in very small outlier concentrations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barrett-etal-2022-lightweight">
<titleInfo>
<title>A Lightweight Yet Robust Approach to Textual Anomaly Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Kingan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Ortan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madhavan</namePart>
<namePart type="family">Seshadri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Kadar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Highly imbalanced textual datasets continue to pose a challenge for supervised learning models. However, viewing such imbalanced text data as an anomaly detection (AD) problem has advantages for certain tasks such as detecting hate speech, or inappropriate and/or offensive language in large social media feeds. There the unwanted content tends to be both rare and non-uniform with respect to its thematic character, and better fits the definition of an anomaly than a class. Several recent approaches to textual AD use transformer models, achieving good results but with trade-offs in pre-training and inflexibility with respect to new domains. In this paper we compare two linear models within the NMF family, which also have a recent history in textual AD. We introduce a new approach based on an alternative regularization of the NMF objective. Our results surpass other linear AD models and are on par with deep models, performing comparably well even in very small outlier concentrations.</abstract>
<identifier type="citekey">barrett-etal-2022-lightweight</identifier>
<location>
<url>https://aclanthology.org/2022.trac-1.8</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>62</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Lightweight Yet Robust Approach to Textual Anomaly Detection
%A Barrett, Leslie
%A Kingan, Robert
%A Ortan, Alexandra
%A Seshadri, Madhavan
%Y Kumar, Ritesh
%Y Ojha, Atul Kr.
%Y Zampieri, Marcos
%Y Malmasi, Shervin
%Y Kadar, Daniel
%S Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022)
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F barrett-etal-2022-lightweight
%X Highly imbalanced textual datasets continue to pose a challenge for supervised learning models. However, viewing such imbalanced text data as an anomaly detection (AD) problem has advantages for certain tasks such as detecting hate speech, or inappropriate and/or offensive language in large social media feeds. There the unwanted content tends to be both rare and non-uniform with respect to its thematic character, and better fits the definition of an anomaly than a class. Several recent approaches to textual AD use transformer models, achieving good results but with trade-offs in pre-training and inflexibility with respect to new domains. In this paper we compare two linear models within the NMF family, which also have a recent history in textual AD. We introduce a new approach based on an alternative regularization of the NMF objective. Our results surpass other linear AD models and are on par with deep models, performing comparably well even in very small outlier concentrations.
%U https://aclanthology.org/2022.trac-1.8
%P 62-67
Markdown (Informal)
[A Lightweight Yet Robust Approach to Textual Anomaly Detection](https://aclanthology.org/2022.trac-1.8) (Barrett et al., TRAC 2022)
ACL
- Leslie Barrett, Robert Kingan, Alexandra Ortan, and Madhavan Seshadri. 2022. A Lightweight Yet Robust Approach to Textual Anomaly Detection. In Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022), pages 62–67, Gyeongju, Republic of Korea. Association for Computational Linguistics.