@inproceedings{fairstein-etal-2024-class,
title = "Class Balancing for Efficient Active Learning in Imbalanced Datasets",
author = "Fairstein, Yaron and
Kalinsky, Oren and
Karnin, Zohar and
Kushilevitz, Guy and
Libov, Alexander and
Tolmach, Sofia",
editor = "Henning, Sophie and
Stede, Manfred",
booktitle = "Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.law-1.8",
pages = "77--86",
abstract = "Recent developments in active learning algorithms for NLP tasks show promising results in terms of reducing labelling complexity. In this paper we extend this effort to imbalanced datasets; we bridge between the active learning approach of obtaining diverse andinformative examples, and the heuristic of class balancing used in imbalanced datasets. We develop a novel tune-free weighting technique that canbe applied to various existing active learning algorithms, adding a component of class balancing. We compare several active learning algorithms to their modified version on multiple public datasetsand show that when the classes are imbalanced, with manual annotation effort remaining equal the modified version significantly outperforms the original both in terms of the test metric and the number of obtained minority examples. Moreover, when the imbalance is mild or non-existent (classes are completely balanced), our technique does not harm the base algorithms.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fairstein-etal-2024-class">
<titleInfo>
<title>Class Balancing for Efficient Active Learning in Imbalanced Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaron</namePart>
<namePart type="family">Fairstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oren</namePart>
<namePart type="family">Kalinsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zohar</namePart>
<namePart type="family">Karnin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Kushilevitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Libov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sofia</namePart>
<namePart type="family">Tolmach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophie</namePart>
<namePart type="family">Henning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manfred</namePart>
<namePart type="family">Stede</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent developments in active learning algorithms for NLP tasks show promising results in terms of reducing labelling complexity. In this paper we extend this effort to imbalanced datasets; we bridge between the active learning approach of obtaining diverse andinformative examples, and the heuristic of class balancing used in imbalanced datasets. We develop a novel tune-free weighting technique that canbe applied to various existing active learning algorithms, adding a component of class balancing. We compare several active learning algorithms to their modified version on multiple public datasetsand show that when the classes are imbalanced, with manual annotation effort remaining equal the modified version significantly outperforms the original both in terms of the test metric and the number of obtained minority examples. Moreover, when the imbalance is mild or non-existent (classes are completely balanced), our technique does not harm the base algorithms.</abstract>
<identifier type="citekey">fairstein-etal-2024-class</identifier>
<location>
<url>https://aclanthology.org/2024.law-1.8</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>77</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Class Balancing for Efficient Active Learning in Imbalanced Datasets
%A Fairstein, Yaron
%A Kalinsky, Oren
%A Karnin, Zohar
%A Kushilevitz, Guy
%A Libov, Alexander
%A Tolmach, Sofia
%Y Henning, Sophie
%Y Stede, Manfred
%S Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F fairstein-etal-2024-class
%X Recent developments in active learning algorithms for NLP tasks show promising results in terms of reducing labelling complexity. In this paper we extend this effort to imbalanced datasets; we bridge between the active learning approach of obtaining diverse andinformative examples, and the heuristic of class balancing used in imbalanced datasets. We develop a novel tune-free weighting technique that canbe applied to various existing active learning algorithms, adding a component of class balancing. We compare several active learning algorithms to their modified version on multiple public datasetsand show that when the classes are imbalanced, with manual annotation effort remaining equal the modified version significantly outperforms the original both in terms of the test metric and the number of obtained minority examples. Moreover, when the imbalance is mild or non-existent (classes are completely balanced), our technique does not harm the base algorithms.
%U https://aclanthology.org/2024.law-1.8
%P 77-86
Markdown (Informal)
[Class Balancing for Efficient Active Learning in Imbalanced Datasets](https://aclanthology.org/2024.law-1.8) (Fairstein et al., LAW-WS 2024)
ACL