@inproceedings{laouar-etal-2023-large,
title = "Large-scale similarity search with Optimal Transport",
author = "Laouar, Cl{\'e}a and
Takezawa, Yuki and
Yamada, Makoto",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.730",
doi = "10.18653/v1/2023.emnlp-main.730",
pages = "11920--11930",
abstract = "Wasserstein distance is a powerful tool for comparing probability distributions and is widely used for document classification and retrieval tasks in NLP. In particular, it is known as the word mover{'}s distance (WMD) in the NLP community. WMD exhibits excellent performance for various NLP tasks; however, one of its limitations is its computational cost and thus is not useful for large-scale distribution comparisons. In this study, we propose a simple and effective nearest neighbor search based on the Wasserstein distance. Specifically, we employ the L1 embedding method based on the tree-based Wasserstein approximation and subsequently used the nearest neighbor search to efficiently find the $k$-nearest neighbors. Through benchmark experiments, we demonstrate that the proposed approximation has comparable performance to the vanilla Wasserstein distance and can be computed three orders of magnitude faster than the vanilla Wasserstein distance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laouar-etal-2023-large">
<titleInfo>
<title>Large-scale similarity search with Optimal Transport</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cléa</namePart>
<namePart type="family">Laouar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Takezawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Yamada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Wasserstein distance is a powerful tool for comparing probability distributions and is widely used for document classification and retrieval tasks in NLP. In particular, it is known as the word mover’s distance (WMD) in the NLP community. WMD exhibits excellent performance for various NLP tasks; however, one of its limitations is its computational cost and thus is not useful for large-scale distribution comparisons. In this study, we propose a simple and effective nearest neighbor search based on the Wasserstein distance. Specifically, we employ the L1 embedding method based on the tree-based Wasserstein approximation and subsequently used the nearest neighbor search to efficiently find the k-nearest neighbors. Through benchmark experiments, we demonstrate that the proposed approximation has comparable performance to the vanilla Wasserstein distance and can be computed three orders of magnitude faster than the vanilla Wasserstein distance.</abstract>
<identifier type="citekey">laouar-etal-2023-large</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.730</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.730</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>11920</start>
<end>11930</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Large-scale similarity search with Optimal Transport
%A Laouar, Cléa
%A Takezawa, Yuki
%A Yamada, Makoto
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F laouar-etal-2023-large
%X Wasserstein distance is a powerful tool for comparing probability distributions and is widely used for document classification and retrieval tasks in NLP. In particular, it is known as the word mover’s distance (WMD) in the NLP community. WMD exhibits excellent performance for various NLP tasks; however, one of its limitations is its computational cost and thus is not useful for large-scale distribution comparisons. In this study, we propose a simple and effective nearest neighbor search based on the Wasserstein distance. Specifically, we employ the L1 embedding method based on the tree-based Wasserstein approximation and subsequently used the nearest neighbor search to efficiently find the k-nearest neighbors. Through benchmark experiments, we demonstrate that the proposed approximation has comparable performance to the vanilla Wasserstein distance and can be computed three orders of magnitude faster than the vanilla Wasserstein distance.
%R 10.18653/v1/2023.emnlp-main.730
%U https://aclanthology.org/2023.emnlp-main.730
%U https://doi.org/10.18653/v1/2023.emnlp-main.730
%P 11920-11930
Markdown (Informal)
[Large-scale similarity search with Optimal Transport](https://aclanthology.org/2023.emnlp-main.730) (Laouar et al., EMNLP 2023)
ACL
- Cléa Laouar, Yuki Takezawa, and Makoto Yamada. 2023. Large-scale similarity search with Optimal Transport. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 11920–11930, Singapore. Association for Computational Linguistics.