@inproceedings{feyisetan-kasiviswanathan-2021-private,
title = "Private Release of Text Embedding Vectors",
author = "Feyisetan, Oluwaseyi and
Kasiviswanathan, Shiva",
editor = "Pruksachatkun, Yada and
Ramakrishna, Anil and
Chang, Kai-Wei and
Krishna, Satyapriya and
Dhamala, Jwala and
Guha, Tanaya and
Ren, Xiang",
booktitle = "Proceedings of the First Workshop on Trustworthy Natural Language Processing",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.trustnlp-1.3",
doi = "10.18653/v1/2021.trustnlp-1.3",
pages = "15--27",
abstract = "Ensuring strong theoretical privacy guarantees on text data is a challenging problem which is usually attained at the expense of utility. However, to improve the practicality of privacy preserving text analyses, it is essential to design algorithms that better optimize this tradeoff. To address this challenge, we propose a release mechanism that takes any (text) embedding vector as input and releases a corresponding private vector. The mechanism satisfies an extension of differential privacy to metric spaces. Our idea based on first randomly projecting the vectors to a lower-dimensional space and then adding noise in this projected space generates private vectors that achieve strong theoretical guarantees on its utility. We support our theoretical proofs with empirical experiments on multiple word embedding models and NLP datasets, achieving in some cases more than 10{\%} gains over the existing state-of-the-art privatization techniques.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="feyisetan-kasiviswanathan-2021-private">
<titleInfo>
<title>Private Release of Text Embedding Vectors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="family">Kasiviswanathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Trustworthy Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yada</namePart>
<namePart type="family">Pruksachatkun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anil</namePart>
<namePart type="family">Ramakrishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satyapriya</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jwala</namePart>
<namePart type="family">Dhamala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanaya</namePart>
<namePart type="family">Guha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Ensuring strong theoretical privacy guarantees on text data is a challenging problem which is usually attained at the expense of utility. However, to improve the practicality of privacy preserving text analyses, it is essential to design algorithms that better optimize this tradeoff. To address this challenge, we propose a release mechanism that takes any (text) embedding vector as input and releases a corresponding private vector. The mechanism satisfies an extension of differential privacy to metric spaces. Our idea based on first randomly projecting the vectors to a lower-dimensional space and then adding noise in this projected space generates private vectors that achieve strong theoretical guarantees on its utility. We support our theoretical proofs with empirical experiments on multiple word embedding models and NLP datasets, achieving in some cases more than 10% gains over the existing state-of-the-art privatization techniques.</abstract>
<identifier type="citekey">feyisetan-kasiviswanathan-2021-private</identifier>
<identifier type="doi">10.18653/v1/2021.trustnlp-1.3</identifier>
<location>
<url>https://aclanthology.org/2021.trustnlp-1.3</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>15</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Private Release of Text Embedding Vectors
%A Feyisetan, Oluwaseyi
%A Kasiviswanathan, Shiva
%Y Pruksachatkun, Yada
%Y Ramakrishna, Anil
%Y Chang, Kai-Wei
%Y Krishna, Satyapriya
%Y Dhamala, Jwala
%Y Guha, Tanaya
%Y Ren, Xiang
%S Proceedings of the First Workshop on Trustworthy Natural Language Processing
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F feyisetan-kasiviswanathan-2021-private
%X Ensuring strong theoretical privacy guarantees on text data is a challenging problem which is usually attained at the expense of utility. However, to improve the practicality of privacy preserving text analyses, it is essential to design algorithms that better optimize this tradeoff. To address this challenge, we propose a release mechanism that takes any (text) embedding vector as input and releases a corresponding private vector. The mechanism satisfies an extension of differential privacy to metric spaces. Our idea based on first randomly projecting the vectors to a lower-dimensional space and then adding noise in this projected space generates private vectors that achieve strong theoretical guarantees on its utility. We support our theoretical proofs with empirical experiments on multiple word embedding models and NLP datasets, achieving in some cases more than 10% gains over the existing state-of-the-art privatization techniques.
%R 10.18653/v1/2021.trustnlp-1.3
%U https://aclanthology.org/2021.trustnlp-1.3
%U https://doi.org/10.18653/v1/2021.trustnlp-1.3
%P 15-27
Markdown (Informal)
[Private Release of Text Embedding Vectors](https://aclanthology.org/2021.trustnlp-1.3) (Feyisetan & Kasiviswanathan, TrustNLP 2021)
ACL
- Oluwaseyi Feyisetan and Shiva Kasiviswanathan. 2021. Private Release of Text Embedding Vectors. In Proceedings of the First Workshop on Trustworthy Natural Language Processing, pages 15–27, Online. Association for Computational Linguistics.