@inproceedings{patil-etal-2022-efficient,
title = "Efficient Dialog State Tracking Using Gated- Intent based Slot Operation Prediction for On-device Dialog Systems",
author = "Patil, Pranamya and
Choi, Hyungtak and
Samal, Ranjan and
Kaur, Gurpreet and
Jhawar, Manisha and
Tammewar, Aniruddha and
Mukherjee, Siddhartha",
editor = "Akhtar, Md. Shad and
Chakraborty, Tanmoy",
booktitle = "Proceedings of the 19th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2022",
address = "New Delhi, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.icon-main.9",
pages = "67--74",
abstract = "Conversational agents on smart devices need to be efficient concerning latency in responding, for enhanced user experience and real-time utility. This demands on-device processing (as on-device processing is quicker), which limits the availability of resources such as memory and processing. Most state-of-the-art Dialog State Tracking (DST) systems make use of large pre-trained language models that require high resource computation, typically available on high-end servers. Whereas, on-device systems are memory efficient, have reduced time/latency, preserve privacy, and don{'}t rely on network. A recent approach tries to reduce the latency by splitting the task of slot prediction into two subtasks of State Operation Prediction (SOP) to select an action for each slot, and Slot Value Generation (SVG) responsible for producing values for the identified slots. SVG being computationally expensive, is performed only for a small subset of actions predicted in the SOP. Motivated from this optimization technique, we build a similar system and work on multi-task learning to achieve significant improvements in DST performance, while optimizing the resource consumption. We propose a quadruplet (Domain, Intent, Slot, and Slot Value) based DST, which significantly boosts the performance. We experiment with different techniques to fuse different layers of representations from intent and slot prediction tasks. We obtain the best joint accuracy of 53.3{\%} on the publicly available MultiWOZ 2.2 dataset, using BERT-medium along with a gating mechanism. We also compare the cost efficiency of our system with other large models and find that our system is best suited for an on-device based production environment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patil-etal-2022-efficient">
<titleInfo>
<title>Efficient Dialog State Tracking Using Gated- Intent based Slot Operation Prediction for On-device Dialog Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pranamya</namePart>
<namePart type="family">Patil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyungtak</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranjan</namePart>
<namePart type="family">Samal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gurpreet</namePart>
<namePart type="family">Kaur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manisha</namePart>
<namePart type="family">Jhawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aniruddha</namePart>
<namePart type="family">Tammewar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddhartha</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Shad</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Delhi, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Conversational agents on smart devices need to be efficient concerning latency in responding, for enhanced user experience and real-time utility. This demands on-device processing (as on-device processing is quicker), which limits the availability of resources such as memory and processing. Most state-of-the-art Dialog State Tracking (DST) systems make use of large pre-trained language models that require high resource computation, typically available on high-end servers. Whereas, on-device systems are memory efficient, have reduced time/latency, preserve privacy, and don’t rely on network. A recent approach tries to reduce the latency by splitting the task of slot prediction into two subtasks of State Operation Prediction (SOP) to select an action for each slot, and Slot Value Generation (SVG) responsible for producing values for the identified slots. SVG being computationally expensive, is performed only for a small subset of actions predicted in the SOP. Motivated from this optimization technique, we build a similar system and work on multi-task learning to achieve significant improvements in DST performance, while optimizing the resource consumption. We propose a quadruplet (Domain, Intent, Slot, and Slot Value) based DST, which significantly boosts the performance. We experiment with different techniques to fuse different layers of representations from intent and slot prediction tasks. We obtain the best joint accuracy of 53.3% on the publicly available MultiWOZ 2.2 dataset, using BERT-medium along with a gating mechanism. We also compare the cost efficiency of our system with other large models and find that our system is best suited for an on-device based production environment.</abstract>
<identifier type="citekey">patil-etal-2022-efficient</identifier>
<location>
<url>https://aclanthology.org/2022.icon-main.9</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>67</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Efficient Dialog State Tracking Using Gated- Intent based Slot Operation Prediction for On-device Dialog Systems
%A Patil, Pranamya
%A Choi, Hyungtak
%A Samal, Ranjan
%A Kaur, Gurpreet
%A Jhawar, Manisha
%A Tammewar, Aniruddha
%A Mukherjee, Siddhartha
%Y Akhtar, Md. Shad
%Y Chakraborty, Tanmoy
%S Proceedings of the 19th International Conference on Natural Language Processing (ICON)
%D 2022
%8 December
%I Association for Computational Linguistics
%C New Delhi, India
%F patil-etal-2022-efficient
%X Conversational agents on smart devices need to be efficient concerning latency in responding, for enhanced user experience and real-time utility. This demands on-device processing (as on-device processing is quicker), which limits the availability of resources such as memory and processing. Most state-of-the-art Dialog State Tracking (DST) systems make use of large pre-trained language models that require high resource computation, typically available on high-end servers. Whereas, on-device systems are memory efficient, have reduced time/latency, preserve privacy, and don’t rely on network. A recent approach tries to reduce the latency by splitting the task of slot prediction into two subtasks of State Operation Prediction (SOP) to select an action for each slot, and Slot Value Generation (SVG) responsible for producing values for the identified slots. SVG being computationally expensive, is performed only for a small subset of actions predicted in the SOP. Motivated from this optimization technique, we build a similar system and work on multi-task learning to achieve significant improvements in DST performance, while optimizing the resource consumption. We propose a quadruplet (Domain, Intent, Slot, and Slot Value) based DST, which significantly boosts the performance. We experiment with different techniques to fuse different layers of representations from intent and slot prediction tasks. We obtain the best joint accuracy of 53.3% on the publicly available MultiWOZ 2.2 dataset, using BERT-medium along with a gating mechanism. We also compare the cost efficiency of our system with other large models and find that our system is best suited for an on-device based production environment.
%U https://aclanthology.org/2022.icon-main.9
%P 67-74
Markdown (Informal)
[Efficient Dialog State Tracking Using Gated- Intent based Slot Operation Prediction for On-device Dialog Systems](https://aclanthology.org/2022.icon-main.9) (Patil et al., ICON 2022)
ACL