@inproceedings{xiaoyan-etal-2021-multi,
title = "Multi-Strategy Knowledge Distillation Based Teacher-Student Framework for Machine Reading Comprehension",
author = "Xiaoyan, Yu and
Qingbin, Liu and
Shizhu, He and
Kang, Liu and
Shengping, Liu and
Jun, Zhao and
Yongbin, Zhou",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2021.ccl-1.91",
pages = "1024--1036",
abstract = "The irrelevant information in documents poses a great challenge for machine reading compre-hension (MRC). To deal with such a challenge current MRC models generally fall into twoseparate parts: evidence extraction and answer prediction where the former extracts the key evi-dence corresponding to the question and the latter predicts the answer based on those sentences. However such pipeline paradigms tend to accumulate errors i.e. extracting the incorrect evi-dence results in predicting the wrong answer. In order to address this problem we propose aMulti-Strategy Knowledge Distillation based Teacher-Student framework (MSKDTS) for ma-chine reading comprehension. In our approach we first take evidence and document respec-tively as the input reference information to build a teacher model and a student model. Then the multi-strategy knowledge distillation method transfers the knowledge from the teacher model to the student model at both feature and prediction level through knowledge distillation approach. Therefore in the testing phase the enhanced student model can predict answer similar to the teacher model without being aware of which sentence is the corresponding evidence in the docu-ment. Experimental results on the ReCO dataset demonstrate the effectiveness of our approachand further ablation studies prove the effectiveness of both knowledge distillation strategies.",
language = "English",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiaoyan-etal-2021-multi">
<titleInfo>
<title>Multi-Strategy Knowledge Distillation Based Teacher-Student Framework for Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Xiaoyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Qingbin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">Shizhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Shengping</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhao</namePart>
<namePart type="family">Jun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yongbin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhu</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaoqi</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Huhhot, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The irrelevant information in documents poses a great challenge for machine reading compre-hension (MRC). To deal with such a challenge current MRC models generally fall into twoseparate parts: evidence extraction and answer prediction where the former extracts the key evi-dence corresponding to the question and the latter predicts the answer based on those sentences. However such pipeline paradigms tend to accumulate errors i.e. extracting the incorrect evi-dence results in predicting the wrong answer. In order to address this problem we propose aMulti-Strategy Knowledge Distillation based Teacher-Student framework (MSKDTS) for ma-chine reading comprehension. In our approach we first take evidence and document respec-tively as the input reference information to build a teacher model and a student model. Then the multi-strategy knowledge distillation method transfers the knowledge from the teacher model to the student model at both feature and prediction level through knowledge distillation approach. Therefore in the testing phase the enhanced student model can predict answer similar to the teacher model without being aware of which sentence is the corresponding evidence in the docu-ment. Experimental results on the ReCO dataset demonstrate the effectiveness of our approachand further ablation studies prove the effectiveness of both knowledge distillation strategies.</abstract>
<identifier type="citekey">xiaoyan-etal-2021-multi</identifier>
<location>
<url>https://aclanthology.org/2021.ccl-1.91</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1024</start>
<end>1036</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Strategy Knowledge Distillation Based Teacher-Student Framework for Machine Reading Comprehension
%A Xiaoyan, Yu
%A Qingbin, Liu
%A Shizhu, He
%A Kang, Liu
%A Shengping, Liu
%A Jun, Zhao
%A Yongbin, Zhou
%Y Li, Sheng
%Y Sun, Maosong
%Y Liu, Yang
%Y Wu, Hua
%Y Liu, Kang
%Y Che, Wanxiang
%Y He, Shizhu
%Y Rao, Gaoqi
%S Proceedings of the 20th Chinese National Conference on Computational Linguistics
%D 2021
%8 August
%I Chinese Information Processing Society of China
%C Huhhot, China
%G English
%F xiaoyan-etal-2021-multi
%X The irrelevant information in documents poses a great challenge for machine reading compre-hension (MRC). To deal with such a challenge current MRC models generally fall into twoseparate parts: evidence extraction and answer prediction where the former extracts the key evi-dence corresponding to the question and the latter predicts the answer based on those sentences. However such pipeline paradigms tend to accumulate errors i.e. extracting the incorrect evi-dence results in predicting the wrong answer. In order to address this problem we propose aMulti-Strategy Knowledge Distillation based Teacher-Student framework (MSKDTS) for ma-chine reading comprehension. In our approach we first take evidence and document respec-tively as the input reference information to build a teacher model and a student model. Then the multi-strategy knowledge distillation method transfers the knowledge from the teacher model to the student model at both feature and prediction level through knowledge distillation approach. Therefore in the testing phase the enhanced student model can predict answer similar to the teacher model without being aware of which sentence is the corresponding evidence in the docu-ment. Experimental results on the ReCO dataset demonstrate the effectiveness of our approachand further ablation studies prove the effectiveness of both knowledge distillation strategies.
%U https://aclanthology.org/2021.ccl-1.91
%P 1024-1036
Markdown (Informal)
[Multi-Strategy Knowledge Distillation Based Teacher-Student Framework for Machine Reading Comprehension](https://aclanthology.org/2021.ccl-1.91) (Xiaoyan et al., CCL 2021)
ACL