@inproceedings{zhang-etal-2022-bert,
title = "{BERT} 4{EVER}@{E}va{H}an 2022: {A}ncient {C}hinese Word Segmentation and Part-of-Speech Tagging Based on Adversarial Learning and Continual Pre-training",
author = "Zhang, Hailin and
Yang, Ziyu and
Fu, Yingwen and
Ding, Ruoyao",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.22",
pages = "150--154",
abstract = "With the development of artificial intelligence (AI) and digital humanities, ancient Chinese resources and language technology have also developed and grown, which have become an increasingly important part to the study of historiography and traditional Chinese culture. In order to promote the research on automatic analysis technology of ancient Chinese, we conduct various experiments on ancient Chinese word segmentation and part-of-speech (POS) tagging tasks for the EvaHan 2022 shared task. We model the word segmentation and POS tagging tasks jointly as a sequence tagging problem. In addition, we perform a series of training strategies based on the provided ancient Chinese pre-trained model to enhance the model performance. Concretely, we employ several augmentation strategies, including continual pre-training, adversarial training, and ensemble learning to alleviate the limited amount of training data and the imbalance between POS labels. Extensive experiments demonstrate that our proposed models achieve considerable performance on ancient Chinese word segmentation and POS tagging tasks. Keywords: ancient Chinese, word segmentation, part-of-speech tagging, adversarial learning, continuing pre-training",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2022-bert">
<titleInfo>
<title>BERT 4EVER@EvaHan 2022: Ancient Chinese Word Segmentation and Part-of-Speech Tagging Based on Adversarial Learning and Continual Pre-training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hailin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyu</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingwen</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruoyao</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the development of artificial intelligence (AI) and digital humanities, ancient Chinese resources and language technology have also developed and grown, which have become an increasingly important part to the study of historiography and traditional Chinese culture. In order to promote the research on automatic analysis technology of ancient Chinese, we conduct various experiments on ancient Chinese word segmentation and part-of-speech (POS) tagging tasks for the EvaHan 2022 shared task. We model the word segmentation and POS tagging tasks jointly as a sequence tagging problem. In addition, we perform a series of training strategies based on the provided ancient Chinese pre-trained model to enhance the model performance. Concretely, we employ several augmentation strategies, including continual pre-training, adversarial training, and ensemble learning to alleviate the limited amount of training data and the imbalance between POS labels. Extensive experiments demonstrate that our proposed models achieve considerable performance on ancient Chinese word segmentation and POS tagging tasks. Keywords: ancient Chinese, word segmentation, part-of-speech tagging, adversarial learning, continuing pre-training</abstract>
<identifier type="citekey">zhang-etal-2022-bert</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.22</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>150</start>
<end>154</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERT 4EVER@EvaHan 2022: Ancient Chinese Word Segmentation and Part-of-Speech Tagging Based on Adversarial Learning and Continual Pre-training
%A Zhang, Hailin
%A Yang, Ziyu
%A Fu, Yingwen
%A Ding, Ruoyao
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F zhang-etal-2022-bert
%X With the development of artificial intelligence (AI) and digital humanities, ancient Chinese resources and language technology have also developed and grown, which have become an increasingly important part to the study of historiography and traditional Chinese culture. In order to promote the research on automatic analysis technology of ancient Chinese, we conduct various experiments on ancient Chinese word segmentation and part-of-speech (POS) tagging tasks for the EvaHan 2022 shared task. We model the word segmentation and POS tagging tasks jointly as a sequence tagging problem. In addition, we perform a series of training strategies based on the provided ancient Chinese pre-trained model to enhance the model performance. Concretely, we employ several augmentation strategies, including continual pre-training, adversarial training, and ensemble learning to alleviate the limited amount of training data and the imbalance between POS labels. Extensive experiments demonstrate that our proposed models achieve considerable performance on ancient Chinese word segmentation and POS tagging tasks. Keywords: ancient Chinese, word segmentation, part-of-speech tagging, adversarial learning, continuing pre-training
%U https://aclanthology.org/2022.lt4hala-1.22
%P 150-154
Markdown (Informal)
[BERT 4EVER@EvaHan 2022: Ancient Chinese Word Segmentation and Part-of-Speech Tagging Based on Adversarial Learning and Continual Pre-training](https://aclanthology.org/2022.lt4hala-1.22) (Zhang et al., LT4HALA 2022)
ACL