@inproceedings{coole-etal-2020-unfinished,
title = "Unfinished Business: Construction and Maintenance of a Semantically Tagged Historical Parliamentary Corpus, {UK} {H}ansard from 1803 to the present day",
author = "Coole, Matthew and
Rayson, Paul and
Mariani, John",
editor = "Fi{\v{s}}er, Darja and
Eskevich, Maria and
de Jong, Franciska",
booktitle = "Proceedings of the Second ParlaCLARIN Workshop",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.parlaclarin-1.5",
pages = "23--27",
abstract = "Creating, curating and maintaining modern political corpora is becoming an ever more involved task. As interest from various social bodies and the general public in political discourse grows so too does the need to enrich such datasets with metadata and linguistic annotations. Beyond this, such corpora must be easy to browse and search for linguists, social scientists, digital humanists and the general public. We present our efforts to compile a linguistically annotated and semantically tagged version of the Hansard corpus from 1803 right up to the present day. This involves combining multiple sources of documents and transcripts. We describe our toolchain for tagging; using several existing tools that provide tokenisation, part-of-speech tagging and semantic annotations. We also provide an overview of our bespoke web-based search interface built on LexiDB. In conclusion, we examine the completed corpus by looking at four case studies including semantic categories made available by our toolchain.",
language = "English",
ISBN = "979-10-95546-47-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coole-etal-2020-unfinished">
<titleInfo>
<title>Unfinished Business: Construction and Maintenance of a Semantically Tagged Historical Parliamentary Corpus, UK Hansard from 1803 to the present day</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Coole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second ParlaCLARIN Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Darja</namePart>
<namePart type="family">Fišer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Eskevich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franciska</namePart>
<namePart type="family">de Jong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-47-4</identifier>
</relatedItem>
<abstract>Creating, curating and maintaining modern political corpora is becoming an ever more involved task. As interest from various social bodies and the general public in political discourse grows so too does the need to enrich such datasets with metadata and linguistic annotations. Beyond this, such corpora must be easy to browse and search for linguists, social scientists, digital humanists and the general public. We present our efforts to compile a linguistically annotated and semantically tagged version of the Hansard corpus from 1803 right up to the present day. This involves combining multiple sources of documents and transcripts. We describe our toolchain for tagging; using several existing tools that provide tokenisation, part-of-speech tagging and semantic annotations. We also provide an overview of our bespoke web-based search interface built on LexiDB. In conclusion, we examine the completed corpus by looking at four case studies including semantic categories made available by our toolchain.</abstract>
<identifier type="citekey">coole-etal-2020-unfinished</identifier>
<location>
<url>https://aclanthology.org/2020.parlaclarin-1.5</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>23</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unfinished Business: Construction and Maintenance of a Semantically Tagged Historical Parliamentary Corpus, UK Hansard from 1803 to the present day
%A Coole, Matthew
%A Rayson, Paul
%A Mariani, John
%Y Fišer, Darja
%Y Eskevich, Maria
%Y de Jong, Franciska
%S Proceedings of the Second ParlaCLARIN Workshop
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-47-4
%G English
%F coole-etal-2020-unfinished
%X Creating, curating and maintaining modern political corpora is becoming an ever more involved task. As interest from various social bodies and the general public in political discourse grows so too does the need to enrich such datasets with metadata and linguistic annotations. Beyond this, such corpora must be easy to browse and search for linguists, social scientists, digital humanists and the general public. We present our efforts to compile a linguistically annotated and semantically tagged version of the Hansard corpus from 1803 right up to the present day. This involves combining multiple sources of documents and transcripts. We describe our toolchain for tagging; using several existing tools that provide tokenisation, part-of-speech tagging and semantic annotations. We also provide an overview of our bespoke web-based search interface built on LexiDB. In conclusion, we examine the completed corpus by looking at four case studies including semantic categories made available by our toolchain.
%U https://aclanthology.org/2020.parlaclarin-1.5
%P 23-27
Markdown (Informal)
[Unfinished Business: Construction and Maintenance of a Semantically Tagged Historical Parliamentary Corpus, UK Hansard from 1803 to the present day](https://aclanthology.org/2020.parlaclarin-1.5) (Coole et al., ParlaCLARIN 2020)
ACL