@inproceedings{ff689ec98a164d179d6d1d45b5f1aff7,
title = "Validation methodology for expert-annotated datasets: Event annotation case study",
abstract = "Event detection is still a difficult task due to the complexity and the ambiguity of such entities. On the one hand, we observe a low inter-annotator agreement among experts when annotating events, disregarding the multitude of existing annotation guidelines and their numerous revisions. On the other hand, event extraction systems have a lower measured performance in terms of F1-score compared to other types of entities such as people or locations. In this paper we study the consistency and completeness of expert-annotated datasets for events and time expressions. We propose a data-agnostic validation methodology of such datasets in terms of consistency and completeness. Furthermore, we combine the power of crowds and machines to correct and extend expert-annotated datasets of events. We show the benefit of using crowd-annotated events to train and evaluate a state-of-the-art event extraction system. Our results show that the crowd-annotated events increase the performance of the system by at least 5.3%.",
keywords = "Crowdsourcing, Event extraction, Human-in-the-loop, Time extraction",
author = "Oana Inel and Lora Aroyo",
year = "2019",
month = may,
day = "1",
doi = "10.4230/OASIcs.LDK.2019.12",
language = "English",
series = "OpenAccess Series in Informatics",
publisher = "Schloss Dagstuhl- Leibniz-Zentrum fur Informatik GmbH, Dagstuhl Publishing",
pages = "1--15",
editor = "Maria Eskevich and {de Melo}, Gerard and Christian Fath and McCrae, {John P.} and Paul Buitelaar and Christian Chiarcos and Bettina Klimek and Milan Dojchinovski",
booktitle = "2nd Conference on Language, Data and Knowledge, LDK 2019",
note = "2nd Conference on Language, Data and Knowledge, LDK 2019 ; Conference date: 20-05-2019 Through 23-05-2019",
}