@TechReport{rpe--slovko-03,
author = {Roger Evans and Carole Tiberius and Dunstan Brown and Greville
Corbett},
title = {Russian Lemmatisation with DATR},
year = {2003},
grant = {Supported by ESRC grant no. RES-000-23-0082 to Surrey University},
abstract = {
In this paper, we describe an approach to lemmatisation for Russian nouns,
which makes use of a large-scale inheritance lexicon implemented in the
lexical representation language DATR (Evans and Gazdar 1996). The lexicon
was compiled semi-automatically from Zaliznjak's morphological dictionary
(Zaliznjak 1977, Ilola and Mustajoki 1989) and automatically generates
fully inflected forms together with their associated morphosyntax for
around 40,000 Russian nouns. From this resource, we have automatically
extracted wordform recognition rules and compiled them into a lemmatiser
which hypothesises possible citation form and morphosyntactic features for
nominal wordforms. We describe the construction of the lemmatiser and the
results of our initial evaluation of its accuracy.
},
month = {October},
note = {Presented at SLOVKO 2003, Bratislava}
}