Module:Sandbox/AbstractWikipedia/GrammaticalFeatures
This module is part of the Abstract Wikipedia Scribunto-based NLG prototype. It specifies mapping from Wikidata grammatical features to an internal representation of features (associated with categories), and provides also an ordering of the features to be used when sorting lexeme forms in lexicographic order, in the sortForms
function of the lexemes module.
local p = {}
-- Mapping of grammatical feature Q-ids in Wikidata to internal representation
-- Each internal feature is associated with a category.
-- Note that a Wikidata feature may be expanded to numerous (or in rare cases
-- zero) internal features.
-- Note that the same feature values can be used with different categories, and
-- thus the importance of the mapping.
-- We use English names for convenience (as these are also used in the Relations
-- module), but this can be revisited.
p.features_map = {
Q499327 = { gender = "masculine" },
Q1775415 = { gender = "feminine" },
Q1775461 = { gender = "neuter" },
Q1305037 = { gender = "common" },
Q110786 = { number = "singular" },
Q146786 = { number = "plural" },
Q47088290 = { gender = "masculine", number = "singular" },
Q47088292 = { gender = "masculine", number = "plural" },
Q47088293 = { gender = "feminine", number = "singular" },
Q47088295 = { gender = "feminine", number = "plural" },
Q131105 = { case = "nominative" },
Q146078 = { case = "accusative" },
Q146233 = { case = "genitive" },
Q145599 = { case = "dative" },
Q51929074 = { person = "third" },
Q51929049 = { person = "second" },
Q21714344 = { person = "first" },
Q51929447 = { person = "third", number = "singular" }, -- 3rd person sing.
Q51929218 = { person = "first", number = "singular" }, -- 1st person sing.
Q69761633 = { possessive_gender = "masculine" },
Q69761768 = { possessive_gender = "feminine" },
Q71469738 = { }, -- poss. masc. or fem. mapped to no feature
Q71470909 = { possessive_person = "third" },
Q71470837 = { possessive_person = "second" },
Q71470598 = { possessive_person = "first" },
Q78191294 = { possessive_number = "singular" },
Q78191289 = { possessive_number = "plural" },
Q682111 = { mood = "indicative" },
Q179230 = { mood = "infinitive" },
Q56682909 = { mood = "indicative", tense = "present" }, -- present indicative
Q3910936 = { mood = "indicative", tense = "present" }, -- simple present
Q442485 = { tense = "past", aspect = "perfective"}, -- preterite
Q1392475 = { tense = "past", aspect = "perfective"}, -- simple past
Q1994301 = { tense = "past" }, -- past tense
Q12547192 = { tense = "past", aspect = "imperfective" }, -- past imperfect
Q1230649 = { mood = "participle", tense = "past" }, -- English past participle
Q12717679 = { mood = "participle", tense = "past" }, -- past participle
Q10345583 = { mood = "participle", tense = "present" }, -- present participle
Q473746 = { mood = "subjunctive" },
Q22716 = { mood = "imperative" },
Q126473 = { contraction = "contraction" }, -- Indicates contracted forms
Q53997851 = { definiteness = "definite" },
-- Expand as needed
}
-- Map of Q-ids to English names of parts-of-speech.
-- We use English names for convenience (as these are also used in the Relations
-- module) but this can be revisited.
p.categories_map = { Q1084 = "noun", Q24905 = "verb", Q34698 = "adjective", Q103184 = "article", Q4833830 = "preposition"}
-- The following array gives us the cannonical order of the forms (to be used
-- by the lexeme module's sortForms function). The forms are sorted in a
-- lexicographical order, iterating over the at the categories in the order
-- given below, within each category ranking the features as given below
-- (where unmentioned features are considered greatest).
p.cannonical_order = {
{ category = "person", third = 1, second = 2, first = 3 },
{ category = "mood", indicative = 1, infinitive = 2, imperative = 3, subjunctive = 3 },
{ category = "tense", present = 1, past = 2},
{ category = "number", singular = 1, plural = 2 },
{ category = "gender", masculine = 1, feminine = 2, neuter = 3, common = 4 },
{ category = "case", nominative = 1, accusative = 2, genitive = 3, dative = 4 },
}
return p