Module:Sandbox/AbstractWikipedia/Functions
This is the functions module of the Abstract Wikipedia template-renderer prototype.
You can define here new functions to be used in the template language. A function invoked directly in a template's slot should return a lexeme
as defined in the lexemes module.
Some remarks:
- You can rely on the Wikidata module to conveniently fetch items and lexemes from Wikidata.
- The TemplateEvaluator module allows you define sub-templates as functions (see the
QuantifiedNoun
function below as an example). - You can define language-specific functions and implementations in language submodules, e.g. Module:Sandbox/AbstractWikipedia/Functions/en for English.
Current language-specific implementations:
local p = {}
local l = require("Module:Sandbox/AbstractWikipedia/Lexemes")
local te = require("Module:Sandbox/AbstractWikipedia/TemplateEvaluator")
local wd = require("Module:Sandbox/AbstractWikipedia/Wikidata")
-- It is assumed that the global variable "language" holds the language code
-- used for rendering
language = "he"
-- The following is a list of language-agnostic functions to be supported by the
-- template language. Language-specific implementations can be found in the
-- Module:Sandbox/AbstractWikipedia/Functions/xx where xx is the language code.
-- Construct a lexeme from a cardinal number. Note that the argument number is
-- assumed to be a string
function p.Cardinal ( number )
local result = l.newLexeme ( number, "numeral")
-- simple logic, only works for some languages
if tonumber(number) == 1 then
result.addFeature("number", "singular")
else
result.addFeature("number", "plural")
end
result.addForm ( tostring(number), {} )
return result
end
-- General facility to convert text to a lexeme
function p.TemplateText ( text, text_type )
text_type = text_type or "text"
return l.newLexeme( text, text_type)
end
-- Helper functions to expand a list of Q-id features and add them to a form
-- or lexeme
local function addFeaturesToForm (form, wdFeatures)
for _, wdFeature in ipairs(wdFeatures) do
local features_to_add = wd.expandFeature(wdFeature)
for category, feature in pairs(features_to_add) do
form.addFeature(category, feature)
end
end
end
-- Fetch the demonym data from an item, and construct an appropriate lexeme
function p.Demonym ( q_id )
local demonyms = wd.getDemonyms(q_id)
if #demonyms == 0 then
error("No demonyms for "..q_id.." in language "..language)
-- Use some fallback here?
end
-- Demonyms can be either specified by giving all their forms or by linking
-- to a lexeme
if demonyms[1].lexeme then
return p.Lexeme(demonyms[1].lexeme)
end
local lexeme = l.newLexeme(demonyms[1].label , "adjective")
for _, demonym in ipairs(demonyms) do
local form = lexeme.addForm(demonym.label)
addFeaturesToForm(form, demonym.features)
end
lexeme.log()
return lexeme
end
-- Function to transform lexemes from Wikidata the the internal representation
-- The extra arguments are Q-ids of features which should act as extra
-- contraints.
function p.Lexeme (lexeme_id, ...)
local wdLexeme = mw.wikibase.getEntity( lexeme_id )
local lemma, used_language = wd.getLemma(wdLexeme, lexeme_id)
local lexeme = l.newLexeme(lemma , wd.getPOS(wdLexeme))
local grammatical_gender = wd.getGrammaticalGender(lexeme_id)
if grammatical_gender then
lexeme.addFeature("gender", grammatical_gender)
end
-- More statements may need to be fetched here
-- Add any extra constraints passed as arguments
addFeaturesToForm(lexeme, arg)
forms = wdLexeme:getForms()
for index, wdForm in ipairs(forms) do -- ingest forms
-- We only want forms of one language code
spelling = wdForm:getRepresentation(used_language)
if spelling then
local form = lexeme.addForm(spelling, {})
wdFeatures = wdForm:getGrammaticalFeatures()
addFeaturesToForm(form, wdFeatures)
end
end
lexeme.log()
return lexeme
end
-- Creates a compound lexeme out of two lexemes (typically nouns), which
-- should be the gendered versions of the same lemma (e.g. German Arzt/Ärztin)
function p.GenderedLexeme ( masculine_lexeme_id, feminine_lexeme_id, ...)
local mascLexeme = mw.wikibase.getEntity( masculine_lexeme_id )
local femLexeme = mw.wikibase.getEntity( feminine_lexeme_id )
local mascLemma, used_language = wd.getLemma(mascLexeme, masculine_lexeme_id)
local femLemma, used_language2 = wd.getLemma(femLexeme, feminine_lexeme_id)
if used_language ~= used_language2 then
error("Lexemes "..mascLemma.." and "..femLemma.." don't use the same language code")
end
local pos = wd.getPOS(mascLexeme)
if pos ~= wd.getPOS(femLexeme) then
error("Lexemes "..mascLemma.." and "..femLemma.." don't have the same part-of-speech")
end
lexeme = l.newLexeme(mascLemma.."/"..femLemma , pos)
-- Add any extra constraints passed as arguments
addFeaturesToForm(lexeme, arg)
for gender, wdLexeme in pairs{ masculine = mascLexeme, feminine = femLexeme } do
forms = wdLexeme:getForms()
for index, wdForm in ipairs(forms) do -- ingest forms
-- We only want forms of one language code
spelling = wdForm:getRepresentation(used_language)
if spelling then
local form = lexeme.addForm(spelling)
wdFeatures = wdForm:getGrammaticalFeatures()
addFeaturesToForm(form, wdFeatures)
if not form.getFeatureIndex("gender") then
form.addFeature("gender", gender)
end
end
end
end
lexeme.log()
return lexeme
end
-- Constructs a lexeme corresponding to a person
-- This populates the grammatical gender according to the social gender
-- and adds grammatical number "singular"
function p.Person (q_id)
local name = wd.getLabel( q_id )
local result = l.newLexeme ( name, "noun")
result.addFeature("number", "singular")
local gender = wd.getHumanGender(q_id)
-- Handling of non-binary gender is language dependent and would have to
-- be done in a language-specific implementation.
if (gender == "masculine" or gender == "feminine") then
result.addFeature("gender", gender)
end
if (wd.isDead(q_id)) then
-- We add a past tense feature for lexeme of dead people, as they are
-- normally spoken about in the past tense. This can exposed to the
-- verb by using the "tsubj relation".
result.addFeature("nominal_tense", "past")
end
result.log()
return result
end
-- Fetches the label of an entity.
-- To allow reverse look-up of lexemes from items, I have used the
-- "literal translation" propery (P2441) qualified by "lexeme sense" (P7018)
-- to point to language-specific lexemes. See discussion in:
-- https://phabricator.wikimedia.org/T320263#8341702
-- These lexemes are stored in the gendered.unspecified.lexeme field (if no
-- gedered pairs are given in "male form of label" (P3321) or
-- "female form of label" (P2521)).
function p.Label (q_id)
-- We disable the following check, since it requires an expensive call
--[[ if wd.isHuman(q_id) then
return p.Person(q_id)
end
]]--
local gendered = wd.getGenderedLabels(q_id)
if (gendered.male.lexeme and gendered.female.lexeme) then
return p.GenderedLexeme(gendered.male.lexeme, gendered.female.lexeme)
elseif (gendered.male.lexeme or gendered.female.lexeme) then
return p.Lexeme(gendered.male.lexeme or gendered.female.lexeme)
elseif (gendered.unspecified.lexeme) then
return p.Lexeme(gendered.unspecified.lexeme)
else
lexeme = l.newLexeme(wd.getLabel(q_id) , "noun")
if (gendered.male.label) then
lexeme.addForm(gendered.male.label, {"gender", "masculine"})
end
if (gendered.female.label) then
lexeme.addForm(gendered.female.label, {"gender", "feminine"})
end
end
return lexeme
end
-- Example of the use a sub-template as a function
function p.QuantifiedNoun(num, noun)
return te.evaluateTemplate("{nummod:Cardinal(num)} {root:noun}", { num = num, noun = noun})
end
-- Invokes either the Person function or the Pronoun function, depending on pronominalize
-- If neither a q_id nor pronominalize are set, it will return an empty dummy noun
function p.PersonOrPronoun(q_id, pronominalize)
if q_id:match("^Q%d+") then
if pronominalize == "true" then
return functions.Pronoun(q_id)
else
return functions.Person(q_id)
end
elseif pronominalize == "true" then
return functions.Pronoun()
else
return l.newLexeme("", "noun")
end
end
-- Generates a generic date expression of the form day.month.year
-- TODO: This should really use CLDR to get a language-specific expression
function p.Date ( date )
local elements = {}
if date.day and tonumber(date.day) > 0 then
table.insert(elements, tostring(date.day))
end
if date.month and tonumber(date.month) > 0 and tonumber(date.month) <=12 then
table.insert(elements, tostring(date.month))
end
if date.year and tonumber(date.year) ~= 0 then
table.insert(elements, tostring(date.year))
end
local result = table.concat(elements, '.')
return l.newLexeme (result, "noun")
end
-- Generates a generic ordina expression of the form day.month.year
-- TODO: This should really use CLDR to get a language-specific expression
function p.Ordinal ( number )
return l.newLexeme(tostring(number)..'.', "adjective")
end
return p