Module:Sandbox/AbstractWikipedia/Lexemes
This is the lexemes module of the Abstract Wikipedia template-renderer prototype.
It defines the internal lexeme
datatype and various methods to manipulate it. This type is based on the lexeme type discussed in the Abstract Wikipedia template proposal.
The handling of the lexemes' features is done through the UnifiableFeatures module.
local p = {}
local uf = require("Module:Sandbox/AbstractWikipedia/UnifiableFeatures")
local gf = require("Module:Sandbox/AbstractWikipedia/GrammaticalFeatures")
-- create a list of features from given table of category-feature pairs
local function featureList ( new_features )
local features = {}
local features_api = {}
-- Verify that category hasn't been used already
local function verifyNewCategory ( category )
if features[category] then
error ("Category "..category.." already exists", 3)
end
end
-- Adds a new feature of a certain category, returns the index
function features_api.addFeature ( category, feature )
verifyNewCategory(category)
features[category] = uf.createNewFeature(feature)
return features[category]
end
function features_api.getFeature(category)
return uf.getFeature(features[category])
end
function features_api.getFeatureIndex(category)
return features[category]
end
function features_api.featureIterator()
return pairs(features)
end
function features_api.numFeatures()
-- This could probably be stored in the table itself
local count = 0
for _, _ in pairs(features) do
count = count + 1
end
return count
end
-- Sets a feature of a new category to an existing value (indexed by index)
function features_api.setFeatureIndex ( category, index )
verifyNewCategory(category)
if (not uf.getFeature(index)) then
error ("Index "..index.." points to inexistent feature.")
end
features[category] = index
end
-- for debugging purposes
function features_api.listFeatures ()
for category, index in pairs(features) do
mw.log(category.." == "..tostring(uf.getFeature(index)))
end
end
for category, feature in pairs(new_features) do
features_api.addFeature(category, feature)
end
return features_api
end
local function newForm ( spelling, new_features )
local form = { spelling = spelling, features = featureList(new_features) }
local function tostring(self)
return self.spelling
end
setmetatable(form, { __index = form.features, __tostring = tostring })
-- for debugging purposes
function form.log ( index )
index = index or ""
mw.log("Form "..index..": '"..form.spelling.."'")
form.features.listFeatures()
end
return form
end
function p.newLexeme ( lemma, part_of_speech, new_features )
local lexeme = { lemma = lemma, pos = part_of_speech, features = featureList (new_features or {}), forms = {} }
local function tostring(self)
if (#self.forms > 0) then
return self.forms[1].spelling
else
return self.lemma
end
end
-- features functions are be accessible at the lexeme level for convenience
setmetatable(lexeme, { __index = lexeme.features, __tostring = tostring })
-- Adds a new form and returns it
function lexeme.addForm ( spelling, form_features )
form_features = form_features or {}
local form = newForm (spelling, form_features)
table.insert(lexeme.forms, form)
return form
end
-- Clears all forms and optionally creates a new single form.
-- This is handy when we want to overwrite the existing forms
function lexeme.replaceByForm ( new_single_form )
lexeme.forms = {}
if new_single_form then
lexeme.addForm(new_single_form)
end
end
-- Sorts the form according to gf.cannonical_order
function lexeme.sortForms()
local function compare_forms (form1, form2)
for _, category_order in ipairs(gf.cannonical_order) do
local category=category_order.category
local rank1 = category_order[form1.getFeature(category)]
local rank2 = category_order[form2.getFeature(category)]
if rank1 ~= rank2 then
if not rank2 then
return true
elseif not rank1 then
return false
else
return rank1 < rank2
end
end
end
-- If all cannonical features are equal, prefer the form with less
-- features overall as smaller
return (form1.numFeatures() < form2.numFeatures())
end
table.sort(lexeme.forms, compare_forms)
end
-- This function removes the forms which don't match the general lexeme
-- features/constraints.
-- Returns the number of forms which are kept
function lexeme.filterForms()
new_forms = {}
-- Iterate on forms
for _, form in ipairs(lexeme.forms) do
local keep_form = true
-- Iterate on lexeme constraints
for category, index in lexeme.featureIterator() do
-- Note that if the form lacks the category, it can be kept
form_feature_index = form.getFeatureIndex(category)
-- In a more strict mode, we should require the form features
-- to strictly subsume the constraints.
if (form_feature_index and not uf.unifiable(form_feature_index, index)) then
mw.log("Discard form "..form.spelling.." due to mismatch with feature '"..uf.getFeature(index).."' of category "..category)
keep_form = false
break
end
end
if keep_form then
mw.log("Keeping form "..form.spelling)
table.insert(new_forms, form)
end
end
lexeme.forms = new_forms
return #new_forms
end
-- for debugging purposes
function lexeme.log ()
mw.log("Lemma: "..lemma.." ("..lexeme.pos..")")
lexeme.features.listFeatures()
for index, form in pairs(lexeme.forms) do
form.log(index)
end
end
return lexeme
end
-- Unify features of given categories in two lexemes
function p.unifyFeatures ( category1, lexeme1, lexeme2, category2 )
-- Unify the same category across both lexemes, if only one is provided
category2 = category2 or category1
local index1 = lexeme1.getFeatureIndex(category1)
local index2 = lexeme2.getFeatureIndex(category2)
if (not index1 and not index2) then -- unification of two empty features
-- in order to unify them, we have to create a new feature
index1 = lexeme1.addFeature(category1, '')
lexeme2.setFeatureIndex(category2, index1)
return ''
elseif (not index2) then -- point lexeme2's feature to lexeme1's
lexeme2.setFeatureIndex(category2, index1)
return uf.getFeature(index1)
elseif (not index1) then -- and conversely
lexeme1.setFeatureIndex(category1, index2)
return uf.getFeature(index2)
else -- unify the two features
local result = uf.unify(index1, index2)
if (result == nil) then
error ("Features "..uf.getFeature(index1).." and "..uf.getFeature(index2).." are not unifiable", 2)
end
return result
end
end
-- Unify a feature of a given category of a lexeme with a new feature
function p.unifyWithFeature ( category, lexeme, feature )
local index = lexeme.getFeatureIndex(category)
if (not index) then -- create feature
lexeme.addFeature(category, feature)
return feature
end
local result = uf.unifyWithFeature(index, feature)
if (result == nil) then
error ("Features "..uf.getFeature(index).." and "..feature.." are not unifiable", 2)
end
return result
end
return p