Module:Formatnum
Module documentation
[create]
-- This module is necessary for Module:Wikidata
local p = {}
-- frequently used functions
local floor = math.floor
local strLen = string.len
local strRep = string.rep
local strFind = string.find
local strSub = string.sub
local strGsub = string.gsub
local textSub = mw.ustring.sub
local textRep = mw.ustring.rep
local textLen = mw.ustring.len
local isSupportedLanguage = mw.language.isSupportedLanguage
local isKnownLanguageTag = mw.language.isKnownLanguageTag
local getLanguage = mw.getLanguage
-- Substitutions for languages not supported by mw.language:formatNum() in core Lua libraries for MediaWiki
-- Converts patterns of basic substrings only from ASCII to localized text (possibly Unicode) using fast string.gsub()
local digits = {
['ml-Mlym'] = {
['0'] = '൦', ['1'] = '൧', ['2'] = '൨', ['3'] = '൩', ['4'] = '൪',
['5'] = '൫', ['6'] = '൬', ['7'] = '൭', ['8'] = '൮', ['9'] = '൯',
},
['mn-Mong'] = {
['0'] = '᠐', ['1'] = '᠑', ['2'] = '᠒', ['3'] = '᠓', ['4'] = '᠔',
['5'] = '᠕', ['6'] = '᠖', ['7'] = '᠗', ['8'] = '᠘', ['9'] = '᠙',
},
ta = {
['0'] = '௦', ['1'] = '௧', ['2'] = '௨', ['3'] = '௩', ['4'] = '௪',
['5'] = '௫', ['6'] = '௬', ['7'] = '௭', ['8'] = '௮', ['9'] = '௯',
},
te = {
['0'] = '౦', ['1'] = '౧', ['2'] = '౨', ['3'] = '౩', ['4'] = '౪',
['5'] = '౫', ['6'] = '౬', ['7'] = '౭', ['8'] = '౮', ['9'] = '౯',
},
th = {
['0'] = '๐', ['1'] = '๑', ['2'] = '๒', ['3'] = '๓', ['4'] = '๔',
['5'] = '๕', ['6'] = '๖', ['7'] = '๗', ['8'] = '๘', ['9'] = '๙',
},
}
function p.formatNum(number, lang, prec, compact)
-- Do not alter the specified value when it is not a valid number, return it as is
local value = tonumber(number)
if value == nil then
return number
end
-- Basic ASCII-only formatting (without paddings)
number = tostring(value)
-- Check the presence of an exponent (incorrectly managed in mw.language:formatNum() and even forgotten due to an internal bug, e.g. in Hindi)
local exponent
local pos = strFind(number, '[Ee]')
if pos ~= nil then
exponent = strSub(number, pos + 1, strLen(number))
number = strSub(number, 1, pos - 1)
else
exponent = ''
end
-- Check the minimum precision requested
prec = tonumber(prec) -- nil if not specified as a true number
if prec ~= nil then
prec = floor(prec)
if prec < 0 then
prec = nil -- discard an incorrect precision (not a positive integer)
elseif prec > 14 then
prec = 14 -- maximum precision supported by tostring(number)
end
end
-- Preprocess the minimum precision in the ASCII string
local dot = '.'
if (prec or 0) > 0 then
pos = strFind(number, dot, 1, true) -- plain search, no regexp
if pos ~= nil then
prec = pos + prec - strLen(number) -- effective number of trailing decimals to add or remove
dot = '' -- already present
--else dot and precision padding must be added
end
else
dot = '' -- don't add dot
prec = 0 -- don't alter the precision
end
if lang ~= nil and isKnownLanguageTag(lang) == true then
-- Convert number to localized digits, decimal separator, and group separators
local language = getLanguage(lang) -- caveat: can load localized resources for up to 20 languages
if compact then
number = language:formatNum(tonumber(number), { noCommafy = 'y' })
else
number = language:formatNum(tonumber(number))
end
-- Postprocessing the precision
if prec > 0 then
local zero = language:formatNum(1.04) -- format a non-integer constant
if dot ~= '' then -- only if adding dot is required
dot = textSub(zero, 2, 2) -- decimal separator of formatted constant
end
zero = textSub(zero, 3, 3) -- first decimal of formatted constant
number = number .. dot .. textRep(zero, prec)
elseif prec < 0 then
-- TODO: rounding of last decimal; here only truncate decimals in excess
number = textSub(number, 1, textLen(number) + prec)
end
-- Append the localized base-10 exponent without grouping separators (there's no reliable way to detect a localized leading symbol 'E')
if exponent ~= '' then
number = number .. 'E' .. language:formatNum(tonumber(exponent), { noCommafy = 'y' })
end
else -- not localized, ASCII only
-- Postprocessing the precision
if prec > 0 then
number = number .. dot .. strRep('0', prec)
elseif prec < 0 then
-- TODO: rounding of last decimal; here only truncate decimals in excess
number = strSub(number, 1, strLen(number) + prec)
end
-- Append the base-10 exponent without grouping separators
if exponent ~= '' then
number = number .. 'E' .. exponent
end
end
-- Special cases for substitution of ASCII digits (missing support in Lua core libraries for some languages)
if lang ~= nil and digits[lang] then
for k, v in pairs(digits[lang]) do
number = strGsub(number, k, v) -- 'mw.ustring' not needed for pattern matching; faster with 'string'
end
end
return number
end
local _parentFrame = nil
local function getArgs(frame)
if _parentFrame == nil then
_parentFrame = frame:getParent() -- costly
if _parentFrame == nil then
_parentFrame = frame
end
end
return _parentFrame.args
end
local _pageLang = nil
local function getPageLang(frame)
if _pageLang == nil then
_pageLang = frame:preprocess('{{PAGELANGUAGE}}') or '' -- costly
end
return _pageLang
end
function p.main(frame)
local args = getArgs(frame)
local prec = args.prec or ''
local sep = args.sep or ''
local number = args.number or args[1] or ''
local lang = args.lang or args[2] or ''
-- validate the language parameter within MediaWiki's caller frame
if lang == 'arabic-indic' then -- only for compatibility ('arabic-indic' is not a SupportedLanguage)
lang = 'fa' -- better support than 'ur', 'ks' or 'ps'
elseif lang == 'ml-old' then -- only for compatibility ('ml-old' is not a SupportedLanguage)
lang = 'ml-Mlym'
elseif lang == 'R' then -- only for compatibility ('R' is not a SupportedLanguage)
lang = nil -- not localized (raw)
elseif lang == '' or not isSupportedLanguage(lang) then
-- Note that 'SupportedLanguages' are not necessarily 'BuiltinValidCodes', and so they are not necessarily
-- 'KnownLanguages' (with a language name defined at least in the default localisation of the local wiki).
-- But they all are ValidLanguageCodes (suitable as Wiki subpages or identifiers: no slash, colon, HTML tags, or entities)
-- In addition, they do not contain any capital letter in order to be unique in page titles (restriction inexistant in BCP47),
-- but they may violate the standard format of BCP47 language tags for specific needs in MediaWiki.
-- Empty/unspecified and unsupported languages are treated here in Commons using the user's language,
-- instead of the local 'ContentLanguage' of the Wiki.
lang = getPageLang(frame)
end
return p.formatNum(number, lang, prec, sep ~= '')
end
return p