Module:AutosortTable
Module documentation
[create]
--[[
AutosortTable: Creates a table which is automatically sorted
Usage: (Remove the hidden comments before use)
{{#invoke: AutosortTable|create
|class = wikitable <!-- Class for the entire table -->
|style = width:50% <!-- CSS for the entire table -->
|sep = -- <!-- Separator used to prefix or cells, such as '--' or '!!' (cannot use any '|' or '=') -->
|order = 2, 1 <!-- Order for sorting preference, takes a coma-separated list of column numbers -->
|nsort = 2 <!-- Columns which use numeric sorting. Takes a coma-separated list of column numbers -->
|grpsep = , <!-- Group separator in numeric values (defaults to ','), to disambiguate decimal separator -->
|valsep = ! <!-- Sortkey separator before displayed text in a cell, such as '!' (cannot use any '|' or '=') -->
|header = -- Name -- Age <!-- Table header (uses sep) -->
|colstyle = -- -- text-align:right <!-- CSS styles for cells in each column (uses sep) -->
| -- Bob -- 20 <!-- Row 1 (uses sep) -->
| -- Peter -- 35 <!-- Row 2 (uses sep) -->
| -- John -- 35.1!35 <!-- Row 3 (uses sep), sorted as 35.1, displayed as 35 -->
| -- James -- 50 <!-- Row 4 (uses sep) -->
| background-color: #FFDDDD -- Henry -- 45 <!-- Row 5 (uses sep), with CSS for the whole row before the two cells -->
}}
]]
local _module = {}
-- Frequently-used functions
local strGsub = string.gsub
local strMatch = string.match
local textIndexOf = mw.ustring.find
local textSub = mw.ustring.sub
local textGsub = mw.ustring.gsub
local textChar = mw.ustring.char
local textSplit = mw.text.split
local textTrim = mw.text.trim
local htmlCreate = mw.html.create
-- Data for building the numeric comparator function below: conversion to ASCII
local classPattern, charSubst, zeroes = {
',−﹐﹑﹒﹣,-.' -- part of a character class '[...]' in a pattern
}, {
-- [' '] = ' ', -- U+0020 (SPACE)
-- [','] = ',', -- U+002C (COMMA) -- see grpsep
-- ['-'] = '-', -- U+002D (MINUS-HYPHEN)
-- ['.'] = '.', -- U+002E (FULL STOP) -- see grpsep
[' '] = ' ', -- U+00A0 (NON-BREAKING SPACE)
[' '] = ' ', -- U+202F (NARROW NON-BREAKING SPACE)
['−'] = '-', -- U+2212 (MATHEMATICAL MINUS)
[' '] = ' ', -- U+3000 (IDEOGRAPHIC SPACE)
['、'] = ',', -- U+3001 (IDEOGRAPHIC COMMA)
['。'] = '.', -- U+3001 (IDEOGRAPHIC FULL STOP)
['﹐'] = '.', -- U+FE50 (SMALL COMMA)
['﹑'] = ',', -- U+FE51 (SMALL IDEOGRAPHIC COMMA)
['﹒'] = '.', -- U+FE52 (SMALL FULL STOP)
['﹢'] = '+', -- U+FE62 (SMALL MINUS-HYPHEN)
['﹣'] = '-', -- U+FE63 (SMALL MINUS-HYPHEN)
['''] = "'", -- U+FF0C (FULLWIDTH SINGLE QUOTE)
['+'] = '+', -- U+FF0C (FULLWIDTH PLUS)
[','] = ',', -- U+FF0C (FULLWIDTH COMMA)
['-'] = '-', -- U+FF0D (FULLWIDTH MINUS-HYPHEN)
['.'] = '.', -- U+FF0E (FULLWIDTH FULL STOP)
['。'] = '.', -- U+FF61 (HALFWIDTH FULL STOP)
['、'] = ',', -- U+FF64 (HALFWIDTH COMMA)
}, { -- Source: https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
--0x0030, -- 0030..0039 ; Decimal # Nd [10] DIGIT ZERO..DIGIT NINE
0x0660, -- 0660..0669 ; Decimal # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
0x06F0, -- 06F0..06F9 ; Decimal # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
0x07C0, -- 07C0..07C9 ; Decimal # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
0x0966, -- 0966..096F ; Decimal # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
0x09E6, -- 09E6..09EF ; Decimal # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
0x0A66, -- 0A66..0A6F ; Decimal # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0x0AE6, -- 0AE6..0AEF ; Decimal # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0x0B66, -- 0B66..0B6F ; Decimal # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0x0BE6, -- 0BE6..0BEF ; Decimal # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0x0C66, -- 0C66..0C6F ; Decimal # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0x0CE6, -- 0CE6..0CEF ; Decimal # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0x0D66, -- 0D66..0D6F ; Decimal # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0x0DE6, -- 0DE6..0DEF ; Decimal # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
0x0E50, -- 0E50..0E59 ; Decimal # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
0x0ED0, -- 0ED0..0ED9 ; Decimal # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0x0F20, -- 0F20..0F29 ; Decimal # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
0x1040, -- 1040..1049 ; Decimal # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
0x1090, -- 1090..1099 ; Decimal # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
0x17E0, -- 17E0..17E9 ; Decimal # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
0x1810, -- 1810..1819 ; Decimal # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
0x1946, -- 1946..194F ; Decimal # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
0x19D0, -- 19D0..19D9 ; Decimal # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
0x1A80, -- 1A80..1A89 ; Decimal # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
0x1A90, -- 1A90..1A99 ; Decimal # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
0x1B50, -- 1B50..1B59 ; Decimal # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
0x1BB0, -- 1BB0..1BB9 ; Decimal # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
0x1C40, -- 1C40..1C49 ; Decimal # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
0x1C50, -- 1C50..1C59 ; Decimal # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
0xA620, -- A620..A629 ; Decimal # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
0xA8D0, -- A8D0..A8D9 ; Decimal # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
0xA900, -- A900..A909 ; Decimal # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
0xA9D0, -- A9D0..A9D9 ; Decimal # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
0xA9F0, -- A9F0..A9F9 ; Decimal # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
0xAA50, -- AA50..AA59 ; Decimal # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
0xABF0, -- ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
0xFF10, -- FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
0x104A0, -- 104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
0x10D30, -- 10D30..10D39 ; Decimal # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
0x11066, -- 11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
0x110F0, -- 110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
0x11136, -- 11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
0x111D0, -- 111D0..111D9 ; Decimal # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
0x112F0, -- 112F0..112F9 ; Decimal # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
0x11450, -- 11450..11459 ; Decimal # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
0x114D0, -- 114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
0x11650, -- 11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
0x116C0, -- 116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
0x11730, -- 11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
0x118E0, -- 118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
0x11950, -- 11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
0x11C50, -- 11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
0x11D50, -- 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
0x11DA0, -- 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
0x16A60, -- 16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
0x16B50, -- 16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
0x1D7CE, 0x1D7D8, 0x1D7E2, 0x1D7EC, 0x1D7F6, -- 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
0x1E140, -- 1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
0x1E2F0, -- 1E2F0..1E2F9 ; Decimal # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
0x1E950, -- 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
0x1FBF0, -- 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
-- # Total code points: 650
}
for _, zero in ipairs(zeroes) do
table.insert(classPattern, textChar(zero) .. '-' .. textChar(zero + 9))
charSubst[textChar(zero )] = '0'
charSubst[textChar(zero + 1)] = '1'
charSubst[textChar(zero + 2)] = '2'
charSubst[textChar(zero + 3)] = '3'
charSubst[textChar(zero + 4)] = '4'
charSubst[textChar(zero + 5)] = '5'
charSubst[textChar(zero + 6)] = '6'
charSubst[textChar(zero + 7)] = '7'
charSubst[textChar(zero + 8)] = '8'
charSubst[textChar(zero + 9)] = '9'
end
classPattern = '[' .. table.concat(classPattern) .. ']'
-- Comparator factory for data-dependant sorting.
-- Uses a locale-dependant numeric format for numeric columns.
-- TODO: locale- and Unicode-dependant string collation for non-numeric.
local function comparator(orderLookup, nsortLookup, descLookup, grpsep)
local discard = '[ \'+' .. grpsep .. '_]' -- pattern for group separators or plus, to discard
return function(a, b)
local ad, bd = a.data, b.data
for _, index in ipairs(orderLookup) do
local ad, bd = ad[index], bd[index]
if type(ad) == 'table' then
ad = ad[1] -- Use the explicit sort key (it should be prefiltered).
if nsortLookup[index] then -- Numeric sort.
ad = tonumber(ad)
end
else -- Cell content only, infer a filtered sort key.
if nsortLookup[index] then -- Numeric sort.
-- Convert Unicode to ASCII, then filter blanks and group separators.
ad = strGsub(textGsub(ad or '', classPattern, charSubst), discard, '')
-- Find the first occurence of a number an use it. Decimal points are allowed.
-- Scientific notation not supported. Convert the matches to numbers or nil
ad = tonumber(strMatch(ad, '%-?%d*%.%d+') or strMatch(ad, '%-?%d+'))
end
end
if type(bd) == 'table' then
bd = bd[1] -- Use the explicit sort key (it should be prefiltered).
if nsortLookup[index] then -- Numeric sort.
bd = tonumber(bd)
end
else -- Cell content only, infer a filtered sort key.
if nsortLookup[index] then -- Numeric sort.
-- Convert Unicode to ASCII, then filter blanks and group separators.
bd = strGsub(textGsub(bd or '', classPattern, charSubst), discard, '')
-- Find the first occurence of a number an use it. Decimal points are allowed.
-- Scientific notation not supported. Convert the matches to numbers or nil
bd = tonumber(strMatch(bd, '%-?%d*%.%d+') or strMatch(bd, '%-?%d+'))
end
end
if ad ~= bd then
if descLookup[index] then
return ad == nil or bd ~= nil and bd < ad
else
return bd == nil or ad ~= nil and ad < bd
end
end
end
return a.key < b.key
end
end
_module.create = function(frame)
-- Named parameters
local args = frame.args
local class = args.class
local style = args.style
local sep = args.sep or '!!' -- required, must not be empty
local valsep = args.valsep or '' -- optional, may be empty
local grpsep = args.grpsep or ',' -- optional, use ',' by default
local order = args.order
local desc = args.descending or ''
local nsort = args.numeric or ''
local hidden = args.hidden or ''
local header = args.header
local footer = args.footer
local colstyle = args.colstyle
local seplen = #sep
local valseplen = #valsep
local orderLookup, nsortLookup, descLookup, hiddenLookup = {}, {}, {}, {}
for i, v in ipairs(textSplit(order, '%s*,%s*')) do orderLookup[i] = tonumber(v) end
for i, v in ipairs(textSplit(nsort, '%s*,%s*')) do nsortLookup[tonumber(v) or 0] = true end
for i, v in ipairs(textSplit(desc, '%s*,%s*')) do descLookup[tonumber(v) or 0] = true end
for i, v in ipairs(textSplit(hidden, '%s*,%s*')) do hiddenLookup[tonumber(v) or 0] = true end
-- Create the table
local html = htmlCreate()
local htable = html:tag('table')
if class then htable:attr('class', class) end
if style then htable:attr('style', style) end
-- Parses a row string. The key parameter is used to assign a unique key to the result so that equal rows do not cause sort errors.
local parse = function(s, key)
local css
local firstSep = textIndexOf(s, sep, 1, true) -- true for matching a literal, not a pattern
if firstSep then -- CSS before first separator
css = textTrim(textSub(s, 1, firstSep - 1))
s = textSub(s, firstSep + seplen, -1)
else -- no CSS
css = nil
s = textSub(s, seplen + 1, -1)
end
-- detect sort values before the value separator
local data = textSplit(s, sep, true) -- true for matching a literal, not a pattern
if valsep then
for i, v in ipairs(data) do
local firstSep = textIndexOf(v, valsep, 1, true) -- true for matching a literal, not a pattern
if firstSep then
data[i] = { textTrim(textSub(v, 1, firstSep - 1)), textSub(v, firstSep + valseplen, -1) }
end
end
end
return { key = key, css = css, sort = s, data = data }
end
--[[
Writes a row to the table.
css: CSS to apply to the row.
data: The data (cells) of the row
_type: Can be 'header', 'footer' or nil.
]]
local writeHtml = function(css, data, _type)
local row = htable:tag('tr')
if css then row:attr('style', textTrim(css)) end
for i, v in ipairs(data) do
if not hiddenLookup[i] then
local cell
if _type == 'header' then
-- Header: use the 'th' tag with scope="col"
cell = row:tag('th')
cell:attr('scope', 'col')
elseif _type == 'footer' then
-- Footer: Mark as 'sortbottom' so that it does not sort when the table is made user-sortable
-- with the 'wikitable sortable' class
cell = row:tag('td')
cell:class('sortbottom')
else
-- Ordinary cell (may have an optional sort-value, separate from the display value)
cell = row:tag('td')
local cellCss = colstyle and colstyle[i]
if cellCss then -- Apply the column styling, if necessary
cell:attr('style', textTrim(cellCss))
end
end
if type(v) == 'table' then
cell:attr('data-sort-value', textTrim(v[1]))
v = v[2]
end
cell:wikitext(textTrim(v))
end
end
return row
end
-- Parse the column styles
if colstyle then colstyle = parse(colstyle, -1).data end
-- Write the header first
if header then
local headerData = parse(header)
writeHtml(headerData.css, headerData.data, 'header')
end
-- Parse the data
local data = {}
for i, v in ipairs(frame.args) do data[i] = parse(v, i) end
-- Sorting with a comparator function
table.sort(data, comparator(orderLookup, descLookup, nsortLookup, grpsep))
-- Write the sorted data to the HTML output
for i, v in ipairs(data) do
writeHtml(v.css, v.data, nil)
end
-- Write the footer
if footer then
local footerData = parse(footer)
writeHtml(footerData.css, footerData.data, 'footer')
end
return tostring(html)
end
return _module