Module:Multilingual description/sort

From "PTTLink Wiki"
Jump to navigation Jump to search

Ordered by direction (all LTR scripts first, then RTL scripts), then by script, then alphabetically (by name) in each script.

Latin, Latin or Cyrillic, Cyrillic, Greek, Other simple LTR alphabets, Indic, Other South Asian, Syllabaries, Hangul, Japanese scripts, Sinograms, (RTL) Hebrew, (RTL) Arabic.

You can see a comprehensive test of rendering (and other checks) of language codes and native names on Module talk:Multilingual description/sort/testcases: the table will be sorted accordingly (If you see green rows at the bottom, there are missing languages to add into this sort list).

Additionally you should map the writing directions (rtl or ltr) of all these languages in Module:Dir/RTL overrides to avoid fallbacks to a costly MediaWiki API (if it knows the language) or a last-chance fallback to "ltr" (which may be incorrect).


--[[
  The documented sort order is by script, then alphabetically by displayed native name (as generated by {{#language: code}}), using the default DUCET order.
  This allows easier selection by users reading the lists of languages in order to find their own.
  Please test this order, and maintain it as complete as possible, including legacy codes still used in MediaWiki.
  Any missing language will be sorted after all languages listed below, just using its internal language code.
]]
local p = {
-- LTR scripts
  -- Latin alphabets
    --[[A]] 'sma', 'ace', 'abs', 'ang', 'af', 'ak', 'gsw', 'als', 'ase', 'smn', 'an', 'rup', 'roa-rup', 'frp', 'ast', 'atj', 'gn', 'ay', 'az',
    --[[B]] 'ban', 'bm', 'nan', 'zh-min-nan', 'bjn', 'map-bms', 'btm', 'bbc', 'bbc-latn', 'sje', 'bcl', 'bi', 'bar', 'bs', 'brh', 'br', 'en-gb',
    --[[C]] 'en-ca', 'cps', 'ca', 'ceb', 'cs', 'ch', 'cbk-zam', 'ny', 'sn', 'tum', 'cho', 'sei', 'co', 'cy',
    --[[D]] 'dag', 'da', 'se', 'pdc', 'de', 'de-formal', 'nv', 'dsb', 'na', 'dtp',
    --[[E]] 'mh', 'et', 'egl', 'eml', 'en', 'es', 'es-formal', 'es-419', 'eo', 'ext', 'eu', 'ee',
    --[[F]] 'wls', 'hif', 'hif-latn', 'fil', 'fo', 'fr', 'frc', 'fy', 'ff', 'fur',
    --[[G]] 'ga', 'gv', 'sm', 'gag', 'gd', 'gl', 'aln', 'ki', 'gom-latn', 'guw',
    --[[H]] 'ha', 'ha-latn', 'haw', 'hil', 'ho', 'hmn', 'hsb', 'hr', 'hrx', 'gor',
    --[[I]] 'io', 'ig', 'rw', 'ilo', 'id', 'ia', 'ie', 'ike-latn', 'ik', 'bto', 'xh', 'zu', 'is', 'it',
    --[[J]] 'jv', 'smj', 'jut',
    --[[K]] 'quc', 'rmf', 'kbp', 'kea', 'kl', 'kr', 'pam', 'krl', 'csb', 'kw', 'krj', 'rn', 'sw', 'kiu', 'kg', 'avk', 'ses', 'ht', 'kri', 'gcf', 'gcr', 'ku', 'ku-latn', 'fkv', 'kj',
    --[[L]] 'jbo', 'lld', 'lad', 'ltg', 'la', 'lv', 'lzz', 'to', 'lb', 'nia', 'lt', 'lij', 'li', 'ln', 'lfn', 'liv', 'olo', 'lg', 'lmo', 'mad',
    --[[M]] 'hu', 'hu-formal', 'vmf', 'mg', 'mt', 'mi', 'mrh', 'arn', 'srq', 'fit', 'ms', 'min', 'cdo', 'mwl', 'lus', 'mus',
    --[[N]] 'fj', 'nah', 'nap', 'nl', 'nl-informal', 'nds-nl', 'cr', 'niu', 'frr', 'pih', 'no', 'non', 'nb', 'nn', 'nrf', 'nrm', 'nov', 'nys',
    --[[O]] 'uz-latn', 'uz', 'oc', 'ojb', 'om', 'ng', 'de-at', 'hz',
    --[[P]] 'pfl', 'pag', 'ami', 'pap', 'jam', 'pcd', 'pms', 'pwn', 'nds', 'pdt', 'pl', 'pt', 'pt-br', 'prg',
    --[[Q]] 'aa', 'kaa', 'kk-latn', 'kk-tr', 'crh', 'crh-latn',
    --[[R]] 'ty', 'ksh', 'ro', 'rmc', 'rmy', 'rgn', 'rm', 'qug', 'qu',
    --[[S]] 'sms', 'xsy', 'szy', 'sg', 'sc', 'sdc', 'sli', 'de-ch', 'sco', 'trv', 'stq', 'st', 'nso', 'tn', 'sq', 'scn', 'loz', 'simple', 'ss', 'sk', 'sl', 'szl', 'so', 'srn', 'sr-latn', 'sr-el', 'sh'--[[latn/cyrl]], 'sh-latn', 'sh-el', 'su', 'fi', 'sv',
    --[[T]] 'shy', 'shy-latn', 'tl', 'tzl', 'kab', 'roa-tara', 'rif', 'shi-latn', 'shi', 'tt-latn', 'tay', 'tet', 'din', 'tg-latn', 'tpi', 'tokipona', 'chy', 've', 'aeb-latn', 'tr', 'tk', 'tru', 'tw', 'kcg',
    --[[U]] 'sju', 'ug-latn',
    --[[V]] 'vot', 'za', 'vec', 'vep', 'vi', 'ruq', 'ruq-latn', 'vo', 'vro', 'fiu-vro',
    --[[W]] 'wa', 'bci', 'guc', 'vls', 'war', 'wo',
    --[[X]] 'ts',
    --[[Y]] 'yo',
    --[[Z]] 'diq', 'zea', 'sgs', 'bat-smg',
  -- Greek or Coptic alphabets
    'grc', 'el', 'pnt', 'cop',
  -- Cyrillic alphabets
    --[[А]] 'av', 'ady', 'ady-cyrl', 'kbd', 'kbd-cyrl', 'ab', 'alt',
    --[[Б]] 'ba', 'be', 'be-tarask', 'be-x-old', 'bxr', 'bg',
    --[[В]] 'ruq-cyrl',
    --[[Г]] 'inh',
    --[[И]] 'os',
    --[[К]] 'sjd', 'kv', 'krc', 'kum', 'crh-cyrl', 'ky', 'mrj', 'kk', 'kk-cyrl', 'kk-kz',
    --[[Л]] 'lbe', 'lez',
    --[[М]] 'mk', 'mdf', 'mo', 'mn',
    --[[Н]] 'ce',
    --[[О]] 'mhr',
    --[[П]] 'koi',
    --[[Р]] 'rue', 'ru',
    --[[C]] 'sah', 'sty', 'cu', 'sr-cyrl', 'sr-ec', 'sr'--[[cyrl/latn]], 'sh-cyrl', 'sh-ec',
    --[[Т]] 'tt-cyrl', 'tt', 'tly', 'tly-cyrl', 'tg-cyrl', 'tg', 'tyv',
    --[[У]] 'udm', 'uz-cyrl', 'uk',
    --[[Х]] 'xal',
    --[[Ч]] 'cv',
    --[[Э]] 'myv',
  -- Other European alphabets
    'xmf', 'ka',
    'hyw', 'hy',
  -- Devanagari abugida
    'anp', 'awa', 'ks-deva', 'ks', 'gom-deva', 'gom', 'dty', 'new', 'ne', 'pi', 'bho', 'bh', 'mr', 'rwr', 'mai', 'sa', 'hi',
  -- Other North Indian abugidas
    'as', 'bn', 'bpy', 'pa',
  -- South Indian abugidas
    'gu', 'or', 'ta', 'te', 'kn', 'tcy', 'ml', 'si', 'mni',
  -- Other South-East Asian abugidas, including Tibetan
    'th',
    'lo',
    'dz', 'bo',
    'bug',
    'kjp', 'shn', 'mnw', 'my',
    'km',
    'nod',
    'ban-bali',
    'sat',
  -- North-Western syllabaries
    'chr',
    'ike-cans', 'iu',
    'got',
  -- African syllabaries
    'tzm', 'zgh', 'shi-tfng', 'rif-tfng', 'sjs',
    'ti', 'am',
  -- Asian syllabaries
    'ii',
  -- Korean scripts (alphabet and sinograms)
    'ko-kp', 'ko', 'ko-kr',
  -- Japanese scripts (syllabaries and sinograms)
    'ja',
  -- Sinographic scripts (plus Bopomofo syllabary)
    'zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-mo', 'zh-hans', 'zh-hant', 'zh-hk', 'zh-my',
    'wuu', 'hak', 'lzh', 'zh-classical', 'yue', 'zh-yue', 'gan', 'gan-hant', 'gan-hans',
-- RTL scripts
  -- Hebrew abjads
    'yi', 'ydd', 'he',
  -- Arabic abjads
    --[[ئ]] 'ug-arab', 'ug',
    --[[ا]] 'ur', 'ary', 'ar',
    --[[ب]] 'bqi', 'ms-arab',
    --[[پ]] 'ps', 
    --[[ت]] 'pnb', 'aeb-arab', 'aeb', 'azb',
    --[[ج]] 'arq', 'bcc',
    --[[ر]] 'bgn', 
    --[[س]] 'sd', 'skr-arab', 'skr',
    --[[ف]] 'fa', 'fa-af', 'prd',
    --[[ق]] 'kk-arab', 'kk-cn',
    --[[ك]] 'ku-arab',
    --[[ک]] 'ks-arab', 'khw', 'ckb', 'sdh',
    --[[گ]] 'glk',
    --[[ل]] 'luz', 'ota', 'lki', 'lrc',
    --[[م]] 'mzn', 'arz',
    --[[ه]] 'ha-arab', 
  -- Other semitics abjads
    'arc', 'dv', 'nqo',
-- Additional language codes that still need to be sorted by native name can be temporarily placed here
}

setmetatable(p, {
    quickTests = function()
        local s = {}
        for k, lang in pairs(p) do
            if type(k) ~= 'number' or k < 1 or k ~= math.floor(k)
            or type(lang) ~= 'string' or #lang < 2 or #lang > 16
            or (lang):find('^[a-z][%-0-9a-z]*[0-9a-z]$') ~= 1
            or s[lang] then
                return false, ': invalid sequence of distinct lowercase language codes at p[' .. tostring(k) .. '] = "' .. tostring(lang) .. '"'
            end
            s[lang] = true
        end
        return true
    end
})
--[[ To test this module in the Lua console: -- must return true
=getmetatable(p).quickTests()
--]]

return p