Difference between revisions of "Module:Language/name/data"

From annadreambrush.com/wiki
Jump to navigation Jump to search
imported>Amalthea
(Removed redundancies and nil-entries)
m (1 revision imported)
 
(84 intermediate revisions by 7 users not shown)
Line 1: Line 1:
 +
-- put tables and their tables together
 +
local function __coalesce(...)
 +
    local coalesced = {}
 +
    for _, langslist in ipairs{...} do
 +
        for langcode, langnames in pairs(langslist) do
 +
            for _, langname in pairs(langnames) do
 +
                if not coalesced[langcode] then
 +
                    coalesced[langcode] = {}
 +
                end
 +
                table.insert(coalesced[langcode], langname)
 +
            end
 +
        end
 +
    end
 +
 +
    return coalesced
 +
end
 +
 +
-- make the keys lowercase
 +
local function __preprocess(t, first_of_array_in_array)
 +
    local preprocessed = {}
 +
    if first_of_array_in_array then
 +
    -- keep only the 1st language name for each code, excluding synonyms
 +
        for k, v in pairs(t) do
 +
            preprocessed[k:lower()] = {v[1]}
 +
        end
 +
    else
 +
        for k, v in pairs(t) do
 +
            preprocessed[k:lower()] = v
 +
        end
 +
    end
 +
 +
    return preprocessed
 +
end
 +
 +
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
 +
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
 +
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
 +
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
 +
local __iso_639_3      = __preprocess(require("Module:Language/data/ISO 639-3"));
 +
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
 +
local __wp_languages  = __preprocess(require("Module:Language/data/wp languages"), true);
 +
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
 +
local iana_scripts    = __preprocess(require("Module:Language/data/iana scripts"));
 +
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
 +
local iana_regions    = __preprocess(require("Module:Language/data/iana regions"));
 +
 +
-- variant subtags from IANA; table format differs from the other IANA data tables
 +
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
 +
-- suppressed script subtags from IANA;
 +
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
 +
 
return {
 
return {
   codes = {
+
    lang   = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
  ["ab"]  = "Abkhaz"
+
    lang_iana = __iana_languages,
,["abk"] = "Abkhaz"
+
    script = iana_scripts,
,["ace"] = "Acehnese"
+
    region = iana_regions,
,["ady"] = "Adyghe"
+
    variant = iana_variants,
,["aa"]  = "Afar"
+
    suppressed = iana_suppressed_scripts,
,["aar"] = "Afar"
 
,["af"]  = "Afrikaans"
 
,["afr"] = "Afrikaans"
 
,["ain"] = "Ainu"
 
,["ak"]  = "Akan"
 
,["aka"] = "Akan"
 
,["akk"] = "Akkadian"
 
,["akl"] = "Aklan"
 
,["alb"] = "Albanian"
 
,["sq"]  = "Albanian"
 
,["sqi"] = "Albanian"
 
,["als"] = "Albanian (Tosk)"
 
,["gsw"] = "Alemannic"
 
,["arq"] = "Algerian Arabic"
 
,["am"]  = "Amharic"
 
,["amh"] = "Amharic"
 
,["grc"] = "Ancient Greek"
 
,["oj"]  = "Anishinaabe"
 
,["oji"] = "Anishinaabe"
 
,["ar"]  = "Arabic"
 
,["ara"] = "Arabic"
 
,["an"]  = "Aragonese"
 
,["arg"] = "Aragonese"
 
,["arc"] = "Aramaic"
 
,["arm"] = "Armenian"
 
,["hy"]  = "Armenian"
 
,["hye"] = "Armenian"
 
,["rup"] = "Aromanian"
 
,["frp"] = "Arpitan"
 
,["as"]  = "Assamese"
 
,["asm"] = "Assamese"
 
,["ast"] = "Asturian"
 
,["av"]  = "Avaric"
 
,["ava"] = "Avaric"
 
,["ae"]  = "Avestan"
 
,["ave"] = "Avestan"
 
,["ay"]  = "Aymara"
 
,["aym"] = "Aymara"
 
,["az"]  = "Azerbaijani"
 
,["aze"] = "Azerbaijani"
 
,["bal"] = "Balochi"
 
,["bam"] = "Bambara"
 
,["bm"]  = "Bambara"
 
,["bjn"] = "Banjar"
 
,["ba"]  = "Bashkir"
 
,["bak"] = "Bashkir"
 
,["baq"] = "Basque"
 
,["eu"]  = "Basque"
 
,["eus"] = "Basque"
 
,["bar"] = "Bavarian"
 
,["be"]  = "Belarusian"
 
,["bel"] = "Belarusian"
 
,["ben"] = "Bengali"
 
,["bn"]  = "Bengali"
 
,["ber"] = "Berber"
 
,["bho"] = "Bhojpuri"
 
,["bik"] = "Bicol"
 
,["bh"]  = "Bihari"
 
,["bih"] = "Bihari"
 
,["bpy"] = "Bishnupriya Manipuri"
 
,["bi"]  = "Bislama"
 
,["bis"] = "Bislama"
 
,["bos"] = "Bosnian"
 
,["bs"]  = "Bosnian"
 
,["por-BR"] = "Brazilian Portuguese"
 
,["pt-BR"] = "Brazilian Portuguese"
 
,["br"]  = "Breton"
 
,["bre"] = "Breton"
 
,["bug"] = "Buginese"
 
,["bg"]  = "Bulgarian"
 
,["bul"] = "Bulgarian"
 
,["bur"] = "Burmese"
 
,["my"]  = "Burmese"
 
,["mya"] = "Burmese"
 
,["bua"] = "Buryat"
 
,["bxr"] = "Buryat (Russia)"
 
,["cbv"] = "Cacua"
 
,["kex"] = "Canara Konkani"
 
,["yue"] = "Cantonese"
 
,["kea"] = "Cape Verdean Creole"
 
,["car"] = "Carib"
 
,["ca"]  = "Catalan"
 
,["cat"] = "Catalan"
 
,["ceb"] = "Cebuano"
 
,["esu"] = "Central Alaskan Yup'ik"
 
,["tzm"] = "Central Atlas Tamazight"
 
,["bcl"] = "Central Bicalono"
 
,["ckb"] = "Central Kurdish"
 
,["ch"]  = "Chamorro"
 
,["cha"] = "Chamorro"
 
,["cbk"] = "Chavacano"
 
,["ce"]  = "Chechen"
 
,["che"] = "Chechen"
 
,["chr"] = "Cherokee"
 
,["chy"] = "Cheyenne"
 
,["ny"]  = "Chichewa"
 
,["nya"] = "Chichewa"
 
,["chi"] = "Chinese"
 
,["zh"]  = "Chinese"
 
,["zho"] = "Chinese"
 
,["cho"] = "Choctaw"
 
,["ckt"] = "Chukchi"
 
,["chv"] = "Chuvash"
 
,["cv"]  = "Chuvash"
 
,["nci"] = "Classical Nahuatl"
 
,["ksh"] = "Colognian"
 
,["cop"] = "Coptic"
 
,["cor"] = "Cornish"
 
,["kw"]  = "Cornish"
 
,["co"]  = "Corsican"
 
,["cos"] = "Corsican"
 
,["cr"]  = "Cree"
 
,["cre"] = "Cree"
 
,["crh"] = "Crimean Tatar"
 
,["hr"]  = "Croatian"
 
,["hrv"] = "Croatian"
 
,["cro"] = "Crow"
 
,["ces"] = "Czech"
 
,["cs"]  = "Czech"
 
,["cze"] = "Czech"
 
,["dlm"] = "Dalmatian"
 
,["da"]  = "Danish"
 
,["dan"] = "Danish"
 
,["del"] = "Delaware"
 
,["div"] = "Dhivehi"
 
,["dv"]  = "Dhivehi"
 
,["dut"] = "Dutch"
 
,["nl"]  = "Dutch"
 
,["nld"] = "Dutch"
 
,["dz"]  = "Dzongkha"
 
,["dzo"] = "Dzongkha"
 
,["egy"] = "Egyptian"
 
,["arz"] = "Egyptian Spoken Arabic"
 
,["elx"] = "Elamite"
 
,["en"]  = "English"
 
,["eng"] = "English"
 
,["myv"] = "Erzya"
 
,["ags"] = "Esimbi"
 
,["eo"]  = "Esperanto"
 
,["epo"] = "Esperanto"
 
,["est"] = "Estonian"
 
,["et"]  = "Estonian"
 
,["evn"] = "Evenki"
 
,["tut"] = "Evenki"
 
,["ee"]  = "Ewe"
 
,["ewe"] = "Ewe"
 
,["ext"] = "Extremaduran"
 
,["fax"] = "Fala"
 
,["fan"] = "Fang"
 
,["fao"] = "Faroese"
 
,["fo"]  = "Faroese"
 
,["hif"] = "Fiji Hindi"
 
,["fij"] = "Fijian"
 
,["fj"]  = "Fijian"
 
,["fi"]  = "Finnish"
 
,["fin"] = "Finnish"
 
,["fr"]  = "French"
 
,["fra"] = "French"
 
,["fre"] = "French"
 
,["fur"] = "Friulian"
 
,["ff"]  = "Fula"
 
,["ful"] = "Fula"
 
,["gag"] = "Gagauz"
 
,["gl"]  = "Galician"
 
,["glg"] = "Galician"
 
,["sdn"] = "Gallurese"
 
,["gan"] = "Gan"
 
,["geo"] = "Georgian"
 
,["ka"]  = "Georgian"
 
,["kat"] = "Georgian"
 
,["de"]  = "German"
 
,["deu"] = "German"
 
,["ger"] = "German"
 
,["gem"] = "Germanic"
 
,["ki"]  = "Gikuyu"
 
,["kik"] = "Gikuyu"
 
,["glk"] = "Gilaki"
 
,["ank"] = "Goemai"
 
,["got"] = "Gothic"
 
,["el"]  = "Greek"
 
,["ell"] = "Greek"
 
,["gre"] = "Greek"
 
,["gn"]  = "Guaraní"
 
,["grn"] = "Guaraní"
 
,["gu"]  = "Gujarati"
 
,["guj"] = "Gujarati"
 
,["hat"] = "Haitian Creole"
 
,["ht"]  = "Haitian Creole"
 
,["hak"] = "Hakka"
 
,["hni"] = "Hani"
 
,["ha"]  = "Hausa"
 
,["hau"] = "Hausa"
 
,["yuf"] = "Havasupai-Hualapai-Yavapai"
 
,["haw"] = "Hawaiian"
 
,["haz"] = "Hazaragi"
 
,["he"]  = "Hebrew"
 
,["heb"] = "Hebrew"
 
,["her"] = "Herero"
 
,["hz"]  = "Herero"
 
,["hil"] = "Hiligaynon"
 
,["mrj"] = "Hill Mari"
 
,["hi"]  = "Hindi"
 
,["hin"] = "Hindi"
 
,["hmo"] = "Hiri Motu"
 
,["ho"]  = "Hiri Motu"
 
,["hit"] = "Hittite"
 
,["hop"] = "Hopi"
 
,["hu"]  = "Hungarian"
 
,["hun"] = "Hungarian"
 
,["ice"] = "Icelandic"
 
,["is"]  = "Icelandic"
 
,["isl"] = "Icelandic"
 
,["ido"] = "Ido"
 
,["io"]  = "Ido"
 
,["ibo"] = "Igbo"
 
,["ig"]  = "Igbo"
 
,["ilo"] = "Ilokano"
 
,["ine"] = "Indo-European"
 
,["iir"] = "Indo-Iranian"
 
,["id"]  = "Indonesian"
 
,["ind"] = "Indonesian"
 
,["inh"] = "Ingush"
 
,["ia"]  = "Interlingua"
 
,["ina"] = "Interlingua"
 
,["ie"]  = "Interlingue"
 
,["ile"] = "Interlingue"
 
,["iku"] = "Inuktitut"
 
,["iu"]  = "Inuktitut"
 
,["ik"]  = "Inupiaq"
 
,["ipk"] = "Inupiaq"
 
,["irk"] = "Iraqw"
 
,["ga"]  = "Irish"
 
,["gle"] = "Irish"
 
,["ruo"] = "Istro-Romanian"
 
,["it"]  = "Italian"
 
,["ita"] = "Italian"
 
,["jac"] = "Jakaltek"
 
,["ja"]  = "Japanese"
 
,["jpn"] = "Japanese"
 
,["jav"] = "Javanese"
 
,["jv"]  = "Javanese"
 
,["apj"] = "Jicarilla Apache"
 
,["lad"] = "Judaeo-Spanish"
 
,["kbd"] = "Kabardian"
 
,["kab"] = "Kabyle"
 
,["kal"] = "Kalaallisut"
 
,["kl"]  = "Kalaallisut"
 
,["kan"] = "Kannada"
 
,["kn"]  = "Kannada"
 
,["kau"] = "Kanuri"
 
,["kr"]  = "Kanuri"
 
,["pam"] = "Kapampangan"
 
,["krc"] = "Karachay-Balkar"
 
,["kaa"] = "Karakalpak"
 
,["kar"] = "Karen"
 
,["kas"] = "Kashmiri"
 
,["ks"]  = "Kashmiri"
 
,["csb"] = "Kashubian"
 
,["kaz"] = "Kazakh"
 
,["kk"]  = "Kazakh"
 
,["khm"] = "Khmer"
 
,["km"]  = "Khmer"
 
,["quc"] = "K'iche'"
 
,["sjd"] = "Kildin Sami"
 
,["kin"] = "Kinyarwanda"
 
,["rw"]  = "Kinyarwanda"
 
,["rn"]  = "Kirundi"
 
,["run"] = "Kirundi"
 
,["tlh"] = "Klingon"
 
,["kom"] = "Komi"
 
,["kv"]  = "Komi"
 
,["koi"] = "Komi-Permyak"
 
,["kg"]  = "Kongo"
 
,["kon"] = "Kongo"
 
,["knn"] = "Konkani"
 
,["kok"] = "Konkani"
 
,["ko"]  = "Korean"
 
,["kor"] = "Korean"
 
,["eko"] = "Koti"
 
,["kum"] = "Kumyk"
 
,["ku"]  = "Kurdish"
 
,["kur"] = "Kurdish"
 
,["kmr"] = "Kurmanji"
 
,["kj"]  = "Kwanyama"
 
,["kua"] = "Kwanyama"
 
,["kir"] = "Kyrgyz"
 
,["ky"]  = "Kyrgyz"
 
,["lbe"] = "Lak"
 
,["lkt"] = "Lakota"
 
,["lao"] = "Lao"
 
,["lo"]  = "Lao"
 
,["ltg"] = "Latgalian"
 
,["la"]  = "Latin"
 
,["lat"] = "Latin"
 
,["lav"] = "Latvian"
 
,["lv"]  = "Latvian"
 
,["lij"] = "Ligurian"
 
,["li"]  = "Limburgish"
 
,["lim"] = "Limburgish"
 
,["lin"] = "Lingala"
 
,["ln"]  = "Lingala"
 
,["lit"] = "Lithuanian"
 
,["lt"]  = "Lithuanian"
 
,["liv"] = "Livonian"
 
,["jbo"] = "Lojban"
 
,["lmo"] = "Lombard"
 
,["lou"] = "Louisiana Creole French"
 
,["nds"] = "Low Saxon"
 
,["dsb"] = "Lower Sorbian"
 
,["lg"]  = "Luganda"
 
,["lug"] = "Luganda"
 
,["luy"] = "Luhya"
 
,["lut"] = "Lushootseed"
 
,["lb"]  = "Luxembourgish"
 
,["ltz"] = "Luxembourgish"
 
,["mac"] = "Macedonian"
 
,["mk"]  = "Macedonian"
 
,["mkd"] = "Macedonian"
 
,["mg"]  = "Malagasy"
 
,["mlg"] = "Malagasy"
 
,["may"] = "Malay"
 
,["ms"]  = "Malay"
 
,["msa"] = "Malay"
 
,["mal"] = "Malayalam"
 
,["ml"]  = "Malayalam"
 
,["mlt"] = "Maltese"
 
,["mt"]  = "Maltese"
 
,["mam"] = "Mam"
 
,["mnc"] = "Manchu"
 
,["cmn"] = "Mandarin Chinese"
 
,["glv"] = "Manx"
 
,["gv"]  = "Manx"
 
,["mao"] = "Māori"
 
,["mi"]  = "Māori"
 
,["mri"] = "Māori"
 
,["arn"] = "Mapudungun"
 
,["mar"] = "Marathi"
 
,["mr"]  = "Marathi"
 
,["chm"] = "Mari"
 
,["mrc"] = "Maricopa"
 
,["mah"] = "Marshallese"
 
,["mh"]  = "Marshallese"
 
,["mwr"] = "Marwari"
 
,["mfe"] = "Mauritian creole"
 
,["myn"] = "Mayan"
 
,["mzn"] = "Mazandarani"
 
,["mhr"] = "Meadow Mari"
 
,["dum"] = "Middle Dutch"
 
,["enm"] = "Middle English"
 
,["gmh"] = "Middle High German"
 
,["mga"] = "Middle Irish"
 
,["gml"] = "Middle Low German"
 
,["pal"] = "Middle Persian"
 
,["wlm"] = "Middle Welsh"
 
,["cdo"] = "Min-dong"
 
,["xmf"] = "Mingrelian"
 
,["nan"] = "Min-nan"
 
,["mwl"] = "Mirandese"
 
,["mov"] = "Mohave"
 
,["moh"] = "Mohawk"
 
,["mdf"] = "Moksha"
 
,["mn"]  = "Mongolian"
 
,["mon"] = "Mongolian"
 
,["mus"] = "Muscogee"
 
,["gmy"] = "Mycenaean Greek"
 
,["nah"] = "Nahuatl"
 
,["na"]  = "Nauruan"
 
,["nau"] = "Nauruan"
 
,["nav"] = "Navajo"
 
,["nv"]  = "Navajo"
 
,["ndo"] = "Ndonga"
 
,["ng"]  = "Ndonga"
 
,["nap"] = "Neapolitan"
 
,["new"] = "Nepal Bhasa"
 
,["ne"]  = "Nepali"
 
,["nep"] = "Nepali"
 
,["pih"] = "Norfolk"
 
,["nrm"] = "Norman"
 
,["frr"] = "North Frisian"
 
,["apc"] = "North Levantine Arabic"
 
,["nd"]  = "Northern Ndebele"
 
,["nde"] = "Northern Ndebele"
 
,["nso"] = "Northern Sotho"
 
,["nod"] = "Northern Thai"
 
,["no"]  = "Norwegian"
 
,["nor"] = "Norwegian"
 
,["nb"]  = "Norwegian Bokmål"
 
,["nob"] = "Norwegian Bokmål"
 
,["nn"]  = "Norwegian Nynorsk"
 
,["nno"] = "Norwegian Nynorsk"
 
,["nov"] = "Novial"
 
,["oc"]  = "Occitan"
 
,["oci"] = "Occitan"
 
,["xal"] = "Oirat"
 
,["ryu"] = "Okinawan"
 
,["chu"] = "Old Church Slavonic"
 
,["cu"]  = "Old Church Slavonic"
 
,["sla"] = "Old East Slavic"
 
,["ang"] = "Old English"
 
,["fro"] = "Old French"
 
,["ofs"] = "Old Frisian"
 
,["goh"] = "Old High German"
 
,["sga"] = "Old Irish"
 
,["non"] = "Old Norse"
 
,["peo"] = "Old Persian"
 
,["osx"] = "Old Saxon"
 
,["owl"] = "Old Welsh"
 
,["one"] = "Oneida"
 
,["or"]  = "Oriya"
 
,["ori"] = "Oriya"
 
,["om"]  = "Oromo"
 
,["orm"] = "Oromo"
 
,["os"]  = "Ossetic"
 
,["oss"] = "Ossetic"
 
,["roa"] = "Other Romance"
 
,["oto"] = "Otomi"
 
,["ota"] = "Ottoman Turkish"
 
,["pfl"] = "Palatinate German"
 
,["pi"]  = "Pāli"
 
,["pli"] = "Pāli"
 
,["pag"] = "Pangasinan"
 
,["pap"] = "Papiamento"
 
,["ps"]  = "Pashto"
 
,["pus"] = "Pashto"
 
,["uun"] = "Pazeh"
 
,["pdc"] = "Pennsylvania German"
 
,["fa"]  = "Persian"
 
,["fas"] = "Persian"
 
,["per"] = "Persian"
 
,["pcd"] = "Picard"
 
,["cel"] = "Pictish"
 
,["pms"] = "Piemontese"
 
,["pny"] = "Pinyin"
 
,["crk"] = "Plains Cree"
 
,["pl"]  = "Polish"
 
,["pol"] = "Polish"
 
,["pnt"] = "Pontic Greek"
 
,["por"] = "Portuguese"
 
,["pt"]  = "Portuguese"
 
,["phr"] = "Potwari"
 
,["pa"]  = "Punjabi"
 
,["pan"] = "Punjabi"
 
,["qu"]  = "Quechua"
 
,["que"] = "Quechua"
 
,["qya"] = "Quenya"
 
,["rar"] = "Rarotongan"
 
,["rom"] = "Romani"
 
,["ro"]  = "Romanian"
 
,["ron"] = "Romanian"
 
,["rum"] = "Romanian"
 
,["rm"]  = "Romansh"
 
,["roh"] = "Romansh"
 
,["ru"]  = "Russian"
 
,["rus"] = "Russian"
 
,["rue"] = "Rusyn"
 
,["se"]  = "Sami"
 
,["sme"] = "Sami"
 
,["sm"]  = "Samoan"
 
,["smo"] = "Samoan"
 
,["sag"] = "Sango"
 
,["sg"]  = "Sango"
 
,["sa"]  = "Sanskrit"
 
,["san"] = "Sanskrit"
 
,["skr"] = "Saraiki"
 
,["sc"]  = "Sardinian"
 
,["srd"] = "Sardinian"
 
,["sdc"] = "Sassarese"
 
,["stq"] = "Saterland Frisian"
 
,["sco"] = "Scots"
 
,["gd"]  = "Scottish Gaelic"
 
,["gla"] = "Scottish Gaelic"
 
,["see"] = "Seneca"
 
,["sr"]  = "Serbian"
 
,["srp"] = "Serbian"
 
,["hbs"] = "Serbo-Croatian"
 
,["scl"] = "Shina"
 
,["sn"]  = "Shona"
 
,["sna"] = "Shona"
 
,["shs"] = "Shuswap"
 
,["scn"] = "Sicilian"
 
,["sgn"] = "Sign language"
 
,["bla"] = "Siksika"
 
,["szl"] = "Silesian"
 
,["zh-Hans"] = "Simplified Chinese"
 
,["sjn"] = "Sindarin"
 
,["sd"]  = "Sindhi"
 
,["snd"] = "Sindhi"
 
,["si"]  = "Sinhala"
 
,["sin"] = "Sinhala"
 
,["sk"]  = "Slovak"
 
,["slk"] = "Slovak"
 
,["slo"] = "Slovak"
 
,["sl"]  = "Slovene"
 
,["slv"] = "Slovene"
 
,["so"]  = "Somali"
 
,["som"] = "Somali"
 
,["wen"] = "Sorbian"
 
,["sot"] = "Sotho"
 
,["st"]  = "Sotho"
 
,["nbl"] = "Southern Ndebele"
 
,["nr"]  = "Southern Ndebele"
 
,["es"]  = "Spanish"
 
,["spa"] = "Spanish"
 
,["srn"] = "Sranan"
 
,["sux"] = "Sumerian"
 
,["su"]  = "Sundanese"
 
,["sun"] = "Sundanese"
 
,["sw"]  = "Swahili"
 
,["swa"] = "Swahili"
 
,["ss"]  = "Swati"
 
,["ssw"] = "Swati"
 
,["sv"]  = "Swedish"
 
,["swe"] = "Swedish"
 
,["syc"] = "Syriac"
 
,["syr"] = "Syriac"
 
,["fil"] = "Tagalog"
 
,["tgl"] = "Tagalog"
 
,["tl"]  = "Tagalog"
 
,["tah"] = "Tahitian"
 
,["ty"]  = "Tahitian"
 
,["tg"]  = "Tajik"
 
,["tgk"] = "Tajik"
 
,["ta"]  = "Tamil"
 
,["tam"] = "Tamil"
 
,["tat"] = "Tatar"
 
,["tt"]  = "Tatar"
 
,["te"]  = "Telugu"
 
,["tel"] = "Telugu"
 
,["tet"] = "Tetum"
 
,["th"]  = "Thai"
 
,["tha"] = "Thai"
 
,["bo"]  = "Tibetan"
 
,["bod"] = "Tibetan"
 
,["tib"] = "Tibetan"
 
,["ti"]  = "Tigrinya"
 
,["tir"] = "Tigrinya"
 
,["tpi"] = "Tok Pisin"
 
,["to"]  = "Tongan"
 
,["ton"] = "Tongan"
 
,["zh-Hant"] = "Traditional Chinese"
 
,["lu"]  = "Tshiluba"
 
,["lub"] = "Tshiluba"
 
,["ts"]  = "Tsonga"
 
,["tso"] = "Tsonga"
 
,["tn"]  = "Tswana"
 
,["tsn"] = "Tswana"
 
,["tcy"] = "Tulu"
 
,["tum"] = "Tumbuka"
 
,["aeb"] = "Tunisian Arabic"
 
,["tr"]  = "Turkish"
 
,["tur"] = "Turkish"
 
,["tk"]  = "Turkmen"
 
,["tuk"] = "Turkmen"
 
,["tus"] = "Tuscarora"
 
,["tyv"] = "Tuvan"
 
,["tw"]  = "Twi"
 
,["twi"] = "Twi"
 
,["udm"] = "Udmurt"
 
,["uk"]  = "Ukrainian"
 
,["ukr"] = "Ukrainian"
 
,["und"] = "undetermined"
 
,["hsb"] = "Upper Sorbian"
 
,["ur"]  = "Urdu"
 
,["urd"] = "Urdu"
 
,["ug"]  = "Uyghur"
 
,["uig"] = "Uyghur"
 
,["uz"]  = "Uzbek"
 
,["uzb"] = "Uzbek"
 
,["ve"]  = "Venda"
 
,["ven"] = "Venda"
 
,["vec"] = "Venetian"
 
,["vep"] = "Veps"
 
,["vi"]  = "Vietnamese"
 
,["vie"] = "Vietnamese"
 
,["rmy"] = "Vlax Romani"
 
,["vo"]  = "Volapük"
 
,["vol"] = "Volapük"
 
,["vro"] = "Võro"
 
,["wa"]  = "Walloon"
 
,["wln"] = "Walloon"
 
,["war"] = "Waray-Waray"
 
,["cy"]  = "Welsh"
 
,["cym"] = "Welsh"
 
,["wel"] = "Welsh"
 
,["vls"] = "West Flemish"
 
,["fry"] = "West Frisian"
 
,["fy"]  = "West Frisian"
 
,["gmw"] = "West Germanic"
 
,["pnb"] = "Western Panjabi"
 
,["wo"]  = "Wolof"
 
,["wol"] = "Wolof"
 
,["wuu"] = "Wuu"
 
,["xh"]  = "Xhosa"
 
,["xho"] = "Xhosa"
 
,["sah"] = "Yakut"
 
,["kdd"] = "Yankunytjatjara"
 
,["ii"]  = "Yi"
 
,["iii"] = "Yi"
 
,["yi"]  = "Yiddish"
 
,["yid"] = "Yiddish"
 
,["yo"]  = "Yoruba"
 
,["yor"] = "Yoruba"
 
,["yua"] = "Yukatek Maya"
 
,["diq"] = "Zazaki"
 
,["zza"] = "Zazaki"
 
,["zea"] = "Zeelandic"
 
,["zen"] = "Zenaga"
 
,["za"]  = "Zhuang"
 
,["zha"] = "Zhuang"
 
,["zu"]  = "Zulu"
 
,["zul"] = "Zulu"
 
,["zun"] = "Zuni"
 
}
 
 
}
 
}

Latest revision as of 14:34, 9 March 2020

Documentation for this module may be created at Module:Language/name/data/doc

-- put tables and their tables together
local function __coalesce(...)
    local coalesced = {}
    for _, langslist in ipairs{...} do
        for langcode, langnames in pairs(langslist) do
            for _, langname in pairs(langnames) do
                if not coalesced[langcode] then
                    coalesced[langcode] = {}
                end
                table.insert(coalesced[langcode], langname)
            end
        end
    end
 
    return coalesced
end

-- make the keys lowercase
local function __preprocess(t, first_of_array_in_array)
    local preprocessed = {}
    if first_of_array_in_array then
    	-- keep only the 1st language name for each code, excluding synonyms
        for k, v in pairs(t) do
            preprocessed[k:lower()] = {v[1]}
        end
    else
        for k, v in pairs(t) do
            preprocessed[k:lower()] = v
        end
    end

    return preprocessed
end

-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3      = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages   = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts     = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions     = __preprocess(require("Module:Language/data/iana regions"));

-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));

return {
    lang   = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
    lang_iana = __iana_languages,
    script = iana_scripts,
    region = iana_regions,
    variant = iana_variants,
    suppressed = iana_suppressed_scripts,
}