Module:Language/data

local U = mw.ustring.char

-- diacritics
local grave     = U(0x300)
local acute     = U(0x301)
local double_acute = U(0x30B)
local tilde     = U(0x303)
local macron    = U(0x304)
local dgrave    = U(0x30F)
local invbreve  = U(0x311)

--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]

local data = {
	["ang"] = {
		["name"] = "Auld Inglis",
		["article"] = {"Auld Inglis"},
		["scripts"] = {"Latn"},
		-- Remove macrons, acutes, and overdots
		["replacements"] = {
			["[ĀÁ]"] = "A",
			["[āá]"] = "a",
			["[ǢǼ]"] = "Æ",
			["[ǣǽ]"] = "æ",
			["Ċ"]    = "C",
			["ċ"]    = "c",
			["[ĒÉ]"] = "E",
			["[ēé]"] = "e",
			["Ġ"]    = "G",
			["ġ"]    = "g",
			["[ĪÍ]"] = "I",
			["[īí]"] = "i",
			["[ŌÓ]"] = "O",
			["[ōó]"] = "o",
			["[ŪÚ]"] = "U",
			["[ūú]"] = "u",
			["[ȲÝ]"] = "Y",
			["[ȳý]"] = "y",
			},
		},
	["ar"] = {
		["name"] = "Arabic",
		["article"] = "Arabic leid",
		["scripts"] = { "Arab" },
			--[[ ālif with wasla is replaced by ālif;
			taṭwīl, fatḥatan, ḍammatan, kasratan,
			fatḥa, ḍamma, kasra,
			shadda, sukūn, and superscript (dagger) ālif are removed. ]]
		["direction"] = "rtl", -- Should be in the script data module.
		["replacements"] = {
			[U(0x0671)] = U(0x0627),
			["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
				..U(0x064E)..U(0x064F)..U(0x0650)
				..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
			},
		},
	["bn"] = {
		["name"] = "Bengali",
		["article"] = "Bengali leid",
		["scripts"] = { "Beng" },
		},
	["de"] = {
		["name"] = "German",
		["article"] = "German leid",
		["scripts"] = { "Latn" },
		--[[
		["replacements"] = {
			["ae"]    = "ä",
			["oe"]    = "ö",
			["ue"]    = "ü",
			["A[Ee]"] = "Ä",
			["O[Ee]"] = "Ö",
			["U[Ee]"] = "Ü",
		},
		]]
		},
	["en"] = {
		["name"] = "Inglis",
		["article"] = "Inglis leid",
		["scripts"] = { "Latn" },
		},
	["es"] = {
		["name"] = "Spanish",
		["article"] = "Spanish leid",
		["scripts"] = { "Latn" },
		},
	["fr"] = {
		["name"] = "French",
		["article"] = "French leid",
		["scripts"] = { "Latn" },
		},
	["frm"] = {
		["name"] = "Middle French",
		["article"] = "Middle French",
		["scripts"] = { "Latn" },
		},
	["gem-pro"] = {
		["name"] = "Proto-Germanic",
		["article"] = "Proto-Germanic leid",
		["script"] = { "Latn" },
		["type"] = "reconstructed",
		["replacements"] = {},
		},
	["grc"] = {
		["name"] = "Ancient Greek",
		["article"] = "Ancient Greek",
		["scripts"] = { "Grek" },
		["replacements"] = {
			-- Vowels with macrons or breves are replaced with plain letters.
			["[ᾱᾰ]"] = "α",
			["[ᾹᾸ]"] = "Α",
			["[ῑῐ]"] = "ι",
			["[ῙῘ]"] = "Ι",
			["[ῡῠ]"] = "υ",
			["[ῩῨ]"] = "Υ",
			["ϐ"]    = "β",
			["ϵ"]    = "ε",
			["ϑ"]    = "θ",
			["ϰ"]    = "κ",
			["ϱ"]    = "ρ",
			["ϲ"]    = "σ",
			["ϕ"]    = "φ",
			},
		},
	["grk-pro"] = {
		["name"] = "Proto-Hellenic",
		["Wikipedia_name"] = "Proto-Greek",
		["article"] = "Proto-Greek leid",
		["script"] = { "Latn" },
		["type"] = "reconstructed",
		["replacements"] = {},
		},
	["hi"] = {
		["name"] = "Hindi",
		["article"] = "Hindi",
		["scripts"] = { "Deva" },
		},
	["ine-pro"] = {
		["name"] = "Proto-Indo-European",
		["article"] = "Proto-Indo-European leid",
		["script"] = { "Latn" },
		["type"] = "reconstructed",
		["replacements"] = {},
		},
	["ja"] = {
		["name"] = "Japanese",
		["article"] = "Japanese leid",
		["scripts"] = { "Jpan" },
		},
	["la"] = {
		["name"] = "Laitin",
		["article"] = "Laitin",
		["scripts"] = { "Latn" },
		["replacements"] = {
			-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
			["[ĀĂ]"]  = "A",
			["[āă]"]  = "a",
			["[ĒĔ]"]  = "E",
			["[ēĕë]"] = "e",
			["[ĪĬÏ]"] = "I",
			["[īĭï]"] = "i",
			["[ŌŎ]"]  = "O",
			["[ōŏ]"]  = "o",
			["[ŪŬÜ]"] = "U",
			["[ūŭü]"] = "u",
			["Ȳ"]     = "Y",
			["ȳ"]     = "y"
			},
		},
	["mul"] = {
		["name"] = "Translingual",
		["article"] = "",
		["script"] = { "" },
		},
	["orv"] = {
		["name"] = "Auld East Slavic",
		["article"] = "Auld East Slavic",
		["script"] = { "Cyrs" },
		["replacements"] = {
			[U(0x484)] = "",
			},
		},
	["pt"] = {
		["name"] = "Portuguese",
		["article"] = "Portuguese leid",
		["scripts"] = { "Latn" },
		},
	["pa"] = {
		["name"] = "Punjabi",
		["article"] = "Punjabi leid",
		["scripts"] = { "Guru", "Arab", }
		},
	["ru"] = {
		["name"] = "Russian",
		["article"] = "Russian leid",
		["scripts"] = { "Cyrl" },
		-- Combining acute accent is removed.
		["replacements"] = { [U(0x0301)] = "", }
		},

	["sla-pro"] = {
		["name"] = "Proto-Slavic", -- also Common Slavic
		["type"] = "reconstructed",
		["scripts"] = { "Latn" },
		["replacements"] = {
			["[ÀÁÃĀȀȂ]"] = "A",
			["[àáãāȁȃ]"] = "a",
			["[ÈÉẼĒȄȆ]"] = "E",
			["[èéẽēȅȇ]"] = "e",
			["[ÌÍĨĪȈȊ]"] = "I",
			["[ìíĩīȉȋ]"] = "i",
			["[ÒÓÕŌȌȎŐ]"] = "O", 
			["[òóõōȍȏő]"] = "o",
			["[ÙÚŨŪȔȖŰ]"] = "U",
			["[ùúũūȕȗű]"] = "u",
			["[ỲÝỸȲ]"] = "Y",
			["[ỳýỹȳ]"] = "y",
			["Ǭ"] = "Ǫ",
			["ǭ"] = "ǫ",
			["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
			},
		},
	["ur"] = {
		["name"] = "Urdu",
		["article"] = "Urdu",
		["scripts"] = { "Arab" },
		},
	["zh"] = {
		["name"] = "Chinese",
		["article"] = "Chinese leid",
		["scripts"] = { "Hani" },
		},
	["xcl"] = {
		["name"] = "Old Armenian",
		["article"] = "Classical Armenian",
		["script"] = { "Armn" },
		["replacements"] = {
			["[՞՜՛՟]"] = "",
			["և"] = "եւ",
			},
		},
	}

--[[

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		},

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		["replacements"] = {
			},
		},

]]

return data