Jump to content

Module:Ko-translit

Frae Wikipedia, the free beuk o knawledge
local export = {}

function export.tr_revised(word, sc, lang, nn)
	nn = nn or 'no' -- WTF is "nn"? very descriptive.
	local initial = {}
	local vowel = {}
	local final = {}
	local syllable = {}
	local revised = {}

	word = mw.ustring.gsub(word,'%([一丁-龯㐀-䶵]+%)','')
	word = mw.ustring.gsub(word,'—','-')
	if mw.ustring.gsub(word,'[ㅂㅈㄷㄱㅅㅁㄴㅇㄹㅎㅋㅌㅊㅍㄸㅃㅉㄲㅆㅛㅕㅑㅐㅔㅗㅓㅏㅣㅠㅜㅡㅖㅒ%-]','') == '' then
		revised = mw.ustring.gsub(word,'[ㅂㅈㄷㄱㅅㅁㄴㅇㄹㅎㅋㅌㅊㅍㄸㅃㅉㄲㅆㅛㅕㅑㅐㅔㅗㅓㅏㅣㅠㅜㅡㅖㅒ]',{['ㅂ']='b',['ㅈ']='j',['ㄷ']='d',['ㄱ']='g',['ㅅ']='s',['ㅁ']='m',['ㄴ']='n',['ㅇ']='/',['ㄹ']='l/r',['ㅎ']='h',['ㅋ']='k',['ㅌ']='t',['ㅊ']='ch',['ㅍ']='p',['ㄸ']='tt',['ㅃ']='pp',['ㅉ']='jj',['ㄲ']='kk',['ㅆ']='ss',['ㅛ']='yo',['ㅕ']='yeo',['ㅑ']='ya',['ㅐ']='ae',['ㅔ']='e',['ㅗ']='o',['ㅓ']='eo',['ㅏ']='a',['ㅣ']='i',['ㅠ']='yu',['ㅜ']='u',['ㅡ']='eu',['ㅖ']='ye',['ㅒ']='yae'})
		return revised
	end
	if not mw.ustring.match(word,'[가-힣]') then
		return nil
	end
	local wordlen = mw.ustring.len(word)
	local i = 0
	for codep in mw.ustring.gcodepoint(word) do
		i = i + 1
		syllable[i] = mw.ustring.char(codep)
		if mw.ustring.gsub(syllable[i], '[가-힣]', '') == '' then
			local syllableindex = codep - 0xAC00
			initial[i] = mw.ustring.char(0x1100 + math.floor(syllableindex / 588))
			vowel[i] = mw.ustring.char(0x1161 + math.floor((syllableindex % 588) / 28))
			final[i] = syllableindex % 28
			if final[i] == 0 then
				final[i] = ''
			else
				final[i] = mw.ustring.char(0x11A7 + final[i])
			end
		else
			initial[i], vowel[i], final[i] = '', '', ''
		end
	end
	syllable[wordlen+1], initial[wordlen+1], vowel[wordlen+1], final[wordlen+1] = '', '', '', ''

	for i = 1, wordlen, 1 do
		j = i + 1
		while mw.ustring.match(syllable[j],"[%-%^%']") do
			syllable[j] = mw.ustring.gsub(syllable[j],'%-','—')
			j = j + 1
		end
		if vowel[j] ~= '' then
			if mw.ustring.gsub((final[i] .. syllable[j]),'[ᇀᆴ][이히]','') == '' then
				final[i] = 'ᆾ'
			end
			if mw.ustring.gsub((final[i] .. syllable[j]),'ᆮ[이히]','') == '' then
				final[i] = mw.ustring.gsub(syllable[j],'[이히]',{['이']='ᆽ',['히']='ᆾ'})
			end
			if mw.ustring.gsub((final[i] .. initial[j]),'ᆺᄋ','') == '' then
				if mw.ustring.gsub(syllable[j],'[이아어은으음읍을었았에]','') ~= '' then
					final[i] = 'ᆮ'
				end
			end
		end
		if initial[j] == 'ᄋ' then
			final[i] = ({['ᆨ']='g',['ᆩ']='kk',['ᆪ']='ks',['ᆬ']='nj',['ᆭ']='n',['ᆮ']='d',['ᆯ']='r',['ᆰ']='lg',['ᆱ']='lm',['ᆲ']='lb',['ᆳ']='ls',['ᆴ']='lt',['ᆵ']='lp',['ᆶ']='r',['ᆸ']='b',['ᆹ']='ps',['ᆺ']='s',['ᆻ']='ss',['ᆼ']='ng-',['ᆽ']='j',['ᆾ']='ch',['ᇂ']=''})[final[i]] or final[i]
		end
		if syllable[i] == '밟' then
			final[i] = 'ᆸ'
		elseif final[i] == 'ᆭ' then
			initial[j] = ({['ᄀ']='k',['ᄁ']='k',['ᄃ']='t',['ᄄ']='t',['ᄅ']='n',['ᄇ']='p',['ᄈ']='p',['ᄌ']='ch',['ᄍ']='ch'})[initial[j]] or initial[j]
		elseif final[i] == 'ᆶ' then
			initial[j] = ({['ᄀ']='k',['ᄁ']='k',['ᄂ']='l',['ᄃ']='t',['ᄄ']='t',['ᄅ']='l',['ᄇ']='p',['ᄈ']='p',['ᄌ']='ch',['ᄍ']='ch'})[initial[j]] or initial[j]
		elseif final[i] == 'ᇂ' then
			initial[j] = ({['ᄀ']='k',['ᄁ']='k',['ᄂ']='nn',['ᄃ']='t',['ᄄ']='t',['ᄅ']='nn',['ᄆ']='nm',['ᄇ']='p',['ᄈ']='p',['ᄌ']='ch',['ᄍ']='ch'})[initial[j]] or initial[j]
		end
		if initial[j] == 'ᄂ' then
			if mw.ustring.match(final[i],'[ᆯᆲᆴᆶ]') then
				final[i] = 'l'
				initial[j] = 'l'
			else
				final[i] = ({['ᆨ']='ng',['ᆩ']='ng',['ᆪ']='ng',['ᆫ']='n',['ᆬ']='n',['ᆭ']='n',['ᆮ']='n',['ᆰ']='ng',['ᆱ']='m',['ᆵ']='m',['ᆷ']='m',['ᆸ']='m',['ᆹ']='m',['ᆺ']='n',['ᆻ']='n',['ᆼ']='ng',['ᆽ']='n',['ᆾ']='n',['ᆿ']='ng',['ᇀ']='n',['ᇁ']='m',['ᇂ']='n'})[final[i]] or final[i]
			end
		end
		if initial[j] == 'ᄅ' then
			if final[i] == 'ᆫ' then
				if nn ~= 'yes' then
					final[i] = 'l'
					initial[j] = 'l'
				else
					initial[j] = 'n'
				end
			elseif mw.ustring.match(final[i],'[ᆯᆲᆴ]') then
				final[i] = 'l'
				initial[j] = 'l'
			else
				final[i] = ({['ᆨ']='ng',['ᆩ']='ng',['ᆪ']='ng',['ᆬ']='n',['ᆮ']='n',['ᆰ']='ng',['ᆱ']='m',['ᆵ']='m',['ᆷ']='m',['ᆸ']='m',['ᆹ']='m',['ᆺ']='n',['ᆻ']='n',['ᆼ']='ng',['ᆽ']='n',['ᆾ']='n',['ᆿ']='ng',['ᇀ']='n',['ᇁ']='m'})[final[i]] or final[i]
				if final[i] ~= '' then
					initial[j] = 'n'
				end
			end
		end
		if initial[j] == 'ᄆ' then
			final[i] = ({['ᆨ']='ng',['ᆩ']='ng',['ᆪ']='ng',['ᆮ']='n',['ᆰ']='ng',['ᆵ']='m',['ᆸ']='m',['ᆹ']='m',['ᆺ']='n',['ᆻ']='n',['ᆽ']='n',['ᆾ']='n',['ᆿ']='ng',['ᇀ']='n',['ᇁ']='m'})[final[i]] or final[i]
		end
		if final[i] == 'ᆰ' then
			if mw.ustring.match(initial[j],'[ᄀᄁᄏ]') then
				final[i] = 'l'
			elseif initial[j] == 'ᄒ' then
				final[i] = 'l'
				initial[j] = 'k'
			end
		end
		if (final[i] .. initial[j]) == 'ᆫᄀ' then
			final[i] = 'n-'
		end
		if (final[i] .. initial[j]) == 'ᆽᄒ' then
			final[i] = 'c'
		end
		if (final[i] .. initial[j]) == 'ᆬᄒ' then
			final[i] = 'nc'
		end
		if vowel[i] ~= '' then
			if (final[i] .. initial[j]) == 'ᄋ' then
				final[i] = '…'
			end
		end
		final[i] = ({['ᆨ']='k',['ᆩ']='k',['ᆪ']='k',['ᆫ']='n',['ᆬ']='n',['ᆭ']='n',['ᆮ']='t',['ᆯ']='l',['ᆰ']='k',['ᆱ']='m',['ᆲ']='l',['ᆳ']='l',['ᆴ']='l',['ᆵ']='p',['ᆶ']='l',['ᆷ']='m',['ᆸ']='p',['ᆹ']='p',['ᆺ']='t',['ᆻ']='t',['ᆼ']='ng',['ᆽ']='t',['ᆾ']='t',['ᆿ']='k',['ᇀ']='t',['ᇁ']='p',['ᇂ']=''})[final[i]] or final[i]
		initial[i] = ({['ᄀ']='g',['ᄁ']='kk',['ᄂ']='n',['ᄃ']='d',['ᄄ']='tt',['ᄅ']='r',['ᄆ']='m',['ᄇ']='b',['ᄈ']='pp',['ᄉ']='s',['ᄊ']='ss',['ᄋ']='',['ᄌ']='j',['ᄍ']='jj',['ᄎ']='ch',['ᄏ']='k',['ᄐ']='t',['ᄑ']='p',['ᄒ']='h'})[initial[i]] or initial[i]
		vowel[i] = ({['ᅡ']='a',['ᅢ']='ae',['ᅣ']='ya',['ᅤ']='yae',['ᅥ']='eo',['ᅦ']='e',['ᅧ']='yeo',['ᅨ']='ye',['ᅩ']='o',['ᅪ']='wa',['ᅫ']='wae',['ᅬ']='oe',['ᅭ']='yo',['ᅮ']='u',['ᅯ']='wo',['ᅰ']='we',['ᅱ']='wi',['ᅲ']='yu',['ᅳ']='eu',['ᅴ']='ui',['ᅵ']='i'})[vowel[i]] or vowel[i]
		revised[i] = initial[i] .. vowel[i] .. final[i]
		if revised[i] == '' then
			if syllable[i] ~= '' then
				revised[i] = syllable[i]
			end
		end
	end

	revised = table.concat(revised,"")
	revised = mw.ustring.gsub(revised, 'o…e', 'o-e')
	revised = mw.ustring.gsub(revised, 'e…([ou])', 'e-%1')
	revised = mw.ustring.gsub(revised, 'a…e', 'a-e')
	revised = mw.ustring.gsub(revised, 'u…i', 'u-i')
	revised = mw.ustring.gsub(revised, '…', '')
	revised = mw.ustring.gsub(revised, '—', '-')
	if mw.ustring.match(revised,'[%.%?%!]') then
		revised = mw.ustring.upper(mw.ustring.sub(revised,1,1)) .. mw.ustring.sub(revised,2,-1)
		revised = mw.ustring.gsub(revised,"([%.%?%!]) ([a-z%'])",'%1 ^%2')
	end
	revised = mw.ustring.gsub(revised, "%^%'%'%'", "'''^")
	revised = mw.ustring.gsub(revised, "%^%l", mw.ustring.upper)
	revised = mw.ustring.gsub(revised, '%^', '')

	return revised
end

export.tr = export.tr_revised

return export