Module:cdo-pron
Appearance
Documentation for this module may be created at Module:cdo-pron/doc
local export = {}
local detone = {
['ă'] = 'a', ['ĕ'] = 'e', ['ĭ'] = 'i', ['ŏ'] = 'o', ['ŭ'] = 'u', ['̆'] = '',
['ā'] = 'a', ['ē'] = 'e', ['ī'] = 'i', ['ō'] = 'o', ['ū'] = 'u', ['̄'] = '',
['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u', ['́'] = '',
['à'] = 'a', ['è'] = 'e', ['ì'] = 'i', ['ò'] = 'o', ['ù'] = 'u', ['̀'] = '',
['â'] = 'a', ['ê'] = 'e', ['î'] = 'i', ['ô'] = 'o', ['û'] = 'u', ['̂'] = '',
}
local initial_ipa = {
['b'] = { ['unchanged'] = 'p', ['lenited'] = '<sup>(p-)</sup>β', ['nasal'] = '<sup>(p-)</sup>m' },
['p'] = { ['unchanged'] = 'pʰ', ['lenited'] = '<sup>(pʰ-)</sup>β', ['nasal'] = '<sup>(pʰ-)</sup>m' },
['m'] = { ['unchanged'] = 'm', ['lenited'] = 'm', ['nasal'] = 'm' },
['d'] = { ['unchanged'] = 't', ['lenited'] = '<sup>(t-)</sup>l', ['nasal'] = '<sup>(t-)</sup>n' },
['t'] = { ['unchanged'] = 'tʰ', ['lenited'] = '<sup>(tʰ-)</sup>l', ['nasal'] = '<sup>(tʰ-)</sup>n' },
['n'] = { ['unchanged'] = 'n', ['lenited'] = 'n', ['nasal'] = 'n' },
['l'] = { ['unchanged'] = 'l', ['lenited'] = 'l', ['nasal'] = '<sup>(l-)</sup>n' },
['g'] = { ['unchanged'] = 'k', ['lenited'] = '<sup>(k-)</sup>', ['nasal'] = '<sup>(k-)</sup>ŋ' },
['k'] = { ['unchanged'] = 'kʰ', ['lenited'] = '<sup>(kʰ-)</sup>', ['nasal'] = '<sup>(kʰ-)</sup>ŋ' },
['ng'] = { ['unchanged'] = 'ŋ', ['lenited'] = 'ŋ', ['nasal'] = 'ŋ' },
['h'] = { ['unchanged'] = 'h', ['lenited'] = '<sup>(h-)</sup>', ['nasal'] = '<sup>(h-)</sup>ŋ' },
['c'] = { ['unchanged'] = 't͡s', ['lenited'] = '<sup>(t͡s-)</sup>ʒ', ['nasal'] = '<sup>(t͡s-)</sup>ʒ' },
['ch'] = { ['unchanged'] = 't͡sʰ', ['lenited'] = '<sup>(t͡sʰ-)</sup>ʒ', ['nasal'] = '<sup>(t͡sʰ-)</sup>ʒ' },
['s'] = { ['unchanged'] = 's', ['lenited'] = '<sup>(s-)</sup>l', ['nasal'] = '<sup>(s-)</sup>n' },
[''] = { ['unchanged'] = '', ['lenited'] = '', ['nasal'] = '<sup>(Ø-)</sup>ŋ' },
}
local final_ipa = {
["a"] = { ["close"] = "a", ["open"] = "ɑ" },
["ia"] = { ["close"] = "ia", ["open"] = "iɑ" },
["ua"] = { ["close"] = "ua", ["open"] = "uɑ" },
["a̤"] = { ["close"] = "ɛ", ["open"] = "ɑ" },
["ie"] = { ["close"] = "ie", ["open"] = "iɛ" },
["o̤"] = { ["close"] = "o", ["open"] = "ɔ" },
["io"] = { ["close"] = "yo", ["open"] = "yɔ" },
["uo"] = { ["close"] = "uo", ["open"] = "uɔ" },
["e̤"] = { ["close"] = "œ", ["open"] = "ɔ" },
["ae̤"] = { ["close"] = "œ", ["open"] = "ɔ" },
["au"] = { ["close"] = "au", ["open"] = "ɑu" },
["eu"] = { ["close"] = "ɛu", ["open"] = "ɛu" },
["aiu"] = { ["close"] = "ɛu", ["open"] = "ɑu" },
["ieu"] = { ["close"] = "iu", ["open"] = "iɛu" },
["iu"] = { ["close"] = "iu", ["open"] = "iu" },
["eu"] = { ["close"] = "iu", ["open"] = "iɛu" },
["oi"] = { ["close"] = "øy", ["open"] = "øy" },
["o̤i"] = { ["close"] = "øy", ["open"] = "ɔy" },
["ai"] = { ["close"] = "ai", ["open"] = "ɑi" },
["uai"] = { ["close"] = "uai", ["open"] = "uɑi" },
["uoi"] = { ["close"] = "ui", ["open"] = "uoi" },
["ui"] = { ["close"] = "ui", ["open"] = "ui" },
["oi"] = { ["close"] = "ui", ["open"] = "uoi" },
["i"] = { ["close"] = "i", ["open"] = "i" },
["e"] = { ["close"] = "i", ["open"] = "ɛi" },
["u"] = { ["close"] = "u", ["open"] = "u" },
["o"] = { ["close"] = "u", ["open"] = "ou" },
["ṳ"] = { ["close"] = "y", ["open"] = "øy" },
["e̤ṳ"] = { ["close"] = "y", ["open"] = "øy" },
["ah"] = { ["close"] = "aʔ", ["open"] = "ɑʔ" },
["iah"] = { ["close"] = "iaʔ", ["open"] = "iɑʔ" },
["uah"] = { ["close"] = "uaʔ", ["open"] = "uɑʔ" },
["a̤h"] = { ["close"] = "eʔ", ["open"] = "ɛʔ" },
["ieh"] = { ["close"] = "ieʔ", ["open"] = "iɛʔ" },
["o̤h"] = { ["close"] = "oʔ", ["open"] = "ɔʔ" },
["ioh"] = { ["close"] = "yoʔ", ["open"] = "yɔʔ" },
["uoh"] = { ["close"] = "uoʔ", ["open"] = "uɔʔ" },
["e̤h"] = { ["close"] = "øʔ", ["open"] = "œʔ" },
["ang"] = { ["close"] = "aŋ", ["open"] = "ɑŋ" },
["iang"] = { ["close"] = "iaŋ", ["open"] = "iɑŋ" },
["uang"] = { ["close"] = "uaŋ", ["open"] = "uɑŋ" },
["ieng"] = { ["close"] = "ieŋ", ["open"] = "iɛŋ" },
["iong"] = { ["close"] = "yoŋ", ["open"] = "yɔŋ" },
["uong"] = { ["close"] = "uoŋ", ["open"] = "uɔŋ" },
["ing"] = { ["close"] = "iŋ", ["open"] = "iŋ" },
["eng"] = { ["close"] = "iŋ", ["open"] = "ɛiŋ" },
["ung"] = { ["close"] = "uŋ", ["open"] = "uŋ" },
["ong"] = { ["close"] = "uŋ", ["open"] = "ouŋ" },
["ṳng"] = { ["close"] = "yŋ", ["open"] = "yŋ" },
["e̤ṳng"] = { ["close"] = "yŋ", ["open"] = "øyŋ" },
["eng"] = { ["close"] = "eiŋ", ["open"] = "eiŋ" },
["aing"] = { ["close"] = "eiŋ", ["open"] = "aiŋ" },
["ong2"] = { ["close"] = "ouŋ", ["open"] = "ouŋ" },
["aung"] = { ["close"] = "ouŋ", ["open"] = "auŋ" },
["e̤ng"] = { ["close"] = "øyŋ", ["open"] = "øyŋ" },
["ae̤ng"] = { ["close"] = "øyŋ", ["open"] = "ɔyŋ" },
["ak"] = { ["close"] = "aʔ", ["open"] = "ɑʔ" },
["iak"] = { ["close"] = "iaʔ", ["open"] = "iɑʔ" },
["uak"] = { ["close"] = "uaʔ", ["open"] = "uɑʔ" },
["iek"] = { ["close"] = "ieʔ", ["open"] = "iɛʔ" },
["iok"] = { ["close"] = "yoʔ", ["open"] = "yɔʔ" },
["uok"] = { ["close"] = "uoʔ", ["open"] = "uɔʔ" },
["ik"] = { ["close"] = "iʔ", ["open"] = "iʔ" },
["ek"] = { ["close"] = "iʔ", ["open"] = "ɛiʔ" },
["uk"] = { ["close"] = "uʔ", ["open"] = "uʔ" },
["ok"] = { ["close"] = "uʔ", ["open"] = "ouʔ" },
["ṳk"] = { ["close"] = "yʔ", ["open"] = "yʔ" },
["e̤ṳk"] = { ["close"] = "yʔ", ["open"] = "øyʔ" },
["ek"] = { ["close"] = "eiʔ", ["open"] = "eiʔ" },
["aik"] = { ["close"] = "eiʔ", ["open"] = "aiʔ" },
["ok2"] = { ["close"] = "ouʔ", ["open"] = "ouʔ" },
["auk"] = { ["close"] = "ouʔ", ["open"] = "auʔ" },
["e̤k"] = { ["close"] = "øyʔ", ["open"] = "øyʔ" },
["ae̤k"] = { ["close"] = "øyʔ", ["open"] = "ɔyʔ" },
["ng"] = { ["close"] = "ŋ̍", ["open"] = "ŋ̍" },
}
local tone_ipa = {
[1] = '⁵⁵',
[2] = '³³',
[3] = '²¹³',
[4] = '²⁴',
[5] = '⁵³',
[6] = '²⁴²',
[7] = '⁵',
}
local tone_sandhi = {
['first'] = {
[1] = '⁻⁵⁵', [5] = '⁻⁵⁵', [7] = '⁻⁵⁵',
[2] = '⁻⁵³', [3] = '⁻⁵³', [4] = '⁻⁵³', [6] = '⁻⁵³',
},
['second'] = {
[1] = '⁻⁵⁵',
[2] = '⁻³³', [5] = '⁻³³', [7] = '⁻³³',
[3] = '⁻²¹', [4] = '⁻²¹', [6] = '⁻²¹',
},
['third'] = {
[1] = '⁻²¹', [5] = '⁻²¹', [7] = '⁻²¹',
[2] = '⁻³⁵',
[3] = '⁻⁵⁵', [4] = '⁻⁵⁵', [6] = '⁻⁵⁵',
}
}
local neg_assim = {
['nasal'] = "<sup>(ŋ̍-)</sup>m̩",
['dental'] = "<sup>(ŋ̍-)</sup>n̩",
['velar'] = "<sup>(ŋ̍-)</sup>ŋ̍",
}
function export.ipa(text)
if type(text) == 'table' then
text = text.args[1]
end
text = mw.ustring.lower(text)
local syllables, initial, final, tone, tone_conv, ipa = {}, {}, {}, {}, {}, {}
syllables = mw.text.split(text, "-")
for i, syllable in ipairs(syllables) do
syllable = mw.ustring.gsub(syllable, '([\{\}])', function(captured_initial)
lenition_blocked = true
return '' end)
initial[i] = mw.ustring.match(syllable, '^([bpmdtnlgkhcs]?[gh]?)')
final[i] = mw.ustring.sub(syllable, mw.ustring.len(initial[i]) + 1, -1)
if mw.ustring.find(final[i], '[ăĕĭŏŭ̆]') or final[i] == '' then
if mw.ustring.find(final[i], '[hk]$') then
tone[i] = 7
else
tone[i] = 1
end
elseif mw.ustring.find(final[i], '[āēīōū̄]') then
tone[i] = 2
elseif mw.ustring.find(final[i], '[áéíóú́]') then
if mw.ustring.find(final[i], '[hk]$') then
tone[i] = 4
else
tone[i] = 3
end
elseif mw.ustring.find(final[i], '[àèìòù̀]') then
tone[i] = 5
elseif mw.ustring.find(final[i], '[âêîôû̂]') then
tone[i] = 6
end
final[i] = mw.ustring.gsub(final[i], '[ăĕĭŏŭāēīōūáéíóúàèìòùâêîôû̆̄́̀̂]', detone)
if (final[i] == 'ong' or final[i] == 'ok') and mw.ustring.find(tostring(tone[i]), '[1257]') then
final[i] = final[i] .. '2'
end
if mw.ustring.find(initial[i] .. final[i], '[dtnlcs]h?io') then
final[i] = mw.ustring.gsub(final[i], 'io', 'uo')
end
if (initial[i] .. final[i]) == 'ng' then
initial[i], final[i] = '', 'ng'
end
if mw.ustring.find(tostring(tone[i]), '[346]') and (#syllables == 1 or i == #syllables) then
final[i] = final_ipa[final[i]]["open"]
else
final[i] = final_ipa[final[i]]["close"]
end
if i == 1 or mw.ustring.find(syllables[i-1], 'k$') or lenition_blocked then
initial_state = 'unchanged'
elseif mw.ustring.find(final[i-1], '[ŋ̍]$') then
initial_state = 'nasal'
else
initial_state = 'lenited'
end
initial[i] = initial_ipa[initial[i]][initial_state]
end
for i = 1, #syllables do
if final[i] == 'ŋ̍' then
if mw.ustring.match(syllables[i+1], '[bpm]') then
neg_type = 'nasal'
elseif mw.ustring.match(syllables[i+1], '[dtnlsc]') then
neg_type = 'dental'
else
neg_type = 'velar'
end
final[i] = neg_assim[neg_type]
end
tone_conv[i] = tone_ipa[tone[i]]
if i ~= #syllables then
if mw.ustring.find(tostring(tone[i]), '[136]') or (tone[i] == 4 and mw.ustring.find(final[i], 'h$')) then
tone_conv[i] = tone_conv[i] .. tone_sandhi['first'][tone[i+1]]
elseif mw.ustring.find(tostring(tone[i]), '[57]') then
tone_conv[i] = tone_conv[i] .. tone_sandhi['second'][tone[i+1]]
else
tone_conv[i] = tone_conv[i] .. tone_sandhi['third'][tone[i+1]]
end
tone_conv[i] = mw.ustring.gsub(tone_conv[i], '([¹²³⁴⁵]+)⁻([¹²³⁴⁵]+)', function(original, sandhi)
if original == sandhi then
return original
end end)
end
ipa[i] = initial[i] .. final[i] .. tone_conv[i]
end
return table.concat(ipa, " ")
end
return export