Module:IPA

local export = {} local m_data = mw.loadData('Module:IPA/data')

function export.format_IPA_full(lang, items) local prefix = nil if m_data.langs_with_infopages[lang:getCode] then prefix = "Appendix:" .. lang:getCanonicalName .. " pronunciation" else prefix = "wikipedia:" .. lang:getCanonicalName .. " phonology" end prefix = "IPA(key):&#32;/" .. "" .. export.format_IPA_multiple(lang, items) prefix = mw.ustring.gsub(prefix,"/%[%[Category","[[Category")	-- Format	return prefix end

function export.format_IPA_multiple(lang, items) notes = notes or {} local categories = {} -- Format if #items == 0 then if mw.title.getCurrentTitle.nsText == "Template" then table.insert(items, {pron = "/aɪ piː ˈeɪ/"}) else table.insert(categories, "") end end local bits = {} for _, item in ipairs(items) do		local bit = export.format_IPA(lang, item.pron) if item.note then bit = bit .. mw.getCurrentFrame:extensionTag("ref", item.note) end table.insert(bits, bit) end return table.concat(bits, ", ") .. table.concat(categories) end

-- TODO: Use data module for this local valid_symbols = ' %(%)%%{%|%}%-~.!abcdefhijklmnopqrstuvwxyz¡àáâãäæçèéêëìíîïðòóôõöøùúûüýÿāăēĕěħĩīĭŋōŏőœũūŭűŷǀǁǂǃǎǐǒǔǖǘǚǜǟǣǽǿȁȅȉȍȕȫȭȳɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɪɫɬɭɮɯɰɱɲɳɴɵɶɸɹɺɻɽɾʀʁʂʃʄʈʉʊʋṽʌʍʎʏʐʑʒʔʕʘʙʛʜʝʟʡʢʬʭ⁻¹²³⁴⁵ᵝʰʱʲʳʴʵʶʷʸʼˀˁˈˌːˑ˞ˠˡˢˣ˥˦˧˨˩ˬ˭̘̙̜̝̞̟̠̣̤̥̩̪̬̯̰̹̺̻̼͇͈͉͍͎͔͕̀́̂̃̄̆̈̋̌̏̽͆͊͋͌̊̌̚͢͡β͜θχᴙᵊᵐᵑᶑᶣᶬᶮᶯᶰᶹ᷽᷄᷅᷆᷇᷈᷉ḁḛḭḯṍṏṳṵṹṻạẹẽịọụỳỵỹ‖․‥…‼‿ⁿ↑↓↗↘ⱱꜛꜜꟸꟹ𝆏𝆑'

-- Takes an IPA pronunciation and formats it and adds cleanup categories. function export.format_IPA(lang, pron) local categories = {} -- Detect whether this is a phonemic or phonetic transcription local repr_mark = {} repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = mw.ustring.find(pron, '^(.).-(.)$') local repr = nil -- If valid, strip the representation marks if repr_mark.left == '/' and repr_mark.right == '/' then repr = "phonemic" pron = mw.ustring.sub(pron, 2, -2) elseif repr_mark.left == '[' and repr_mark.right == ']' then repr = "phonetic" pron = mw.ustring.sub(pron, 2, -2) else table.insert(categories, "") end -- Check for obsolete and nonstandard symbols for i, symbol in ipairs(m_data.nonstandard) do		local result = mw.ustring.find(pron, symbol) if result then table.insert(categories, "") break end end -- Check for invalid symbols local result = mw.ustring.gsub(pron, '[' .. valid_symbols .. ']', '') if result ~= '' then table.insert(categories, "") end -- Check for double character if mw.ustring.match(pron, '([^˥˦˧˨˩])%1') then table.insert(categories, "") end -- Reference inside IPA template usage -- FIXME: Doesn't work; you can't put HTML in module output. --if mw.ustring.find(pron, ' ') then --	table.insert(categories, "") --end if repr == "phonemic" then if lang and m_data.phonemes[lang:getCode] then local valid_phonemes = m_data.phonemes[lang:getCode] local rest = pron local phonemes = {} while mw.ustring.len(rest) > 0 do				local longestmatch = "" if mw.ustring.sub(rest, 1, 1) == "(" or mw.ustring.sub(rest, 1, 1) == ")" then longestmatch = mw.ustring.sub(rest, 1, 1) else for _, phoneme in ipairs(valid_phonemes) do						if mw.ustring.len(phoneme) > mw.ustring.len(longestmatch) and mw.ustring.sub(rest, 1, mw.ustring.len(phoneme)) == phoneme then longestmatch = phoneme end end end if mw.ustring.len(longestmatch) > 0 then table.insert(phonemes, longestmatch) rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1) else local phoneme = mw.ustring.sub(rest, 1, 1) table.insert(phonemes, "" .. phoneme .. " ") rest = mw.ustring.sub(rest, 2) table.insert(categories, "") require("Module:debug").track("IPA/invalid phonemes/" .. phoneme) end end pron = table.concat(phonemes) end pron = "/" .. pron .. "/"	elseif repr == "phonetic" then pron = "[" .. pron .. "]"	end return ' ' .. pron .. ' ' .. table.concat(categories) end

-- IPA <-> XSAMPA lookup tables local i2x_lookup, x2i_lookup = {}, {}

for ipa_sym, data in pairs(m_data.symbols[1]) do	if type(data.XSAMPA) == "table" then i2x_lookup[ipa_sym] = data.XSAMPA[1] for _, xsampa_sym in ipairs(data.XSAMPA) do			x2i_lookup[xsampa_sym] = ipa_sym end else i2x_lookup[ipa_sym] = data.XSAMPA x2i_lookup[data.XSAMPA] = ipa_sym end end

--exception cases where two IPA characters map to one XSAMPA character x2i_lookup["_T"]="˥" x2i_lookup["_H"]="˦" x2i_lookup["_M"]="˧" x2i_lookup["_L"]="˨" x2i_lookup["_B"]="˩"

function export.IPA_to_XSAMPA(text) local escape = false if type(text) == 'table' then -- a frame, extract args text = text.args[1] text = text:gsub('=','='):gsub('|','|') text = mw.text.decode(text) -- XXX escape = true end

text = mw.ustring.gsub(text, 'ːː', ':') -- this basically sums up m_data.symbols[2].XSAMPA text = mw.ustring.gsub(text, '.', i2x_lookup)

if escape then text = mw.text.nowiki(text) end return text end

function export.XSAMPA_to_IPA(text) local escape = false if type(text) == 'table' then -- a frame, extract args text = text.args[1] text = mw.text.decode(text) -- XXX escape = true end -- XXX: may not be the most efficient, but at least correct. local output = {} while #text > 0 do		local a1, a2, a3, a4 = mw.ustring.sub(text, 1, 1), mw.ustring.sub(text, 1, 2), mw.ustring.sub(text, 1, 3), mw.ustring.sub(text, 1, 4) if x2i_lookup[a4] then table.insert(output, x2i_lookup[a4]) text = mw.ustring.sub(text, 5) elseif x2i_lookup[a3] then table.insert(output, x2i_lookup[a3]) text = mw.ustring.sub(text, 4) elseif x2i_lookup[a2] then table.insert(output, x2i_lookup[a2]) text = mw.ustring.sub(text, 3) elseif x2i_lookup[a1] then table.insert(output, x2i_lookup[a1]) text = mw.ustring.sub(text, 2) else -- no match table.insert(output, a1) text = mw.ustring.sub(text, 2) end end

output = table.concat(output) if escape then --		output = mw.text.nowiki(output) end

return output end

return export