Module:av-nominal

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}


local lang = require("Module:languages").getByCode("av")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local m_para = require("Module:parameters")

local current_title = mw.title.getCurrentTitle()
local NAMESPACE = current_title.nsText
local PAGENAME = current_title.text

local m_noun_decl = require("Module:av-noun/data")
local m_noun_table = require("Module:av-noun/table")
local m_la_utilities = require("Module:av-utilities")

local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local uupper = mw.ustring.upper

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local cases = {
	"nom", "erg", "gen", "dat", "ins"
}

local nums = {
	"sg", "pl"
}

local genders = {
	"m", "f", "n"
}


local declension_to_english = {
	["1"] = "first",
	["2"] = "second",
	["3"] = "third",
	["4"] = "fourth",
}

local number_to_english = {
	"one", "two", "three", "four"
}
local linked_prefixes = {
	"", "linked_"
}

-- List of adjective slots for which we generate linked variants. Include
-- feminine and neuter variants because they will be needed if the adjective
-- is part of a multiword feminine or neuter noun.
local potential_adj_lemma_slots = {
	"abs_sg_m",
	"abs_pl_m",
	"abs_sg_f",
	"abs_pl_f",
	"abs_sg_n",
	"abs_pl_n"
}

local linked_to_non_linked_adj_slots = {}
for _, slot in ipairs(potential_adj_lemma_slots) do
	linked_to_non_linked_adj_slots["linked_" .. slot] = slot
end

local potential_noun_lemma_slots = {
	"abs_sg",
	"abs_pl"
}

local linked_to_non_linked_noun_slots = {}
for _, slot in ipairs(potential_noun_lemma_slots) do
	linked_to_non_linked_noun_slots["linked_" .. slot] = slot
end

local function iter_noun_slots(overridable_only)
	local case = 1
	local num = 1
	local linked_variant = 0
	local function iter()
		linked_variant = linked_variant + 1
		local max_linked_variant = overridable_only and 1 or cases[case] == "abs" and 2 or 1
		if linked_variant > max_linked_variant then
			linked_variant = 1
			num = num + 1
			if num > #nums then
				num = 1
				case = case + 1
				if case > #cases then
					return nil
				end
			end
		end
		return linked_prefixes[linked_variant] .. cases[case] .. "_" .. nums[num]
	end
	return iter
end

-- Iterate over all the "slots" associated with an adjective declension, where a slot
-- is a particular case/number/gender combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg_m, linked_nom_pl_m, etc.), which aren't overridable.
local function iter_adj_slots(overridable_only)
	local case = 1
	local num = 1
	local gen = 1
	local linked_variant = 0
	local function iter()
		linked_variant = linked_variant + 1
		local max_linked_variant = overridable_only and 1 or cases[case] == "abs" and genders[gen] == "m" and 2 or 1
		if linked_variant > max_linked_variant then
			linked_variant = 1
			gen = gen + 1
			if gen > #genders then
				gen = 1
				num = num + 1
				if num > #nums then
					num = 1
					case = case + 1
					if case > #cases then
						return nil
					end
				end
			end
		end
		return linked_prefixes[linked_variant] .. cases[case] .. "_" .. nums[num] .. "_" .. genders[gen]
	end
	return iter
end

-- Iterate over all the "slots" associated with a noun or adjective declension (depending on
-- the value of IS_ADJ), where a slot is a particular case/number combination (in the case of
-- nouns) or case/number/gender combination (in the case of adjectives). If OVERRIDABLE_ONLY
-- is specified, only include overridable slots (not including linked_ variants).
local function iter_slots(is_adj, overridable_only)
	if is_adj then
		return iter_adj_slots(overridable_only)
	else
		return iter_noun_slots(overridable_only)
	end
end

local function concat_forms_in_slot(forms)
	if forms and forms ~= "" and forms ~= "—" and #forms > 0 then
		local new_vals = {}
		for _, v in ipairs(forms) do
			table.insert(new_vals, rsub(v, "|", "<!>"))
		end
		return table.concat(new_vals, ",")
	else
		return nil
	end
end

local function glossary_link(anchor, text)
	text = text or anchor
	return "[[Appendix:Glossary#" .. anchor .. "|" .. text .. "]]"
end

local function track(page)
	require("Module:debug").track("av-nominal/" .. page)
	return true
end

local function set_union(sets)
	local union = {}
	for _, set in ipairs(sets) do
		for key, _ in pairs(set) do
			union[key] = true
		end
	end
	return union
end

local function set_difference(set1, set2)
	local diff = {}
	for key, _ in pairs(set1) do
		if not set2[key] then
			diff[key] = true
		end
	end
	return diff
end

-- If a form is set as '*', that means its unattested
-- but should still be generated
local function unattested_forms(data, args, is_adj)
	data.unattested = {}
	for slot in iter_slots(is_adj) do
		if args[slot] == '*' then
			data.unattested[slot] = true
			args[slot] = nil
		end
	end
end

-- Make a link only if the form is attested
local function link_if_attested(form, accel, is_unattested)
	if is_unattested then
		return m_links.full_link({ lang = lang, alt = '*' .. form })
	else
		return m_links.full_link({ lang = lang, term = form, accel = accel })
	end
end

local function process_noun_forms_and_overrides(data, args)
	local redlink = false
	unattested_forms(data, args, is_adj);

	-- Process overrides and canonicalize forms.
	for slot in iter_noun_slots() do
		local val = nil
		if args[slot] then
			val = args[slot]
			data.user_specified[slot] = true
		else
			-- Overridding nom_sg etc. should override linked_nom_sg so that
			-- the correct value gets displayed in the headword, which uses
			-- linked_nom_sg.
			local non_linked_equiv_slot = linked_to_non_linked_noun_slots[slot]
			if non_linked_equiv_slot and args[non_linked_equiv_slot] then
				val = args[non_linked_equiv_slot]
				data.user_specified[slot] = true
			else
				val = data.forms[slot]
			end
		end
		if val then
			if type(val) == "string" then
				val = mw.text.split(val, "/")
			end
			if (data.num == "pl" and slot:find("sg")) or (data.num == "sg" and slot:find("pl")) then
				data.forms[slot] = ""
			elseif val[1] == "" or val[1] == "-" or val[1] == "—" then
				data.forms[slot] = "—"
			else
				data.forms[slot] = val
			end
		end
	end

	-- Compute the lemma for accelerators. Do this after processing
	-- overrides in case we overrode the lemma form(s).
	local accel_lemma
	if data.num and data.num ~= "" then
		accel_lemma = data.forms["nom_" .. data.num]
	else
		accel_lemma = data.forms["nom_sg"]
	end
	if type(accel_lemma) == "table" then
		accel_lemma = accel_lemma[1]
	end

	-- Set the accelerators, and determine if there are red links.
	for slot in iter_noun_slots() do
		local val = data.forms[slot]
		if val and val ~= "" and val ~= "—" and #val > 0 then
			for i, form in ipairs(val) do
				local accel_form = slot
				accel_form = accel_form:gsub("_([sp])[gl]$", "|%1")

				data.accel[slot] = {form = accel_form, lemma = accel_lemma}
				if not redlink and NAMESPACE == '' then
					local title = (lang:makeEntryName(form))
					local t = mw.title.new(title)
					if t and not t.exists then
						table.insert(data.categories, "Avar" .. data.pos .. " with red links in their inflection tables")
						redlink = true
					end
				end
			end
		end
	end
end

local function process_adj_forms_and_overrides(data, args)
	local redlink = false
	unattested_forms(data, args, true)

	-- Process overrides and canonicalize forms.
	for slot in iter_adj_slots() do
		-- If noneut=1 passed, clear out all neuter forms.
		if data.noneut and slot:find("_n") then
			data.forms[slot] = nil
		end
		-- If nomf=1 passed, clear out all masculine and feminine forms.
		if data.nomf and (slot:find("_m") or slot:find("_f")) then
			data.forms[slot] = nil
		end
		local val = nil
		if args[slot] then
			val = args[slot]
			data.user_specified[slot] = true
		else
			-- Overridding nom_sg_m etc. should override linked_nom_sg_m so that
			-- the correct value gets displayed in the headword, which uses
			-- linked_nom_sg_m.
			local non_linked_equiv_slot = linked_to_non_linked_adj_slots[slot]
			if non_linked_equiv_slot and args[non_linked_equiv_slot] then
				val = args[non_linked_equiv_slot]
				data.user_specified[slot] = true
			else
				val = data.forms[slot]
			end
		end
		if val then
			if type(val) == "string" then
				val = mw.text.split(val, "/")
			end
			if (data.num == "pl" and slot:find("sg")) or (data.num == "sg" and slot:find("pl")) then
				data.forms[slot] = ""
			elseif val[1] == "" or val[1] == "-" or val[1] == "—" then
				data.forms[slot] = "—"
			else
				data.forms[slot] = val
			end
		end
	end

	-- Compute the lemma for accelerators. Do this after processing
	-- overrides in case we overrode the lemma form(s).
	local accel_lemma, accel_lemma_f
	if data.num and data.num ~= "" then
		accel_lemma = data.forms["abs_" .. data.num .. "_m"]
		accel_lemma_f = data.forms["abs_" .. data.num .. "_f"]
	else
		accel_lemma = data.forms["abs_sg_m"]
		accel_lemma_f = data.forms["abs_sg_f"]
	end
	if type(accel_lemma) == "table" then
		accel_lemma = accel_lemma[1]
	end
	if type(accel_lemma_f) == "table" then
		accel_lemma_f = accel_lemma_f[1]
	end

	-- Set the accelerators, and determine if there are red links.
	for slot in iter_adj_slots() do
		local val = data.forms[slot]
		if val and val ~= "" and val ~= "—" and #val > 0 then
			for i, form in ipairs(val) do
				local accel_form = slot
				accel_form = accel_form:gsub("_([sp])[gl]_", "|%1|")

				if data.noneut then
					-- If noneut=1, we're being asked to do a noun like
					-- Aquītānus or Rōmānus that has masculine and feminine
					-- variants, not an adjective. In that case, make the
					-- accelerators correspond to nominal case/number forms
					-- without the gender, and use the feminine as the
					-- lemma for feminine forms.
					if slot:find("_f") then
						data.accel[slot] = {form = accel_form:gsub("|f$", ""), lemma = accel_lemma_f}
					else
						data.accel[slot] = {form = accel_form:gsub("|m$", ""), lemma = accel_lemma}
					end
				else
					if not data.forms.nom_sg_n and not data.forms.nom_pl_n then
						-- use multipart tags if called for
						accel_form = accel_form:gsub("|m$", "|m//f//n")
					elseif not data.forms.nom_sg_f and not data.forms.nom_pl_f then
						accel_form = accel_form:gsub("|m$", "|m//f")
					end

					-- use the order nom|m|s, which is more standard than nom|s|m
					accel_form = accel_form:gsub("|(.-)|(.-)$", "|%2|%1")

					data.accel[slot] = {form = accel_form, lemma = accel_lemma}
				end
				if not redlink and NAMESPACE == '' then
					local title = (lang:makeEntryName(form))
					local t = mw.title.new(title)
					if t and not t.exists then
						table.insert(data.categories, "Avar" .. data.pos .. " with red links in their inflection tables")
						redlink = true
					end
				end
			end
		end
	end

	-- See if the masculine and feminine/neuter are the same across all slots.
	-- If so, blank out the feminine/neuter so we use a table that combines
	-- masculine and feminine, or masculine/feminine/neuter.
	for _, gender in ipairs({"f", "n"}) do
		local other_is_masc = true
		for _, case in ipairs(cases) do
			for _, num in ipairs(nums) do
				if not m_table.deepEquals(data.forms[case .. "_" .. num .. "_" .. gender],
						data.forms[case .. "_" .. num .. "_m"]) then
					other_is_masc = false
					break
				end
			end
			if not other_is_masc then
				break
			end
		end

		if other_is_masc then
			for _, case in ipairs(cases) do
				for _, num in ipairs(nums) do
					data.forms[case .. "_" .. num .. "_" .. gender] = nil
				end
			end
		end
	end
end

-- Convert data.forms[slot] for all slots into displayable text. This is
-- an older function, still currently used for nouns but not for adjectives.
-- For adjectives, the adjective table module has special code to combine
-- adjacent slots, and needs the original forms plus other text that will
-- go into the displayable text for the slot; this is handled below by
-- partial_show_forms() and finish_show_form().
local function show_forms(data, is_adj)
	local noteindex = 1
	local notes = {}
	local seen_notes = {}
	for slot in iter_slots(is_adj) do
		local val = data.forms[slot]
		if val and val ~= "" and val ~= "—" then
			for i, form in ipairs(val) do
				local link = link_if_attested(form, data.accel[slot], data.unattested[slot])
				local this_notes = data.notes[slot .. i]
				if this_notes and not data.user_specified[slot] then
					if type(this_notes) == "string" then
						this_notes = {this_notes}
					end
					local link_indices = {}
					for _, this_note in ipairs(this_notes) do
						local this_noteindex = seen_notes[this_note]
						if not this_noteindex then
							-- Generate a footnote index.
							this_noteindex = noteindex
							noteindex = noteindex + 1
							table.insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
							seen_notes[this_note] = this_noteindex
						end
						m_table.insertIfNot(link_indices, this_noteindex)
					end
					val[i] = link .. '<sup style="color: red">' .. table.concat(link_indices, ",") .. '</sup>'
				else
					val[i] = link
				end
			end
			-- FIXME, do we want this difference?
			data.forms[slot] = table.concat(val, is_adj and ", " or "<br />")
		end
	end
	for _, footnote in ipairs(data.footnotes) do
		table.insert(notes, footnote)
	end
	data.footnotes = table.concat(notes, "<br />")
end

-- Generate the display form for a set of slots with identical content. We
-- verify that the slots are actually identical, and throw an assertion error
-- if not. The display form is as in show_forms() but combines together all the
-- accelerator forms for all the slots.
local function finish_show_form(data, slots, is_adj)
	assert(#slots > 0)
	local slot1 = slots[1]
	local forms = data.forms[slot1]
	local notetext = data.notetext[slot1]
	for _, slot in ipairs(slots) do
		if not m_table.deepEquals(data.forms[slot], forms) then
			error("data.forms[" .. slot1 .. "] = " .. (concat_forms_in_slot(forms) or "nil") ..
				", but data.forms[" .. slot .. "] = " .. (concat_forms_in_slot(data.forms[slot]) or "nil"))
		end
		assert(m_table.deepEquals(data.notetext[slot], notetext))
	end
	if not forms then
		return "—"
	else
		local accel_forms = {}
		local accel_lemma = data.accel[slot1].lemma
		for _, slot in ipairs(slots) do
			assert(data.accel[slot].lemma == accel_lemma)
			table.insert(accel_forms, data.accel[slot].form)
		end
		local combined_accel_form = table.concat(accel_forms, "|;|")
		local accel = {form = combined_accel_form, lemma = accel_lemma}
		local formtext = {}
		for i, form in ipairs(forms) do
			table.insert(formtext, link_if_attested(form, accel, data.unattested[slot1]) .. notetext[i])
		end
		-- FIXME, do we want this difference?
		return table.concat(formtext, is_adj and ", " or "<br />")
	end
end

-- Used by the adjective table module. This does some of the work of
-- show_forms(); in particular, it converts all empty forms of any format
-- (nil, "", "—") to nil and, if the forms aren't empty, generates the footnote
-- text associated with each form.
local function partial_show_forms(data, is_adj)
	local noteindex = 1
	local notes = {}
	local seen_notes = {}
	data.notetext = {}
	-- Store this function in DATA so that it can be called from the adjective
	-- table module without needing to require this module, which will (or
	-- could) lead to recursive module requiring.
	data.finish_show_form = finish_show_form
	for slot in iter_slots(is_adj) do
		local val = data.forms[slot]
		if not val or val == "" or val == "—" then
			data.forms[slot] = nil
		else
			local notetext = {}
			for i, form in ipairs(val) do
				local this_notes = data.notes[slot .. i]
				if this_notes and not data.user_specified[slot] then
					if type(this_notes) == "string" then
						this_notes = {this_notes}
					end
					local link_indices = {}
					for _, this_note in ipairs(this_notes) do
						local this_noteindex = seen_notes[this_note]
						if not this_noteindex then
							-- Generate a footnote index.
							this_noteindex = noteindex
							noteindex = noteindex + 1
							table.insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
							seen_notes[this_note] = this_noteindex
						end
						m_table.insertIfNot(link_indices, this_noteindex)
					end
					table.insert(notetext, '<sup style="color: red">' .. table.concat(link_indices, ",") .. '</sup>')
				else
					table.insert(notetext, "")
				end
			end
			data.notetext[slot] = notetext
		end
	end
	for _, footnote in ipairs(data.footnotes) do
		table.insert(notes, footnote)
	end
	data.footnotes = table.concat(notes, "<br />")
end

local function make_noun_table(data)
	if data.num == "sg" then
		return m_noun_table.make_table_sg(data)
	elseif data.num == "pl" then
		return m_noun_table.make_table_pl(data)
	else
		return m_noun_table.make_table(data)
	end
end

local function concat_forms(data, is_adj, include_props)
	local ins_text = {}
	for slot in iter_slots(is_adj) do
		local formtext = concat_forms_in_slot(data.forms[slot])
		if formtext then
			table.insert(ins_text, slot .. "=" .. formtext)
		end
	end
	if include_props then
		if data.gender then
			table.insert(ins_text, "g=" .. mw.ustring.lower(data.gender))
		end
		local num = data.num
		if not num or num == "" then
			num = "both"
		end
		table.insert(ins_text, "num=" .. num)
	end
	return table.concat(ins_text, "|")
end

-- Given an ending (or possibly a full regex matching the entire lemma, if
-- a regex group is present), return the base minus the ending, or nil if
-- the ending doesn't match.
local function extract_base(lemma, ending)
	if ending:find("%(") then
		return rmatch(lemma, ending)
	else
		return rmatch(lemma, "^(.*)" .. ending .. "$")
	end
end

return export