Modul:0it-verb
Megjelenés
A modult a Modul:0it-verb/doc lapon tudod dokumentálni
local export = {}
--[=[
Authorship: Ben Wing <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of tense/mood/person/number/etc.
Example slot names for verbs are "pres1p" (present indicative first-person plural), "impsub12s" (imperfect
subjunctive first/second-person singular form) and "pp" (past participle).
Each slot is filled with zero or more forms.
-- "form" = The conjugated Italian form representing the value of a given slot.
-- "lemma" = The dictionary form of a given Italian term. For Italian, always the infinitive.
]=]
--[=[
RULES FOR CONJUGATION:
1. Present indicative:
-- Individual form overrides always take precedence.
-- Otherwise, if presrow: if given, this specifies all six forms (1s, 2s, 3s, 1p, 2p, 3p).
-- Otherwise:
1. The 1s form is generated from the explicitly specified pres1s principal part.
2. The 3p form is generated from the explicitly specified pres1s principal part by replacing -o with -ano (for -are verbs)
or with -ono (for -ere/-ire verbs).
3. The 2s and 3s forms are generated as follows:
a. If an explicit pres3s principal part is given using PRES1S^PRES3S, the 2s/3s stem is generated by removing final -a/-e.
b. Otherwise, if an explicit stem was given using stem:, this serves as the 2s/3s stem.
c. Otherwise, if the infinitive is root-stressed, the 2s/3s stem comes from the infinitive. This is used e.g.
with [[togliere]] (tòlgo, tògli, tòglie, togliàmo, togliéte, tòlgono), where the stem of the 1s and 3p forms
is different from the stem of the 2s and 3s forms.
d. Otherwise, the pres1s stem is used.
From this stem, the 2s form is generated by adding -i (possibly with some adjustments; two unstressed i's
together are compressed to one, and -h- is inserted after the stem if the verb is an -are verb and the stem ends
with -c or -g), and the 3s form is generated by adding -a (for -are verbs) or -e (for -ere/-ire verbs).
4. The 1p and 2p forms are generated from the infinitive stem (or explicit stem given by stem:) by adding -iàmo
(1p form) and -àte/-éte/-ìte (2p form, -are/-ere/-ire verbs). We don't use the pres1s form because it might have
a different stem (e.g. +isc verbs).
2. Present subjunctive:
-- Individual form overrides always take precedence.
-- Otherwise, if subrow: if given, this specifies all four forms (123s, 1p, 2p, 3p).
-- Otherwise:
1. If sub: is given, this specifies the 123s form; otherwise the 123s form is generated from the pres1s form by
changing -o to -a (for -ere/-ire verbs), or to -i (for -are verbs). E.g. for [[venire]], vèngo -> vènga;
for [[potere]], pòsso -> pòssa. sub: needs to be given for e.g. [[essere]] (pres1s sóno, sub1s sìa),
[[sapere]] (pres1s sò* but sub1s sàppia), [[fare]] (pres1s fàccio:fò*[archaic or dialectal] but sub1s just fàccia),
[[andare]] (similar to [[fare]]), [[dovere]].
2. The 3p form is generated from the 123s form by adding -no.
3. The 1p form is copied from the pres1p form.
4. The 2p form is generated from the 1p form by replacing -mo with -te.
3. Imperative:
-- Individual form overrides always take precedence.
-- Otherwise, if improw: if given, this specifies 2s, 2p.
-- Otherwise:
1. If imp: given, this specifies the 2s form; otherwise the 2s form is generated by copying the pres2s form
(for -ere/-ire verbs) or by copying the pres3s form (for -are verbs). We use the present indicative 2s/3s
forms to preserve vowel and other alternations in the root-stressed forms (for all -are verbs, for
[[venire]] and [[tenere]], for +isc verbs, etc.).
2. The 2p form is generated by copying the pres2p form.
3. The 3s, 1p, 3p forms are copied from the corresponding present subjunctive forms.
]=]
--[=[
EXAMPLES OF CONJUGATION:
; Including archaic and literary forms:
{{it-conj|essere<e\è\-,-,stàto:essùto[archaic]:sùto[archaic].
presrow:sóno,sèi,è,siàmo:sémo[archaic],siète:sète[archaic],sóno:èmmo[archaic].
imperfrow:èro:èra[literary],èri,èra,eravàmo:èramo[archaic],eravàte,èrano.
phisrow:fùi,fósti:fùsti[archaic],fù*,fùmmo,fóste:fùste[archaic],fùrono:fùro[archaic]:fóre[archaic]:fòro[archaic].
futrow:sarò,sarài,sarà:fìa[archaic]:fìe[archaic],sarémo,saréte,sarànno:fìano[archaic]:fìeno[archaic].
condrow:sarèi:sarìa[archaic]:fòra[archaic],sarésti,sarèbbe:sarìa[archaic]:fòra[archaic],sarémmo,saréste,sarèbbero:sarìano[archaic]:fòro[archaic].
subrow:sìa,siàmo,siàte,sìano:sìeno[literary].
impsub:fóssi:fùssi[archaic].
improw:sìi:siàte.
ger:essèndo:sèndo[archaic].
presp:essènte[rare]:ènte[archaic]
>}}
{{it-conj|fare<a/-,féci,fàtto.
stem:fàce.
presrow:fàccio:fò*[archaic or dialectal],fài,fà*,facciàmo,fàte,fànno.
sub:fàccia.
imp:fà*:fài:fà'
>}}
{{it-conj|potere<a:e[as an auxiliary, with main verbs taking ''essere'']/-,potéi:potètti[less common].
presrow:pòsso,puòi,può*,possiàmo,potéte,pòssono.
fut:potrò.
imp:-
>}}
{{it-conj|dire<a/+,dìssi,détto.
stem:dìce.
pres2p:dìte.
imp:dì':dì*!
>}}
{{it-conj|dare<a/-,dièdi:diédi:détti.
phisstem:dé.
presrow:dò*:dò*![less common],dài,dà*!,diàmo,dàte,dànno.
fut:darò.
sub:dìa.
impsub:déssi.
imp:dài:dà':dà*!
>}}
; Including archaic and poetic forms:
{{it-conj|dovere<a:e[as an auxiliary, with main verbs taking ''essere'']/-.
presrow:
dèvo:dévo:dèbbo:débbo:dèggio[archaic or poetic]:déggio[archaic or poetic],
dèvi:dévi:dèi[archaic, poetic or popular Tuscan]:déi[archaic, poetic or popular Tuscan],
dève:déve:dèe[archaic, poetic or popular Tuscan]:dée[archaic, poetic or popular Tuscan]:dèbbe[archaic]:débbe[archaic],
dobbiàmo,
dovéte,
dèvono:dévono:dèbbono:débbono:dènno[archaic or poetic]:dénno[archaic or poetic].
fut:dovrò.
sub:dèbba:débba:dèva[rare]:déva[rare]:dèggia[archaic or poetic]:déggia[archaic or poetic]:dèbbia[obsolete]:débbia[obsolete].
imp:-
>}}
{{it-conj|andare<e/-.
presrow:vàdo:vò*[less common],vài,và*,andiàmo,andàte,vànno.
fut:andrò.
sub:vàda.
imp:vài:và':và*
>}}
{{it-conj|valere<e:a[transitive]/vàlgo^à,vàlsi,vàlso.fut:varrò>}}
{{it-conj|vedere<a/védo:véggo[literary]:véggio[poetic]^é,vìdi,vìsto:vedùto[less popular].
fut:vedrò
>}}
{{it-conj|venire<e/vèngo:vègno[archaic or poetic]^viène,vénni,venùto.
fut:verrò
>}}
{{it-conj|sapere<a:e[as an auxiliary, with main verbs taking ''essere'']/-,sèppi.
presrow:sò*,sài,sà*,sappiàmo,sapéte,sànno.
fut:saprò.
sub:sàppia.
improw:sàppi:sappiàte
>}}
{{it-conj|piacere<e/piàccio^piàce,piàcqui.pres1p:piacciàmo>}}
{{it-conj|comparire<e/compàio:+isc[less common]^à:+isc[less common],compàrvi:comparìi[less common]:compàrsi[in the legal sense],compàrso:comparìto[rare]>}}
{{it-conj|togliere<a\ò\tòlgo,tòlsi,tòlto.fut:+:torrò[literary]>}}
{{it-conj|condurre<a\+,condùssi,condótto.stem:condùce>}}
{{it-conj|proporre<a\ó+\propóngo,propósi,propósto:propòsto.stem:propóne>}}
{{it-conj|trarre<a\tràggo,tràssi,tràtto.stem:tràe>}}
; Third-person only verbs:
{{it-conj|bufare<e/ù.impers>}}
{{it-verb|accadere<e/à+,accàdde.fut:accadrà.thirdonly>}}
{{it-verb|volerci<e/vuòle,vòlle.fut:vorrà.sub:vòglia.impers>}}
; Defective verbs:
{{it-verb|redire<a/rièdo,-,redìto.imperf:-.fut:-.impsub:->}}
{{it-verb|serpere<-\è,+,->}}
; Multiword expressions:
{{it-verb|tenere<a/tèngo^tiène,ténni.fut:terrò> [[d']][[occhio]]}}
{{it-conj|trascinare<a/ì> e rilasciare<a/à>}}
]=]
--[=[
FIXME:
1. Fix inf_linked and lemma_linked to work like in [[Module:0es-verb]]. (DONE)
2. Finish support for reflexive and pronominal verbs. (DONE)
3. Finish support for reflexive and pronominal imperatives. (DONE)
4. Finish support for negative imperatives. (DONE)
5. Fix handling of third-only verbs; require that irregular forms be specified in the first person.
Remove existing half-implemented support for specifying principal parts in the third person. (DONE)
6. Support defective verbs specified using e.g. redire<a/rièdo,-,redìto.imperf:-.fut:-.impsub:->.
Include categorization; but if row overrides or single overrides of all forms given, don't categorize
as defective for that row. (DONE)
7. Fix handling of aux; snarf code from [[Module:0de-verb]] to handle aux with multiword expressions. (DONE)
8. Add automatic support for common irregular verbs: [[essere]], [[avere]], [[andare]], [[fare]], [[dare]],
[[dire]], [[venire]], [[vedere]], [[tenere]], [[bere]], etc. Should make combinations of these verbs
with clitics, and multiword expressions with these verbs, easier to handle. (DONE)
9. Add support for calling from {{it-verb}} in [[Module:0it-headword]]. (DONE)
10. Throw an error if forms missing accents are specified (perhaps except with some special signal, to make
it easier to port the old [[Module:0it-conj]]).
11. Consider adding combined clitic tables like in [[Module:0es-verb]].
12. Consider adding automatic support for prefixed -fare verbs. (DONE)
13. Consider displaying irregular forms in a different color, as with the old [[Module:0es-conj]], or adding
a triangle next to them, as with [[Module:0ru-verb]].
14. Consider supporting replace_reflexive_indicators().
15. Add late (post-clitic-addition) overrides. (DONE)
16. PRES/PAST/PP spec should be required to come first to avoid ambiguities. (DONE)
17. Add variant codes to avoid mismatching variants in the conditional -èbbe/-ébbe, -éttero vs. érono, etc.
18. If explicit fut: given, it should control the conditional as well. (DONE)
19. If present -, sub:- or imp:-, it should suppress the whole row in the absence of row or individual overrides.
20. 'ci ci vuole' should maybe -> 'a noi ci vuole' instead of 'ci vuole'.
21. Instead of * before, use ! after so that * before can be used for reconstructed terms. (DONE)
22. When handling built-in verbs, automatically add + after vowel in cases like comporre. (DONE; HANDLED AUTOMATICALLY)
23. When handling built-in verbs, make sure we correctly handle root-stressed infinitives. (DONE)
24. When handling built-in verbs, make sure explicit ! afterwards is handled as if automatically there when a
prefix is added, and make sure final accent is handled correct when a prefix is added. In both cases, verify
both sfare and rifare, sdarsi and ridare. (DONE)
25. When handling built-in verbs, make sure we handle prefixes correctly w.r.t. negative imperatives. (DONE)
26. Support ref: in footnotes. (DONE)
27. Finish built-in -ire verbs.
28. Implement error("If past participle given as '-', auxiliary must be explicitly specified as '-'"). (DONE)
29. Make present participles default to enabled. (DONE)
30. Instead of a qualifier for syntactic gemination, use a superscripted symbol with a tooltip, as for {{it-IPA}}.
Do this automatically for multisyllabic terms ending in a stressed vowel, but don't do it if the verb ends up
non-final, as in [[andare a letto]].
31. [[darsi]]: imp2s is wrong; it generates [[datti|dàtti]] and also [[dàtti]] (which is wrong) (DONE)
32. [[liquefare]]: forms like liquefà missing the indication of following syntactic gemination; it should be automatic
in multisyllabic words.
33. [[ridare]] has ridò with a syntactic gemination footnote, but liquefà doesn't have it (see #32). It should be a
tooltip.
34. Add 'addnote[SLOT_SPEC]' to make [[tangere]] with disused 1s/2s/1p/2p easier to handle. (DONE)
35. Throw an error if comma seen in single form specs like 'imp:'. (DONE)
36. Overrides like phis:+[rare] of [[affarsi]] using built-in @ (from [[fare]]) don't properly pick up the irregular
built-in forms. (DONE)
37. Overrides like phis:afféci[rare] of [[affarsi]] using built-in @ (from [[fare]]) get the prefix duplicated. (DONE)
38. Support negated addnote like 'addnote[-pres3s][rare]'. (DONE)
39. Support comma-separated addnote spec for the equivalent of alternation. (DONE)
40. Issue an error if addnote spec has no effect. (DONE)
41. Support verbs in -gliela like [[fargliela]]. (DONE)
42. Support /\ notation for optional root-stressed infinitive with ending first, for [[suadere]] and derivatives. (DONE)
43. Support verbs in -gli like [[mancargli qualche rotella]]. (DONE)
44. Correctly incorporate links in multiword expressions from [[Module:0it-headword]] so the autolinking algorithm there
works (e.g. for [[portare il cervello all'ammasso]]) and head=~... specs are correctly propagated. (DONE)
45. Support head= with the verb as part of a larger bracketed expression, e.g. for [[stare a vedere]],
{{it-verb|e/@|head=[[stare a]] [[vedere]]}}. (DONE)
46. Expand addnote[] notation to support references (maybe needs no work)?
47. Support verbs in -glire ([[boglire]], [[inorgoglire]], [[saglire]], etc.) correctly. (DONE)
--]=]
local lang = require("Module:0languages").getByCode("it")
local m_string_utilities = require("Module:0string utilities")
local m_links = require("Module:0links")
local m_table = require("Module:0table")
local iut = require("Module:0inflection utilities")
local com_module = "Module:0it-common"
local m_builtin
local force_cat = false -- set to true for debugging
local check_for_red_links = false -- set to false for debugging
local u = require("Module:0string/char")
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len
local unfd = mw.ustring.toNFD
local unfc = mw.ustring.toNFC
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local GR = u(0x0300)
local V = "[aeiou]"
local NV = "[^aeiou]"
local AV = "[àèéìòóù]"
local MAV = "[aeiouàèéìòóù]" -- maybe-accented vowel
local NMAV = "[^aeiouàèéìòóù]" -- not maybe-accented vowel
local PRESERVE_ACCENT = u(0xFFF0)
local full_person_number_list = {"1s", "2s", "3s", "1p", "2p", "3p"}
local imp_person_number_list = {"2s", "3s", "1p", "2p", "3p"}
-- Used only in error messages concerning the principal part for a given row, to generate the English description
-- (by concatenating the following person/number description to the overall description for the row, taken from
-- the `.desc` element of the rowspec).
local principal_part_person_number_desc = {
["1s"] = "first-singular ",
["12s"] = "first/second-singular ",
["123s"] = "first/second/third-singular ",
["2s"] = "second-singular ",
[""] = "",
}
local all_verb_slots = {
{"aux", "-"},
}
-- Used to create the accelerator entries in all_verb_slots.
local person_number_tag_prefix = {
["1s"] = "1|s|",
["2s"] = "2|s|",
["3s"] = "3|s|",
["1p"] = "1|p|",
["2p"] = "2|p|",
["3p"] = "3|p|",
[""] = "", -- used for non-finite forms such as the past participle
}
substitutable_reflexive_pronoun = {
["si"] = {
["1s"] = "[[mi]]",
["2s"] = "[[ti]]",
["3s"] = "[[si]]",
["1p"] = "[[ci]]",
["2p"] = "[[vi]]",
["3p"] = "[[si]]",
["nf"] = "[[si]]",
},
["se"] = {
["1s"] = "[[me]]",
["2s"] = "[[te]]",
["3s"] = "[[se]]",
["1p"] = "[[ce]]",
["2p"] = "[[ve]]",
["3p"] = "[[se]]",
["nf"] = "[[se]]",
},
["si_no_ci"] = {
["1s"] = "[[mi]]",
["2s"] = "[[ti]]",
["3s"] = "[[si]]",
["1p"] = "",
["2p"] = "[[vi]]",
["3p"] = "[[si]]",
["nf"] = "[[si]]",
},
["si_no_vi"] = {
["1s"] = "[[mi]]",
["2s"] = "[[ti]]",
["3s"] = "[[si]]",
["1p"] = "[[ci]]",
["2p"] = "",
["3p"] = "[[si]]",
["nf"] = "[[si]]",
},
["space_no_ci"] = {
["1s"] = " ",
["2s"] = " ",
["3s"] = " ",
["1p"] = "",
["2p"] = " ",
["3p"] = " ",
["nf"] = " ",
},
["space_no_vi"] = {
["1s"] = " ",
["2s"] = " ",
["3s"] = " ",
["1p"] = " ",
["2p"] = "",
["3p"] = " ",
["nf"] = " ",
},
}
-- FIXME, the following broken and not yet used
local reflexive_forms = {
["si"] = {"mi", "ti", "si", "ci", "vi", "si"},
["suo"] = {"mìo", "tùo", "sùo", "nòstro", "vòstro", "sùo"},
["sua"] = {"mìa", "tùa", "sùa", "nòstra", "vòstra", "sùa"},
["suoi"] = {"mièi", "tuòi", "suòi", "nòstri", "vòstri", "suòi"},
["sue"] = {"mìe", "tùe", "sùe", "nòstre", "vòstre", "sùe"},
}
local function remove_accents(form)
return rsub(form, AV, function(v) return usub(unfd(v), 1, 1) end)
end
-- Map a function over `forms` (a list of form objects of the form {form=FORM, footnotes=FOOTNOTES}) and modify the
-- form objects in `forms` in-place. If `include_footnotes` is given, the function is called with two arguments
-- (the existing form and footnotes) and should return two arguments, the new form and footnotes; otherwise, the
-- function is called with one argument (the existing form) and should return the new form.
--
-- WARNING: This is dangerous and should only be done near the end.
local function map_side_effecting_forms(forms, fun, include_footnotes)
for _, form in ipairs(forms) do
if form.form ~= "?" then
if include_footnotes then
local new_form, new_footnotes = fun(form.form, form.footnotes)
form.form = new_form
form.footnotes = new_footnotes
else
local new_form = fun(form.form)
form.form = new_form
end
end
end
end
-- Add links around words. If multiword_only, do it only in multiword forms.
local function add_links(form, multiword_only)
if form == "" or form == " " then
return form
end
if not form:find("%[%[") then
if rfind(form, "[%s%p]") then --optimization to avoid loading [[Module:0headword]] on single-word forms
local m_headword = require("Module:0headword")
if m_headword.head_is_multiword(form) then
form = m_headword.add_multiword_links(form)
end
end
if not multiword_only and not form:find("%[%[") then
form = "[[" .. form .. "]]"
end
end
return form
end
-- Add suffix to form, removing the PRESERVE_ACCENT notation if present (as for [[datti]] of [[dare]]).
local function add_suffix_to_form(form, suffix)
if suffix ~= "" then
return rsub(form, PRESERVE_ACCENT, "") .. suffix
else
return form
end
end
local function convert_accented_links_in_text(text)
local need_preserve_accent_note = false
local retval = rsub(text, "%[%[([^%[%]|]+)%]%]",
function(linktext)
if rfind(linktext, PRESERVE_ACCENT) then
linktext = rsub(linktext, PRESERVE_ACCENT, "")
need_preserve_accent_note = true
elseif rfind(linktext, "^.*" .. MAV .. ".*" .. AV .. "$") then
-- Final accented vowel with preceding vowel; keep accent.
else
local unaccented = remove_accents(linktext)
if unaccented == linktext then
-- keep linktext
else
linktext = unaccented .. "|" .. linktext
end
end
return "[[" .. linktext .. "]]"
end)
return retval, need_preserve_accent_note
end
-- Convert links around accented words to two-part links without extra accents. Remove PRESERVE_ACCENT characters,
-- and if necessary, insert a footnote indicating that a written accent on a monosyllabic verb is preserved.
local function convert_accented_links(alternant_multiword_spec)
for slot, forms in pairs(alternant_multiword_spec.forms) do
-- Side-effect the forms to save memory.
map_side_effecting_forms(forms, function(form, footnotes)
local new_form, need_preserve_accent_note = convert_accented_links_in_text(form)
if need_preserve_accent_note then
footnotes = iut.combine_footnotes(footnotes, {"[with written accent]"})
end
return new_form, footnotes
end, "include footnotes")
end
end
local function strip_spaces(text)
return rsub(text, "^%s*(.-)%s*$", "%1")
end
-- Like iut.split_alternating_runs() but strips spaces from both ends of the odd-numbered elements (only in
-- odd-numbered runs if preserve_splitchar is given). Effectively we leave alone the footnotes and splitchars
-- themselves, but otherwise strip extraneous spaces. Spaces in the middle of an element are also left alone.
local function split_alternating_runs_and_strip_spaces(segment_runs, splitchar, preserve_splitchar)
local split_runs = iut.split_alternating_runs(segment_runs, splitchar, preserve_splitchar)
local function strip_run(run)
end
for i, run in ipairs(split_runs) do
if not preserve_splitchar or i % 2 == 1 then
for j, element in ipairs(run) do
if j % 2 == 1 then
run[j] = strip_spaces(element)
end
end
end
end
return split_runs
end
local function check_not_null(base, form, spec, principal_part_desc)
if not form then
error(("Spec '%s' cannot be used because default %s cannot be derived from '%s'"):format(
spec, principal_part_desc, base.lemma))
end
end
-- Indicate whether to skip `slot` when conjugating. If `checking_defective` is given, we are checking for defective
-- rows and only want certain user-specified indicators respected.
local function skip_slot(base, slot, checking_defective)
if not checking_defective then
if base.props.nofinite and slot:find("[123]") then
-- Skip all finite (1/2/3-person) slots.
return true
end
if base.props.presonly and slot:find("[123]") and not slot:find("^pres") then
-- Skip all finite (1/2/3-person) slots except the present indicative.
return true
end
if base.props.no_root_stressed and (slot:find("[123]s") or slot:find("3p")) and (
slot:find("^pres") or slot:find("^sub") or slot:find("^imp[123]") or slot:find("^negimp")
) then
-- Skip all 1s/2s/3s/3p slots in the present indicative and subjunctive and the imperative.
-- Beware of impsub slots.
return true
end
end
if base.props.impers or base.props.thirdonly then
-- Impersonal or third-person-only verb.
if slot:find("[12]") and not slot:find("3") then
-- Skip slots for 1/2 person that don't also reference 3rd person (hence we don't skip sub123s).
return true
end
if slot:find("^imp[123]") or slot:find("^negimp") then
-- Skip all imperative slots, including third-person ones. Beware of impsub slots.
return true
end
end
if base.props.impers and slot:find("3p") then
-- Skip third plural slots for impersonal verbs.
return true
end
return false
end
local function erase_suppressed_slots(base)
for _, slot_spec in ipairs(all_verb_slots) do
local slot, accel = unpack(slot_spec)
if skip_slot(base, slot) then
base.forms[slot] = nil
end
end
end
local function substitute_reflexive_pronoun(text, persnum)
return rsub(text, "<(.-)>", function(refl)
return substitutable_reflexive_pronoun[refl][persnum]
end)
end
local function escape_reflexive_indicators(arg1)
-- FIXME, broken and not yet used
if not arg1:find("pron>") then
return arg1
end
local segments = iut.parse_balanced_segment_run(arg1, "<", ">")
-- Loop over every other segment. The even-numbered segments are angle-bracket specs while
-- the odd-numbered segments are the text between them.
for i = 2, #segments - 1, 2 do
if segments[i] == "<pron>" then
segments[i] = "⦃⦃pron⦄⦄"
end
end
return table.concat(segments)
end
local function undo_escape_form(form)
-- FIXME, broken and not yet used
return rsub(rsub(form, "⦃⦃", "<"), "⦄⦄", ">")
end
local function remove_reflexive_indicators(form)
-- FIXME, broken and not yet used
return rsub(form, "⦃⦃.-⦄⦄", "")
end
local function replace_reflexive_indicators(slot, form)
-- FIXME, broken and not yet used
if not form:find("⦃") then
return form
end
error("Internal error: replace_reflexive_indicators not implemented yet")
end
-- Add the `stem` to the `ending` for the given `slot` and apply any phonetic modifications.
local function combine_stem_ending(base, slot, stem, ending)
-- Add h after c/g in -are forms to preserve the sound.
if base.conj_vowel == "à" and stem:find("[cg]$") and rfind(ending, "^[eèéiì]") then
stem = stem .. "h"
end
-- Handle [[boglire]], [[inorgoglire]], [[saglire]], etc.
if base.conj_vowel == "ì" and stem:find("gl$") and not rfind(ending, "^[iì]") then
stem = stem .. "i"
end
-- Two unstressed i's coming together compress to one.
if ending:find("^i") then
stem = rsub(stem, "i$", "")
end
-- Remove accents from stem if ending is accented.
if rfind(ending, AV) then
stem = remove_accents(stem)
end
return stem .. ending
end
local function add_forms(base, slot, stems, endings, allow_overrides)
local function do_combine_stem_ending(stem, ending)
return combine_stem_ending(base, slot, stem, ending)
end
iut.add_forms(base.forms, slot, stems, endings, do_combine_stem_ending)
end
local function insert_forms(base, slot, forms)
iut.insert_forms(base.forms, slot, forms)
end
local function copy_forms(base, slot, forms)
-- When copying forms, clone the form objects because in various later places, we side-effect existing form objects
-- and don't want any shared objects.
insert_forms(base, slot, mw.clone(forms))
end
local function general_list_form_contains_form(list, form, process_form)
if not list then
-- general_list_form_contains_form() is used to determine whether the defaults in `list` contain a given form; if not, that
-- row is irregular. If there are no defaults, any form is irregular.
return false
end
for _, formobj in ipairs(list) do
local formobj_form = formobj.form
if process_form then
formobj_form = process_form(formobj_form)
end
if formobj_form == form then
return true
end
end
return false
end
-- Parse a raw form that may be decorated with !, # or similar before it and * or ! after it. Return three values:
-- the prespec, bare form and postspec.
local function parse_decorated_form(form)
return rmatch(form, "^([!#]*)(.-)([*!]*)$")
end
-- Process the user-given specs in `specs` in order to generate verb forms, and insert the resulting forms into
-- `destforms`[`slot`]. If the destination slot has no forms yet (i.e. it is nil), it will be first set to {}.
-- Duplicate forms will not be inserted.
--
-- `specs` is a list of objects of the form {form = SPEC, footnotes = FOOTNOTES}, where FOOTNOTES is a list of
-- user-specified footnotes/qualifiers (given after the spec using brackets) or nil if no footnotes. `specs` can be
-- nil, which is equivalent to specifying "+" to request the default. A given SPEC may be "-" to indicate that the
-- corresponding forms are missing, or be decorated with preceding decorators like !! (= "elevated style"),
-- ! (= "careful style") or # (= "traditional"), or with following decorators like ! (= preserve monosyllabic accent),
-- * (= verb form triggers syntactic gemination of following consonant), ** (= verb form triggers optional syntactic
-- gemination).
--
-- Processing is done by calling `generate_forms` on each spec after stripping decorators. If spec is "+", that will be
-- passed directly to `generate_forms`, but if spec is "-", `generate_forms` will not be called. The return value of
-- `generate_forms` can be anything that is convertible to a list of {form = SPEC, footnotes = FOOTNOTES} objects using
-- iut.convert_to_general_list_form(). In other words, it can be a single string (a form), a single object of the form
-- {form = FORM, footnotes = FOOTNOTES, ...}, or a list of either of these.
local function process_specs(base, destforms, slot, specs, generate_forms)
specs = specs or {{form = "+"}}
for _, spec in ipairs(specs) do
local decorated_form = spec.form
-- Skip "-"; effectively, no forms get inserted into destforms[slot].
if decorated_form ~= "-" then
local prespec, form, postspec = parse_decorated_form(decorated_form)
local forms = form == "?" and "?" or generate_forms(form)
-- If `generate_forms` return nil, no forms get inserted into destforms[slot]. This happens e.g. when
-- fut:- is given and no explicit conditional principal part is given. In that case,
-- generate_default_conditional_principal_part() fetches the future principal parts, which don't exist,
-- so it returns nil, and the surrounding generate_principal_part_forms() also returns nil. The effect is
-- that the conditional principal part ends up nil and no conditional parts get inserted.
if forms then
forms = iut.convert_to_general_list_form(forms, spec.footnotes)
if base.all_footnotes then
forms = iut.convert_to_general_list_form(forms, base.all_footnotes)
end
for _, formobj in ipairs(forms) do
local qualifiers = formobj.footnotes
local form = formobj.form
if prespec:find("!!") then
qualifiers = iut.combine_footnotes({"[elevated style]"}, qualifiers)
prespec = rsub(prespec, "!!", "")
end
if prespec:find("!") then
qualifiers = iut.combine_footnotes({"[careful style]"}, qualifiers)
prespec = rsub(prespec, "!", "")
end
if prespec:find("#") then
qualifiers = iut.combine_footnotes({"[traditional]"}, qualifiers)
prespec = rsub(prespec, "#", "")
end
if postspec:find("!") then
form = PRESERVE_ACCENT .. form
postspec = rsub(postspec, "!", "")
end
if postspec == "*" then
qualifiers = iut.combine_footnotes(qualifiers, {"[with syntactic gemination after the verb]"})
elseif postspec == "**" then
qualifiers = iut.combine_footnotes(qualifiers, {"[with optional syntactic gemination after the verb]"})
elseif postspec ~= "" then
error("Decorated form '" .. decorated_form .. "' has too many asterisks after it, use '*' for syntactic gemination and '**' for optional syntactic gemination")
end
iut.insert_form(destforms, slot, {form = form, footnotes = qualifiers})
end
end
end
end
end
local function set_up_base_verb(base)
local ret = base.verb
local raw_verb = ret.raw_verb
if rfind(raw_verb, "r$") then
if rfind(raw_verb, "[ou]r$") or base.props.rre then
ret.verb = raw_verb .. "re"
else
ret.verb = raw_verb .. "e"
end
else
ret.verb = raw_verb
end
end
local function add_default_verb_forms(base)
local ret = base.verb
-- Need to call combine_stem_ending() to combine stem and ending rather than just pasting them together to handle
-- cases like [[boglire]], where 'bógl' + 'o' becomes 'bóglio'.
local function comb(slot, stem, ending)
return combine_stem_ending(base, slot, stem, ending)
end
-- Process 'phisstem:...' spec.
if base.principal_part_specs.explicit_phis_stem_spec then
-- Put the explicit past historic stem in ret.phisstem (i.e. base.verb.phisstem).
process_specs(base, ret, "phisstem", base.principal_part_specs.explicit_phis_stem_spec, iut.identity)
end
ret.default_stem, ret.default_ending_vowel = rmatch(base.verb.verb, "^(.-)([aeir])re$")
if not ret.default_stem then
error("Unrecognized verb '" .. base.verb.verb .. "', doesn't end in -are, -ere, -ire or -rre")
end
base.props.syncopated = base.props.rre or ret.default_ending_vowel == "r"
-- Process 'stem:...' spec.
local ending_vowel
if base.principal_part_specs.explicit_stem_spec then
local function generate_explicit_stem_forms(form)
local stem, this_ending_vowel
if form == "+" then
stem = ret.default_stem
this_ending_vowel = ret.default_ending_vowel
if base.props.syncopated then
error("Can't use + with 'stem:' in syncopated verbs; specify an explicit stem")
end
else
base.explicit_non_default_stem_spec = true
stem, this_ending_vowel = rmatch(form, "^(.*)([aeiàéì])$")
if not stem then
error("Unrecognized stem '" .. form .. "', should end in -a, -e, -i, -à, -é or -ì")
end
end
if ending_vowel and ending_vowel ~= this_ending_vowel then
error("Can't currently specify explicit stems with two different conjugation vowels (" .. ending_vowel
.. " and " .. this_ending_vowel .. ")")
end
ending_vowel = this_ending_vowel
return stem
end
-- Put the explicit stem in ret.stem (i.e. base.verb.stem).
process_specs(base, ret, "stem", base.principal_part_specs.explicit_stem_spec, generate_explicit_stem_forms)
else
if base.props.syncopated then
error("With syncopated verb '" .. ret.verb .. "', must use 'stem:' to specify an explicit stem")
end
-- Convert to general list form so we can call iut.map_forms() over it.
ret.stem = iut.convert_to_general_list_form(ret.default_stem)
ending_vowel = ret.default_ending_vowel
end
base.conj_vowel =
ending_vowel == "a" and "à" or
ending_vowel == "e" and "é" or
ending_vowel == "i" and "ì" or
ending_vowel
-- Process 'unstressed_stem:...' spec.
if base.principal_part_specs.explicit_unstressed_stem_spec then
local function generate_explicit_stem_forms(form)
local stem
if form == "+" then
stem = ret.default_stem
if base.props.syncopated then
error("Can't use + with 'unstressed_stem:' in syncopated verbs; specify an explicit stem")
end
else
base.explicit_non_default_unstressed_stem_spec = true
stem = rmatch(form, "^(.*)" .. ret.default_ending_vowel .. "$")
if not stem then
error("Stem vowel of stem '" .. form .."' specified with 'unstressed_stem:' must end in conjugation vowel -" .. default_ending_vowel)
end
end
return stem
end
-- Put the explicit stem in ret.unstressed_stem (i.e. base.verb.unstressed_stem).
process_specs(base, ret, "unstressed_stem", base.principal_part_specs.explicit_unstressed_stem_spec,
generate_explicit_stem_forms)
else
ret.unstressed_stem = iut.map_forms(ret.stem, function(stem) return remove_accents(stem) end)
end
ret.pres = iut.map_forms(ret.stem, function(stem) return comb("pres1s", stem, "o") end)
ret.pres3s = iut.map_forms(ret.stem, function(stem) return comb("pres3s", stem, ending_vowel == "a" and "a" or "e") end)
if ending_vowel == "i" then
ret.isc_pres = iut.map_forms(ret.unstressed_stem, function(stem) return comb("pres1s", stem, "ìsco") end)
ret.isc_pres3s = iut.map_forms(ret.unstressed_stem, function(stem) return comb("pres3s", stem, "ìsce") end)
end
ret.phis = iut.map_forms(ret.unstressed_stem, function(stem)
if ending_vowel == "a" then
return comb("phis1s", stem, "ài")
elseif ending_vowel == "e" then
-- Per Anna M. Thornton, "Overabundance: Multiple Forms Realizing the Same Cell" in ''Morphological Autonomy'', p. 366
-- [https://www.google.com/books/edition/Morphological_Autonomy/oh3UlV6xSQEC?hl=en&gbpv=1&dq=%22fottette%22&pg=PA366&printsec=frontcover],
-- -éi tends to occur only with stems ending in -t, while -étti/-ètti occurs with stems not ending in -t.
-- The opposite combinations are almost vanishingly rare, and should not be included.
if stem:find("t$") then
return comb("phis1s", stem, "éi")
else
return comb("phis1s", stem, "étti")
end
else
return comb("phis1s", stem, "ìi")
end
end)
ret.pp = iut.map_forms(ret.unstressed_stem, function(stem)
if ending_vowel == "a" then
return comb("pp", stem, "àto")
elseif ending_vowel == "e" then
return comb("pp", stem, rfind(stem, "[cg]$") and "iùto" or "ùto")
else
return comb("pp", stem, "ìto")
end
end)
end
local function is_single_vowel_spec(spec)
return rfind(spec, "^" .. AV .. "[+-]?$") or rfind(spec, "^" .. AV .. "%-%-$") or rfind(spec, "^" .. AV .. "%+%+$")
end
-- Given an unaccented stem, pull out the last two vowels as well as the in-between stuff, and return
-- before, v1, between, v2, after as 5 return values. `unaccented` is the full verb and `unaccented_desc`
-- a description of where the verb came from; used only in error messages.
local function analyze_stem_for_last_two_vowels(unaccented_stem, unaccented, unaccented_desc)
local before, v1, between, v2, after = rmatch(unaccented_stem, "^(.*)(" .. V .. ")(" .. NV .. "*)(" .. V .. ")(" .. NV .. "*)$")
if not before then
before, v1 = "", ""
between, v2, after = rmatch(unaccented_stem, "^(.*)(" .. V .. ")(" .. NV .. "*)$")
end
if not between then
error("No vowel in " .. unaccented_desc .. " '" .. unaccented .. "' to match")
end
return before, v1, between, v2, after
end
-- Apply a single-vowel spec in `form`, e.g. é+, to `unaccented_stem`. `unaccented` is the full verb and
-- `unaccented_desc` a description of where the verb came from; used only in error messages.
local function apply_vowel_spec(unaccented_stem, unaccented, unaccented_desc, vowel_spec)
local function vowel_spec_doesnt_match()
error("Vowel spec '" .. vowel_spec .. "' doesn't match vowel of " .. unaccented_desc .. " '" .. unaccented .. "'")
end
local raw_spec_vowel = usub(unfd(vowel_spec), 1, 1)
local form
local spec_vowel = rmatch(vowel_spec, "^(.)%-%-$")
if spec_vowel then -- a spec like ò--
local before, v1, between1, v2, between2, v3, after = rmatch(unaccented_stem,
"^(.*)(" .. V .. ")(" .. NV .. "*)(" .. V .. ")(" .. NV .. "*)(" .. V .. ")(" .. NV .. "*)$")
if not before then
error(mw.getContentLanguage():ucfirst(unaccented_desc) .. " '" .. unaccented ..
"' must have at least three vowels to use the vowel spec '" .. vowel_spec .. "'")
end
if raw_spec_vowel ~= v1 then
vowel_spec_doesnt_match()
end
form = before .. spec_vowel .. between1 .. v2 .. between2 .. v3 .. after
else
local before, v1, between, v2, after = analyze_stem_for_last_two_vowels(unaccented_stem, unaccented, unaccented_desc)
if v1 == v2 then
local first_second
spec_vowel, first_second = rmatch(vowel_spec, "^(.)([+-]+)$") -- include ++
if not spec_vowel then
error("Last two stem vowels of " .. unaccented_desc .. " '" .. unaccented ..
"' are the same; you must specify + (second vowel) or - (first vowel) after the vowel spec '" ..
vowel_spec .. "'")
end
if raw_spec_vowel ~= v1 then
vowel_spec_doesnt_match()
end
if first_second == "-" then
form = before .. spec_vowel .. between .. v2 .. after
else
form = before .. v1 .. between .. spec_vowel .. after
end
else
if rfind(vowel_spec, "%+%+$") then
vowel_spec = rsub(vowel_spec, "%+%+$", "")
elseif rfind(vowel_spec, "[+-]$") then
error("Last two stem vowels of " .. unaccented_desc .. " '" .. unaccented ..
"' are different; specify just an accented vowel, without a following + or -: '" .. vowel_spec .. "'")
end
if raw_spec_vowel == v1 then
form = before .. vowel_spec .. between .. v2 .. after
elseif raw_spec_vowel == v2 then
form = before .. v1 .. between .. vowel_spec .. after
elseif before == "" then
vowel_spec_doesnt_match()
else
error("Vowel spec '" .. vowel_spec .. "' doesn't match either of the last two vowels of " .. unaccented_desc ..
" '" .. unaccented .. "'")
end
end
end
return form
end
local function add_finite_reflexive_clitics(base, rowslot)
for _, persnum in ipairs(full_person_number_list) do
base.forms[rowslot .. persnum] = iut.map_forms(base.forms[rowslot .. persnum], function(form)
return substitute_reflexive_pronoun(base.verb.finite_pref, persnum) .. "[[" .. form .. "]]"
end)
end
end
local function do_ending_stressed_inf(base)
if rfind(base.verb.verb, "rre$") then
error("Use a backslash (\\) with -rre verbs")
end
-- Add acute accent to -ere, grave accent to -are/-ire.
local accented = rsub(base.verb.verb, "ere$", "ére")
accented = unfc(rsub(accented, "([ai])re$", "%1" .. GR .. "re"))
iut.insert_form(base.forms, "inf", {form = accented})
end
local function do_root_stressed_inf(base, specs)
local function generate_root_stressed_inf_forms(base, spec, form_to_do, from_defaulted_pres)
if spec == "-" then
error("Spec '-' not allowed as root-stressed infinitive spec")
end
if spec == "+" then
if from_defaulted_pres then
error("Can't use + for present tense with root-stressed infinitive, would trigger infinite loop")
end
local rre_vowel = rmatch(base.verb.verb, "([aiu])rre$")
if rre_vowel then
-- do_root_stressed_inf is used for verbs in -ere and -rre. If the root-stressed vowel isn't explicitly
-- given and the verb ends in -arre, -irre or -urre, derive it from the infinitive since there's only
-- one possibility. If the verb ends in -erre or -orre, this won't work because we have both scérre
-- (= [[scegliere]]) and disvèrre (= [[disvellere]]), as well as pórre and tòrre (= [[togliere]]).
local before, v1, between, v2, after = analyze_stem_for_last_two_vowels(
rsub(base.verb.verb, "re$", ""), base.verb.verb, "root-stressed infinitive")
local vowel_spec = unfc(rre_vowel .. GR)
if v1 == v2 then
vowel_spec = vowel_spec .. "+"
end
spec = vowel_spec
else
-- Use the single-vowel spec(s) in the present tense principal part.
local temp = {}
process_specs(base, temp, "temp", base.principal_part_specs.pres, function(form)
return generate_root_stressed_inf_forms(base, form, form_to_do, "from defaulted pres") end)
if not temp.temp then
error("Unable to generate infinitive from present tense")
end
return temp.temp
end
end
local verb_stem, verb_suffix = rmatch(base.verb.verb, "^(.-)([er]re)$")
if not verb_stem then
error("Verb '" .. base.verb.verb .. "' must end in -ere or -rre to use backslash (\\) notation")
end
if not is_single_vowel_spec(spec) then
if from_defaulted_pres then
error("When defaulting root-stressed infinitive vowel to present, present spec must be a single-vowel spec, but saw '"
.. spec .. "'")
else
error("Explicit root-stressed infinitive spec '" .. spec .. "' should be a single-vowel spec")
end
end
local expanded = apply_vowel_spec(verb_stem, base.verb.verb, "root-stressed infinitive", spec)
if form_to_do == "stem" then
return expanded
else
return expanded .. verb_suffix
end
end
process_specs(base, base.principal_part_forms, "root_stressed_stem", specs, function(form)
return generate_root_stressed_inf_forms(base, form, "stem") end)
process_specs(base, base.forms, "inf", specs, function(form)
return generate_root_stressed_inf_forms(base, form, "inf") end)
end
local function add_infinitive(base, rowslot)
-- When do_root_stressed_inf is called, this also sets base.principal_part_forms.root_stressed_stem, which is needed
-- by add_present_indic(), so we have to do this before conjugating the present indicative.
local function root()
do_root_stressed_inf(base, base.principal_part_specs.root_stressed_inf)
end
local function ending()
do_ending_stressed_inf(base)
end
if not base.principal_part_specs.root_stressed_inf then
ending()
elseif base.props.opt_root_stressed_inf == "root-first" then
root()
ending()
elseif base.props.opt_root_stressed_inf == "ending-first" then
ending()
root()
else
root()
end
end
local function add_infinitive_reflexive_clitics(base, rowslot)
base.forms[rowslot] = iut.map_forms(base.forms[rowslot], function(form)
local unaccented_form = remove_accents(form)
form = rsub(form, "r?re$", "r")
return "[[" .. unaccented_form .. "|" .. form .. "]]" .. substitute_reflexive_pronoun(base.verb.linked_suf, "nf")
end)
end
local function generate_pres_forms(base, form)
local principal_part_desc = "first-singular present indicative"
if form == "+" then
check_not_null(base, base.verb.pres, form, principal_part_desc)
return base.verb.pres
elseif form == "+isc" then
check_not_null(base, base.verb.isc_pres, form, principal_part_desc)
return base.verb.isc_pres
elseif is_single_vowel_spec(form) then
check_not_null(base, base.verb.pres, form, principal_part_desc)
return iut.map_forms(base.verb.pres, function(defform)
defform = remove_accents(defform) -- in case we specified 'stem:...' with an accent
local pres = rmatch(defform, "^(.*)o$")
if not pres then
error("Internal error: Default present '" .. defform .. "' doesn't end in -o")
end
return apply_vowel_spec(pres, defform, "default present", form) .. "o"
end)
elseif not rfind(form, "[oò]$") then
error("Present first-person singular form '" .. form .. "' should end in -o")
else
local unaccented_form = remove_accents(form)
if not general_list_form_contains_form(base.verb.pres, unaccented_form, remove_accents)
and (base.verb.isc_pres and not general_list_form_contains_form(base.verb.isc_pres, unaccented_form, remove_accents)) then
base.rowprops.irreg.pres = true
-- FIXME! Here we are encoding knowledge of the algorithm in add_present_indic() to determine how to
-- propagate irregular present 1s to other forms. This duplicates the logic of that algorithm, and if that
-- code ever changes, this code needs to change too. In practice, it doesn't currently matter so much
-- because the values in `is_irreg` are currently used only by the code in [[Module:0it-headword]] to
-- determine whether to show irregular principal parts, and the present tense is always shown in any case.
-- But in the future, the values in `is_irreg` could be used for other purposes.
base.is_irreg.pres1s = true
base.is_irreg.pres3p = true
if not base.principal_part_forms.pres3s and not base.explicit_non_default_stem_spec
and not base.principal_part_forms.root_stressed_stem then
base.is_irreg.pres2s = true
base.is_irreg.pres3s = true
end
end
return form
end
end
local function generate_pres3s_forms(base, form)
local principal_part_desc = "third-singular present indicative"
if form == "+" then
check_not_null(base, base.verb.pres3s, form, principal_part_desc)
return base.verb.pres3s
elseif form == "+isc" then
check_not_null(base, base.verb.isc_pres3s, form, principal_part_desc)
return base.verb.isc_pres3s
elseif is_single_vowel_spec(form) then
check_not_null(base, base.verb.pres3s, form, principal_part_desc)
return iut.map_forms(base.verb.pres3s, function(defform)
defform = remove_accents(defform) -- in case we specified 'stem:...' with an accent
local pres3s, final_vowel = rmatch(defform, "^(.*)([ae])$")
if not pres3s then
error("Internal error: Default third-person singular present '" .. defform .. "' doesn't end in -a or -e")
end
return apply_vowel_spec(pres3s, defform, "default third-person singular present", form) .. final_vowel
end)
elseif not rfind(form, "[aàeè]") then
error("Present third-person singular form '" .. form .. "' should end in -a or -e")
else
local unaccented_form = remove_accents(form)
if not general_list_form_contains_form(base.verb.pres3s, unaccented_form, remove_accents)
and (base.verb.isc_pres3s and not general_list_form_contains_form(base.verb.isc_pres3s, unaccented_form, remove_accents)) then
base.rowprops.irreg.pres = true
base.is_irreg.pres3s = true
-- pres3s is copied to pres2s.
base.is_irreg.pres2s = true
end
return form
end
end
-- Generate the present indicative. See "RULES FOR CONJUGATION" near the top of the file for the detailed rules.
local function add_present_indic(base, rowslot)
process_specs(base, base.principal_part_forms, "pres", base.principal_part_specs.pres, function(form)
return generate_pres_forms(base, form) end)
if base.principal_part_specs.pres3s then
process_specs(base, base.principal_part_forms, "pres3s", base.principal_part_specs.pres3s, function(form)
return generate_pres3s_forms(base, form) end)
end
-- If no present indicative principal parts (user specified 'pres:-'), don't generate any present indicative forms.
-- Otherwise we will end up generating pres23s and pres12p forms based on the overall verb stem(s).
if not base.principal_part_forms.pres and not base.principal_part_forms.pres3s then
return
end
local function add(pers, stems, endings)
add_forms(base, rowslot .. pers, stems, endings)
end
add("1s", base.principal_part_forms.pres, "")
local pres1s_stem = iut.map_forms(base.principal_part_forms.pres, function(form)
if not form:find("o$") then
error("presrow: must be given in order to generate the present indicative because explicit first-person "
.. "singular present indicative '" .. form .. "' does not end in -o")
end
return rsub(form, "o$", "")
end)
add("3p", pres1s_stem, base.conj_vowel == "à" and "ano" or "ono")
local pres23s_stem
if base.principal_part_forms.pres3s then
pres23s_stem = iut.map_forms(base.principal_part_forms.pres3s, function(form)
if not form:find("[ae]$") then
error("presrow: must be given in order to generate the present indicative because explicit third-person "
.. "singular present indicative '" .. form .. "' does not end in -a or -e")
end
return rsub(form, "[ae]$", "")
end)
else
pres23s_stem = base.explicit_non_default_stem_spec and base.verb.stem
or base.principal_part_forms.root_stressed_stem or pres1s_stem
end
add("2s", pres23s_stem, "i")
add("3s", pres23s_stem, base.conj_vowel == "à" and "a" or "e")
add("1p", base.verb.unstressed_stem, "iàmo")
add("2p", base.verb.unstressed_stem, base.conj_vowel .. "te")
end
local function generate_default_present_subj_principal_part(base, do_err)
return iut.flatmap_forms(base.forms.pres1s, function(form)
if not form:find("o$") then
if do_err then
error("sub: or subrow: must be given in order to generate the singular present subjunctive "
.. "because first-person singular present indicative '" .. form .. "' does not end in -o")
else
return {}
end
else
-- Need to call combine_stem_ending() here to handle verbs in -care/-gare and -ciare/-giare.
return {combine_stem_ending(base, "sub123s", rsub(form, "o$", ""), base.conj_vowel == "à" and "i" or "a")}
end
end)
end
-- Generate the present subjunctive. See "RULES FOR CONJUGATION" near the top of the file for the detailed rules.
local function add_present_subj(base, rowslot)
-- If no present subjunctive principal parts (user specified 'sub:-'), don't generate any present subjunctive forms.
-- Otherwise we will end up generating sub12p forms based on the present indicative.
if not base.principal_part_forms.sub then
return
end
local function add(pers, stems, endings)
add_forms(base, rowslot .. pers, stems, endings)
end
local function ins(pers, forms)
insert_forms(base, rowslot .. pers, forms)
end
-- Generate the 123s and 3p forms.
add("123s", base.principal_part_forms.sub, "")
add("3p", base.principal_part_forms.sub, "no")
-- Copy present indicative 1p to present subjunctive.
copy_forms(base, rowslot .. "1p", base.forms.pres1p)
-- Generate present subjunctive 2p from present indicative 1p by replacing -mo with -te.
ins("2p", iut.map_forms(base.forms.pres1p, function(form)
if not form:find("mo$") then
error("subrow: must be given in order to generate the second-person plural present subjunctive "
.. "because first-person plural present indicative '" .. form .. "' does not end in -mo")
end
return rsub(form, "mo$", "te")
end))
end
local function generate_default_imperative_principal_part(base, do_err)
if base.conj_vowel == "à" then
-- Copy present indicative 3s to imperative 2s.
return base.forms.pres3s
else
-- Copy present indicative 2s to imperative 2s.
return base.forms.pres2s
end
end
-- Generate the imperative. See "RULES FOR CONJUGATION" near the top of the file for the detailed rules.
local function add_imperative(base, rowslot)
if not base.principal_part_forms.imp then
-- If imp:- given, suppress the whole imperative.
return
end
local function copy(pers, forms)
copy_forms(base, rowslot .. pers, forms)
end
-- Copy first imperative form (user specified or taken from present indicative 3s for conj vowel à, or from
-- present indicative 2s for other conj vowels) to imperative 2s.
copy("2s", base.principal_part_forms.imp)
-- Copy present indicative 2p to imperative 2p.
copy("2p", base.forms.pres2p)
-- Copy present subjunctive 3s, 1p, 3p to imperative.
copy("3s", base.forms.sub3s)
copy("1p", base.forms.sub1p)
copy("3p", base.forms.sub3p)
end
local function get_unlinked_clitic_suffix(base, persnum)
return m_links.remove_links(substitute_reflexive_pronoun(base.verb.linked_suf, persnum))
end
local function add_imperative_reflexive_clitics(base, rowslot)
local s2suf = get_unlinked_clitic_suffix(base, "2s")
local saw_form_with_apostrophe = false
-- Check if there is a 2s imperative ending in an apostrophe, e.g. dà', fà'. If so, there is probably also an
-- imperative in -ài, but we don't want to generate a reflexive imperative from it (#dàiti). Otherwise, we want to
-- generative a reflexive imperative as normal (e.g. ''distràiti'' from [[distrarsi]]).
local imp2s_forms = base.forms[rowslot .. "2s"]
if imp2s_forms then
for _, form in ipairs(imp2s_forms) do
if rfind(form.form, "'$") then
saw_form_with_apostrophe = true
break
end
end
end
base.forms[rowslot .. "2s"] = iut.flatmap_forms(imp2s_forms, function(form)
form = rsub(form, "'$", "") -- dà', fà', etc.
if rfind(form, AV .. "$") then -- final stressed vowel; implement syntactic gemination
if rfind(s2suf, "^gli") then
return {add_suffix_to_form(form, s2suf)}
else
return {add_suffix_to_form(form, usub(s2suf, 1, 1) .. s2suf)}
end
elseif rfind(form, "ài$") and saw_form_with_apostrophe then
-- Skip this imperative; see above.
return {}
else
return {add_suffix_to_form(form, s2suf)}
end
end)
-- For the following, we shouldn't need to use add_suffix_to_form(), which handles PRESERVE_ACCENT, because
-- PRESERVE_ACCENT occurs only with monosyllabic forms, and the 1p/2p forms are never monosyllabic.
for _, persnum in ipairs({"1p", "2p"}) do
local suf = get_unlinked_clitic_suffix(base, persnum)
base.forms[rowslot .. persnum] = iut.map_forms(base.forms[rowslot .. persnum], function(form)
return form .. suf
end)
end
for _, persnum in ipairs({"3s", "3p"}) do
base.forms[rowslot .. persnum] = iut.map_forms(base.forms[rowslot .. persnum], function(form)
return substitute_reflexive_pronoun(base.verb.finite_pref, persnum) .. "[[" .. form .. "]]"
end)
end
end
local function add_negative_imperative(base)
if not base.principal_part_forms.imp then
-- If imp:- given, suppress the whole imperative.
return
end
for _, persnum in ipairs({"2s", "3s", "1p", "2p", "3p"}) do
local from = persnum == "2s" and "inf" or "imp" .. persnum
insert_forms(base, "negimp" .. persnum, iut.map_forms(base.forms[from], function(form)
return "[[non]] [[" .. form .. "]]"
end))
end
end
local function add_negative_imperative_reflexive_clitics(base, rowslot)
for _, persnum in ipairs({"2s", "1p", "2p"}) do
local suf = get_unlinked_clitic_suffix(base, persnum)
local pref = substitute_reflexive_pronoun(base.verb.finite_pref, persnum)
base.forms[rowslot .. persnum] = iut.flatmap_forms(base.forms[rowslot .. persnum], function(form)
local truncated = persnum == "2s" and rsub(form, "r?re%]%]$", "r") or rsub(form, "%]%]$", "")
local sufform = truncated .. suf .. "]]"
local prefform = rsub(form, "^(%[%[non%]%]) (.*)$", "%1 " .. pref .. " %2")
return {sufform, prefform}
end)
end
for _, persnum in ipairs({"3s", "3p"}) do
local pref = substitute_reflexive_pronoun(base.verb.finite_pref, persnum)
base.forms[rowslot .. persnum] = iut.map_forms(base.forms[rowslot .. persnum], function(form)
return rsub(form, "^(%[%[non%]%]) (.*)$", "%1 " .. pref .. " %2")
end)
end
end
local function generate_default_past_historic_principal_part(base, do_err)
if do_err then
check_not_null(base, base.verb.phis, "+", "first-singular past historic")
end
return base.verb.phis
end
local function add_past_historic(base, rowslot)
if not base.principal_part_forms.phis then
-- specified as "-"
return
end
for _, form in ipairs(base.principal_part_forms.phis) do
local function add_phis(pref, s1, s2, s3, p1, p2, p3)
local newform = form.footnotes and iut.convert_to_general_list_form(pref, form.footnotes) or pref
local function add(pers, endings)
add_forms(base, rowslot .. pers, newform, endings)
end
add("1s", s1)
add("2s", s2)
add("3s", s3)
add("1p", p1)
add("2p", p2)
add("3p", p3)
end
while true do
if form.form == "?" then
add_phis("?", "?", "?", "?", "?", "?", "?")
break
end
local pref = rmatch(form.form, "^(.*)ài$")
if pref then
add_phis(pref, "ài", "àsti", "ò", "àmmo", "àste", "àrono")
break
end
pref = rmatch(form.form, "^(.*)éi$")
if pref then
add_phis(pref, "éi", "ésti", "é", "émmo", "éste", "érono")
break
end
pref = rmatch(form.form, "^(.*)[èé]tti$")
if pref then
add_phis(pref, {{form = "étti"}, {form = "ètti", footnotes = {"[traditional]"}}},
"ésti", {{form = "étte"}, {form = "ètte", footnotes = {"[traditional]"}}},
"émmo", "éste", {{form = "éttero"}, {form = "èttero", footnotes = {"[traditional]"}}})
break
end
pref = rmatch(form.form, "^(.*)ìi$")
if pref then
add_phis(pref, "ìi", "ìsti", "ì", "ìmmo", "ìste", "ìrono")
break
end
pref = rmatch(form.form, "^(.*)i$")
if pref then
add_phis(pref, "i", {}, "e", {}, {}, "ero")
if base.verb.phisstem then
add_phis(base.verb.phisstem, {}, "sti", {}, "mmo", "ste", {})
else
add_phis(base.verb.unstressed_stem, {}, base.conj_vowel .. "sti", {}, base.conj_vowel .. "mmo",
base.conj_vowel .. "ste", {})
end
break
end
error("phisrow: must be given in order to generate the past historic because explicit first-person "
.. "singular past historic '" .. form.form .. "' does not end in -i")
end
end
end
local function generate_default_future_principal_part(base, do_err)
-- For -are verbs, we may need to make some adjustments to form the future principal part.
local function are_stem_to_future_principal_part(stem)
local function comb(ending)
return combine_stem_ending(base, "fut1s", stem, ending)
end
if stem:find("[cg]$") then
return {comb("herò")}
elseif stem:find("[cg]i$") then
if not base.forms.pres1s or base.forms.pres1s[1].form == "?" then -- missing or unknown pres1s; future still can be generated
return {rsub(stem, "i$", "erò")}
else
-- Verbs in -ciare/-giare with the accent on the final -ì in the present singular take future in
-- -cier-/-gier- not -cer-/-ger-. Compare [[sciare]] "to ski", pres1s ''scìo'', fut1s ''scierò'' vs.
-- [[lasciare]] "to let", pres1s ''làscio'', fut1s ''lascerò''. The only way to make this distinction
-- is to check the present singular, e.g. pres1s.
return iut.map_forms(base.forms.pres1s, function(form)
if rfind(form, "ìo$") then
return comb("erò")
else
return rsub(stem, "i$", "erò")
end
end)
end
else
return {comb("erò")}
end
end
if base.explicit_non_default_unstressed_stem_spec then
-- If user gave 'unstressed_stem:', use it here.
if base.conj_vowel == "à" then
return iut.flatmap_forms(base.verb.unstressed_stem, are_stem_to_future_principal_part)
else
return iut.map_forms(base.verb.unstressed_stem, function(form)
return form .. base.verb.default_ending_vowel .. "rò"
end)
end
elseif base.conj_vowel == "à" then
return are_stem_to_future_principal_part(rsub(base.verb.verb, "are$", ""))
else
return rsub(base.verb.verb, "e$", "ò")
end
end
local function generate_default_conditional_principal_part(base, do_err)
-- If fut:- is given, base.principal_part_forms.fut will be nil, and the following will correctly return nil,
-- so that there's no conditional.
return iut.map_forms(base.principal_part_forms.fut, function(form)
local pref = rmatch(form, "^(.*)ò$")
if not pref then
-- This should have been caught earlier (when processing the future) and generated an error.
error(("Internal error: When generating conditional, saw principal part for future '%s' that does not end in -ò")
:format(form))
end
return combine_stem_ending(base, "cond1s", pref, "èi")
end)
end
local function add_participle_reflexive_clitics(base, rowslot)
-- do nothing
end
local function generate_default_past_participle_principal_part(base, do_err)
if do_err then
check_not_null(base, base.verb.pp, "+", "first-singular past historic")
end
return base.verb.pp
end
local function generate_default_gerund_principal_part(base, do_err)
return iut.map_forms(base.verb.unstressed_stem, function(stem)
-- Need to call combine_stem_ending() to handle cases like [[boglire]], where 'bógl' + 'èndo' becomes 'boglièndo'.
return combine_stem_ending(base, "ger", stem, (base.conj_vowel == "à" and "àndo" or "èndo"))
end)
end
local function add_gerund_reflexive_clitics(base, rowslot)
base.forms[rowslot] = iut.map_forms(base.forms[rowslot], function(form)
return form .. get_unlinked_clitic_suffix(base, "nf")
end)
end
local function generate_default_present_participle_principal_part(base, do_err)
return iut.map_forms(base.verb.unstressed_stem, function(stem)
-- Need to call combine_stem_ending() to handle cases like [[boglire]], where 'bógl' + 'ènte' becomes 'bogliènte'.
return combine_stem_ending(base, "presp", stem, (base.conj_vowel == "à" and "ànte" or "ènte"))
end)
end
--[=[
Data on how to conjugate individual rows (i.e. tense/aspect combinations, such as present indicative or
conditional).
The order listed here matters. It determines the order of generating row forms. The order must have
'inf' < 'pres' < 'sub' < 'imp' < 'negimp' because the present indicative uses the root_stressed_stem generated
by add_infinitive; the present subjunctive uses generated forms from the present indicative; the imperative uses
forms from the present subjunctive and present indicative; and the negative imperative uses forms from the infinitive
and the imperative. Similarly we must have 'fut' < 'cond' because the conditional uses the future principal part.
The following specs are allowed:
-- `desc` must be present and is an all-lowercase English description of the row. It is used in error messages and in
generating categories of the form 'Italian verbs with irregular ROW' and 'Italian verbs with missing ROW'.
-- `tag_suffix` must be present is a string containing the {{inflection of}} tags that are appended onto the
person/number tags to form the accelerator spec. For example, the spec "pres|sub" means that the accelerator spec
for the third singular present subjunctive will be "3|s|pres|sub". This accelerator spec is passed to
[[Module:0inflection utilities]], which in turn passes it to [[Module:0links]] when generating the link(s) for the
corresponding verb form(s). The spec ultimately gets processed by [[Module:0accel]] to generate the definition line
for nonexistent verb forms. (FIXME: Accelerator support is currently disabled for forms with non-final accents.
We need to change the code in [[Module:0inflection utilities]] so it sets the correct target not containing the
non-final accent.)
-- `persnums` must be present and specifies the possible person/number suffixes to add onto the row-level slot
(e.g. "phis" for the past historic) to form the individual person/number-specific slot (e.g. "phis2s" for the
second-person singular past historic).
-- `row_override_persnums`, if present, specifies the person/number suffixes that are specified by a row override.
If omitted, `persnums` is used.
-- `row_override_persnums_to_full_persnums`, if present, specifies a mapping from the person/number suffixes
specified by a row override to the person/number/suffixes used for conjugating the row. This is used, for example,
with the subjunctive and imperfect subjunctive, where the first element of the row override specifies
(respectively) the 123s and 12s forms, which need to be copied (respectively) to the 1s/2s/3s and 1s/3s forms.
If omitted, no such copying happens. It's still possible for the row override persnums to disagree with the
overall persnums. This happens, for example, with the imperative, where the 'improw:' row override spec specifies
only the 2s and 2p forms; the remaining forms (3s, 1p, 3p) are generated during conjugation by copying from other
forms, and can't be overridden using a row override. (They can still be overridden using a single override such
as 'imp3s:...' or a late single override such as 'imp3s!:...'.
-- `generate_default_principal_part`, if present, should be a function of two arguments, `base` and `do_err`, and
should return the principal part(s) for the row. The return value can be anything that is convertible to the
"general list form" of a slot's forms, i.e. it can return a string, an object
{form = FORM, footnotes = {FOOTNOTE, FOOTNOTE, ...}}, or a list of either. It must be present if `conjugate` is
a table, but may be missing if `conjugate` is a function, in which case the function needs to generate the
principal part itself or otherwise handle things differently. For example, the present indicative does not
specify a value for `generate_default_principal_part` because there are actually two principal parts for the
present tense (first and third singular), which are processed at the beginning of the present indicative
`conjugate` function. Similarly, the infinitive does not specify a value for `generate_default_principal_part`
because there is no principal part to speak of; the infinitive is generated directly from the lemma in combination
with the slash or backslash that follows the auxiliary and (in the case of a root-stressed infinitive) the
single-vowel spec following the backslash. If `do_err` is given to this function, the function may throw an error
if it can't generate the principal part; otherwise it should return nil.
-- `conjugate` is either a function to conjugate the row (of two arguments, `base` and `rowslot`), or a table
containing the endings to add onto the principal part to conjugate the row. In the latter case, there should be
the same number of elements in the table as there are elements in `row_override_persnums` (if given) or
`persnums` (otherwise).
-- `no_explicit_principal_part` (DOCUMENT ME)
-- `no_row_overrides` (DOCUMENT ME)
-- `no_single_overrides` (DOCUMENT ME)
-- `add_reflexive_clitics` (DOCUMENT ME)
-- `dont_check_defective_status` (DOCUMENT ME)
]=]
local row_conjugations = {
{"inf", {
desc = "infinitive",
tag_suffix = "inf",
persnums = {""},
-- No generate_default_principal_part; handled specially in add_infinitive.
conjugate = add_infinitive,
no_explicit_principal_part = true, -- because handled specially using / or \ notation
no_row_overrides = true, -- useless because there's only one form; use / or \ notation
no_single_overrides = true, --useless because there's only one form; use / or \ notation
add_reflexive_clitics = add_infinitive_reflexive_clitics,
}},
{"pres", {
desc = "present indicative",
tag_suffix = "pres|ind",
persnums = full_person_number_list,
-- No generate_default_principal_part; handled specially in add_present_indic because we actually have
-- two principal parts for the present indicative ("pres" and "pres3s").
conjugate = add_present_indic,
-- No setting for no_explicit_principal_part here because it would never be checked; we special-case 'pres:'
-- overrides before checking no_explicit_principal_part. The reason for special-casing is because there are two
-- principal parts involved, "pres" and "pres3s", and we allow both to be specified using the syntax
-- 'pres:PRES^PRES3S'.
add_reflexive_clitics = add_finite_reflexive_clitics,
}},
{"sub", {
desc = "present subjunctive",
tag_suffix = "pres|sub",
persnums = full_person_number_list,
row_override_persnums = {"123s", "1p", "2p", "3p"},
row_override_persnums_to_full_persnums = {["123s"] = {"1s", "2s", "3s"}},
generate_default_principal_part = generate_default_present_subj_principal_part,
conjugate = add_present_subj,
add_reflexive_clitics = add_finite_reflexive_clitics,
}},
{"imp", {
desc = "imperative",
tag_suffix = "imp",
persnums = imp_person_number_list,
row_override_persnums = {"2s", "2p"},
generate_default_principal_part = generate_default_imperative_principal_part,
conjugate = add_imperative,
add_reflexive_clitics = add_imperative_reflexive_clitics,
}},
{"negimp", {
desc = "negative imperative",
tag_suffix = "-",
persnums = imp_person_number_list,
-- No generate_default_principal_part because all parts are copied from other parts.
conjugate = add_negative_imperative,
add_reflexive_clitics = add_negative_imperative_reflexive_clitics,
no_explicit_principal_part = true, -- because all parts are copied from other parts
no_row_overrides = true, -- not useful; use single overrides if really needed
-- We don't want a category [[:Category:Italian verbs with missing negative imperative]]; doesn't make
-- sense as all parts are copied from elsewhere.
dont_check_defective_status = true,
}},
{"phis", {
desc = "past historic",
tag_suffix = "phis",
persnums = full_person_number_list,
generate_default_principal_part = generate_default_past_historic_principal_part,
conjugate = add_past_historic,
add_reflexive_clitics = add_finite_reflexive_clitics,
-- Set to "builtin" because normally handled specially in PRES^PRES3S,PHIS,PP spec, but when a built-in verb
-- is involved, we want a way of overriding the past historic (using 'phis:').
no_explicit_principal_part = "builtin",
}},
{"imperf", {
desc = "imperfect indicative",
tag_suffix = "impf|ind",
persnums = full_person_number_list,
generate_default_principal_part = function(base) return iut.map_forms(base.verb.unstressed_stem,
function(stem) return combine_stem_ending(base, "imperf1s", stem, base.conj_vowel .. "vo") end) end,
conjugate = {"o", "i", "a", "àmo", "àte", "ano"},
add_reflexive_clitics = add_finite_reflexive_clitics,
}},
{"impsub", {
desc = "imperfect subjunctive",
tag_suffix = "impf|sub",
persnums = full_person_number_list,
row_override_persnums = {"12s", "3s", "1p", "2p", "3p"},
row_override_persnums_to_full_persnums = {["12s"] = {"1s", "2s"}},
generate_default_principal_part = function(base) return iut.map_forms(base.verb.unstressed_stem,
function(stem) return combine_stem_ending(base, "impsub12s", stem, base.conj_vowel .. "ssi") end) end,
conjugate = {"ssi", "sse", "ssimo", "ste", "ssero"},
add_reflexive_clitics = add_finite_reflexive_clitics,
}},
{"fut", {
desc = "future",
tag_suffix = "fut",
persnums = full_person_number_list,
generate_default_principal_part = generate_default_future_principal_part,
conjugate = {"ò", "ài", "à", "émo", "éte", "ànno"},
add_reflexive_clitics = add_finite_reflexive_clitics,
}},
{"cond", {
desc = "conditional",
tag_suffix = "cond",
persnums = full_person_number_list,
generate_default_principal_part = generate_default_conditional_principal_part,
conjugate = {"èi", "ésti", {"èbbe", "ébbe"}, "émmo", "éste", {"èbbero", "ébbero"}},
add_reflexive_clitics = add_finite_reflexive_clitics,
}},
{"pp", {
desc = "past participle",
tag_suffix = "past|part",
persnums = {""},
generate_default_principal_part = generate_default_past_participle_principal_part,
conjugate = {""},
add_reflexive_clitics = add_participle_reflexive_clitics,
-- Set to "builtin" because normally handled specially in PRES^PRES3S,PHIS,PP spec, but when a built-in verb
-- is involved, we want a way of overriding the past participle (using 'pp:').
no_explicit_principal_part = "builtin",
no_row_overrides = true, -- useless because there's only one form; use the PRES^PRES3S,PHIS,PP or pp: spec
no_single_overrides = true, --useless because there's only one form; use the PRES^PRES3S,PHIS,PP or pp: spec
}},
{"ger", {
desc = "gerund",
tag_suffix = "ger",
persnums = {""},
generate_default_principal_part = generate_default_gerund_principal_part,
conjugate = {""},
add_reflexive_clitics = add_gerund_reflexive_clitics,
no_row_overrides = true, -- useless because there's only one form; use explicit principal part
no_single_overrides = true, -- useless because there's only one form; use explicit principal part
}},
{"presp", {
desc = "present participle",
tag_suffix = "pres|part",
persnums = {""},
generate_default_principal_part = generate_default_present_participle_principal_part,
conjugate = {""},
add_reflexive_clitics = add_participle_reflexive_clitics,
no_row_overrides = true, -- useless because there's only one form; use explicit principal part
no_single_overrides = true, -- useless because there's only one form; use explicit principal part
-- Disable this; seems most verbs do have present participles
-- not_defaulted = true, -- not defaulted, user has to request it explicitly
dont_check_defective_status = true, -- this is frequently missing and doesn't indicate a defective verb
}},
}
local row_conjugation_map = {}
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
row_conjugation_map[rowslot] = rowspec
end
local overridable_slot_set = {}
local late_overridable_slot_set = {}
-- Populate all_verb_slots and overridable_slot_set.
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
for _, persnum in ipairs(rowspec.persnums) do
local persnum_tag = person_number_tag_prefix[persnum]
local slot = rowslot .. persnum
if rowspec.tag_suffix == "-" then
table.insert(all_verb_slots, {slot, "-"})
else
table.insert(all_verb_slots, {slot, persnum_tag .. rowspec.tag_suffix})
end
if not rowspec.no_single_overrides then
overridable_slot_set[slot] = true
end
-- For now, we allow all slots to be late-overridable. Maybe we will rethink this later.
late_overridable_slot_set[slot] = true
end
end
local function handle_row_overrides_for_row(base, rowslot)
if base.row_override_specs[rowslot] then
for persnum, specs in pairs(base.row_override_specs[rowslot]) do
local slot = rowslot .. persnum
local existing_generated_form = base.forms[slot]
local function generate_row_override_forms(form)
if form == "+" then
if not existing_generated_form then
error(("Default form '+' requested in row override '%srow:' for slot %s but no default-generated form available; "
.. "typically this means the principal part was given as '-'")
:format(rowslot, slot))
end
return existing_generated_form
end
-- Check whether the row override form is the same as the default; if not, it's an irregularity.
if not general_list_form_contains_form(existing_generated_form, form) then
-- Note that the row has an irregularity in it.
base.rowprops.irreg[rowslot] = true
-- Now note that the individual form is irregular. If the row override is for a combined form like
-- 123s, we have to map that to the individual forms (1s, 2s, 3s).
local rowspec = row_conjugation_map[rowslot]
if not rowspec then
error("Internal error: No row conjugation spec for " .. rowslot)
end
local row_override_persnums_map = rowspec.row_override_persnums_to_full_persnums
if row_override_persnums_map and row_override_persnums_map[persnum] then
-- Propagate individual irregularities to actual person/number forms, as for the present and
-- imperfect subjunctive.
for _, full_persnum in ipairs(row_override_persnums_map[persnum]) do
base.is_irreg[rowslot .. full_persnum] = true
end
else
base.is_irreg[rowslot .. persnum] = true
end
end
return form
end
base.forms[slot] = nil -- erase existing form before generating override
process_specs(base, base.forms, slot, specs, generate_row_override_forms)
end
end
end
local function handle_single_overrides_for_row(base, override_spec, rowslot)
local rowspec = row_conjugation_map[rowslot]
if not rowspec then
error("Internal error: No row conjugation spec for " .. rowslot)
end
-- FIXME: We may need to rethink the handling of irregularity markers. If the user e.g. sets an irregular
-- override using 'pres1p:' and then sets it back to regular using 'pres1p!:', it ends up irregular.
for _, persnum in ipairs(rowspec.persnums) do
local slot = rowslot .. persnum
if base[override_spec][slot] then
local existing_generated_form = base.forms[slot]
local function generate_override_forms(form)
if form == "+" then
if not existing_generated_form then
error(("Default form '+' requested in override for slot %s but no default-generated form available; "
.. "typically this means the principal part was given as '-'")
:format(rowslot, slot))
end
return existing_generated_form
end
-- Check whether the single override form is the same as the default; if not, it's an irregularity.
if not general_list_form_contains_form(existing_generated_form, form) then
base.rowprops.irreg[rowslot] = true
base.is_irreg[slot] = true
end
return form
end
base.forms[slot] = nil -- erase existing form before generating override
process_specs(base, base.forms, slot, base[override_spec][slot], generate_override_forms)
end
end
end
local function conjugate_row(base, rowslot)
local rowspec = row_conjugation_map[rowslot]
if not rowspec then
error("Internal error: Unrecognized row slot '" .. rowslot .. "'")
end
-- Generate the principal part for this row now if it has an entry for `generate_default_principal_part`.
if rowspec.generate_default_principal_part then
local function generate_principal_part_forms(form)
-- If form == "+", either the user did not specify a principal part override (like 'sub:') or gave the value as '+'.
-- In this circumstance, and provided the user did not specify a row override (like 'subrow:'), we need the default
-- principal part in order to conjugate the row, so throw an error if we can't generate it. (If the user gave a row
-- override, we may still need the default principal part if the row override contains '+', so we could check for
-- this and set 'do_err', but it seems simpler to rely on the check in `handle_row_overrides_for_row` that makes
-- sure that a default form is available when the user specifies '+' in a row override.)
local do_err = form == "+" and not base.row_override_specs[rowslot]
local default_principal_part = rowspec.generate_default_principal_part(base, do_err)
if default_principal_part then
-- There may be no default; e.g. if fut:- is given, the default conditional principal part is nil.
-- process_specs() calls convert_to_general_list_form() on the output in any case and we need it in this form
-- in order to call general_list_form_contains_form(), so we may as well convert it now.
default_principal_part = iut.convert_to_general_list_form(default_principal_part)
end
if form == "+" then
return default_principal_part
end
-- Check whether the principal part is the same as the default; if not, the entire row is irregular.
if not general_list_form_contains_form(default_principal_part, form) then
base.rowprops.irreg[rowslot] = true
for _, persnum in ipairs(rowspec.persnums) do
base.is_irreg[rowslot .. persnum] = true
end
end
return form
end
local principal_part_specs = base.principal_part_specs[rowslot] or rowspec.not_defaulted and {{form = "-"}}
or {{form = "+"}}
process_specs(base, base.principal_part_forms, rowslot, principal_part_specs, generate_principal_part_forms)
end
if type(rowspec.conjugate) == "table" then
local persnums = rowspec.row_override_persnums or rowspec.persnums
if #rowspec.conjugate ~= #persnums then
error("Internal error: Expected " .. #persnums .. " elements for row slot '" .. rowslot
.. ", but saw " .. #rowspec.conjugate)
end
local stem = iut.map_forms(base.principal_part_forms[rowslot], function(form)
local principal_part_ending = rowspec.conjugate[1]
if type(principal_part_ending) ~= "string" then
error(("Internal error: First element of the `.conjugate` table of the rowspec for row '%s' is not "
.. "a single string; if this is needed, either use a conjugate function instead of a table, "
.. "generalize the code following this error message, or introduce an additional rowspec element "
.. "`principal_part_ending` containing a single string"):format(rowslot))
end
if not rfind(form, principal_part_ending .. "$") then
-- Generate the principal part description from the first person/number of the row (which should
-- always be the principal part) + the overall row description.
local principal_part_desc = principal_part_person_number_desc[persnums[1]] .. rowspec.desc
error(rowslot .. "row: must be given in order to generate the " .. rowspec.desc .. " because"
.. "explicit " .. principal_part_desc .. " '" .. form .. "' does not end in -"
.. principal_part_ending)
end
return rsub(form, principal_part_ending .. "$", "")
end)
for i, persnum in ipairs(persnums) do
add_forms(base, rowslot .. persnum, stem, rowspec.conjugate[i])
end
else
rowspec.conjugate(base, rowslot)
end
-- Now add any footnotes derived from principal part overrides of the form '+[footnote]' used in conjunction with
-- built-in verbs.
if base.principal_part_footnotes[rowslot] then
for _, persnum in ipairs(rowspec.persnums) do
local full_slot = rowslot .. persnum
if base.forms[full_slot] then
-- To save on memory, side-effect the existing forms.
for _, formobj in ipairs(base.forms[full_slot]) do
formobj.footnotes = iut.combine_footnotes(formobj.footnotes, base.principal_part_footnotes[rowslot])
end
end
end
end
handle_row_overrides_for_row(base, rowslot)
-- If there's a mapping from row override persnums to full persnums, copy the slots accordingly.
if rowspec.row_override_persnums_to_full_persnums then
for row_override_persnum, full_persnums in pairs(rowspec.row_override_persnums_to_full_persnums) do
for _, full_persnum in ipairs(full_persnums) do
copy_forms(base, rowslot .. full_persnum, base.forms[rowslot .. row_override_persnum])
end
end
end
handle_single_overrides_for_row(base, "single_override_specs", rowslot)
end
-- Process specs given by the user using 'addnote[SLOTSPEC][FOOTNOTE][FOOTNOTE][...]'. SLOTSPEC can be a Lua pattern,
-- a comma-separated list of Lua patterns (any of which need to match), or a hyphen followed by one or more
-- comma-separated patterns (which negates the sense of the matching).
local function process_addnote_specs(base)
for _, spec in ipairs(base.addnote_specs) do
for _, slot_spec in ipairs(spec.slot_specs) do
local negated = false
local any_changed = false
local orig_slot_spec = slot_spec
if slot_spec:find("^%-") then
negated = true
slot_spec = usub(slot_spec, 2)
end
local single_specs = rsplit(slot_spec, ",")
for slot, forms in pairs(base.forms) do
local matches
for _, single_spec in ipairs(single_specs) do
if rfind(slot, "^" .. slot_spec .. "$") then
matches = true
break
end
end
if not negated and matches or negated and not matches and slot ~= "inf" and slot ~= "aux" then
-- To save on memory, side-effect the existing forms.
for _, form in ipairs(forms) do
form.footnotes = iut.combine_footnotes(form.footnotes, spec.footnotes)
any_changed = true
end
end
end
if not any_changed then
error(("addnote spec '%s' had no effect; correct it or remove it"):format(orig_slot_spec))
end
end
end
end
local function check_for_defective_and_unknown_rows(base)
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
if not rowspec.dont_check_defective_status then
local row_not_entirely_unknown = false
local row_not_entirely_missing = false
for i, persnum in ipairs(rowspec.persnums) do
local slot = rowslot .. persnum
if base.forms[slot] then
row_not_entirely_missing = true
for _, form in ipairs(base.forms[slot]) do
if form.form == "?" then
base.rowprops.unknown[rowslot] = true
else
row_not_entirely_unknown = true
end
end
elseif not skip_slot(base, slot, "checking defective") then
base.rowprops.defective[rowslot] = true
end
end
base.rowprops.all_unknown[rowslot] = not row_not_entirely_unknown
base.rowprops.all_defective[rowslot] = not row_not_entirely_missing
end
end
if not base.principal_part_specs.aux and not base.verb.is_reflexive then
base.rowprops.defective.aux = true
base.rowprops.all_defective.aux = true
end
if base.principal_part_specs.aux then
local row_not_entirely_unknown = false
for _, form in ipairs(base.principal_part_specs.aux) do
if form.form == "?" then
base.rowprops.unknown.aux = true
else
row_not_entirely_unknown = true
end
end
base.rowprops.all_unknown.aux = not row_not_entirely_unknown
end
end
-- Any forms without links should get them now. Redundant ones will be stripped later.
local function add_missing_links_to_forms(base)
for slot, forms in pairs(base.forms) do
map_side_effecting_forms(forms, add_links)
end
end
local function remove_links_from_forms(base)
-- Remove links from forms in case of noautolinkverb.
for slot, forms in pairs(base.forms) do
map_side_effecting_forms(forms, m_links.remove_links)
end
end
local function conjugate_verb(base)
add_default_verb_forms(base)
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
conjugate_row(base, rowslot)
end
if base.verb.linked_suf ~= "" then
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
rowspec.add_reflexive_clitics(base, rowslot)
end
end
erase_suppressed_slots(base)
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
handle_single_overrides_for_row(base, "late_single_override_specs", rowslot)
end
process_addnote_specs(base)
check_for_defective_and_unknown_rows(base)
if base.args.noautolinkverb then
remove_links_from_forms(base)
else
add_missing_links_to_forms(base)
end
end
local function analyze_verb(lemma)
local is_pronominal = false
local is_reflexive = false
-- The particles that can go after a verb are:
-- * la, le
-- * ne
-- * ci, vi (sometimes in the form ce, ve)
-- * si (sometimes in the form se)
-- * gli
-- Observed combinations:
-- * ce + la: [[avercela]] "to be angry (at someone)", [[farcela]] "to make it, to succeed",
-- [[mettercela tutta]] "to put everything (into something)"
-- * se + la: [[sbrigarsela]] "to deal with", [[bersela]] "to naively believe in",
-- [[sentirsela]] "to have the courage to face (a difficult situation)",
-- [[spassarsela]] "to live it up", [[svignarsela]] "to scurry away",
-- [[squagliarsela]] "to vamoose, to clear off", [[cercarsela]] "to be looking for (trouble etc.)",
-- [[contarsela]] "to have a distortedly positive self-image; to chat at length",
-- [[dormirsela]] "to be fast asleep", [[filarsela]] "to slip away, to scram",
-- [[giostrarsela]] "to get away with; to turn a situation to one's advantage",
-- [[cavarsela]] "to get away with; to get out of (trouble); to make the best of; to manage (to do); to be good at",
-- [[meritarsela]] "to get one's comeuppance", [[passarsela]] "to fare (well, badly)",
-- [[rifarsela]] "to take revenge", [[sbirbarsela]] "to slide by (in life)",
-- [[farsela]]/[[intendersela]] "to have a secret affair or relationship with",
-- [[farsela addosso]] "to shit oneself", [[prendersela]] "to take offense at; to blame",
-- [[prendersela comoda]] "to take one's time", [[sbrigarsela]] "to finish up; to get out of (a difficult situation)",
-- [[tirarsela]] "to lord it over", [[godersela]] "to enjoy", [[vedersela]] "to see (something) through",
-- [[vedersela brutta]] "to have a hard time with; to be in a bad situation",
-- [[aversela]] "to pick on (someone)", [[battersela]] "to run away, to sneak away",
-- [[darsela a gambe]] "to run away", [[fumarsela]] "to sneak away",
-- [[giocarsela]] "to behave (a certain way); to strategize; to play"
-- * se + ne: [[andarsene]] "to take leave", [[approfittarsene]] "to take advantage of",
-- [[fottersene]]/[[strafottersene]] "to not give a fuck",
-- [[fregarsene]]/[[strafregarsene]] "to not give a damn",
-- [[guardarsene]] "to beware; to think twice", [[impiparsene]] "to not give a damn",
-- [[morirsene]] "to fade away; to die a lingering death", [[ridersene]] "to laugh at; to not give a damn",
-- [[ritornarsene]] "to return to", [[sbattersene]]/[[strabattersene]] "to not give a damn",
-- [[infischiarsene]] "to not give a damn", [[stropicciarsene]] "to not give a damn",
-- [[sbarazzarsene]] "to get rid of, to bump off", [[andarsene in acqua]] "to be diluted; to decay",
-- [[nutrirsene]] "to feed oneself", [[curarsene]] "to take care of",
-- [[intendersene]] "to be an expert (in)", [[tornarsene]] "to return, to go back",
-- [[starsene]] "to stay", [[farsene]] "to matter; to (not) consider; to use",
-- [[farsene una ragione]] "to resign; to give up; to come to terms with; to settle (a dispute)",
-- [[riuscirsene]] "to repeat (something annoying)", [[venirsene]] "to arrive slowly; to leave"
-- * ci + si: [[trovarcisi]] "to find oneself in a happy situation",
-- [[vedercisi]] "to imagine oneself (in a situation)", [[sentircisi]] "to feel at ease"
-- * vi + si: [[recarvisi]] "to go there"
-- * glie + la: [[fargliela]] "to succeed"
--
local ret = {}
local linked_suf, finite_pref, finite_pref_elided_e, finite_pref_elided_ho
local clitic_to_substitutable = {ce = "[[ce]]", ve = "[[ve]]", se = "<se>"}
local clitic_to_elided = {
ci = "[[c']]", vi = "[[vi]] ", si = "[[si]] ",
lo = "[[l']]", la = "[[l']]", li = "[[li]] ", le = "[[le]] ",
gli = "[[gli]] ",
}
local verb, clitic, clitic2 = rmatch(lemma, "^(.-)([cvs]e)(l[oaie])$")
if verb then
is_pronominal = true
is_reflexive = clitic == "se"
clitic = clitic_to_substitutable[clitic]
linked_suf = clitic .. "[[" .. clitic2 .. "]]"
finite_pref = clitic .. " [[" .. clitic2 .. "]] "
finite_pref_elided_e = clitic .. " " .. clitic_to_elided[clitic2]
finite_pref_elided_ho = clitic .. " " .. clitic_to_elided[clitic2]
end
if not verb then
verb, clitic = rmatch(lemma, "^(.-)glie(l[oaie])$")
if verb then
is_pronominal = true
linked_suf = "[[glie" .. clitic .. "]]"
finite_pref = linked_suf .. " "
finite_pref_elided_e = clitic_to_elided[clitic]:gsub("^%[%[", "[[glie")
finite_pref_elided_ho = finite_pref_elided_e
end
end
if not verb then
verb, clitic = rmatch(lemma, "^(.-)([cvs]e)ne$")
if verb then
is_pronominal = true
is_reflexive = clitic == "se"
clitic = clitic_to_substitutable[clitic]
linked_suf = clitic .. "[[ne]]"
finite_pref = clitic .. " [[ne]] "
finite_pref_elided_e = clitic .. " [[n']]"
finite_pref_elided_ho = clitic .. " [[ne]] "
end
end
if not verb then
verb, clitic = rmatch(lemma, "^(.-)([cv]i)si$")
if verb then
is_pronominal = true
is_reflexive = true
local si_no_clitic, space_no_clitic
if clitic == "ci" then
si_no_clitic = "<si_no_ci>"
space_no_clitic = "<space_no_ci>"
else
si_no_clitic = "<si_no_vi>"
space_no_clitic = "<space_no_vi>"
end
linked_suf = "[[" .. clitic .. "]]" .. si_no_clitic
finite_pref = si_no_clitic .. space_no_clitic .. "[[" .. clitic .. "]] "
finite_pref_elided_e = si_no_clitic .. space_no_clitic .. clitic_to_elided[clitic]
finite_pref_elided_ho = finite_pref
end
end
if not verb then
verb, clitic = rmatch(lemma, "^(.-)([cv]i)$")
if verb then
is_pronominal = true
linked_suf = "[[" .. clitic .. "]]"
finite_pref = "[[" .. clitic .. "]] "
finite_pref_elided_e = clitic_to_elided[clitic]
finite_pref_elided_ho = finite_pref
end
end
if not verb then
verb = rmatch(lemma, "^(.-)si$")
if verb then
-- not pronominal
is_reflexive = true
linked_suf = "<si>"
finite_pref = "<si> "
finite_pref_elided_e = finite_pref
finite_pref_elided_ho = finite_pref
end
end
if not verb then
verb = rmatch(lemma, "^(.-)ne$")
if verb then
is_pronominal = true
linked_suf = "[[ne]]"
finite_pref = "[[ne]] "
finite_pref_elided_e = "[[n']]"
finite_pref_elided_ho = "[[ne]] "
end
end
if not verb then
verb, clitic = rmatch(lemma, "^(.-)(gli)$")
if not verb then
verb, clitic = rmatch(lemma, "^(.-)(l[oaie])$")
end
if verb then
is_pronominal = true
linked_suf = "[[" .. clitic .. "]]"
finite_pref = "[[" .. clitic .. "]] "
finite_pref_elided_e = clitic_to_elided[clitic]
finite_pref_elided_ho = clitic_to_elided[clitic]
end
end
if not verb then
-- not pronominal
verb = lemma
linked_suf = ""
finite_pref = ""
finite_pref_elided_e = ""
finite_pref_elided_ho = ""
end
ret.raw_verb = verb
ret.linked_suf = linked_suf
ret.finite_pref = finite_pref
ret.finite_pref_elided_e = finite_pref_elided_e
ret.finite_pref_elided_ho = finite_pref_elided_ho
ret.is_pronominal = is_pronominal
ret.is_reflexive = is_reflexive
return ret
end
-- Subfunction of find_builtin_verb(). Match a single spec (which may begin with ^ to anchor against the beginning,
-- otherwise anchored only at the end) against `verb`. Return the prefix and main verb.
local function match_spec_against_verb(spec, verb)
if spec:find("^%^") then
-- must match exactly
if rfind(verb, spec .. "$") then
return "", verb
end
else
local prefix, main_verb = rmatch(verb, "^(.*)(" .. spec .. ")$")
if prefix then
return prefix, main_verb
end
end
end
-- Subfunction of find_builtin_verb(). Match a single prefix + spec (where the prefix may begin with ^ to anchor
-- against the beginning, otherwise anchored only at the end) against `verb`. Return the prefix and main verb.
local function match_prefixed_spec_against_verb(specprefix, spec, verb)
if specprefix:find("^%^") then
-- must match exactly
specprefix = specprefix:gsub("^%^", "")
if specprefix == "" then
-- We can't use the second branch of the if-else statement because an empty () returns the current position
-- in rmatch().
local main_verb = rmatch(verb, "^(" .. spec .. ")$")
if main_verb then
return "", main_verb
end
else
local prefix, main_verb = rmatch(verb, "^(" .. specprefix .. ")(" .. spec .. ")$")
if prefix then
return prefix, main_verb
end
end
else
local prefix, main_verb = rmatch(verb, "^(.*" .. specprefix .. ")(" .. spec .. ")$")
if prefix then
return prefix, main_verb
end
end
end
-- Find and return the prefix, main verb and conj spec for the built-in verb matching user-specified verb `verb`.
local function find_builtin_verb(verb)
if not m_builtin then
m_builtin = require("Module:0it-verb/builtin")
end
for _, builtin_verb in ipairs(m_builtin.builtin_verbs) do
local spec, conj, desc = unpack(builtin_verb)
if type(spec) == "string" then
local prefix, main_verb = match_spec_against_verb(spec, verb)
if prefix then
return prefix, main_verb, conj
end
else
-- Of the form {term = "ergere", prefixes = {"^", "ad", "ri"}}. Note that the prefixes not preceded by ^
-- can have further prefixes before them.
for _, spec_prefix in ipairs(spec.prefixes) do
local prefix, main_verb = match_prefixed_spec_against_verb(spec_prefix, spec.term, verb)
if prefix then
return prefix, main_verb, conj
end
end
end
end
end
-- Parse the "inside" of an angle bracket spec (e.g. "a/é"), storing the results into `base`. This is the actual
-- function that parses indicator specs. It is separated from, and called from, parse_indicator_spec() in order to
-- deal with built-in verbs, which have their own indicator specs that must be combined with the indicator spec given
-- by the user. `is_builtin_verb` is true if we're processing a built-in verb spec, as opposed to a user-specified one.
local function parse_inside(base, inside, is_builtin_verb)
local function parse_err(msg)
error((is_builtin_verb and "Internal error processing built-in verb spec: " or "") .. msg
.. ": <" .. inside .. ">")
end
local function parse_qualifiers(separated_group)
local qualifiers
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
parse_err("Extraneous text after bracketed qualifiers: '" .. table.concat(separated_group) .. "'")
end
if not qualifiers then
qualifiers = {}
end
local r_spec = separated_group[j]:match("^%[r:(.*)%]$")
if r_spec then
local expanded_ref = require(com_module).parse_abbreviated_references_spec(r_spec)
table.insert(qualifiers, "[ref:" .. expanded_ref .. "]")
else
table.insert(qualifiers, separated_group[j])
end
end
return qualifiers
end
local function fetch_specs(comma_separated_group, allow_blank)
local colon_separated_groups = split_alternating_runs_and_strip_spaces(comma_separated_group, ":")
if allow_blank and #colon_separated_groups == 1 and #colon_separated_groups[1] == 1 and
colon_separated_groups[1][1] == "" then
return nil
end
local specs = {}
for _, colon_separated_group in ipairs(colon_separated_groups) do
local form = colon_separated_group[1]
if form == "" then
parse_err("Blank form not allowed here, but saw '" ..
table.concat(comma_separated_group) .. "'")
end
if form:find(",") then
parse_err("Comma in form '" .. form .. "', did you mean to use a colon?")
end
local new_spec = {form = form, footnotes = parse_qualifiers(colon_separated_group)}
for _, existing_spec in ipairs(specs) do
if m_table.deepEquals(existing_spec, new_spec) then
parse_err("Duplicate spec '" .. table.concat(colon_separated_group) .. "'")
end
end
table.insert(specs, new_spec)
end
return specs
end
-- Parse present-tense spec of the form PRES^PRES3S or just PRES, and set the appropriate properties in `base`.
-- Used in the PRES^PRES3S,PHIS,PP spec as well as with pres:PRES^PRES3S in conjunction with built-in verbs.
local function parse_present_spec(run)
local cflex_separated_groups = split_alternating_runs_and_strip_spaces(run, "%^")
if #cflex_separated_groups > 2 then
parse_err("At most one circumflex sign (^) can appear in present tense specs")
end
base.principal_part_specs.pres = fetch_specs(cflex_separated_groups[1])
if #cflex_separated_groups == 2 then
base.principal_part_specs.pres3s = fetch_specs(cflex_separated_groups[2])
end
end
local function is_root_stressed(separator)
return separator == "\\" or separator == "\\/" or separator == "/\\"
end
local function get_opt_root_stressed_value(separator)
return separator == "\\/" and "root-first" or separator == "/\\" and "ending-first" or nil
end
local segments = iut.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = split_alternating_runs_and_strip_spaces(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local first_element = dot_separated_group[1]
if i == 1 then -- first dot-separated group is PRES,PHIS,PP or PRES^PRES3S,PHIS,PP or similar.
local comma_separated_groups = split_alternating_runs_and_strip_spaces(dot_separated_group, "[,\\/]+",
"preserve splitchar")
local presind = 1
local first_separator = #comma_separated_groups > 1 and comma_separated_groups[2][1]
if base.verb.is_reflexive or is_builtin_verb then
if #comma_separated_groups > 1 and first_separator ~= "," then
presind = 3
-- Fetch root-stressed infinitive, if given.
local specs = fetch_specs(comma_separated_groups[1], "allow blank")
if is_root_stressed(first_separator) then
-- For verbs like [[scegliersi]] and [[proporsi]], allow either 'é\scélgo' or '\é\scélgo'
-- and similarly either 'ó+\propóngo' or '\ó+\propóngo'.
if specs == nil then
if #comma_separated_groups > 3 and is_root_stressed(comma_separated_groups[4][1]) then
base.principal_part_specs.root_stressed_inf = fetch_specs(comma_separated_groups[3])
presind = 5
else
base.principal_part_specs.root_stressed_inf = {{form = "+"}}
end
else
base.principal_part_specs.root_stressed_inf = specs
end
base.props.opt_root_stressed_inf = get_opt_root_stressed_value(first_separator)
elseif specs ~= nil then
local errpref = is_builtin_verb and "With built-in verb" or "With reflexive verb"
parse_err(errpref .. ", can't specify anything before initial slash, but saw '"
.. table.concat(comma_separated_groups[1]))
end
elseif not is_builtin_verb and rfind(base.verb.raw_verb, "er$") then
parse_err("With reflexive -ere verb, must precede present spec with / or \\ to indicate whether infinitive is root-stressed")
end
if not is_builtin_verb then
base.principal_part_specs.aux = {{form = "èssere"}}
end
else -- non-reflexive
if #comma_separated_groups == 1 or first_separator == "," then
parse_err("With non-reflexive verb, use a spec like AUX/PRES, AUX\\PRES, AUX/PRES,PAST,PP or similar")
end
presind = 3
-- Fetch auxiliary or auxiliaries.
local colon_separated_groups = split_alternating_runs_and_strip_spaces(comma_separated_groups[1], ":")
for _, colon_separated_group in ipairs(colon_separated_groups) do
local aux = colon_separated_group[1]
if aux == "a" then
aux = "avére"
elseif aux == "e" then
aux = "èssere"
elseif aux == "-" then
if #colon_separated_group > 1 then
parse_err("No footnotes allowed with '-' spec for auxiliary")
end
aux = nil
elseif aux == "?" then
-- remains as-is
else
parse_err("Unrecognized auxiliary '" .. aux ..
"', should be 'a' (for [[avere]]), 'e' (for [[essere]]), or '-' if no past participle")
end
if aux then
if base.principal_part_specs.aux then
for _, existing_aux in ipairs(base.principal_part_specs.aux) do
if existing_aux.form == aux then
parse_err("Auxiliary '" .. aux .. "' specified twice")
end
end
else
base.principal_part_specs.aux = {}
end
table.insert(base.principal_part_specs.aux, {form = aux, footnotes = parse_qualifiers(colon_separated_group)})
end
end
-- Fetch root-stressed infinitive, if given.
if is_root_stressed(first_separator) then
if #comma_separated_groups > 3 and is_root_stressed(comma_separated_groups[4][1]) then
base.principal_part_specs.root_stressed_inf = fetch_specs(comma_separated_groups[3])
presind = 5
else
base.principal_part_specs.root_stressed_inf = {{form = "+"}}
end
base.props.opt_root_stressed_inf = get_opt_root_stressed_value(first_separator)
end
end
if #comma_separated_groups == presind and comma_separated_groups[presind][1] == "@" then
-- We will find the conjugation for the built-in verb later, after we've seen whether there is an
-- '.rre' property.
base.props.builtin = true
else
-- Parse present
parse_present_spec(comma_separated_groups[presind])
-- Parse past historic
if #comma_separated_groups > presind then if comma_separated_groups[presind + 1][1] ~= "," then
parse_err("Use a comma not slash to separate present from past historic")
end
base.principal_part_specs.phis = fetch_specs(comma_separated_groups[presind + 2])
end
-- Parse past participle
if #comma_separated_groups > presind + 2 then
if comma_separated_groups[presind + 3][1] ~= "," then
parse_err("Use a comma not slash to separate past historic from past participle")
end
base.principal_part_specs.pp = fetch_specs(comma_separated_groups[presind + 4])
end
if #comma_separated_groups > presind + 4 then
parse_err("Extraneous text after past participle")
end
end
elseif first_element == "impers" or first_element == "thirdonly" or first_element == "rre" or
first_element == "nofinite" or first_element == "no_root_stressed" or first_element == "presonly" then
if #dot_separated_group > 1 then
parse_err("No footnotes allowed with '" .. first_element .. "' spec")
end
base.props[first_element] = true
elseif first_element == "addnote" then
local spec_and_footnotes = parse_qualifiers(dot_separated_group)
if #spec_and_footnotes < 2 then
parse_err("Spec with 'addnote' should be of the form 'addnote[SLOTSPEC][FOOTNOTE][FOOTNOTE][...]'")
end
local slot_spec = table.remove(spec_and_footnotes, 1)
local slot_spec_inside = rmatch(slot_spec, "^%[(.*)%]$")
if not slot_spec_inside then
parse_err("Internal error: slot_spec " .. slot_spec .. " should be surrounded with brackets")
end
local slot_specs = rsplit(slot_spec_inside, ",")
for j, spec in ipairs(slot_specs) do
slot_specs[j] = strip_spaces(spec)
end
table.insert(base.addnote_specs, {slot_specs = slot_specs, footnotes = spec_and_footnotes})
else
local first_element_prefix, first_element_minus_prefix = rmatch(first_element,
"^%s*([a-z0-9_!]+)%s*:%s*(.-)%s*$")
if not first_element_prefix then
parse_err("Dot-separated element should be either 'impers', 'thirdonly', 'nofinite', 'no_root_stressed', 'presonly', 'rre' or be of the form "
.. "'PREFIX:SPEC', but saw '" .. table.concat(dot_separated_group) .. "'")
end
dot_separated_group[1] = first_element_minus_prefix
if first_element_prefix == "stem" then
base.principal_part_specs.explicit_stem_spec = fetch_specs(dot_separated_group)
elseif first_element_prefix == "phisstem" then
base.principal_part_specs.explicit_phis_stem_spec = fetch_specs(dot_separated_group)
elseif first_element_prefix == "unstressed_stem" then
base.principal_part_specs.explicit_unstressed_stem_spec = fetch_specs(dot_separated_group)
elseif first_element_prefix == "pres" then
if first_element_minus_prefix == "-" and #dot_separated_group == 1 then
-- Allow 'pres:-' to be given to suppress the present. We implement this using the
-- equivalent of a row override for each of the present forms; this also cancels out the
-- present subjunctive and the imperative.
local hyphen_form = {{form = "-"}}
base.row_override_specs.pres = {
["1s"] = hyphen_form, ["2s"] = hyphen_form, ["3s"] = hyphen_form,
["1p"] = hyphen_form, ["2p"] = hyphen_form, ["3p"] = hyphen_form,
}
else
if not base.props.builtin then
parse_err("Can't specify 'pres:' override except when '@' is given to request a built-in verb")
end
parse_present_spec(dot_separated_group)
end
elseif row_conjugation_map[first_element_prefix] then
local no_explicit_pp = row_conjugation_map[first_element_prefix].no_explicit_principal_part
if no_explicit_pp == true or not base.props.builtin and no_explicit_pp == "builtin" then
parse_err("Can't specify principal part for " .. row_conjugation_map[first_element_prefix].desc
.. " using '" .. first_element_prefix .. ":'; use the specification PRES^PRES3S.PHIS.PP")
else
base.principal_part_specs[first_element_prefix] = fetch_specs(dot_separated_group)
end
elseif overridable_slot_set[first_element_prefix] then
base.single_override_specs[first_element_prefix] = fetch_specs(dot_separated_group)
elseif first_element_prefix:find("!$") then
local late_override_slot = rmatch(first_element_prefix, "^(.*)!$")
if late_overridable_slot_set[late_override_slot] then
base.late_single_override_specs[late_override_slot] = fetch_specs(dot_separated_group)
else
parse_err("Late override " .. first_element_prefix .. " refers to an unrecognized slot in '" ..
table.concat(dot_separated_group) .. "'")
end
elseif first_element_prefix:find("row$") then
local row_override_slot = rmatch(first_element_prefix, "^(.*)row$")
if row_conjugation_map[row_override_slot] then
local rowspec = row_conjugation_map[row_override_slot]
if rowspec.no_row_overrides then
-- This happens with e.g. pp and negimp. Doesn't make sense with pp because it's a single form
-- that can be specified completely using the explicit principal part. Rarely if ever useful
-- for negimp; use single overrides if absolutely necessary.
parse_err("Can't specify row override for " .. rowspec.desc .. " using " .. row_override_slot
.. "row:; use an explicit principal part or single overrides (if allowed)")
end
local comma_separated_groups = split_alternating_runs_and_strip_spaces(dot_separated_group, ",")
local persnums = rowspec.row_override_persnums or rowspec.persnums
if #comma_separated_groups ~= #persnums then
parse_err("For " .. row_override_slot .. "row:, expected " .. #persnums
.. " comma-separated forms but saw " .. #comma_separated_groups .. " in '"
.. table.concat(dot_separated_group) .. "'")
end
base.row_override_specs[row_override_slot] = {}
for i, persnum in ipairs(persnums) do
base.row_override_specs[row_override_slot][persnum] = fetch_specs(comma_separated_groups[i])
end
else
local row_override_slots = {}
for row_override_slot, _ in pairs(row_conjugation_map) do
table.insert(row_override_slots, row_override_slot .. "row:")
end
table.sort(row_override_slots)
parse_err("Row override spec should begin with one of " .. m_table.serialCommaJoin(row_override_slots)
.. ", but saw '" .. table.concat(dot_separated_group) .. "'")
end
else
parse_err("Unrecognized prefix '" .. first_element_prefix .. "' in '"
.. table.concat(dot_separated_group) .. "'")
end
end
end
end
local function create_base()
-- `lemma` is the verb lemma, as specified by the user.
-- `verb` contains various properties of the verb derived from the lemma by analyze_verb().
-- `forms` contains the final per-slot forms. This is processed further in [[Module:0inflection-utilities]].
-- This is a table indexed by slot (e.g. "pres1s"). Each value in the table is a list of items of the form
-- {form = FORM, footnotes = FOOTNOTES} where FORM is the actual generated form and FOOTNOTES is either nil
-- or a list of footnotes (which must be surrounded by brackets, e.g. "[archaic]").
-- `principal_part_specs` contains forms specified by the user in various fashions. The value is in the same form
-- as for `forms``, but the FORM contained in it is the actual user-specified form, which may be e.g. "#è"
-- rather than a verb form, and needs to be processed to generate the actual form. A spec may be "+" to insert
-- the default-generated form or forms, or "-" to indicate that this form doesn't exist. The source of these
-- forms is either
-- (a) prefixes 'imperf:', 'fut:', 'sub:', 'impsub:', 'imp:', etc. (the key is "imperf", "fut", etc.);
-- (b) specs in the format e.g. "vèngo:vègno[archaic or poetic]^viène,vénni,venùto" or "é:#è" (the key is one of
-- "pres", "pres3s", "phis" or "pp" as appropriate);
-- (c) an explicit stem specified using 'stem:' (the key is "explicit_stem_spec");
-- (d) a root-stressed infinitive spec such as "ó+" in "a\ó+\compóngo,compósi,compósto" (the key is
-- "root_stressed_inf"); this will have the value "+" (meaning to take the vowel spec from the present tense)
-- if a spec like "a\è" is given with only one backslash, and will be missing entirely if a spec like "a/è"
-- is given with a slash instead of a backslash;
-- (e) an auxiliary specified using e.g. "a[transitive]:e[intransitive]/è" (the key is "aux" and the value will
-- contain the actual auxiliary in the form in place of "a" or "e").
-- `principal_part_footnotes` contains per-row footnotes derived from overriding principal part specs of the form
-- '+[footnote]' used in conjunction with a built-in verb.
-- `principal_part_forms` contains the processed versions of the specs contained in `principal_part_specs`. The
-- keys are as in `principal_part_specs` and the values are the same as for `forms`.
-- `row_override_specs` contains user-specified forms for a full tense/aspect row using 'presrow:', 'subrow:', etc.
-- The key is "pres", "sub", etc. (i.e. minus the "row" suffix). The value is another table indexed by the
-- person/number suffix (e.g. "1s", "2s", etc. for "pres"; "123s", "1p", "2p", etc. for "sub"), whose values
-- are in the same format as `principal_part_specs`.
-- `single_override_specs` contains user-specified forms using 'pres1s:', 'sub3p:', etc. The key is the slot
-- ("pres1s", "sub3p", etc.) and the value is of the same format as `principal_part_specs`.
-- `late_single_override_specs` contains user-specified forms using 'pres1s!:', 'sub3p!:', etc., specifying late
-- overrides that take place after copying forms from one place to another and after adding reflexive clitics.
-- The key is the slot ("pres1s", "sub3p", etc.) and the value is of the same format as `principal_part_specs`.
-- `addnote_specs` contains specs specifying footnotes to add to individual slots or collections of slots using
-- 'addnote[SLOTSPEC,SLOTSPEC][FOOTNOTE][FOOTNOTE][...]'. Each element is an object of the form
-- {slotspec = {SLOTSPEC, ...}, footnotes = {FOOTNOTE, ...}}.
-- `is_irreg` is a table indexed by an individual form slot ("pres1s", "sub2s", "pp", etc.) whose value is true or
-- false indicating whether a given form is irregular. Currently, the values in `is_irreg` are used only by the
-- code in [[Module:0it-headword]] to determine whether to show irregular principal parts.
-- `props` is a table of miscellaneous Boolean properties. Current properties:
-- - `impers` (impersonal verb, with only third-singular forms)
-- - `thirdonly` (third-person only verb)
-- - `nofinite` (verb is missing all finite forms)
-- - `no_root_stressed` (verb is missing all root-stressed forms)
-- - `presonly` (verb is missing all finite forms except the present indicative)
-- - `rre` (user specified the 'rre' indicator in conjunction with a syncopated reflexive verb like
-- [[contrarsi]] reflexive of [[contrarre]], which otherwise would get interpreted as the reflexive of
-- [[contrare]])
-- - `syncopated` (verb is syncopated, i.e. the infinitive ends in '-rre'; includes verbs with '-rre' infinitive
-- as well as reflexive verbs ending in '-orsi' or '-ursi' and verbs where the 'rre' indicator
-- was given by the user)
-- - `builtin` (verb uses a built-in conjugation in [[Module:0it-verb/builtin]])
-- - `opt_root_stressed_inf` (verb used the \/ or /\ notation to indicate an optionally root-stressed infinitive;
-- to determine if a verb used the \ notation, look for a value in
-- base.principal_part_specs.root_stressed_inf)
-- `rowprops` is a table of tables of row-specific Boolean properties. Each subtable specifies a property of a
-- given row, such as whether the row is irregular. Specifically:
-- - `rowprops.irreg`: The row is irregular, i.e. at least one slot has an irregular form. Currently used to
-- determine whether to add categories like [[:Category:Italian verbs with irregular imperfect subjunctive]],
-- and whether to display the row's principal part in the headword line.
-- - `rowprops.defective`: The row is defective (missing one or more forms). Forms expected to be missing due to
-- 'impers' or 'thirdonly' don't count.
-- - `rowprops.all_defective`: The row is missing all forms. A row should be considered completely defective if
-- `rowprops.defective[row]` and `rowprops.all_defective[row]` (we need both checks in case of expected
-- missing rows, such as imperative with 'impers' or 'thirdonly').
-- - `rowprops.unknown`: The row has at least one unknown form.
-- - `rowprops.all_unknown`: All forms of the row are unknown.
--
-- There should be no other properties set directly at the `base` level.
return {forms = {}, principal_part_specs = {}, principal_part_footnotes = {}, principal_part_forms = {},
row_override_specs = {}, single_override_specs = {}, late_single_override_specs = {}, addnote_specs = {},
is_irreg = {}, props = {},
rowprops = {irreg = {}, defective = {}, all_defective = {}, unknown = {}, all_unknown = {}},
}
end
-- Parse the indicator spec of an Italian verb, e.g. '<a/é>'. `angle_bracket_spec` is the indicator spec itself,
-- surrounded by angle brackets. `lemma` is the verb lemma specified before the indicator spec, possibly with brackets
-- if so specified by the user, and an empty string if no lemma was given. `pagename` is the pagename, either the
-- actual name of the page or the value of pagename= if given.
--
-- For example:
-- * If the user said {{it-conj|a/é}} or {{it-conj|<a/é>}} then angle_bracket_spec == "<a/é>" and lemma == "".
-- * If the user said {{it-conj|partecipare<a/é>}} then angle_bracket_spec == "<a/é>" and lemma == "partecipare".
-- * If the user said {{it-conj|[[annunciare|annunciarsi|]]<ù>}} then angle_bracket_spec == "<ù>" and
-- lemma == "[[annunciare|annunciarsi]]".
--
-- This function returns a `base` object (see create_base()) describing the parsed spec.
local function parse_indicator_spec(angle_bracket_spec, lemma, pagename)
local base = create_base()
if lemma == "" then
lemma = pagename
end
base.lemma = m_links.remove_links(lemma)
base.verb = analyze_verb(lemma)
local inside = angle_bracket_spec:match("^<(.*)>$")
assert(inside)
parse_inside(base, inside, false)
local function parse_err(msg)
error(msg .. ": " .. angle_bracket_spec)
end
-- Set up base.verb.verb. This must be done after parse_inside() because it depends on the '.rre' indicator.
set_up_base_verb(base)
if base.props.builtin then
local prefix, main_verb, conj = find_builtin_verb(base.verb.verb)
if not prefix then
parse_err("Unable to find built-in verb corresponding to '" .. base.verb.verb .. "'")
end
-- Create a new `base`, fill it with properties from the built-in verb, and copy over the user-specified
-- properties on top of it.
local nbase = create_base()
nbase.lemma = base.lemma
nbase.verb = base.verb
nbase.verb.prefix = prefix
nbase.verb.verb = prefix .. main_verb
nbase.verb.raw_verb = prefix .. nbase.verb.raw_verb
parse_inside(nbase, conj, "is builtin")
if nbase.principal_part_specs.root_stressed_inf then
local base_rsi = base.principal_part_specs.root_stressed_inf
-- We are dealing with a built-in verb whose spec has a backslash in it, such as [[scegliere]] with spec
-- "é\scélgo,scélsi,scélto", or [[porre]] with spec "ó\póngo,pósi,pósto". The user should have specified
-- "a\@" or similar. We need to merge the specs appropriately.
if not base_rsi then
parse_err(("Built-in verb [[%s]] has a root-stressed infinitive, and so the user specification "
.. "should have a backslash (\\) preceding the @ sign"):format(main_verb))
end
if #base_rsi ~= 1 or base_rsi[1].form ~= "+" or base_rsi[1].footnotes then
local auxspec = base.verb.is_reflexive and "" or "a"
parse_err(("Built-in verb [[%s]] has a root-stressed infinitive, and so the user specification "
.. "should use a single-backslash spec like '%s\\@', not a double-backslash one like '%s\\ó\\@'"):
format(main_verb, aux_spec, aux_spec))
end
-- Cancel out the user's spec so it doesn't override the built-in spec.
base.principal_part_specs.root_stressed_inf = nil
end
-- If there's a prefix, add it now to all the specs derived from the built-in verb.
if prefix ~= "" then
local function form_should_be_preserved(form)
local prespec, bare_form, postspec = parse_decorated_form(form)
return bare_form == "+" or bare_form == "+isc" or bare_form == "-" or bare_form == "?" or is_single_vowel_spec(bare_form)
end
local function add_prefix_to_forms(tbl, slot)
local saw_preserve_accent = false
for _, formobj in ipairs(tbl[slot]) do
local prespec, bare_form, postspec = parse_decorated_form(formobj.form)
if rfind(postspec, "!") then
saw_preserve_accent = true
break
end
end
if saw_preserve_accent then
-- We have to do it the "hard" way, re-inserting the forms, in case of redundancy.
local existing_forms = tbl[slot]
tbl[slot] = {}
iut.insert_forms(tbl, slot, iut.map_forms(existing_forms, function(form)
if form_should_be_preserved(form) then
return form
else
-- If there is a ! after the form (indicating that monosyllabic accents should be
-- preserved), remove it.
local prespec, bare_form, postspec = parse_decorated_form(form)
return prespec .. prefix .. bare_form .. rsub(postspec, "!", "")
end
end))
else
-- To save on memory, side-effect the existing forms.
map_side_effecting_forms(tbl[slot], function(form)
if form_should_be_preserved(form) then
return form
else
local prespec, bare_form, postspec = parse_decorated_form(form)
return prespec .. prefix .. bare_form .. postspec
end
end)
end
end
for _, prop_table in ipairs { "principal_part_specs", "single_override_specs", "late_single_override_specs" } do
for slot, _ in pairs(nbase[prop_table]) do
add_prefix_to_forms(nbase[prop_table], slot)
end
end
-- nbase.row_override_specs is in a different format.
for slot, prop in pairs(nbase.row_override_specs) do
for persnum, _ in pairs(prop) do
add_prefix_to_forms(prop, persnum)
end
end
-- We also need to hack any single-vowel specs that don't already specify +, - or -- to use ++, which is
-- like + (take the right vowel of two) but won't throw an error if there aren't two matching vowels.
-- Thisis needed for e.g. [[porre]] with prefix com-, where [[comporre]] ends up with two of the same vowel.
local function hack_single_vowel_spec(specs)
if specs then
for _, spec in ipairs(specs) do
local prespec, bare_form, postspec = parse_decorated_form(spec.form)
if rfind(bare_form, "^" .. AV .. "$") then
spec.form = prespec .. bare_form .. "++" .. postspec
end
end
end
end
hack_single_vowel_spec(nbase.principal_part_specs.root_stressed_inf)
hack_single_vowel_spec(nbase.principal_part_specs.pres)
hack_single_vowel_spec(nbase.principal_part_specs.pres3s)
end
-- Copy user-specified principal part specs unless '+' is used, which we handle specially. The reason for this
-- is that + is normally used in conjunction with footnotes such as 'phis:+[rare]' where the intention is to add
-- a footnote to all the forms of that row; but if we don't do the following, the + instead requests the default
-- principal part generated from the infinitive.
for slot, specs in pairs(base.principal_part_specs) do
local saw_plus, saw_non_plus
for _, spec in ipairs(specs) do
local prespec, bare_form, postspec = parse_decorated_form(spec.form)
if bare_form == "+" then
saw_plus = true
else
saw_non_plus = true
end
end
if saw_plus and saw_non_plus then
parse_err(("For principal part '%s:', can't specify both + and something else when overriding a built-in verb"):
format(slot))
elseif saw_plus and slot ~= "root_stressed_inf" then
-- Need a special case for root_stressed_inf. We do want to copy a + from the user-specified specs for
-- root_stressed_inf to `nbase`, e.g. for [[condurre]]. The built-in spec doesn't indicate that this is
-- root-stressed; this comes from the user-specified 'a\@' or similar, and if we don't copy it, the code in
-- add_infinitive() triggers do_ending_stressed_infinitive(), which throws an error on -rre verbs.
for _, spec in ipairs(specs) do
local prespec, bare_form, postspec = parse_decorated_form(spec.form)
if bare_form ~= "+" then
parse_err(("Internal error: Saw '%s' for principal part form for slot '%s' but expected '+'"):
format(slot, spec.form))
end
if spec.footnotes then
nbase.principal_part_footnotes[slot] = iut.combine_footnotes(
nbase.principal_part_footnotes[full_slot], spec.footnotes)
end
end
else
nbase.principal_part_specs[slot] = specs
end
end
-- Now copy remaining user-specified specs into the built-in verb `base`.
for _, prop_table in ipairs { "row_override_specs", "single_override_specs", "late_single_override_specs",
"props" } do
for slot, prop in pairs(base[prop_table]) do
nbase[prop_table][slot] = prop
end
end
for _, prop_list in ipairs { "addnote_specs" } do
for _, prop in ipairs(base[prop_list]) do
m_table.insertIfNot(nbase[prop_list], prop)
end
end
return nbase
end
return base
end
-- Normalize all lemmas, substituting the pagename for blank lemmas and adding links to multiword lemmas.
local function normalize_all_lemmas(alternant_multiword_spec)
-- (1) Add links to all before and after text.
if not alternant_multiword_spec.args.noautolinktext then
alternant_multiword_spec.post_text = add_links(alternant_multiword_spec.post_text)
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
alternant_or_word_spec.before_text = add_links(alternant_or_word_spec.before_text)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
multiword_spec.post_text = add_links(multiword_spec.post_text)
for _, word_spec in ipairs(multiword_spec.word_specs) do
word_spec.before_text = add_links(word_spec.before_text)
end
end
end
end
end
-- (2) Remove any links from the lemma.
iut.map_word_specs(alternant_multiword_spec, function(base)
base.lemma = m_links.remove_links(base.lemma)
end)
end
-- Detect inconsistencies in indicator specs. This checks that the properties 'impers' and 'thirdonly' are consistent
-- across all verbs; checks that if the past participle is given as -, the auxiliary is also given as -; and propagates
-- certain properties (the `from_headword` property and the template args) down to each `base`.
local function detect_all_indicator_specs(alternant_multiword_spec, from_headword)
alternant_multiword_spec.props = {}
local props_that_must_be_consistent = {"impers", "thirdonly"}
-- Propagate some settings up or down.
iut.map_word_specs(alternant_multiword_spec, function(base)
for _, prop in ipairs(props_that_must_be_consistent) do
if base.props[prop] then
alternant_multiword_spec.props[prop] = true
end
end
base.from_headword = from_headword
base.args = alternant_multiword_spec.args
end)
for _, prop in ipairs(props_that_must_be_consistent) do
if alternant_multiword_spec.props[prop] then
iut.map_word_specs(alternant_multiword_spec, function(base)
if not base.props[prop] then
error("If some alternants specify '" .. prop .. "', all must")
end
end)
end
end
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.props.impers and base.props.thirdonly then
error("'impers' and 'thirdonly' cannot both be specified")
end
-- Check for missing past participle -> missing auxiliary.
if not base.verb.is_reflexive then
local pp_is_missing = base.principal_part_specs.pp and #base.principal_part_specs.pp == 0
and base.principal_part_specs.pp[1].form == "-"
local aux_is_missing = not base.principal_part_specs.aux
if pp_is_missing and not aux_is_missing then
error("If past participle given as '-', auxiliary must be explicitly specified as '-'")
end
end
end)
end
-- Propagate indications of irregularity, defectiveness and other properties upward from individual `base` forms to the
-- overall `alternant_multiword_spec`. The overall indications of irregularity/defectiveness are used in
-- [[Module:0it-headword]] to show irregular/defective principal parts, and other properties are used similarly in
-- [[Module:0it-headword]]. This needs to be done later than detect_all_indicator_specs() because it depends on the
-- result of parsing and conjugating the angle bracket spec.
local function propagate_properties_upward(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(base)
local function copy_property_table(dest_table, source_table, slotprop)
if not dest_table[slotprop] then
dest_table[slotprop] = {}
end
for slot, propval in pairs(source_table[slotprop]) do
dest_table[slotprop][slot] = dest_table[slotprop][slot] or propval
end
end
copy_property_table(alternant_multiword_spec, base, "is_irreg")
if not alternant_multiword_spec.rowprops then
alternant_multiword_spec.rowprops = {}
end
for subtable_key, subtable in pairs(base.rowprops) do
copy_property_table(alternant_multiword_spec.rowprops, base.rowprops, subtable_key)
end
-- If there is an explicit stem spec, we display the imperfect principal part explicitly even if not marked
-- as irregular.
if base.principal_part_specs.explicit_stem_spec then
alternant_multiword_spec.props.has_explicit_stem_spec = true
end
-- If any verb is pronominal, we display the overall lemma as 'pronominal'.
if base.verb.is_pronominal then
alternant_multiword_spec.props.is_pronominal = true
end
-- If any verb is non-reflexive, we show the auxiliary.
if not base.verb.is_reflexive then
alternant_multiword_spec.props.is_non_reflexive = true
end
end)
end
-- Set the overall auxiliary or auxiliaries. We can't do this using the normal inflection
-- code as it will produce e.g. '[[avere|avére]] [[e]] [[avere|avére]]' for conjoined verbs.
local function compute_auxiliary(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(base)
iut.insert_forms(alternant_multiword_spec.forms, "aux",
iut.map_forms(base.principal_part_specs.aux, function(form)
return add_links(form)
end)
)
end)
end
-- Add the categories and annotation for `base` into the appropriate structures in `alternant_multiword_spec`.
local function add_categories_and_annotation(alternant_multiword_spec, base, multiword_lemma, from_headword)
local function insert_ann(anntype, value)
m_table.insertIfNot(alternant_multiword_spec.annotation[anntype], value)
end
local function insert_cat(cat, also_when_multiword)
-- Don't place multiword terms in categories like 'Italian verbs ending in -are' to avoid spamming the
-- categories with such terms.
if also_when_multiword or not multiword_lemma then
--m_table.insertIfNot(alternant_multiword_spec.categories, "Italian " .. cat)
end
end
if check_for_red_links and not from_headword and not multiword_lemma then
for _, slot_and_accel in ipairs(all_verb_slots) do
local slot = slot_and_accel[1]
local forms = base.forms[slot]
local must_break = false
if forms then
for _, form in ipairs(forms) do
if not form.form:find("%[%[") then
local title = mw.title.new(form.form)
if title and not title.exists then
--insert_cat("verbs with red links in their inflection tables")
must_break = true
break
end
end
end
end
if must_break then
break
end
end
end
if base.props.syncopated then
insert_ann("conj", "syncopated")
--insert_cat("syncopated verbs")
elseif base.principal_part_specs.root_stressed_inf then
insert_ann("conj", "root-stressed -ere")
--insert_cat("verbs with root-stressed infinitive")
--insert_cat("verbs ending in -ere")
else
local ending = base.conj_vowel == "à" and "are" or base.conj_vowel == "é" and "ere" or "ire"
insert_ann("conj", "-" .. ending)
--insert_cat("verbs ending in -" .. ending)
end
if base.props.impers then
insert_ann("third_only", "impersonal")
--insert_cat("impersonal verbs")
elseif base.props.thirdonly then
insert_ann("third_only", "third-person only")
--insert_cat("third-person-only verbs")
else
insert_ann("third_only", "regular")
end
local is_irreg = false
local is_defective = false
for _, rowconj in ipairs(row_conjugations) do
local rowslot, rowspec = unpack(rowconj)
if base.rowprops.irreg[rowslot] then
if not is_irreg then
is_irreg = true
insert_cat("rendhagyó igék")
end
--insert_cat("verbs with irregular " .. rowspec.desc)
end
if base.rowprops.defective[rowslot] then
if not is_defective then
is_defective = true
--insert_cat("defective verbs")
end
--insert_cat("verbs with missing " .. rowspec.desc)
end
end
if not base.verb.is_reflexive and not base.principal_part_specs.aux then
if not is_defective then
is_defective = true
--insert_cat("defective verbs")
end
--insert_cat("verbs lacking composed tenses")
end
if is_irreg then
insert_ann("irreg", "irregular")
else
insert_ann("irreg", "regular")
end
if is_defective then
insert_ann("defective", "defective")
else
insert_ann("defective", "regular")
end
if not base.verb.is_reflexive and base.principal_part_specs.aux then
for _, auxform in ipairs(base.principal_part_specs.aux) do
if auxform.form ~= "?" then
-- No auxiliaries end in a stressed vowel so this is safe.
local aux_no_accents = remove_accents(auxform.form)
insert_ann("aux", aux_no_accents)
--insert_cat("verbs taking " .. aux_no_accents .. " as auxiliary")
end
end
end
if base.verb.is_pronominal then
--insert_cat("pronominal verbs")
-- FIXME: Should we use this instead? This is what Spanish does.
--insert_cat("verbs with lexical clitics")
end
if base.verb.is_reflexive then
insert_cat("visszaható igék")
end
end
-- Compute the categories to add the verb to, as well as the annotation to display in the conjugation title bar. We
-- combine the code to do these functions as both categories and title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec, from_headword)
alternant_multiword_spec.categories = {}
local ann = {}
alternant_multiword_spec.annotation = ann
ann.conj = {}
ann.third_only = {}
ann.irreg = {}
ann.defective = {}
ann.aux = {}
local multiword_lemma = false
for _, form in ipairs(alternant_multiword_spec.forms.inf) do
if form.form:find(" ") then
multiword_lemma = true
break
end
end
iut.map_word_specs(alternant_multiword_spec, function(base)
add_categories_and_annotation(alternant_multiword_spec, base, multiword_lemma, from_headword)
end)
local ann_parts = {}
local conj = table.concat(ann.conj, " or ")
table.insert(ann_parts, conj)
local third_only = table.concat(ann.third_only, " or ")
if third_only ~= "" and third_only ~= "regular" then
table.insert(ann_parts, third_only)
end
local irreg = table.concat(ann.irreg, " or ")
if irreg ~= "" and irreg ~= "regular" then
table.insert(ann_parts, irreg)
end
local defective = table.concat(ann.defective, " or ")
if defective ~= "" and defective ~= "regular" then
table.insert(ann_parts, defective)
end
alternant_multiword_spec.annotation = table.concat(ann_parts, "; ")
end
-- Convert the forms associated with each slot into their display form (a string).
local function show_forms(alternant_multiword_spec)
local lemmas = iut.map_forms(alternant_multiword_spec.forms.inf, remove_reflexive_indicators)
alternant_multiword_spec.lemmas = lemmas -- save for later use in make_table()
local props = {
lang = lang,
lemmas = lemmas,
slot_list = all_verb_slots,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local notes_template = [=[
<div style="width:100%;text-align:left;background:#d9ebff">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]=]
local basic_table = [=[
{description}<div class="NavFrame">
<div class="NavHead" align=center> Conjugation of {title} (See [[Appendix:Italian verbs]])</div>
<div class="NavContent">
{\op}| style="background:#F0F0F0;border-collapse:separate;border-spacing:2px;width:100%" class="inflection-table"
|-
! style="background:#e2e4c0" | <span title="infinito">infinitive</span>
| {inf}
|-
! colspan="2" style="background:#e2e4c0" | <span title="verbo ausiliare">auxiliary verb</span>
| {aux}
! colspan="2" style="background:#e2e4c0" | <span title="gerundio">gerund</span>
| colspan="2" | {ger}
|-
! colspan="2" style="background:#e2e4c0" | <span title="participio presente">present participle</span>
| {presp}
! colspan="2" style="background:#e2e4c0" | <span title="participio passato">past participle</span>
| colspan="2" | {pp}
|-
! rowspan="2" style="background:#C0C0C0" | person
! colspan="3" style="background:#C0C0C0" | singular
! colspan="3" style="background:#C0C0C0" | plural
|-
! style="background:#C0C0C0;width:12.5%" | first
! style="background:#C0C0C0;width:12.5%" | second
! style="background:#C0C0C0;width:12.5%" | third
! style="background:#C0C0C0;width:12.5%" | first
! style="background:#C0C0C0;width:12.5%" | second
! style="background:#C0C0C0;width:12.5%" | third
|-
! style="background:#c0cfe4" | <span title="indicativo">indicative</span>
! style="background:#c0cfe4" | io
! style="background:#c0cfe4" | tu
! style="background:#c0cfe4" | lui/lei, esso/essa
! style="background:#c0cfe4" | noi
! style="background:#c0cfe4" | voi
! style="background:#c0cfe4" | loro, essi/esse
|-
! style="height:3em;background:#c0cfe4" | <span title="presente">present</span>
| {pres1s}
| {pres2s}
| {pres3s}
| {pres1p}
| {pres2p}
| {pres3p}
|-
! style="height:3em;background:#c0cfe4" | <span title="imperfetto">imperfect</span>
| {imperf1s}
| {imperf2s}
| {imperf3s}
| {imperf1p}
| {imperf2p}
| {imperf3p}
|-
! style="height:3em;background:#c0cfe4" | <span title="passato remoto">past historic</span>
| {phis1s}
| {phis2s}
| {phis3s}
| {phis1p}
| {phis2p}
| {phis3p}
|-
! style="height:3em;background:#c0cfe4" | <span title="futuro semplice">future</span>
| {fut1s}
| {fut2s}
| {fut3s}
| {fut1p}
| {fut2p}
| {fut3p}
|-
! style="background:#c0d8e4" | <span title="condizionale">conditional</span>
! style="background:#c0d8e4" | io
! style="background:#c0d8e4" | tu
! style="background:#c0d8e4" | lui/lei, esso/essa
! style="background:#c0d8e4" | noi
! style="background:#c0d8e4" | voi
! style="background:#c0d8e4" | loro, essi/esse
|-
! style="height:3em;background:#c0d8e4" | <span title="condizionale presente">present</span>
| {cond1s}
| {cond2s}
| {cond3s}
| {cond1p}
| {cond2p}
| {cond3p}
|-
! style="background:#c0e4c0" | <span title="congiuntivo">subjunctive</span>
! style="background:#c0e4c0" | che io
! style="background:#c0e4c0" | che tu
! style="background:#c0e4c0" | che lui/che lei, che esso/che essa
! style="background:#c0e4c0" | che noi
! style="background:#c0e4c0" | che voi
! style="background:#c0e4c0" | che loro, che essi/che esse
|-
! style="height:3em;background:#c0e4c0" | <span title="congiuntivo presente">present</span>
| {sub1s}
| {sub2s}
| {sub3s}
| {sub1p}
| {sub2p}
| {sub3p}
|-
! style="height:3em;background:#c0e4c0" | <span title="congiuntivo imperfetto">imperfect</span>
| {impsub1s}
| {impsub2s}
| {impsub3s}
| {impsub1p}
| {impsub2p}
| {impsub3p}
|-
! rowspan="2" style="height:3em;background:#e4d4c0" | <span title="imperativo">imperative</span>
! style="background:#e4d4c0" | —
! style="background:#e4d4c0" | tu
! style="background:#e4d4c0" | Lei
! style="background:#e4d4c0" | noi
! style="background:#e4d4c0" | voi
! style="background:#e4d4c0" | Loro
|-
|
| {imp2s}
| {imp3s}
| {imp1p}
| {imp2p}
| {imp3p}
|-
! style="height:3em;background:#e4d4c0" | <span title="imperativo negativo">negative imperative</span>
|
| {negimp2s}
| {negimp3s}
| {negimp1p}
| {negimp2p}
| {negimp3p}
|{\cl}{notes_clause}</div></div>
]=]
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local lemma_links = {}
for _, lemma in ipairs(alternant_multiword_spec.lemmas) do
table.insert(lemma_links, m_links.full_link({ lang = lang, term = lemma.form }, "term"))
end
forms.title = table.concat(lemma_links, " or ")
if alternant_multiword_spec.annotation ~= "" then
forms.title = forms.title .. " (" .. alternant_multiword_spec.annotation .. ")"
end
forms.description = ""
-- Format the table.
forms.footnote = alternant_multiword_spec.forms.footnote
forms.notes_clause = forms.footnote ~= "" and m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(basic_table, forms)
end
-- Externally callable function to conjugate a verb. Return value is ALTERNANT_MULTIWORD_SPEC, an object where the
-- conjugated forms are in `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the slot
-- key will be missing. The value for a given slot is a list of objects {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(args, from_headword, headword_head)
local pagename = args.pagename or mw.title.getCurrentTitle().text
local head = headword_head or pagename
local arg1 = args[1]
local need_surrounding_angle_brackets = true
local incorporated_headword_head_into_lemma = false
-- Check whether we need to add <...> around the argument. If the
-- argument has no < in it, we definitely do. Otherwise, we need to
-- parse the balanced [...] and <...> and add <...> only if there isn't
-- a top-level <...>. We check for [...] because there might be angle
-- brackets inside of them (HTML tags in qualifiers or <<name:...>> and
-- such in references).
if arg1:find("<") then
local segments = iut.parse_multi_delimiter_balanced_segment_run(arg1, {{"<", ">"}, {"[", "]"}})
for i = 2, #segments, 2 do
if segments[i]:find("^<.*>$") then
need_surrounding_angle_brackets = false
break
end
end
end
if need_surrounding_angle_brackets then
if head:find(" ") then
-- If multiword lemma without <...> already, try to add it after the first word.
local need_explicit_angle_brackets = false
if arg1:find("%(%(") then
need_explicit_angle_brackets = true
else
-- Try to preserve the brackets in the part after the verb, but don't do it
-- if there aren't the same number of left and right brackets in the verb
-- (which means the verb was linked as part of a larger expression).
local refl_clitic_verb, post = rmatch(head, "^(.-)( .*)$")
local left_brackets = rsub(refl_clitic_verb, "[^%[]", "")
local right_brackets = rsub(refl_clitic_verb, "[^%]]", "")
if #left_brackets == #right_brackets then
arg1 = iut.remove_redundant_links(refl_clitic_verb) .. "<" .. arg1 .. ">" .. post
incorporated_headword_head_into_lemma = true
else
-- Try again using the form without links.
local linkless_head = m_links.remove_links(head)
if linkless_head:find(" ") then
refl_clitic_verb, post = rmatch(linkless_head, "^(.-)( .*)$")
arg1 = refl_clitic_verb .. "<" .. arg1 .. ">" .. post
else
need_explicit_angle_brackets = true
end
end
end
if need_explicit_angle_brackets then
error("Multiword argument without <> and with alternants, a multiword linked verb or " ..
"unbalanced brackets; please include <> explicitly: " .. arg1)
end
else
arg1 = "<" .. arg1 .. ">"
-- Will be incorporated through `head` below in the call to parse_indicator_spec().
incorporated_headword_head_into_lemma = true
end
end
local function do_parse_indicator_spec(angle_bracket_spec, lemma)
return parse_indicator_spec(angle_bracket_spec, lemma, head)
end
local parse_props = {
parse_indicator_spec = do_parse_indicator_spec,
allow_default_indicator = true,
allow_blank_lemma = true,
}
local escaped_arg1 = escape_reflexive_indicators(arg1)
local alternant_multiword_spec = iut.parse_inflected_text(escaped_arg1, parse_props)
alternant_multiword_spec.pos = pos or "verbs"
alternant_multiword_spec.args = args
alternant_multiword_spec.incorporated_headword_head_into_lemma = incorporated_headword_head_into_lemma
normalize_all_lemmas(alternant_multiword_spec)
detect_all_indicator_specs(alternant_multiword_spec, from_headword)
local inflect_props = {
slot_list = all_verb_slots,
inflect_word_spec = conjugate_verb,
-- We add links around the generated verbal forms rather than allow the entire multiword
-- expression to be a link, so ensure that user-specified links get included as well.
include_user_specified_links = true,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
propagate_properties_upward(alternant_multiword_spec)
compute_auxiliary(alternant_multiword_spec)
convert_accented_links(alternant_multiword_spec)
compute_categories_and_annotation(alternant_multiword_spec, from_headword)
if args.json then
return require("Module:0JSON").toJSON(alternant_multiword_spec)
end
return alternant_multiword_spec
end
-- Externally callable function to parse user-specified arguments and conjugate a verb.
-- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the conjugated forms are in `ALTERNANT_MULTIWORD_SPEC.forms`
-- for each slot. If there are no values for a slot, the slot key will be missing. The value
-- for a given slot is a list of objects {form=FORM, footnotes=FOOTNOTES}.
function export.parse_args_and_generate_forms(parent_args)
local params = {
[1] = {required = true, default = def or "mettere<a\\é,mìsi,mésso>"},
["noautolinktext"] = {type = "boolean"},
["noautolinkverb"] = {type = "boolean"},
["pagename"] = {}, -- for testing
["json"] = {type = "boolean"}, -- for bot use
}
local args = require("Module:0parameters").process(parent_args, params)
return export.do_generate_forms(args)
end
-- Entry point for {{it-conj}}. Template-callable function to parse and conjugate a verb given
-- user-specified arguments and generate a displayable table of the conjugated forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.parse_args_and_generate_forms(parent_args)
if type(alternant_multiword_spec) == "string" then
return alternant_multiword_spec
end
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) ..
require("Module:0utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
-- Concatenate all forms of all slots into a single string of the form
-- "SLOT=FORM,FORM,...|SLOT=FORM,FORM,...|...". Embedded pipe symbols (as might occur
-- in embedded links) are converted to <!>. If INCLUDE_PROPS is given, also include
-- additional properties (currently, none). This is for use by bots.
local function concat_forms(alternant_multiword_spec, include_props)
local ins_text = {}
for _, slot_and_accel in ipairs(all_verb_slots) do
local slot = slot_and_accel[1]
local formtext = iut.concat_forms_in_slot(alternant_multiword_spec.forms[slot])
if formtext then
table.insert(ins_text, slot .. "=" .. formtext)
end
end
return table.concat(ins_text, "|")
end
-- Template-callable function to parse and conjugate a verb given user-specified arguments and return
-- the forms as a string "SLOT=FORM,FORM,...|SLOT=FORM,FORM,...|...". Embedded pipe symbols (as might
-- occur in embedded links) are converted to <!>. If |include_props=1 is given, also include
-- additional properties (currently, none). This is for use by bots.
function export.generate_forms(frame)
local include_props = frame.args["include_props"]
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.parse_args_and_generate_forms(parent_args)
return concat_forms(alternant_multiword_spec, include_props)
end
return export