i18Next: Add exceptions for mixed v3/v4 plural forms

This commit is contained in:
Samantaz Fox 2023-10-06 08:18:26 +02:00
parent bb14f79496
commit 32310b7c9f
No known key found for this signature in database
GPG key ID: F42821059186176E
2 changed files with 103 additions and 35 deletions
spec
src/invidious/helpers

View file

@ -15,12 +15,15 @@ FORM_TESTS = {
"ar" => I18next::Plurals::PluralForms::Special_Arabic,
"be" => I18next::Plurals::PluralForms::Dual_Slavic,
"cy" => I18next::Plurals::PluralForms::Special_Welsh,
"fr" => I18next::Plurals::PluralForms::Special_French_Portuguese,
"en" => I18next::Plurals::PluralForms::Single_not_one,
"fr" => I18next::Plurals::PluralForms::Single_gt_one,
"es" => I18next::Plurals::PluralForms::Special_Spanish_Italian,
"ga" => I18next::Plurals::PluralForms::Special_Irish,
"gd" => I18next::Plurals::PluralForms::Special_Scottish_Gaelic,
"he" => I18next::Plurals::PluralForms::Special_Hebrew,
"hr" => I18next::Plurals::PluralForms::Special_Hungarian_Serbian,
"is" => I18next::Plurals::PluralForms::Special_Icelandic,
"it" => I18next::Plurals::PluralForms::Special_Spanish_Italian,
"jv" => I18next::Plurals::PluralForms::Special_Javanese,
"kw" => I18next::Plurals::PluralForms::Special_Cornish,
"lt" => I18next::Plurals::PluralForms::Special_Lithuanian,
@ -30,13 +33,13 @@ FORM_TESTS = {
"mt" => I18next::Plurals::PluralForms::Special_Maltese,
"or" => I18next::Plurals::PluralForms::Special_Odia,
"pl" => I18next::Plurals::PluralForms::Special_Polish_Kashubian,
"pt" => I18next::Plurals::PluralForms::Single_gt_one,
"pt-PT" => I18next::Plurals::PluralForms::Single_not_one,
"pt-BR" => I18next::Plurals::PluralForms::Single_gt_one,
"pt" => I18next::Plurals::PluralForms::Special_French_Portuguese,
"pt-PT" => I18next::Plurals::PluralForms::Special_French_Portuguese,
"ro" => I18next::Plurals::PluralForms::Special_Romanian,
"su" => I18next::Plurals::PluralForms::None,
"sk" => I18next::Plurals::PluralForms::Special_Czech_Slovak,
"sl" => I18next::Plurals::PluralForms::Special_Slovenian,
"su" => I18next::Plurals::PluralForms::None,
"sr" => I18next::Plurals::PluralForms::Special_Hungarian_Serbian,
}
SUFFIX_TESTS = {
@ -73,10 +76,18 @@ SUFFIX_TESTS = {
{num: 1, suffix: ""},
{num: 10, suffix: "_plural"},
],
"es" => [
{num: 0, suffix: "_2"},
{num: 1, suffix: "_0"},
{num: 10, suffix: "_2"},
{num: 6_000_000, suffix: "_1"},
],
"fr" => [
{num: 0, suffix: ""},
{num: 1, suffix: ""},
{num: 10, suffix: "_plural"},
{num: 0, suffix: "_0"},
{num: 1, suffix: "_0"},
{num: 10, suffix: "_2"},
{num: 4_000_000, suffix: "_1"},
{num: 6_260_000, suffix: "_2"},
],
"ga" => [
{num: 1, suffix: "_0"},
@ -156,30 +167,24 @@ SUFFIX_TESTS = {
{num: 5, suffix: "_2"},
],
"pt" => [
{num: 0, suffix: ""},
{num: 1, suffix: ""},
{num: 10, suffix: "_plural"},
{num: 0, suffix: "_0"},
{num: 1, suffix: "_0"},
{num: 10, suffix: "_2"},
{num: 42, suffix: "_2"},
{num: 9_000_000, suffix: "_1"},
],
"pt-PT" => [
{num: 0, suffix: "_plural"},
{num: 1, suffix: ""},
{num: 10, suffix: "_plural"},
],
"pt-BR" => [
{num: 0, suffix: ""},
{num: 1, suffix: ""},
{num: 10, suffix: "_plural"},
{num: 0, suffix: "_0"},
{num: 1, suffix: "_0"},
{num: 10, suffix: "_2"},
{num: 42, suffix: "_2"},
{num: 9_000_000, suffix: "_1"},
],
"ro" => [
{num: 0, suffix: "_1"},
{num: 1, suffix: "_0"},
{num: 20, suffix: "_2"},
],
"su" => [
{num: 0, suffix: "_0"},
{num: 1, suffix: "_0"},
{num: 10, suffix: "_0"},
],
"sk" => [
{num: 0, suffix: "_2"},
{num: 1, suffix: "_0"},
@ -191,6 +196,18 @@ SUFFIX_TESTS = {
{num: 2, suffix: "_2"},
{num: 3, suffix: "_3"},
],
"su" => [
{num: 0, suffix: "_0"},
{num: 1, suffix: "_0"},
{num: 10, suffix: "_0"},
],
"sr" => [
{num: 1, suffix: "_0"},
{num: 51, suffix: "_0"},
{num: 32, suffix: "_1"},
{num: 100, suffix: "_2"},
{num: 100_000, suffix: "_2"},
],
}
Spectator.describe "i18next_Plural_Resolver" do

View file

@ -35,19 +35,25 @@ module I18next::Plurals
Special_Slovenian = 21
Special_Hebrew = 22
Special_Odia = 23
# Mixed v3/v4 rules in Weblate
# See: https://github.com/translate/translate/issues/4873
Special_French_Portuguese
Special_Hungarian_Serbian
Special_Spanish_Italian
end
private PLURAL_SETS = {
PluralForms::Single_gt_one => [
"ach", "ak", "am", "arn", "br", "fil", "fr", "gun", "ln", "mfe", "mg",
"mi", "oc", "pt", "pt-BR", "tg", "tl", "ti", "tr", "uz", "wa",
"ach", "ak", "am", "arn", "br", "fil", "gun", "ln", "mfe", "mg",
"mi", "oc", "tg", "tl", "ti", "tr", "uz", "wa",
],
PluralForms::Single_not_one => [
"af", "an", "ast", "az", "bg", "bn", "ca", "da", "de", "dev", "el", "en",
"eo", "es", "et", "eu", "fi", "fo", "fur", "fy", "gl", "gu", "ha", "hi",
"hu", "hy", "ia", "it", "kk", "kn", "ku", "lb", "mai", "ml", "mn", "mr",
"eo", "et", "eu", "fi", "fo", "fur", "fy", "gl", "gu", "ha", "hi",
"hu", "hy", "ia", "kk", "kn", "ku", "lb", "mai", "ml", "mn", "mr",
"nah", "nap", "nb", "ne", "nl", "nn", "no", "nso", "pa", "pap", "pms",
"ps", "pt-PT", "rm", "sco", "se", "si", "so", "son", "sq", "sv", "sw",
"ps", "rm", "sco", "se", "si", "so", "son", "sq", "sv", "sw",
"ta", "te", "tk", "ur", "yo",
],
PluralForms::None => [
@ -55,7 +61,7 @@ module I18next::Plurals
"lo", "ms", "sah", "su", "th", "tt", "ug", "vi", "wo", "zh",
],
PluralForms::Dual_Slavic => [
"be", "bs", "cnr", "dz", "hr", "ru", "sr", "uk",
"be", "bs", "cnr", "dz", "ru", "uk",
],
}
@ -81,6 +87,13 @@ module I18next::Plurals
"ro" => PluralForms::Special_Romanian,
"sk" => PluralForms::Special_Czech_Slovak,
"sl" => PluralForms::Special_Slovenian,
# Mixed v3/v4 rules
"es" => PluralForms::Special_Spanish_Italian,
"fr" => PluralForms::Special_French_Portuguese,
"hr" => PluralForms::Special_Hungarian_Serbian,
"it" => PluralForms::Special_Spanish_Italian,
"pt" => PluralForms::Special_French_Portuguese,
"sr" => PluralForms::Special_Hungarian_Serbian,
}
# These are the v1 and v2 compatible suffixes.
@ -150,11 +163,8 @@ module I18next::Plurals
end
def get_plural_form(locale : String) : PluralForms
# Extract the ISO 639-1 or 639-2 code from an RFC 5646 language code,
# except for pt-BR and pt-PT which needs to be kept as-is.
if !locale.matches?(/^pt-(BR|PT)$/)
locale = locale.split('-')[0]
end
# Extract the ISO 639-1 or 639-2 code from an RFC 5646 language code
locale = locale.split('-')[0]
return self.forms[locale] if self.forms[locale]?
@ -246,6 +256,10 @@ module I18next::Plurals
when .special_slovenian? then return special_slovenian(count)
when .special_hebrew? then return special_hebrew(count)
when .special_odia? then return special_odia(count)
# Mixed v3/v4 forms
when .special_spanish_italian? then return special_cldr_Spanish_Italian(count)
when .special_french_portuguese? then return special_cldr_French_Portuguese(count)
when .special_hungarian_serbian? then return special_cldr_Hungarian_Serbian(count)
else
# default, if nothing matched above
return 0_u8
@ -507,5 +521,42 @@ module I18next::Plurals
def self.special_odia(count : Int) : UInt8
return (count == 1) ? 0_u8 : 1_u8
end
# -------------------
# "v3.5" rules
# -------------------
# Plural form for Spanish & Italian languages
#
# This rule is mostly compliant to CLDR v42
#
def self.special_cldr_Spanish_Italian(count : Int) : UInt8
return 0_u8 if (count == 1) # one
return 1_u8 if (count != 0 && count % 1_000_000 == 0) # many
return 2_u8 # other
end
# Plural form for French and Portuguese
#
# This rule is mostly compliant to CLDR v42
#
def self.special_cldr_French_Portuguese(count : Int) : UInt8
return 0_u8 if (count == 0 || count == 1) # one
return 1_u8 if (count % 1_000_000 == 0) # many
return 2_u8 # other
end
# Plural form for Hungarian and Serbian
#
# This rule is mostly compliant to CLDR v42
#
def self.special_cldr_Hungarian_Serbian(count : Int) : UInt8
n_mod_10 = count % 10
n_mod_100 = count % 100
return 0_u8 if (n_mod_10 == 1 && n_mod_100 != 11) # one
return 1_u8 if (2 <= n_mod_10 <= 4 && (n_mod_100 < 12 || 14 < n_mod_100)) # few
return 2_u8 # other
end
end
end