diff --git a/documents/how_to_add_new_language.md b/documents/how_to_add_new_language.md
index b0ff36d3..5813dfb7 100644
--- a/documents/how_to_add_new_language.md
+++ b/documents/how_to_add_new_language.md
@@ -10,6 +10,9 @@ The following steps with help you identify files that need to be added or change
NOTE: Take a look at [PR #40](https://github.com/unicode-org/inflection/pull/40) and [PR #111](https://github.com/unicode-org/inflection/pull/111) for example on how to add initial language support based on dictionary lookup only.
In general, to bootstrap your progress look for grammatically similar language that's already supported, e.g. if you are adding Serbian look for existing Russian implementation.
This will help you find most of the files you need to add/change and will speed up implementation of the rules and lexicons.
+We recommend you spend around a week researching the language and all the different components of the language before even beginning to modify and add the files below. Look at all the files in the project such as tokenizers, configuration files, grammar files, and different lookup functions to see what you need. This will save you a lot of time in the end. We highly suggest you stray away from hardcoded logic and rely on the Dictionary Lookup. Look at all the grammemes, tokenizer logic, and multi-word phrase handling.
+
+Before you add new language support, go to the README.md in the inflection subfolder (inflection/inflection/README.md), build the project, and make sure all the tests run on your computer.
## Mark your language as supported
* UPDATE: inflection/src/inflection/util/LocaleUtils.hpp
@@ -29,13 +32,13 @@ TODO: We need to expand what each of these do.
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer.hpp
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer.cpp
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.hpp
-* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.hpp
+* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.cpp
* UPDATE: inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp
* UPDATE: inflection/src/inflection/grammar/synthesis/fwd.hpp
## Add language specific properties for lists, quantities and related topics
* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.hpp
-* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.hpp
+* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.cpp
* UPDATE: inflection/src/inflection/dialog/language/fwd.hpp
## Define and create lexion
diff --git a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes
index fe535cd8..9dc3b5af 100644
--- a/inflection/resources/org/unicode/inflection/dictionary/.gitattributes
+++ b/inflection/resources/org/unicode/inflection/dictionary/.gitattributes
@@ -8,6 +8,7 @@ dictionary_he.lst filter=lfs diff=lfs merge=lfs -text
dictionary_hi.lst filter=lfs diff=lfs merge=lfs -text
dictionary_it.lst filter=lfs diff=lfs merge=lfs -text
dictionary_ko.lst filter=lfs diff=lfs merge=lfs -text
+dictionary_ml.lst filter=lfs diff=lfs merge=lfs -text
dictionary_nb.lst filter=lfs diff=lfs merge=lfs -text
dictionary_nl.lst filter=lfs diff=lfs merge=lfs -text
dictionary_pt.lst filter=lfs diff=lfs merge=lfs -text
@@ -23,6 +24,7 @@ inflectional_fr.xml filter=lfs diff=lfs merge=lfs -text
inflectional_he.xml filter=lfs diff=lfs merge=lfs -text
inflectional_hi.xml filter=lfs diff=lfs merge=lfs -text
inflectional_it.xml filter=lfs diff=lfs merge=lfs -text
+inflectional_ml.xml filter=lfs diff=lfs merge=lfs -text
inflectional_nb.xml filter=lfs diff=lfs merge=lfs -text
inflectional_nl.xml filter=lfs diff=lfs merge=lfs -text
inflectional_pt.xml filter=lfs diff=lfs merge=lfs -text
diff --git a/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst b/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst
new file mode 100644
index 00000000..320b3589
--- /dev/null
+++ b/inflection/resources/org/unicode/inflection/dictionary/dictionary_ml.lst
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bda9371a2aa17c08328381e678b77e769269f4ee74749dd4f9e0bd5890cf59c
+size 53958746
diff --git a/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml b/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml
new file mode 100644
index 00000000..d6be1b3b
--- /dev/null
+++ b/inflection/resources/org/unicode/inflection/dictionary/inflectional_ml.xml
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1868dab352ff2648c2ba495bc08a3877409eadf177f573817fd03ae07174b12f
+size 613479
diff --git a/inflection/resources/org/unicode/inflection/features/grammar.xml b/inflection/resources/org/unicode/inflection/features/grammar.xml
index 6a620220..f7b60a2a 100644
--- a/inflection/resources/org/unicode/inflection/features/grammar.xml
+++ b/inflection/resources/org/unicode/inflection/features/grammar.xml
@@ -1624,6 +1624,97 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv b/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv
new file mode 100644
index 00000000..5134f9da
--- /dev/null
+++ b/inflection/resources/org/unicode/inflection/inflection/pronoun_ml.csv
@@ -0,0 +1,75 @@
+അവൻ,third,singular,nominative,masculine
+അവൾ,third,singular,nominative,feminine
+അത്,third,singular,nominative,neuter
+അവനെ,third,singular,accusative,masculine
+അവന്റെ,third,singular,genitive,masculine,determination=dependent
+അവന്റെത്,third,singular,genitive,masculine,determination=independent
+അവളെ,third,singular,accusative,feminine
+അവളുടെ,third,singular,genitive,feminine,determination=dependent
+അവളുടേതു്,third,singular,genitive,feminine,determination=independent
+അതിനെ,third,singular,accusative,neuter
+അതിന്റെ,third,singular,genitive,neuter,determination=dependent
+അതിന്റേതു്,third,singular,genitive,neuter,determination=independent
+അവനിൽ,third,singular,locative,masculine
+അവനാൽ,third,singular,instrumental,masculine
+അവനോടു്,third,singular,sociative,masculine
+അവളിൽ,third,singular,locative,feminine
+അവളാൽ,third,singular,instrumental,feminine
+അവളോടു്,third,singular,sociative,feminine
+അതിൽ,third,singular,locative,neuter
+അതാൽ,third,singular,instrumental,neuter
+അതോടു്,third,singular,sociative,neuter
+അവർ,third,plural,nominative
+അവരെ,third,plural,accusative
+അവരുടെ,third,plural,genitive,determination=dependent
+അവരുടേതു്,third,plural,genitive,determination=independent
+അവരിൽ,third,plural,locative
+അവരാൽ,third,plural,instrumental
+അവരോടു്,third,plural,sociative
+നീ,second,singular,nominative,informal
+താങ്കൾ,second,singular,nominative,formal
+നിനക്ക്,second,singular,dative,informal
+താങ്കൾക്ക്,second,singular,dative,formal
+നിനെ,second,singular,accusative,informal
+താങ്കളെ,second,singular,accusative,formal
+നിന്റെ,second,singular,genitive,informal,determination=dependent
+നിന്റേതു്,second,singular,genitive,informal,determination=independent
+താങ്കളുടെ,second,singular,genitive,formal,determination=dependent
+താങ്കളുടേതു്,second,singular,genitive,formal,determination=independent
+നിനിൽ,second,singular,locative,informal
+നിനാൽ,second,singular,instrumental,informal
+നിനോടു്,second,singular,sociative,informal
+താങ്കളിൽ,second,singular,locative,formal
+താങ്കളാൽ,second,singular,instrumental,formal
+താങ്കളോടു്,second,singular,sociative,formal
+നിങ്ങൾ,second,plural,nominative,formal
+നിങ്ങളെ,second,plural,accusative,formal
+നിങ്ങൾക്ക്,second,plural,dative,formal
+നിങ്ങളുടെ,second,plural,genitive,formal,determination=dependent
+നിങ്ങളുടേതു്,second,plural,genitive,formal,determination=independent
+നിങ്ങളിൽ,second,plural,locative,formal
+നിങ്ങളാൽ,second,plural,instrumental,formal
+നിങ്ങളോടു്,second,plural,sociative,formal
+ഞാൻ,first,singular,nominative,exclusive
+എനിക്ക്,first,singular,dative
+നമുക്ക്,first,plural,dative,inclusive
+എന്നെ,first,singular,accusative,exclusive
+നമ്മെ,first,plural,accusative,inclusive
+എന്റെ,first,singular,genitive,determination=dependent,exclusive
+എന്റേത്,first,singular,genitive,determination=independent,exclusive
+എന്നിൽ,first,singular,locative
+എന്നാൽ,first,singular,instrumental
+എന്നോടു്,first,singular,sociative
+ഞങ്ങൾ,first,plural,nominative,exclusive
+നാം,first,plural,nominative,inclusive
+ഞങ്ങളെ,first,plural,accusative,exclusive
+ഞങ്ങൾക്ക്,first,plural,dative,exclusive
+ഞങ്ങളുടെ,first,plural,genitive,exclusive,determination=dependent
+ഞങ്ങളുടേത്,first,plural,genitive,exclusive,determination=independent
+നമ്മുടെ,first,plural,genitive,inclusive,determination=dependent
+നമ്മുടേതു്,first,plural,genitive,inclusive,determination=independent
+ഞങ്ങളിലു്,first,plural,locative,exclusive
+ഞങ്ങളാൽ,first,plural,instrumental,exclusive
+ഞങ്ങളോടു്,first,plural,sociative,exclusive
+താൻ,third,singular,nominative,reflexive
+തങ്ങൾ,third,plural,nominative,formal,reflexive
\ No newline at end of file
diff --git a/inflection/resources/org/unicode/inflection/locale/supported-locales.properties b/inflection/resources/org/unicode/inflection/locale/supported-locales.properties
index 6815591d..43741bca 100644
--- a/inflection/resources/org/unicode/inflection/locale/supported-locales.properties
+++ b/inflection/resources/org/unicode/inflection/locale/supported-locales.properties
@@ -15,6 +15,7 @@ locale.group.it=it_IT,it_CH
locale.group.ja=ja_JP
locale.group.ko=ko_KR
locale.group.ms=ms_MY
+locale.group.ml=ml_IN
locale.group.nb=nb_NO
locale.group.nl=nl_NL,nl_BE
locale.group.pt=pt_BR,pt_PT
diff --git a/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties b/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties
new file mode 100644
index 00000000..d9652dc0
--- /dev/null
+++ b/inflection/resources/org/unicode/inflection/tokenizer/config_ml.properties
@@ -0,0 +1,7 @@
+#
+# Copyright 2025 Unicode Incorporated and others. All rights reserved.
+#
+tokenizer.implementation.class=DefaultTokenizer
+tokenizer.nonDecompound.file=/org/unicode/inflection/tokenizer/ml/nondecompound.tok
+tokenizer.decompound=(ശ്രീ)(.+?)(ഗുരു|സര്ക്കാര്)|(.+?)(ഗുരു|സര്ക്കാര്|ഉണ്ട്|ആണ്|ഇല്ല|ഒടൊപ്പം|ഉടൻ|ഓടെ|ഓട്|ഒപ്പം|തന്നെ|പോലും|പോലെ|ഉം|യ്|കളുടെ|ങ്ങളുടെ|ത്തിന്റെ|ൻ്റെ|ന്റെ|യുടേ|യുടെ|യാൽ|യിൽ|ഇൽ|ല്|ൽ|ക്ക്|മാർ|ങ്ങൾ|കൾ|നെ|യെ)
+
diff --git a/inflection/resources/org/unicode/inflection/tokenizer/ml/nondecompound.tok b/inflection/resources/org/unicode/inflection/tokenizer/ml/nondecompound.tok
new file mode 100644
index 00000000..c62b299c
--- /dev/null
+++ b/inflection/resources/org/unicode/inflection/tokenizer/ml/nondecompound.tok
@@ -0,0 +1,35 @@
+അമ്മ
+അച്ഛൻ
+അച്ഛി
+അമ്മൻ
+മകൻ
+മകൾ
+കുട്ടി
+കുട്ടികൾ
+ആൺകുട്ടി
+ആൺകുട്ടികൾ
+പെൺകുട്ടി
+പെൺകുട്ടികൾ
+കഥ
+ചിത്രം
+ചിത്രങ്ങൾ
+ഗ്രന്ഥം
+ഗ്രന്ഥങ്ങൾ
+മക്കൾ
+ഞാൻ
+നീ
+നിങ്ങൾ
+അവൻ
+അവൾ
+അവ
+അവർ
+ഇത്
+അത്
+ഇവ
+അവ
+ശ്രീ
+നാരായണ
+ഗുരു
+കേരളം
+സര്ക്കാര്
+കേരളസര്ക്കാര്
diff --git a/inflection/src/inflection/dialog/PronounConcept.cpp b/inflection/src/inflection/dialog/PronounConcept.cpp
index 5ced24eb..dfd5ccc2 100644
--- a/inflection/src/inflection/dialog/PronounConcept.cpp
+++ b/inflection/src/inflection/dialog/PronounConcept.cpp
@@ -228,7 +228,7 @@ PronounConcept::PronounConcept(const SemanticFeatureModel& model, std::u16string
for (int32_t idx = 0; idx < pronounData->numValues(); idx++) {
const auto& pronounEntry = pronounData->getPronounEntry(idx);
std::u16string_view displayString(pronounEntry.first);
- if (displayString.back() == u' ') {
+ if (!displayString.empty() && displayString.back() == u' ') {
displayString.remove_suffix(1);
}
auto status = U_ZERO_ERROR;
diff --git a/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp
new file mode 100644
index 00000000..e426c9e4
--- /dev/null
+++ b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2025 Unicode Incorporated and others. All rights reserved.
+ */
+
+#include
+#include
+#include
+
+namespace inflection::dialog::language {
+
+// In Malayalam, numbers generally follow the noun
+::inflection::dialog::SpeakableString
+MlCommonConceptFactory::quantifiedJoin(const ::inflection::dialog::SpeakableString& formattedNumber,
+ const ::inflection::dialog::SpeakableString& nounPhrase,
+ const ::std::u16string& /*measureWord*/,
+ Plurality::Rule countType) const
+{
+ ::inflection::dialog::SpeakableString space(u" ");
+ if (countType == Plurality::Rule::ONE) {
+ return nounPhrase + space + formattedNumber;
+ }
+ return formattedNumber + space + nounPhrase;
+}
+
+} // namespace inflection::dialog::language
diff --git a/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp
new file mode 100644
index 00000000..7bfab1dd
--- /dev/null
+++ b/inflection/src/inflection/dialog/language/MlCommonConceptFactory.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2025 Unicode Incorporated and others. All rights reserved.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+
+namespace inflection::dialog::language {
+
+class MlCommonConceptFactory : public CommonConceptFactoryImpl {
+ using super = CommonConceptFactoryImpl;
+
+public:
+ explicit MlCommonConceptFactory(const ::inflection::util::ULocale& language);
+ ~MlCommonConceptFactory() override;
+
+protected:
+ ::inflection::dialog::SpeakableString quantifiedJoin(
+ const ::inflection::dialog::SpeakableString& formattedNumber,
+ const ::inflection::dialog::SpeakableString& nounPhrase,
+ const ::std::u16string& measureWord,
+ ::inflection::dialog::Plurality::Rule countType) const override;
+};
+
+} // namespace inflection::dialog::language
diff --git a/inflection/src/inflection/dialog/language/fwd.hpp b/inflection/src/inflection/dialog/language/fwd.hpp
index 6429ca3a..e952df27 100644
--- a/inflection/src/inflection/dialog/language/fwd.hpp
+++ b/inflection/src/inflection/dialog/language/fwd.hpp
@@ -1,4 +1,5 @@
/*
+ * Copyright 2025 Unicode Incorporated and others. All rights reserved.
* Copyright 2017-2024 Apple Inc. All rights reserved.
*/
// Forward declarations for inflection.dialog.language
@@ -28,6 +29,7 @@ namespace inflection
class JaCommonConceptFactory;
class KoCommonConceptFactory;
class KoCommonConceptFactory_KoAndList;
+ class MlCommonConceptFactory;
class MsCommonConceptFactory;
class NbCommonConceptFactory;
class NlCommonConceptFactory;
diff --git a/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp b/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp
index ecb31303..242101af 100644
--- a/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp
+++ b/inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp
@@ -1,4 +1,5 @@
/*
+ * Copyright 2025 Unicode Incorporated and others. All rights reserved.
* Copyright 2017-2024 Apple Inc. All rights reserved.
*/
#include
@@ -13,6 +14,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -41,6 +43,7 @@ static const ::std::map<::inflection::util::ULocale, addSemanticFeatures>& GRAMM
{::inflection::util::LocaleUtils::HINDI(), &HiGrammarSynthesizer::addSemanticFeatures},
{::inflection::util::LocaleUtils::ITALIAN(), &ItGrammarSynthesizer::addSemanticFeatures},
{::inflection::util::LocaleUtils::KOREAN(), &KoGrammarSynthesizer::addSemanticFeatures},
+ {::inflection::util::LocaleUtils::MALAYALAM(), &MlGrammarSynthesizer::addSemanticFeatures},
{::inflection::util::LocaleUtils::NORWEGIAN(), &NbGrammarSynthesizer::addSemanticFeatures},
{::inflection::util::LocaleUtils::DUTCH(), &NlGrammarSynthesizer::addSemanticFeatures},
{::inflection::util::LocaleUtils::PORTUGUESE(), &PtGrammarSynthesizer::addSemanticFeatures},
diff --git a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp
index 52d1f31f..9fa24c65 100644
--- a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp
+++ b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.cpp
@@ -159,6 +159,12 @@ const ::std::u16string& GrammemeConstants::CASE_PREPOSITIONAL()
return *npc(CASE_PREPOSITIONAL_);
}
+const ::std::u16string& GrammemeConstants::CASE_SOCIATIVE()
+{
+ static auto CASE_SOCIATIVE_ = new ::std::u16string(u"sociative");
+ return *npc(CASE_SOCIATIVE_);
+}
+
const ::std::u16string& GrammemeConstants::CASE_TRANSLATIVE()
{
static auto CASE_TRANSLATIVE_ = new ::std::u16string(u"translative");
@@ -279,6 +285,12 @@ const ::std::u16string& GrammemeConstants::MOOD_INDICATIVE()
return *npc(MOOD_INDICATIVE_);
}
+const ::std::u16string& GrammemeConstants::MOOD_SUBJUNCTIVE()
+{
+ static auto MOOD_SUBJUNCTIVE_ = new ::std::u16string(u"subjunctive");
+ return *npc(MOOD_SUBJUNCTIVE_);
+}
+
const ::std::u16string& GrammemeConstants::TENSE_PAST()
{
static auto TENSE_PAST_ = new ::std::u16string(u"past");
diff --git a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp
index 4010c1ba..60cc2133 100644
--- a/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp
+++ b/inflection/src/inflection/grammar/synthesis/GrammemeConstants.hpp
@@ -42,6 +42,7 @@ class inflection::grammar::synthesis::GrammemeConstants final
static const ::std::u16string& CASE_OBLIQUE();
static const ::std::u16string& CASE_PARTITIVE();
static const ::std::u16string& CASE_PREPOSITIONAL();
+ static const ::std::u16string& CASE_SOCIATIVE();
static const ::std::u16string& CASE_TRANSLATIVE();
static const ::std::u16string& CASE_VOCATIVE();
@@ -75,6 +76,7 @@ class inflection::grammar::synthesis::GrammemeConstants final
static constexpr auto MOOD = u"mood";
static const ::std::u16string& MOOD_IMPERATIVE();
static const ::std::u16string& MOOD_INDICATIVE();
+ static const ::std::u16string& MOOD_SUBJUNCTIVE();
static constexpr auto TENSE = u"tense";
static const ::std::u16string& TENSE_PAST();
diff --git a/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp
new file mode 100644
index 00000000..abcc7b65
--- /dev/null
+++ b/inflection/src/inflection/grammar/synthesis/MlGrammarSynthesizer.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2025 Unicode Incorporated and others. All rights reserved.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include