diff --git a/Build/Windows.targets b/Build/Windows.targets index 046805c114..d96c66e6ee 100644 --- a/Build/Windows.targets +++ b/Build/Windows.targets @@ -205,7 +205,7 @@ Condition="'$(SkipXsltCompilation)'!='true'" /> + + + + + + + + +
+

+ The following error + + + s were + + + was + + + found while the parser was loading the information getting ready to parse words. +

+
    + + + + + + + + + + + + +
  • + The phoneme " + + " does not have a grapheme defined for the default vernacular writing system. + + + + + (Click here to see the phoneme.) + +
  • +
    + +
  • + The phoneme " + + " has the same grapheme as another phoneme. + + + + + (Click here to see the phoneme.) + +
  • +
    + +
  • + The + + + left hand side + + + right hand side + + + of the affix process rule " + + " contains an invalid natural class or phoneme. + + + + + (Click here to see the entry.) + +
  • +
    + +
  • + The environment " + + " in allomorph " + + " is invalid. The environment will be ignored. Reason: + + + + + + + (Click here to see the entry.) + +
  • +
    + +
  • + The reduplication form " + + " is invalid. Reason: + + + + + + + (Click here to see the entry.) + +
  • +
    + + +
  • + +
  • +
    +
    +
    +
+
+
+
+
+ + + + + + + +
  • + There is at least one undefined phoneme in the form " + + ". The following phonemes were parsed: " + + ". The problem begins with character/diacritic number + + -- that is, in the part of the form " + + ". Please make sure all phonemes in the form have been defined. The Hermit Crab parser will ignore this entry until it is fixed. + + + + + (Click here to see the lexical entry.) + +
  • +
    + + + + +
    +

    + The following data issue + + + s were + + + was + + + found that may affect how the synthesis works. When the Hermit Crab synthesize uses a natural class during its synthesis process, the natural class will use the phonological features which are the intersection of the features of all the phonemes in the class while trying to see if a segment matches the natural class. The implied phonological features are shown for each class below and mean that it will match any of the predicted phonemes shown. (If the implied features field is blank, then it will match *all* phonemes.) For each of the natural classes shown below, the set of predicted phonemes is not the same as the set of actual phonemes. You will need to rework your phonological feature system and the assignment of these features to phonemes to make it be correct. +

    + + + + + + + + +
    + + + + + + + +
    + +
    + [ + + ] +
    +
    + + + + + + + + + + + + + +
    Implied Features + +
    Predicted Phonemes + +
    Actual Phonemes + +
    +
    + +
    + +
    +
    +
    +
    diff --git a/Src/Transforms/Presentation/HCSynthByGlossFormatHCTrace.xsl b/Src/Transforms/Presentation/HCSynthByGlossFormatHCTrace.xsl new file mode 100644 index 0000000000..bd6da4eade --- /dev/null +++ b/Src/Transforms/Presentation/HCSynthByGlossFormatHCTrace.xsl @@ -0,0 +1,1704 @@ + + + + + + + + file:///C:/fwrepo/fw/DistFiles/Language Explorer/Configuration/Words/Analyses/TraceParse/ + + + Times New Roman + + + 10pt + + + Charis SIL + + + 20pt + + + N + + + true + + + + + + + + + + + true + + + false + + + + + + green; font-weight:bold + + + red + + + blue + + + + + + + + + + +

    + Synthesis of + + + color: + + ; font-family: + + + + + . +

    + + + + + + +
    + + + + + + + + + + + + + + + Y + + + + + + + ; font-family: + + ; font-size: + + + + + + + + + + + + + + + + ; font-family: + + ; font-size: + + + + + + + + + + + + , + + + + + + + , + + + + + +

    Result

    + + +

    + This analysis synthesized successfully. The following are the forms that synthesized (the first line is the form, the second line shows the morpheme breaks via a plus sign): +

    +
    + + direction:rtl; text-align:right + + +
    +
    + +

    + + + color: + + + This word failed to synthesize successfully. + +

    + +

    + + + font-size:larger; color: + + + An error was detected! + + +

    +
    +
    +
    + + +
    + + + + + + + + ; font-size:smaller + + + + Category = + + + + + + + + + + ?? + + + + + + ; Slot = + + ( + + + + + + + + + ) + + + + ; Unspecified slot or category + + + + + From category = + + + + + + + + + + ?? + + + + ; To category = + + + + + + + + + + ?? + + + + + ; To inflection class = + + + + + + + + + + unclassified affix + + + + + + + + + + + + + + ; Category = + + + + + + + + + + ?? + + + ; Attaches to: + + + + + , + + or + + + + + + + + + + + + Any category + + + + + + + clitic + + ; Category = + + + + + + + + + + + Category = + + + + + + + + + + ?? + + + + + ; Inflection class = + + + + + + + + + + + + + + + + + + + + + + + + + + + + color: + + ; font-size:smaller + +   (Reason: This is a duplicate synthesis and has been pruned.) + + + + + + + color: + + ; font-size:smaller + +   (Reason: + + + Ad-hoc prohibition rule failed. The + + , + + + + cursor:default + + + + + + + + + + + + + + + + + , cannot occur + + + adjacent before + + + adjacent after + + + somewhere before + + + somewhere after + + + anywhere around + + + + + + these items: + + + this item: + + + + + + + + + + + , + + + + + + cursor:default + + + + + + + + + , + + + . + + + The synthesized surface form does not match the input word. + + + A bound stem or root was found completely by itself. These must have at least one other morpheme present. + + + The valid synthesis ' + + + + + + + ' takes precedence over this synthesis. + + + The synthesis's inflection features ' + + ' conflict with the following features required by the allomorph ' + + + cursor:default + + + + + + + + ': + + . + + + The allomorph ' + + + cursor:default + + + + + + + + ' has a stem name of ' + + ', therefore it requires some inflectional affixes with inflection features for that stem name, but there aren't any such inflectional affixes. + + + The synthesis contains inflectional features that match the stem name ' + + ', which was specified by another allomorph in the stem/root entry ' + + + cursor:default + + + + + + + + '. + + + + + Environment incorrect for allomorph ' + + + cursor:default + + + + + + + + + + + ( + + ) + + + ': + + + Environment incorrect: + + + + + + + + + + + , + + + . + + + The synthesized form does not match the input side of this affix process rule. + + + The synthesis's inflection features ' + + ' conflict with the following required features: + + . + + + The stem/root does not have the stem name ' + + '. + + + + + + + + This null affix can only attach to an irregularly inflected form. + + + This affix cannot attach to an irregularly inflected form. + + + This synthesis does not include all analyzed morphemes. + + + Further derivation is required after a non-final template. + + + Applicable affix templates were found, but none were applied. + + + The synthesis's part of speech ' + + ' conflicts with the following required parts of speech: + + . + + + Further derivation is prohibited after a final template. + + + Further derivation is required after a non-final template, but this affix is not derivational. + + + An affix cannot be applied more than once. + + + ) + + + + + + + + + + + + + + + + + + singleminus + + + beginminus + + + lastminus + + + minus + + + + RTL + + .gif + + + + + singleplus + + + beginplus + + + lastplus + + + plus + + + + RTL + + .gif + + + + + + + \ + + + + Toggle(this.parentNode, " + + /", + + + 8) + + + 0) + + + + + + + + + + + + cursor:default;color: + + + + + + + + + + + + + + + + + ; text-align:right; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + direction:rtl + + + + + + + + + + + +
    + + + direction:ltr + + + + + + + + + + + +
    + + direction:ltr + + + + + + + + + + +
    + + + direction:rtl + + ; font-size:smaller + + Template = + + + + + + +
    + + + direction:rtl + + ; font-size:smaller + + Output = + + + + + + +
    +
    +
    + + + + + + cursor:default;color: + + + ; text-align:right; + + + + + + + + + + + + + + + + + +
    + + + direction:rtl + + + + +
    + + + direction:rtl + + + + +
    + + + direction:ltr + + + + + + + + +   + + +
    + + direction:ltr + + + + +
    +
    +
    + + + + + + + + + + + + + + + + + + + + +
    Input + + + direction:rtl + + + + + Rule not applied because...
    + + + + + + + + + + + + + + + + + + Rule + + + + + + + direction:rtl + + + + + + + + The stem's category is + + , but this rule only applies when the stem's category is + + + + + + + + + + + + + + +
    +
    + + + + + + + inflection classes + + + exception features + + + + + + The stem has the following + + : + + + + + + + + + + The stem does not have any + + , + + + + + but this rule only applies when the stem has the following + + + but this rule only applies when the stem has none of the following + + + + : + + + + + + + and + + + or + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + + + + + + + color: + + + + + + + + + + + + + + +
    + + + + + + + +
    + +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + color: + + + Phonological rules applied: + + + +
    + + color: + + + Synthesis completed. +
    + + + + color: + + + ; text-align:right; + + + + + + + + + + +
    + + + direction:rtl + + ; font-size:smaller + + Result = + + + + + + +
    + + + direction:ltr + + ; font-size:smaller + + + + + (Synthesis succeeded)   + + +   (Synthesis succeeded!) + + +
    +
    +
    + +
    + + + + + +
    +
    +
    + + + + display:block + + + display:none + + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    Synthesis Details

    +

    + In the following, any item shown in + + + color: + + + green + + has a path to a successful synthesis. Any item shown in + + + color: + + + red + + does not have a path to a successful synthesis (i.e. this path failed to produce a successful form). The reason for the failure is shown at the end of a line as + + + color: + + ; font-size:smaller + + (Reason: XXX) + + , where XXX is the reason. Sometimes you have to follow a path to find a reason. +

    +

    + This particular synthesizer works as follows: +

      +
    1. Given a possible analysis, it then builds the word in a generative fashion, from the root out.
    2. +
    3. When there are any affixes, it tries them in various permuations of the affixes.
    4. +
    5. When there are any inflectional templates that might apply, it applies each template in the order of suffixes from the root out followed by prefixes from the root out.
    6. +
    7. Then it applies any phonological rules in their generative order.
    8. +
    9. The result is then declared to be successful.
    10. +
    +

    +

    Click on the box by a morpheme to follow a path.

    +
    + + direction:rtl; text-align:right + + + + + + +

    + + color: + + ; font-size:larger + + There was an error trying to synthesize this analysis. +
    + +

    +
    + + + + + +
    + + + + + +
    +
    +
    +
    +
    +
    + + +

    + +

    +
    +
    diff --git a/Src/Utilities/HCSynthByGloss/.gitignore b/Src/Utilities/HCSynthByGloss/.gitignore new file mode 100644 index 0000000000..60fa6ac6a8 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/.gitignore @@ -0,0 +1,20 @@ +.vs +*~ +*.bak +temp +bin/ +obj/ +packages/ +Language Explorer/ +NUnit-2.6.4/ +NUnit-2.6.4.zip +*.lock +XLingPaperPDFTemp/ +UserDocumentation.css +UserDocumentation.htm +*.csproj.user +GenerateHCConfig4FLExTransTest/ +RegressionTests/ +results.txt +.gitB4/ +.githubB4/ diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/App.config b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/App.config new file mode 100644 index 0000000000..34d6e93055 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/App.config @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/ConsoleLogger.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/ConsoleLogger.cs new file mode 100644 index 0000000000..67c9d1ae17 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/ConsoleLogger.cs @@ -0,0 +1,127 @@ +using System; +using System.ComponentModel; +using SIL.LCModel; +using SIL.FieldWorks.WordWorks.Parser; + +namespace GenerateHCConfig +{ + internal class ConsoleLogger : ILcmUI, IHCLoadErrorLogger + { + private readonly ISynchronizeInvoke m_synchronizeInvoke; + + public ConsoleLogger(ISynchronizeInvoke synchronizeInvoke) + { + m_synchronizeInvoke = synchronizeInvoke; + } + + public ISynchronizeInvoke SynchronizeInvoke + { + get { return m_synchronizeInvoke; } + } + + public bool ConflictingSave() + { + throw new NotImplementedException(); + } + + public DateTime LastActivityTime + { + get { return DateTime.Now; } + } + + public FileSelection ChooseFilesToUse() + { + throw new NotImplementedException(); + } + + public bool RestoreLinkedFilesInProjectFolder() + { + throw new NotImplementedException(); + } + + public YesNoCancel CannotRestoreLinkedFilesToOriginalLocation() + { + throw new NotImplementedException(); + } + + public void DisplayMessage(MessageType type, string message, string caption, string helpTopic) + { + Console.WriteLine(message); + } + + public void ReportException(Exception error, bool isLethal) + { + Console.WriteLine(error.Message); + } + + public void ReportDuplicateGuids(string errorText) + { + Console.WriteLine(errorText); + } + + public void DisplayCircularRefBreakerReport(string msg, string caption) + { + Console.WriteLine("{0}: {1}", caption, msg); + } + + public bool Retry(string msg, string caption) + { + throw new NotImplementedException(); + } + + public bool OfferToRestore(string projectPath, string backupPath) + { + throw new NotImplementedException(); + } + + public void InvalidShape(string str, int errorPos, IMoMorphSynAnalysis msa) + { + Console.WriteLine("The form \"{0}\" contains an undefined phoneme at {1}.", str, errorPos); + } + + public void InvalidAffixProcess(IMoAffixProcess affixProcess, bool isInvalidLhs, IMoMorphSynAnalysis msa) + { + Console.WriteLine("The affix process \"{0}\" is invalid.", affixProcess.Form.BestVernacularAlternative.Text); + } + + public void InvalidPhoneme(IPhPhoneme phoneme) + { + Console.WriteLine("The phoneme \"{0}\" does not contain any valid graphemes.", phoneme.Name.BestAnalysisVernacularAlternative.Text); + } + + public void DuplicateGrapheme(IPhPhoneme phoneme) + { + Console.WriteLine("The phoneme \"{0}\" has the same grapheme as another phoneme.", phoneme.Name.BestAnalysisVernacularAlternative.Text); + } + + public void InvalidEnvironment(IMoForm form, IPhEnvironment env, string reason, IMoMorphSynAnalysis msa) + { + Console.WriteLine("The environment \"{0}\" is invalid. Reason: {1}", env.StringRepresentation.Text, reason); + } + + public void InvalidReduplicationForm(IMoForm form, string reason, IMoMorphSynAnalysis msa) + { + Console.WriteLine("The reduplication form \"{0}\" is invalid. Reason: {1}", form.Form.VernacularDefaultWritingSystem.Text, reason); + } + + public void InvalidRewriteRule(IPhRegularRule rule, string reason) + { + Console.WriteLine("The rewrite rule \"{0}\" is invalid. Reason: {1}", rule.Name.BestAnalysisVernacularAlternative.Text, reason); + } + + public void InvalidStrata(string strata, string reason) + { + Console.WriteLine(reason); + } + + public void OutOfScopeSlot(IMoInflAffixSlot slot, IMoInflAffixTemplate template, string reason) + { + Console.WriteLine(reason); + } + + public void UnmatchedReduplicationIndexedClass(IMoForm form, string reason, string environment) + { + throw new NotImplementedException(); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/DuplicateGlossChecker.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/DuplicateGlossChecker.cs new file mode 100644 index 0000000000..ffdc71a916 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/DuplicateGlossChecker.cs @@ -0,0 +1,55 @@ +// Copyright (c) 2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using SIL.Machine.Morphology.HermitCrab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Xml.Linq; +using System.Xml.XPath; + +namespace SIL.GenerateHCConfigForFLExTrans +{ + public class DuplicateGlossChecker + { + Language Language { get; set; } + XDocument HCConfiguration { get; set; } + + public DuplicateGlossChecker(string HCConfig) + { + HCConfiguration = XDocument.Load(HCConfig); + } + + public void ReportAnyDuplicateGlosses() + { + var glosses = new List(); + var duplicateGlosses = new List(); + + var query = + from c in HCConfiguration.Root.Descendants("MorphologicalRule").Descendants("Gloss") + select c; + foreach (XElement g in query) + { + XElement name = g.XPathSelectElement("preceding-sibling::Name"); + string sName = (name != null) ? name.Value : ""; + glosses.Add(new DuplicateGlossInfo(sName, g.Value)); + } + glosses.Sort(); + DuplicateGlossInfo lastInfo = new DuplicateGlossInfo("", ""); + foreach (DuplicateGlossInfo dupInfo in glosses) + { + if (lastInfo.Gloss == dupInfo.Gloss) + { + Console.WriteLine( + "Duplicate gloss found for \"" + + lastInfo.ToString() + + "\" and \"" + + dupInfo.ToString() + ); + } + lastInfo = dupInfo; + } + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/DuplicateGlossInfo.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/DuplicateGlossInfo.cs new file mode 100644 index 0000000000..f45995f0e6 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/DuplicateGlossInfo.cs @@ -0,0 +1,39 @@ +// Copyright (c) 2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace SIL.GenerateHCConfigForFLExTrans +{ + public class DuplicateGlossInfo : IComparable + { + public DuplicateGlossInfo(string name, string gloss) + { + Name = name; + Gloss = gloss; + } + + public string Name { get; set; } + public string Gloss { get; set; } + + public int CompareTo(object obj) + { + DuplicateGlossInfo dup2 = (DuplicateGlossInfo)obj; + int compare = Gloss.CompareTo(dup2.Gloss); + if (compare == 0) + { + // the gloss is the same; sort by name + compare = Name.CompareTo(dup2.Name); + } + return compare; + } + + public override string ToString() => $"({Gloss}, {Name})"; + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/GenerateHCConfigForFLExTrans.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/GenerateHCConfigForFLExTrans.cs new file mode 100644 index 0000000000..d3db7e2d47 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/GenerateHCConfigForFLExTrans.cs @@ -0,0 +1,107 @@ +// Copyright (c) 2016-2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; +using System.IO; +using SIL.FieldWorks.Common.FwUtils; +using SIL.FieldWorks.WordWorks.Parser; +using SIL.LCModel; +using SIL.LCModel.Utils; +using SIL.Machine.Annotations; +using SIL.Machine.Morphology.HermitCrab; +using SIL.WritingSystems; +using GenerateHCConfig; + +namespace SIL.GenerateHCConfigForFLExTrans +{ + public class GenerateHCConfigForFLExTrans + { + static int Main(string[] args) + { + if (args.Length < 2) + { + WriteHelp(); + return 0; + } + + if (!File.Exists(args[0])) + { + Console.WriteLine("The FieldWorks project file could not be found."); + return 1; + } + return Generate(args[0], args[1]); + } + + public static int Generate(string flexDB, string hcXML) + { + FwRegistryHelper.Initialize(); + FwUtils.InitializeIcu(); + Sldr.Initialize(); + var synchronizeInvoke = new SingleThreadedSynchronizeInvoke(); + + var projectId = new ProjectIdentifier(flexDB); + var logger = new ConsoleLogger(synchronizeInvoke); + var dirs = new NullFdoDirectories(); + var settings = new LcmSettings { DisableDataMigration = true }; + var progress = new NullThreadedProgress(synchronizeInvoke); + Console.WriteLine("Loading FieldWorks project..."); + try + { + using ( + LcmCache cache = LcmCache.CreateCacheFromExistingData( + projectId, + "en", + logger, + dirs, + settings, + progress + ) + ) + { + Language language = HCLoaderForFLExTrans.Load(cache, logger); + Console.WriteLine("Loading completed."); + Console.WriteLine("Writing HC configuration file..."); + XmlLanguageWriter.Save(language, hcXML); + Console.WriteLine("Checking for duplicate glosses."); + var dupChecker = new DuplicateGlossChecker(hcXML); + dupChecker.ReportAnyDuplicateGlosses(); + Console.WriteLine("Writing completed."); + } + return 0; + } + catch (LcmFileLockedException) + { + Console.WriteLine("Loading failed."); + Console.WriteLine( + "The FieldWorks project is currently open in another application." + ); + Console.WriteLine("Close the application and try to run this command again."); + return 2; + } + catch (LcmDataMigrationForbiddenException) + { + Console.WriteLine("Loading failed."); + Console.WriteLine( + "The FieldWorks project was created with an older version of FLEx." + ); + Console.WriteLine( + "Migrate the project to the latest version by opening it in FLEx." + ); + return 3; + } + } + + private static void WriteHelp() + { + Console.WriteLine( + "Generates a HermitCrab configuration file from a FieldWorks project, suitable for using HermitCrab synthesis with FLExTrans." + ); + Console.WriteLine(); + Console.WriteLine("generatehcconfig "); + Console.WriteLine(); + Console.WriteLine(" Specifies the FieldWorks project path."); + Console.WriteLine(" Specifies the HC configuration path."); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/GenerateHCConfigForFLExTrans.csproj b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/GenerateHCConfigForFLExTrans.csproj new file mode 100644 index 0000000000..a4714e994d --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/GenerateHCConfigForFLExTrans.csproj @@ -0,0 +1,134 @@ + + + + + Debug + AnyCPU + {39D15298-8C61-48F6-91CB-2809531CC94F} + Exe + SIL.GenerateHCConfigForFLExTrans + GenerateHCConfigForFLExTrans + v4.6.2 + 512 + true + true + + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + ..\..\..\..\Output\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\CommonServiceLocator.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\FwUtils.dll + + + ..\..\..\..\..\..\Users\Andy Black\Documents\FieldWorks\FLExTrans\HCSynthByGloss\HCSynthByGlossLib\bin\x64\Debug\HCSynthByGlossLib.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\icu.net.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Archiving.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Core.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.LCModel.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.LCModel.Core.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.LCModel.Utils.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.Morphology.HermitCrab.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.WritingSystems.dll + + + + + False + ..\..\..\..\Output\Debug\System.ValueTuple.dll + + + + + + + + ..\..\..\..\Output\Debug\xWorks.dll + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/HCLoaderForFLExTrans.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/HCLoaderForFLExTrans.cs new file mode 100644 index 0000000000..150e7d788c --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/HCLoaderForFLExTrans.cs @@ -0,0 +1,3203 @@ +// Copyright (c) 2015-2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Xml; +using System.Xml.Linq; +using SIL.Collections; +using SIL.Extensions; +using SIL.LCModel.Core.Phonology; +using SIL.LCModel.Core.WritingSystems; +using SIL.LCModel; +using SIL.Machine.Annotations; +using SIL.Machine.DataStructures; +using SIL.Machine.FeatureModel; +using SIL.Machine.Matching; +using SIL.Machine.Morphology.HermitCrab; +using SIL.Machine.Morphology.HermitCrab.MorphologicalRules; +using SIL.Machine.Morphology.HermitCrab.PhonologicalRules; + +namespace SIL.FieldWorks.WordWorks.Parser +{ + // This is basically the same as HCLoader from LexText.ParserCore + // We redid the GetGloss method to use Apertium-style "glosses" + // Also we force root forms and root glosses to be lower case + public class HCLoaderForFLExTrans + { + public static Language Load(LcmCache cache, IHCLoadErrorLogger logger) + { + var loader = new HCLoaderForFLExTrans(cache, logger); + loader.LoadLanguage(); + return loader.m_language; + } + + private static readonly string[] VariableNames = + { + "α", + "β", + "γ", + "δ", + "ε", + "ζ", + "η", + "θ", + "ι", + "κ", + "λ", + "μ", + "ν", + "ξ", + "ο", + "π", + "ρ", + "σ", + "τ", + "υ", + "φ", + "χ", + "ψ", + "ω" + }; + + // A caret ^ is invalid in IDs. Plain reduplication patterns have them and can be used in IDs. + // We replace them with a period (which is allowed in IDs). + private static readonly string kCaretInID = "."; + + private readonly LcmCache m_cache; + private readonly Dictionary> m_allomorphs; + private readonly Dictionary> m_morphemes; + private readonly Dictionary m_stemNames; + private readonly Dictionary m_mprFeatures; + + private Language m_language; + private CharacterDefinitionTable m_table; + private Stratum m_morphophonemic; + private Stratum m_clitic; + private ComplexFeature m_headFeature; + private SymbolicFeature m_posFeature; + + private readonly IHCLoadErrorLogger m_logger; + private readonly PhonEnvRecognizer m_envValidator; + private readonly Dictionary m_naturalClassLookup; + private readonly Dictionary m_naturalClasses; + private readonly Dictionary m_charDefs; + + private readonly bool m_noDefaultCompounding; + private readonly bool m_notOnClitics; + private readonly bool m_acceptUnspecifiedGraphemes; + + private SimpleContext m_any; + private CharacterDefinition m_null; + private CharacterDefinition m_morphBdry; + + // Following moved from HCParser as we do not need anything else from HCParser + internal const string CRuleID = "ID"; + internal const string FormID = "ID"; + internal const string FormID2 = "ID2"; + internal const string InflTypeID = "InflTypeID"; + internal const string MsaID = "ID"; + internal const string PRuleID = "ID"; + internal const string SlotID = "SlotID"; + internal const string TemplateID = "ID"; + + internal const string IsNull = "IsNull"; + internal const string IsPrefix = "IsPrefix"; + internal const string Env = "Env"; + internal const string PrefixEnv = "PrefixEnv"; + internal const string SuffixEnv = "SuffixEnv"; + + private HCLoaderForFLExTrans(LcmCache cache, IHCLoadErrorLogger logger) + { + m_cache = cache; + m_logger = logger; + m_allomorphs = new Dictionary>(); + m_morphemes = new Dictionary>(); + m_stemNames = new Dictionary(); + m_mprFeatures = new Dictionary(); + + m_envValidator = new PhonEnvRecognizer( + RemoveDottedCircles(m_cache.LangProject.PhonologicalDataOA.AllPhonemes().ToArray()), + m_cache.LangProject.PhonologicalDataOA.AllNaturalClassAbbrs().ToArray() + ); + + m_naturalClassLookup = new Dictionary(); + foreach ( + IPhNaturalClass nc in m_cache.LanguageProject.PhonologicalDataOA.NaturalClassesOS + ) + m_naturalClassLookup[nc.Abbreviation.BestAnalysisAlternative.Text] = nc; + + XElement parserParamsElem = XElement.Parse( + m_cache.LanguageProject.MorphologicalDataOA.ParserParameters + ); + XElement hcElem = parserParamsElem.Element("HC"); + m_noDefaultCompounding = + hcElem != null && ((bool?)hcElem.Element("NoDefaultCompounding") ?? false); + m_notOnClitics = hcElem == null || ((bool?)hcElem.Element("NotOnClitics") ?? true); + m_acceptUnspecifiedGraphemes = + hcElem != null && ((bool?)hcElem.Element("AcceptUnspecifiedGraphemes") ?? false); + + m_naturalClasses = new Dictionary(); + m_charDefs = new Dictionary(); + } + + private string[] RemoveDottedCircles(string[] phonemes) + { + return phonemes.Select(RemoveDottedCircles).ToArray(); + } + + private string RemoveDottedCircles(string text) + { + string dottedCircle = "\u25CC"; + return text?.Replace(dottedCircle, string.Empty); + } + + private void LoadLanguage() + { + m_language = new Language { Name = m_cache.ProjectId.Name }; + + var inflClassesGroup = new MprFeatureGroup + { + Name = "inflClasses", + MatchType = MprFeatureGroupMatchType.Any + }; + var posSymbols = new List(); + foreach (IPartOfSpeech pos in m_cache.LanguageProject.AllPartsOfSpeech) + { + posSymbols.Add( + new FeatureSymbol("pos" + pos.Hvo) + { + Description = pos.Abbreviation.BestAnalysisAlternative.Text + } + ); + foreach (IMoInflClass inflClass in pos.InflectionClassesOC) + LoadInflClassMprFeature(inflClass, inflClassesGroup); + } + if (inflClassesGroup.MprFeatures.Count > 0) + m_language.MprFeatureGroups.Add(inflClassesGroup); + + var prodRestrictsGroup = new MprFeatureGroup + { + Name = "exceptionFeatures", + MatchType = MprFeatureGroupMatchType.All + }; + foreach ( + ICmPossibility prodRestrict in m_cache + .LanguageProject + .MorphologicalDataOA + .ProdRestrictOA + .ReallyReallyAllPossibilities + ) + LoadMprFeature(prodRestrict, prodRestrictsGroup); + if (prodRestrictsGroup.MprFeatures.Count > 0) + m_language.MprFeatureGroups.Add(prodRestrictsGroup); + + var lexEntryInflTypesGroup = new MprFeatureGroup + { + Name = "lexEntryInflTypes", + MatchType = MprFeatureGroupMatchType.All + }; + foreach ( + ILexEntryInflType inflType in m_cache.ServiceLocator + .GetInstance() + .AllInstances() + ) + LoadMprFeature(inflType, lexEntryInflTypesGroup); + if (lexEntryInflTypesGroup.MprFeatures.Count > 0) + m_language.MprFeatureGroups.Add(lexEntryInflTypesGroup); + + m_posFeature = m_language.SyntacticFeatureSystem.AddPartsOfSpeech(posSymbols); + m_headFeature = m_language.SyntacticFeatureSystem.AddHeadFeature(); + LoadFeatureSystem( + m_cache.LanguageProject.MsFeatureSystemOA, + m_language.SyntacticFeatureSystem + ); + + LoadFeatureSystem( + m_cache.LanguageProject.PhFeatureSystemOA, + m_language.PhonologicalFeatureSystem + ); + + var anyNC = new NaturalClass(FeatureStruct.New().Value) { Name = "Any" }; + m_language.NaturalClasses.Add(anyNC); + m_any = new SimpleContext(anyNC, Enumerable.Empty()); + + LoadCharacterDefinitionTable( + m_cache.LanguageProject.PhonologicalDataOA.PhonemeSetsOS[0] + ); + + foreach ( + IMoStemName stemName in m_cache.ServiceLocator + .GetInstance() + .AllInstances() + ) + { + var pos = stemName.OwnerOfClass(); + var regions = new List(); + foreach (IFsFeatStruc fs in stemName.RegionsOC.Where(fs => !fs.IsEmpty)) + { + var hcFS = new FeatureStruct(); + hcFS.AddValue( + m_headFeature, + LoadFeatureStruct(fs, m_language.SyntacticFeatureSystem) + ); + hcFS.AddValue(m_posFeature, LoadAllPartsOfSpeech(pos)); + hcFS.Freeze(); + regions.Add(hcFS); + } + + if (regions.Count > 0) + { + var hcStemName = new StemName(regions) + { + Name = stemName.Name.BestAnalysisAlternative.Text + }; + m_stemNames[stemName] = hcStemName; + m_language.StemNames.Add(hcStemName); + } + } + + m_morphophonemic = new Stratum(m_table) + { + Name = "Morphophonemic", + MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered + }; + m_language.Strata.Add(m_morphophonemic); + + m_clitic = new Stratum(m_table) + { + Name = "Clitic", + MorphologicalRuleOrder = MorphologicalRuleOrder.Unordered + }; + m_language.Strata.Add(m_clitic); + + m_language.Strata.Add(new Stratum(m_table) { Name = "Surface" }); + + if ( + m_cache.LanguageProject.MorphologicalDataOA.CompoundRulesOS.Count == 0 + && !m_noDefaultCompounding + ) + { + m_morphophonemic.MorphologicalRules.AddRange(DefaultCompoundingRules()); + } + else + { + foreach ( + IMoCompoundRule compoundRule in m_cache.LanguageProject.MorphologicalDataOA.CompoundRulesOS.Where( + r => !r.Disabled + ) + ) + { + switch (compoundRule.ClassID) + { + case MoEndoCompoundTags.kClassId: + m_morphophonemic.MorphologicalRules.Add( + LoadEndoCompoundingRule((IMoEndoCompound)compoundRule) + ); + break; + + case MoExoCompoundTags.kClassId: + m_morphophonemic.MorphologicalRules.AddRange( + LoadExoCompoundingRule((IMoExoCompound)compoundRule) + ); + break; + } + } + } + + foreach (ILexEntry entry in m_cache.LanguageProject.LexDbOA.Entries) + { + var stemAllos = new List(); + var cliticStemAllos = new List(); + var affixAllos = new List(); + var cliticAffixAllos = new List(); + + foreach (IMoForm form in entry.AlternateFormsOS.Concat(entry.LexemeFormOA)) + { + if (form == null) + continue; + + if (IsValidLexEntryForm(form)) + { + if (IsCliticType(form.MorphTypeRA)) + cliticStemAllos.Add((IMoStemAllomorph)form); + else + stemAllos.Add((IMoStemAllomorph)form); + } + + if (IsValidRuleForm(form)) + { + if (IsCliticType(form.MorphTypeRA)) + cliticAffixAllos.Add(form); + else + affixAllos.Add(form); + } + } + + if (stemAllos.Count > 0) + LoadLexEntries(m_morphophonemic, entry, stemAllos); + if (cliticStemAllos.Count > 0) + LoadLexEntries(m_clitic, entry, cliticStemAllos); + if (affixAllos.Count > 0) + LoadMorphologicalRules(m_morphophonemic, entry, affixAllos); + if (cliticAffixAllos.Count > 0) + LoadMorphologicalRules(m_clitic, entry, cliticAffixAllos); + } + + foreach ( + IMoInflAffixTemplate template in m_cache.ServiceLocator + .GetInstance() + .AllInstances() + .Where(t => !t.Disabled) + ) + { + IMoInflAffixSlot[] slots = template.SuffixSlotsRS + .Concat(template.PrefixSlotsRS.Reverse()) + .Where(s => s.Affixes.Any(msa => m_morphemes.ContainsKey(msa))) + .ToArray(); + if (slots.Length > 0) + m_morphophonemic.AffixTemplates.Add(LoadAffixTemplate(template, slots)); + } + + foreach ( + IPhSegmentRule prule in m_cache.LanguageProject.PhonologicalDataOA.PhonRulesOS + .Where(r => !r.Disabled) + .OrderBy(r => r.OrderNumber) + ) + { + switch (prule.ClassID) + { + case PhRegularRuleTags.kClassId: + var regRule = (IPhRegularRule)prule; + if ( + regRule.StrucDescOS.Count > 0 + || regRule.RightHandSidesOS.Any(rhs => rhs.StrucChangeOS.Count > 0) + ) + { + RewriteRule hcRegRule = LoadRewriteRule(regRule); + if (hcRegRule == null) + continue; + // Choose which stratum the phonological rules apply on. + if (!m_notOnClitics) + m_clitic.PhonologicalRules.Add(hcRegRule); + else + m_morphophonemic.PhonologicalRules.Add(hcRegRule); + m_language.PhonologicalRules.Add(hcRegRule); + } + break; + + case PhMetathesisRuleTags.kClassId: + var metaRule = (IPhMetathesisRule)prule; + if (metaRule.LeftSwitchIndex != -1 && metaRule.RightSwitchIndex != -1) + { + MetathesisRule hcMetaRule = LoadMetathesisRule(metaRule); + + // Choose which stratum the phonological rules apply on. + if (!m_notOnClitics) + m_clitic.PhonologicalRules.Add(hcMetaRule); + else + m_morphophonemic.PhonologicalRules.Add(hcMetaRule); + m_language.PhonologicalRules.Add(hcMetaRule); + } + break; + } + } + + m_language.NaturalClasses.AddRange(m_naturalClasses.Values.Where(nc => nc != null)); + + foreach ( + IMoAlloAdhocProhib alloAdhocProhib in m_cache.ServiceLocator + .GetInstance() + .AllInstances() + .Where( + a => !a.Disabled && a.FirstAllomorphRA != null && a.RestOfAllosRS.Count > 0 + ) + ) + { + LoadAllomorphCoOccurrenceRules(alloAdhocProhib); + } + + foreach ( + IMoMorphAdhocProhib morphAdhocProhib in m_cache.ServiceLocator + .GetInstance() + .AllInstances() + .Where( + a => !a.Disabled && a.FirstMorphemeRA != null && a.RestOfMorphsRS.Count > 0 + ) + ) + { + LoadMorphemeCoOccurrenceRules(morphAdhocProhib); + } + } + + private void LoadInflClassMprFeature( + IMoInflClass inflClass, + MprFeatureGroup inflClassesGroup + ) + { + LoadMprFeature(inflClass, inflClassesGroup); + foreach (IMoInflClass subclass in inflClass.SubclassesOC) + LoadInflClassMprFeature(subclass, inflClassesGroup); + } + + private bool HasValidRuleForm(ILexEntry entry) + { + if (entry.IsCircumfix() && entry.LexemeFormOA is IMoAffixAllomorph) + { + bool hasPrefix = false, + hasSuffix = false; + foreach (IMoForm form in entry.AlternateFormsOS.Where(IsValidRuleForm)) + { + if (form.MorphTypeRA.Guid == MoMorphTypeTags.kguidMorphPrefix) + hasPrefix = true; + else if (form.MorphTypeRA.Guid == MoMorphTypeTags.kguidMorphSuffix) + hasSuffix = true; + if (hasPrefix && hasSuffix) + return true; + } + return false; + } + return entry.AllAllomorphs.Any(IsValidRuleForm); + } + + private bool IsValidRuleForm(IMoForm form) + { + var affixProcess = form as IMoAffixProcess; + if (affixProcess != null) + return affixProcess.InputOS.Count > 1 || affixProcess.OutputOS.Count > 1; + + string formStr = RemoveDottedCircles(form.Form.VernacularDefaultWritingSystem.Text); + if (form.IsAbstract || string.IsNullOrEmpty(formStr)) + return false; + + if (form.MorphTypeRA != null) + { + switch (form.MorphTypeRA.Guid.ToString()) + { + case MoMorphTypeTags.kMorphProclitic: + case MoMorphTypeTags.kMorphEnclitic: + return true; + + case MoMorphTypeTags.kMorphPrefix: + case MoMorphTypeTags.kMorphPrefixingInterfix: + case MoMorphTypeTags.kMorphSuffix: + case MoMorphTypeTags.kMorphSuffixingInterfix: + if (formStr.Contains("[") && !formStr.Contains("[...]")) + return ((IMoAffixAllomorph)form).PhoneEnvRC.Any( + env => IsValidEnvironment(env.StringRepresentation.Text) + ); + return true; + + case MoMorphTypeTags.kMorphInfix: + case MoMorphTypeTags.kMorphInfixingInterfix: + return ((IMoAffixAllomorph)form).PositionRS.Any( + env => IsValidEnvironment(env.StringRepresentation.Text) + ); + } + } + + return false; + } + + private void LoadMprFeature(ICmObject obj, MprFeatureGroup group) + { + if (obj.ShortName != null) + { + var feat = new MprFeature { Name = obj.ShortName }; + group.MprFeatures.Add(feat); + m_mprFeatures[obj] = feat; + m_language.MprFeatures.Add(feat); + } + } + + private bool IsValidLexEntryForm(IMoForm form) + { + if (!(form is IMoStemAllomorph)) + return false; + + string formStr = RemoveDottedCircles(form.Form.VernacularDefaultWritingSystem.Text); + if (form.IsAbstract || string.IsNullOrEmpty(formStr)) + return false; + + return IsStemType(form.MorphTypeRA) || IsCliticType(form.MorphTypeRA); + } + + private static bool IsStemType(IMoMorphType type) + { + if (type == null) + return false; + + switch (type.Guid.ToString()) + { + case MoMorphTypeTags.kMorphRoot: + case MoMorphTypeTags.kMorphStem: + case MoMorphTypeTags.kMorphBoundRoot: + case MoMorphTypeTags.kMorphBoundStem: + case MoMorphTypeTags.kMorphPhrase: + return true; + } + + return false; + } + + private static bool IsCliticType(IMoMorphType type) + { + if (type == null) + return false; + + switch (type.Guid.ToString()) + { + case MoMorphTypeTags.kMorphClitic: + case MoMorphTypeTags.kMorphEnclitic: + case MoMorphTypeTags.kMorphProclitic: + case MoMorphTypeTags.kMorphParticle: + return true; + } + + return false; + } + + private void LoadLexEntries(Stratum stratum, ILexEntry entry, IList allos) + { + if (entry.SensesOS.Count == 0) + { + foreach (ILexEntryRef lexEntryRef in entry.EntryRefsOS) + { + foreach (ILexEntryInflType inflType in GetInflTypes(lexEntryRef)) + { + foreach (ICmObject component in lexEntryRef.ComponentLexemesRS) + { + var mainEntry = component as ILexEntry; + if (mainEntry != null) + { + foreach ( + IMoStemMsa msa in mainEntry.MorphoSyntaxAnalysesOC.OfType() + ) + LoadLexEntryOfVariant(stratum, inflType, entry, msa, allos); + } + else + { + ILexSense sense = (ILexSense)component; + LoadLexEntryOfVariant( + stratum, + inflType, + entry, + (IMoStemMsa)sense.MorphoSyntaxAnalysisRA, + allos + ); + } + } + } + } + } + + foreach (ILexSense sense in entry.SensesOS) + { + IMoMorphSynAnalysis msaOfSense = sense.MorphoSyntaxAnalysisRA; + if (msaOfSense != null && msaOfSense.ClassID == MoStemMsaTags.kClassId) + { + IMoStemMsa msa = msaOfSense as IMoStemMsa; + string gloss = GetGlossForStem(sense, msa, false); + LoadLexEntry(stratum, gloss, msa, allos); + } + } + } + + private IEnumerable GetInflTypes(ILexEntryRef lexEntryRef) + { + if (lexEntryRef.VariantEntryTypesRS.Count == 0) + { + yield return null; + yield break; + } + + bool normalTypeFound = false; + foreach (ILexEntryType type in lexEntryRef.VariantEntryTypesRS) + { + var inflType = type as ILexEntryInflType; + if (inflType != null) + { + yield return inflType; + } + else if (!normalTypeFound) + { + yield return null; + normalTypeFound = true; + } + } + } + + private void AddEntry(Stratum stratum, LexEntry hcEntry, IMoMorphSynAnalysis msa) + { + if (hcEntry.Allomorphs.Count > 0) + { + stratum.Entries.Add(hcEntry); + m_morphemes.GetOrCreate(msa, () => new List()).Add(hcEntry); + } + } + + private void LoadLexEntry( + Stratum stratum, + string gloss, + IMoStemMsa msa, + IList allos + ) + { + var hcEntry = new LexEntry(); + + IMoInflClass inflClass = GetInflClass(msa); + if (inflClass != null) + hcEntry.MprFeatures.Add(m_mprFeatures[inflClass]); + + foreach (ICmPossibility prodRestrict in msa.ProdRestrictRC) + hcEntry.MprFeatures.Add(m_mprFeatures[prodRestrict]); + + // hcEntry.Gloss = GetGloss(msa, false); + hcEntry.Gloss = gloss; + + var fs = new FeatureStruct(); + if (msa.PartOfSpeechRA != null) + fs.AddValue( + m_posFeature, + m_posFeature.PossibleSymbols["pos" + msa.PartOfSpeechRA.Hvo] + ); + else + hcEntry.IsPartial = true; + if (msa.MsFeaturesOA != null && !msa.MsFeaturesOA.IsEmpty) + fs.AddValue( + m_headFeature, + LoadFeatureStruct(msa.MsFeaturesOA, m_language.SyntacticFeatureSystem) + ); + fs.Freeze(); + hcEntry.SyntacticFeatureStruct = fs; + + hcEntry.Properties[MsaID] = msa.Hvo; + + foreach (IMoStemAllomorph allo in allos) + { + try + { + RootAllomorph hcAllo = LoadRootAllomorph(allo, msa); + hcEntry.Allomorphs.Add(hcAllo); + m_allomorphs.GetOrCreate(allo, () => new List()).Add(hcAllo); + } + catch (InvalidShapeException ise) + { + m_logger.InvalidShape(ise.String, ise.Position, msa); + } + } + + AddEntry(stratum, hcEntry, msa); + } + + private void LoadLexEntryOfVariant( + Stratum stratum, + ILexEntryInflType inflType, + ILexEntry entry, + IMoStemMsa msa, + IList allos + ) + { + var hcEntry = new LexEntry(); + + IMoInflClass inflClass = GetInflClass(msa); + if (inflClass != null) + hcEntry.MprFeatures.Add(m_mprFeatures[inflClass]); + + foreach (ICmPossibility prodRestrict in msa.ProdRestrictRC) + hcEntry.MprFeatures.Add(m_mprFeatures[prodRestrict]); + + // TODO: irregularly inflected forms should be handled by rule blocking in HC + if (inflType != null) + hcEntry.MprFeatures.Add(m_mprFeatures[inflType]); + + var glossSB = new StringBuilder(); + // we ignore any prepend material + //if (inflType != null) + //{ + // string prepend = inflType.GlossPrepend.BestAnalysisAlternative.Text; + // if (prepend != "***") + // glossSB.Append(prepend); + //} + glossSB.Append(GetGlossOfVariant(entry)); + // we ignore any append material + //if (inflType != null) + //{ + // string append = inflType.GlossAppend.BestAnalysisAlternative.Text; + // if (append != "***") + // glossSB.Append(append); + //} + // we add the first sense number of this msa + int senseIndex = 1; + ILexEntry ownerEntry = msa.Owner as ILexEntry; + if (ownerEntry != null) + { + int index = ownerEntry.AllSenses.IndexOf( + s => s.MorphoSyntaxAnalysisRA.Hvo == msa.Hvo + ); + if (index != -1) + { + senseIndex = index + 1; + } + } + glossSB.Append("."); + glossSB.Append(senseIndex); + glossSB.Append("_variant_"); + hcEntry.Gloss = glossSB.ToString(); + + var fs = new FeatureStruct(); + if (msa.PartOfSpeechRA != null) + fs.AddValue( + m_posFeature, + m_posFeature.PossibleSymbols["pos" + msa.PartOfSpeechRA.Hvo] + ); + else + hcEntry.IsPartial = true; + FeatureStruct headFS = null; + if (msa.MsFeaturesOA != null && !msa.MsFeaturesOA.IsEmpty) + headFS = LoadFeatureStruct(msa.MsFeaturesOA, m_language.SyntacticFeatureSystem); + if (inflType != null) + { + if ( + inflType.SlotsRC.Count == 0 + && inflType.InflFeatsOA != null + && !inflType.InflFeatsOA.IsEmpty + ) + { + FeatureStruct inflFS = LoadFeatureStruct( + inflType.InflFeatsOA, + m_language.SyntacticFeatureSystem + ); + if (headFS == null) + headFS = inflFS; + else + headFS.Add(inflFS); + } + } + if (headFS != null) + fs.AddValue(m_headFeature, headFS); + fs.Freeze(); + hcEntry.SyntacticFeatureStruct = fs; + + hcEntry.Properties[MsaID] = msa.Hvo; + if (inflType != null) + hcEntry.Properties[InflTypeID] = inflType.Hvo; + + foreach (IMoStemAllomorph allo in allos) + { + try + { + RootAllomorph hcAllo = LoadRootAllomorph(allo, msa); + hcEntry.Allomorphs.Add(hcAllo); + m_allomorphs.GetOrCreate(allo, () => new List()).Add(hcAllo); + } + catch (InvalidShapeException ise) + { + m_logger.InvalidShape(ise.String, ise.Position, msa); + } + } + + AddEntry(stratum, hcEntry, msa); + } + + private string GetGlossOfVariant(ILexEntry entry) + { + StringBuilder sb = new StringBuilder(); + string result = ""; + if (entry != null) + { + sb.Append(entry.HeadWord.Text); + int homograph = entry.HomographNumber; + if (homograph == 0) + { + sb.Append("1"); + } + result = sb.ToString().Normalize(NormalizationForm.FormD); + } + return result; + ; + } + + private RootAllomorph LoadRootAllomorph(IMoStemAllomorph allo, IMoMorphSynAnalysis msa) + { + string form = FormatRootForm(RemoveDottedCircles(allo.Form.VernacularDefaultWritingSystem.Text)); + Shape shape = Segment(form); + var hcAllo = new RootAllomorph(new Segments(m_table, form, shape)); + + foreach (IPhEnvironment env in allo.PhoneEnvRC) + { + string error; + if (IsValidEnvironment(env.StringRepresentation.Text, out error)) + { + Tuple contexts = SplitEnvironment(env); + hcAllo.Environments.Add( + new AllomorphEnvironment( + ConstraintType.Require, + LoadEnvironmentPattern(contexts.Item1, true), + LoadEnvironmentPattern(contexts.Item2, false) + ) + { + Name = env.StringRepresentation.Text + } + ); + } + else + { + m_logger.InvalidEnvironment(allo, env, error, msa); + } + } + + StemName hcStemName; + if (allo.StemNameRA != null && m_stemNames.TryGetValue(allo.StemNameRA, out hcStemName)) + hcAllo.StemName = hcStemName; + + switch (allo.MorphTypeRA.Guid.ToString()) + { + case MoMorphTypeTags.kMorphBoundRoot: + case MoMorphTypeTags.kMorphBoundStem: + hcAllo.IsBound = true; + break; + } + + hcAllo.Properties[FormID] = allo.Hvo; + return hcAllo; + } + + private void LoadMorphologicalRules(Stratum stratum, ILexEntry entry, IList allos) + { + if (!HasValidRuleForm(entry)) + return; + + if (entry.SensesOS.Count == 0) + { + foreach (ILexEntryRef lexEntryRef in entry.EntryRefsOS) + { + foreach (ICmObject component in lexEntryRef.ComponentLexemesRS) + { + var mainEntry = component as ILexEntry; + if (mainEntry != null) + { + foreach (IMoMorphSynAnalysis msa in mainEntry.MorphoSyntaxAnalysesOC) + { + int variantIndex = + lexEntryRef.ComponentLexemesRS.IndexOf(component) + 1; + LoadMorphologicalRule(stratum, entry, allos, msa, variantIndex); + } + } + else + { + var sense = (ILexSense)component; + LoadMorphologicalRule( + stratum, + entry, + allos, + sense.MorphoSyntaxAnalysisRA, + -1 + ); + } + } + } + } + + foreach (IMoMorphSynAnalysis msa in entry.MorphoSyntaxAnalysesOC) + LoadMorphologicalRule(stratum, entry, allos, msa, -1); + } + + private void LoadMorphologicalRule( + Stratum stratum, + ILexEntry entry, + IList allos, + IMoMorphSynAnalysis msa, + int variantIndex + ) + { + AffixProcessRule mrule = null; + Stratum s = stratum; + bool isCliticAffix = false; + switch (msa.ClassID) + { + case MoDerivAffMsaTags.kClassId: + mrule = LoadDerivAffixProcessRule(entry, (IMoDerivAffMsa)msa, allos); + break; + + case MoInflAffMsaTags.kClassId: + var inflMsa = (IMoInflAffMsa)msa; + if (inflMsa.SlotsRC.Count > 0) + s = null; + mrule = LoadInflAffixProcessRule(entry, inflMsa, allos); + break; + + case MoUnclassifiedAffixMsaTags.kClassId: + mrule = LoadUnclassifiedAffixProcessRule( + entry, + (IMoUnclassifiedAffixMsa)msa, + allos + ); + break; + + case MoStemMsaTags.kClassId: + mrule = LoadCliticAffixProcessRule(entry, (IMoStemMsa)msa, allos); + isCliticAffix = true; + break; + } + + if (mrule != null) + { + mrule.Gloss = GetGloss(msa, isCliticAffix); + if (variantIndex > 0) + { + mrule.Gloss += "_variant." + variantIndex + "_"; + } + AddMorphologicalRule(s, mrule, msa); + } + } + + private string GetGlossForStem(ILexSense sense, IMoMorphSynAnalysis msa, bool isCliticAffix) + { + string result = GetGlossViaSense(msa, isCliticAffix, sense); + return result; + } + + private string GetGloss(IMoMorphSynAnalysis msa, bool isCliticAffix) + { + ILexSense sense = msa.OwnerOfClass().SenseWithMsa(msa); + string result = GetGlossViaSense(msa, isCliticAffix, sense); + return result; + } + + private string GetGlossViaSense( + IMoMorphSynAnalysis msa, + bool isCliticAffix, + ILexSense sense + ) + { + string result = + sense == null + ? null + : sense.Gloss.BestAnalysisAlternative.Text.Normalize(NormalizationForm.FormD); + if (msa is IMoStemMsa && sense != null && !isCliticAffix) + { + ILexEntry entry = sense.Entry; + if (entry != null) + { + StringBuilder sb = new StringBuilder(); + string formatted = entry.HeadWord.Text; + formatted = formatted.Replace("#", " "); + sb.Append(formatted); + int homograph = entry.HomographNumber; + if (homograph == 0) + { + sb.Append("1"); + } + sb.Append("."); + int index = entry.SensesOS.IndexOf(sense); + sb.Append(index + 1); + result = sb.ToString().Normalize(NormalizationForm.FormD); + } + } + + return result; + } + + private void AddMorphologicalRule( + Stratum stratum, + AffixProcessRule rule, + IMoMorphSynAnalysis msa + ) + { + if (rule.Allomorphs.Count > 0) + { + if (stratum != null) + stratum.MorphologicalRules.Add(rule); + m_morphemes.GetOrCreate(msa, () => new List()).Add(rule); + } + } + + private AffixProcessRule LoadDerivAffixProcessRule( + ILexEntry entry, + IMoDerivAffMsa msa, + IList allos + ) + { + var mrule = new AffixProcessRule { Name = entry.ShortName }; + + var requiredFS = new FeatureStruct(); + if (msa.FromPartOfSpeechRA != null) + requiredFS.AddValue(m_posFeature, LoadAllPartsOfSpeech(msa.FromPartOfSpeechRA)); + if (msa.FromMsFeaturesOA != null && !msa.FromMsFeaturesOA.IsEmpty) + requiredFS.AddValue( + m_headFeature, + LoadFeatureStruct(msa.FromMsFeaturesOA, m_language.SyntacticFeatureSystem) + ); + requiredFS.Freeze(); + mrule.RequiredSyntacticFeatureStruct = requiredFS; + + var outFS = new FeatureStruct(); + if (msa.ToPartOfSpeechRA != null) + outFS.AddValue( + m_posFeature, + m_posFeature.PossibleSymbols["pos" + msa.ToPartOfSpeechRA.Hvo] + ); + if (msa.ToMsFeaturesOA != null && !msa.ToMsFeaturesOA.IsEmpty) + outFS.AddValue( + m_headFeature, + LoadFeatureStruct(msa.ToMsFeaturesOA, m_language.SyntacticFeatureSystem) + ); + outFS.Freeze(); + mrule.OutSyntacticFeatureStruct = outFS; + + var requiredMprFeatures = new List(); + if (msa.FromInflectionClassRA != null) + requiredMprFeatures.AddRange(LoadAllInflClasses(msa.FromInflectionClassRA)); + + foreach (ICmPossibility prodRestrict in msa.FromProdRestrictRC) + requiredMprFeatures.Add(m_mprFeatures[prodRestrict]); + + var outMprFeatures = new List(); + if (msa.ToInflectionClassRA != null) + outMprFeatures.Add(m_mprFeatures[msa.ToInflectionClassRA]); + + foreach (ICmPossibility prodRestrict in msa.ToProdRestrictRC) + outMprFeatures.Add(m_mprFeatures[prodRestrict]); + + StemName hcStemName; + if ( + msa.FromStemNameRA != null + && m_stemNames.TryGetValue(msa.FromStemNameRA, out hcStemName) + ) + mrule.RequiredStemName = hcStemName; + + mrule.Properties[MsaID] = msa.Hvo; + + foreach (AffixProcessAllomorph hcAllo in LoadAffixProcessAllomorphs(msa, allos)) + { + hcAllo.RequiredMprFeatures.AddRange(requiredMprFeatures); + hcAllo.OutMprFeatures.AddRange(outMprFeatures); + mrule.Allomorphs.Add(hcAllo); + } + + return mrule; + } + + private AffixProcessRule LoadInflAffixProcessRule( + ILexEntry entry, + IMoInflAffMsa msa, + IList allos + ) + { + // TODO: use realizational affix process rules + var mrule = new AffixProcessRule + { + Name = entry.ShortName, + IsPartial = msa.SlotsRC.Count == 0 + }; + + var requiredFS = new FeatureStruct(); + if (msa.PartOfSpeechRA != null) + requiredFS.AddValue(m_posFeature, LoadAllPartsOfSpeech(msa.PartOfSpeechRA)); + if (msa.InflFeatsOA != null && !msa.InflFeatsOA.IsEmpty) + requiredFS.AddValue( + m_headFeature, + LoadFeatureStruct(msa.InflFeatsOA, m_language.SyntacticFeatureSystem) + ); + requiredFS.Freeze(); + mrule.RequiredSyntacticFeatureStruct = requiredFS; + + var requiredMprFeatures = new List(); + foreach (ICmPossibility prodRestrict in msa.FromProdRestrictRC) + requiredMprFeatures.Add(m_mprFeatures[prodRestrict]); + + mrule.Properties[MsaID] = msa.Hvo; + + foreach (AffixProcessAllomorph hcAllo in LoadAffixProcessAllomorphs(msa, allos)) + { + hcAllo.RequiredMprFeatures.AddRange(requiredMprFeatures); + mrule.Allomorphs.Add(hcAllo); + } + + return mrule; + } + + private AffixProcessRule LoadUnclassifiedAffixProcessRule( + ILexEntry entry, + IMoUnclassifiedAffixMsa msa, + IList allos + ) + { + var mrule = new AffixProcessRule { Name = entry.ShortName, IsPartial = true }; + + var requiredFS = new FeatureStruct(); + if (msa.PartOfSpeechRA != null) + requiredFS.AddValue(m_posFeature, LoadAllPartsOfSpeech(msa.PartOfSpeechRA)); + requiredFS.Freeze(); + mrule.RequiredSyntacticFeatureStruct = requiredFS; + + mrule.Properties[MsaID] = msa.Hvo; + + foreach (AffixProcessAllomorph hcAllo in LoadAffixProcessAllomorphs(msa, allos)) + mrule.Allomorphs.Add(hcAllo); + + return mrule; + } + + private AffixProcessRule LoadCliticAffixProcessRule( + ILexEntry entry, + IMoStemMsa msa, + IList allos + ) + { + var mrule = new AffixProcessRule { Name = entry.ShortName }; + + var requiredFS = new FeatureStruct(); + if (msa.FromPartsOfSpeechRC.Count > 0) + requiredFS.AddValue(m_posFeature, LoadAllPartsOfSpeech(msa.FromPartsOfSpeechRC)); + requiredFS.Freeze(); + mrule.RequiredSyntacticFeatureStruct = requiredFS; + + mrule.Properties[MsaID] = msa.Hvo; + + foreach (AffixProcessAllomorph hcAllo in LoadAffixProcessAllomorphs(msa, allos)) + mrule.Allomorphs.Add(hcAllo); + + return mrule; + } + + private IEnumerable LoadAffixProcessAllomorphs( + IMoMorphSynAnalysis msa, + IList allos + ) + { + var entry = msa.OwnerOfClass(); + if (entry.IsCircumfix() && entry.LexemeFormOA is IMoAffixAllomorph) + { + foreach ( + IMoAffixAllomorph prefixAllo in allos + .OfType() + .Where(a => a.MorphTypeRA.Guid == MoMorphTypeTags.kguidMorphPrefix) + ) + { + MprFeature[] requiredMprFeatures = null; + if (msa is IMoInflAffMsa) + requiredMprFeatures = LoadAllInflClasses(prefixAllo.InflectionClassesRC) + .ToArray(); + foreach ( + IMoAffixAllomorph suffixAllo in allos + .OfType() + .Where(a => a.MorphTypeRA.Guid == MoMorphTypeTags.kguidMorphSuffix) + ) + { + foreach ( + IPhEnvironment prefixEnv in GetAffixAllomorphEnvironments( + prefixAllo, + msa + ) + ) + { + foreach ( + IPhEnvironment suffixEnv in GetAffixAllomorphEnvironments( + suffixAllo, + msa + ) + ) + { + AffixProcessAllomorph hcAllo = null; + try + { + hcAllo = LoadCircumfixAffixProcessAllomorph( + prefixAllo, + prefixEnv, + suffixAllo, + suffixEnv + ); + if (requiredMprFeatures != null) + hcAllo.RequiredMprFeatures.AddRange(requiredMprFeatures); + m_allomorphs + .GetOrCreate( + entry.LexemeFormOA, + () => new List() + ) + .Add(hcAllo); + } + catch (InvalidShapeException ise) + { + m_logger.InvalidShape(ise.String, ise.Position, msa); + } + if (hcAllo != null) + yield return hcAllo; + } + } + } + } + } + else + { + foreach (IMoForm allo in allos) + { + switch (allo.ClassID) + { + case MoAffixProcessTags.kClassId: + var affixProcess = (IMoAffixProcess)allo; + AffixProcessAllomorph hcAffixProcessAllo = null; + try + { + hcAffixProcessAllo = LoadAffixProcessAllomorph(affixProcess); + if (msa is IMoInflAffMsa) + hcAffixProcessAllo.RequiredMprFeatures.AddRange( + LoadAllInflClasses(affixProcess.InflectionClassesRC) + ); + m_allomorphs + .GetOrCreate(allo, () => new List()) + .Add(hcAffixProcessAllo); + } + catch (InvalidShapeException ise) + { + m_logger.InvalidShape(ise.String, ise.Position, msa); + } + catch (InvalidAffixProcessException iape) + { + m_logger.InvalidAffixProcess(affixProcess, iape.IsInvalidLhs, msa); + } + if (hcAffixProcessAllo != null) + yield return hcAffixProcessAllo; + break; + + case MoAffixAllomorphTags.kClassId: + var affixAllo = (IMoAffixAllomorph)allo; + MprFeature[] requiredMprFeatures = null; + if (msa is IMoInflAffMsa) + requiredMprFeatures = LoadAllInflClasses( + affixAllo.InflectionClassesRC + ) + .ToArray(); + foreach ( + IPhEnvironment env in GetAffixAllomorphEnvironments(affixAllo, msa) + ) + { + AffixProcessAllomorph hcAffixAllo = null; + try + { + hcAffixAllo = LoadFormAffixProcessAllomorph(affixAllo, env); + if (requiredMprFeatures != null) + hcAffixAllo.RequiredMprFeatures.AddRange( + requiredMprFeatures + ); + var requiredFS = new FeatureStruct(); + if ( + affixAllo.MsEnvFeaturesOA != null + && !affixAllo.MsEnvFeaturesOA.IsEmpty + ) + requiredFS.AddValue( + m_headFeature, + LoadFeatureStruct( + affixAllo.MsEnvFeaturesOA, + m_language.SyntacticFeatureSystem + ) + ); + requiredFS.Freeze(); + hcAffixAllo.RequiredSyntacticFeatureStruct = requiredFS; + m_allomorphs + .GetOrCreate(allo, () => new List()) + .Add(hcAffixAllo); + } + catch (InvalidShapeException ise) + { + m_logger.InvalidShape(ise.String, ise.Position, msa); + } + catch (InvalidReduplicationFormException iee) + { + m_logger.InvalidReduplicationForm(affixAllo, iee.Message, msa); + } + if (hcAffixAllo != null) + yield return hcAffixAllo; + } + break; + + case MoStemAllomorphTags.kClassId: + var stemAllo = (IMoStemAllomorph)allo; + foreach ( + IPhEnvironment env in GetStemAllomorphEnvironments(stemAllo, msa) + ) + { + AffixProcessAllomorph hcStemAllo = null; + try + { + hcStemAllo = LoadFormAffixProcessAllomorph(allo, env); + m_allomorphs + .GetOrCreate(allo, () => new List()) + .Add(hcStemAllo); + } + catch (InvalidShapeException ise) + { + m_logger.InvalidShape(ise.String, ise.Position, msa); + } + if (hcStemAllo != null) + yield return hcStemAllo; + } + break; + } + } + } + } + + private IEnumerable GetAffixAllomorphEnvironments( + IMoAffixAllomorph allo, + IMoMorphSynAnalysis msa + ) + { + return GetValidEnvironments(allo.PhoneEnvRC.Concat(allo.PositionRS), allo, msa); + } + + private IEnumerable GetStemAllomorphEnvironments( + IMoStemAllomorph allo, + IMoMorphSynAnalysis msa + ) + { + return GetValidEnvironments(allo.PhoneEnvRC, allo, msa); + } + + private IEnumerable GetValidEnvironments( + IEnumerable envs, + IMoForm allo, + IMoMorphSynAnalysis msa + ) + { + IPhEnvironment[] envArray = envs.ToArray(); + bool hasBlankEnv = envArray.Length == 0; + foreach (IPhEnvironment env in envArray) + { + string error; + if (IsValidEnvironment(env.StringRepresentation.Text, out error)) + { + yield return env; + } + else + { + m_logger.InvalidEnvironment(allo, env, error, msa); + hasBlankEnv = true; + } + } + + if (hasBlankEnv) + yield return null; + } + + private bool IsValidEnvironment(string env) + { + string error; + return IsValidEnvironment(env, out error); + } + + private bool IsValidEnvironment(string env, out string error) + { + if (m_envValidator.Recognize(env)) + { + int start = 0; + do + { + start = env.IndexOf("[", start, StringComparison.Ordinal); + if (start != -1) + { + int end = env.IndexOf("]", start + 1, StringComparison.Ordinal); + string ncAbbr = env.Substring(start + 1, end - start - 1).Trim(); + int caretIndex = ncAbbr.IndexOf("^", StringComparison.Ordinal); + if (caretIndex != -1) + ncAbbr = ncAbbr.Substring(0, caretIndex).Trim(); + NaturalClass hcNaturalClass; + if (!TryLoadNaturalClass(m_naturalClassLookup[ncAbbr], out hcNaturalClass)) + { + error = string.Format( + "The natural class \"{0}\" contains an invalid phoneme.", + ncAbbr + ); + return false; + } + start = end + 1; + } + } while (start != -1); + error = null; + return true; + } + + error = null; + try + { + XElement errorElem = XElement.Parse(m_envValidator.ErrorMessage); + var status = (string)errorElem.Attribute("status"); + var pos = (int)errorElem.Attribute("pos") + 1; + switch (status) + { + case "class": + error = string.Format("Unrecognized natural class at position {0}.", pos); + break; + case "segment": + error = string.Format("Unrecognized phoneme at position {0}.", pos); + break; + case "missingClosingParen": + error = string.Format("Missing closing parenthesis at position {0}.", pos); + break; + case "missingOpeningParen": + error = string.Format("Missing opening parenthesis at position {0}.", pos); + break; + case "missingClosingSquareBracket": + error = string.Format("Missing closing bracket at position {0}.", pos); + break; + case "missingOpeningSquareBracket": + error = string.Format("Missing opening bracket at position {0}.", pos); + break; + case "syntax": + error = string.Format("Invalid format at position {0}.", pos); + break; + } + } + catch (XmlException) { } + + if (error == null) + error = "Invalid format."; + return false; + } + + private AffixProcessAllomorph LoadCircumfixAffixProcessAllomorph( + IMoAffixAllomorph prefixAllo, + IPhEnvironment prefixEnv, + IMoAffixAllomorph suffixAllo, + IPhEnvironment suffixEnv + ) + { + var hcAllo = new AffixProcessAllomorph(); + + Pattern leftEnvPattern = null, + rightEnvPattern = null; + var pattern = new Pattern("stem"); + if (prefixEnv == null && suffixEnv == null) + { + pattern.Children.AddRange(AnyPlus()); + } + else + { + if (prefixEnv != null) + { + pattern.Children.Add(PrefixNull()); + Tuple prefixContexts = SplitEnvironment(prefixEnv); + pattern.Children.AddRange(LoadPatternNodes(prefixContexts.Item2)); + + if (!string.IsNullOrEmpty(prefixContexts.Item1)) + leftEnvPattern = LoadEnvironmentPattern(prefixContexts.Item1, true); + } + pattern.Children.AddRange(AnyStar()); + if (suffixEnv != null) + { + Tuple suffixContexts = SplitEnvironment(suffixEnv); + pattern.Children.AddRange(LoadPatternNodes(suffixContexts.Item1)); + pattern.Children.Add(SuffixNull()); + + if (!string.IsNullOrEmpty(suffixContexts.Item2)) + rightEnvPattern = LoadEnvironmentPattern(suffixContexts.Item2, false); + } + } + pattern.Freeze(); + hcAllo.Lhs.Add(pattern); + + hcAllo.Rhs.Add( + new InsertSegments( + Segments(RemoveDottedCircles(prefixAllo.Form.VernacularDefaultWritingSystem.Text).Trim() + "+") + ) + ); + hcAllo.Rhs.Add(new CopyFromInput("stem")); + hcAllo.Rhs.Add( + new InsertSegments( + Segments("+" + RemoveDottedCircles(suffixAllo.Form.VernacularDefaultWritingSystem.Text).Trim()) + ) + ); + + if (leftEnvPattern != null || rightEnvPattern != null) + { + string name; + if (leftEnvPattern != null && rightEnvPattern == null) + name = prefixEnv.StringRepresentation.Text; + else if (leftEnvPattern == null) + name = suffixEnv.StringRepresentation.Text; + else + name = string.Format( + "{0}, {1}", + prefixEnv.StringRepresentation.Text, + suffixEnv.StringRepresentation.Text + ); + hcAllo.Environments.Add( + new AllomorphEnvironment( + ConstraintType.Require, + leftEnvPattern, + rightEnvPattern + ) + { + Name = name + } + ); + } + + hcAllo.Properties[FormID] = prefixAllo.Hvo; + hcAllo.Properties[FormID2] = suffixAllo.Hvo; + if (prefixEnv != null) + hcAllo.Properties[PrefixEnv] = prefixEnv.StringRepresentation.Text; + if (suffixEnv != null) + hcAllo.Properties[SuffixEnv] = suffixEnv.StringRepresentation.Text; + return hcAllo; + } + + private AffixProcessAllomorph LoadAffixProcessAllomorph(IMoAffixProcess allo) + { + var hcAllo = new AffixProcessAllomorph(); + int i = 1; + foreach (IPhContextOrVar ctxtOrVar in allo.InputOS) + { + var var = ctxtOrVar as IPhVariable; + if (var != null) + { + var pattern = new Pattern( + i.ToString(CultureInfo.InvariantCulture), + AnyStar() + ); + pattern.Freeze(); + hcAllo.Lhs.Add(pattern); + } + else + { + PatternNode n; + if (LoadPatternNode((IPhPhonContext)ctxtOrVar, out n)) + { + var pattern = new Pattern( + i.ToString(CultureInfo.InvariantCulture), + n + ); + pattern.Freeze(); + hcAllo.Lhs.Add(pattern); + } + else + { + throw new InvalidAffixProcessException(allo, true); + } + } + i++; + } + + foreach (IMoRuleMapping mapping in allo.OutputOS) + { + switch (mapping.ClassID) + { + case MoInsertNCTags.kClassId: + var insertNC = (IMoInsertNC)mapping; + if (insertNC.ContentRA != null) + { + SimpleContext ctxt; + if (!TryLoadSimpleContext(insertNC.ContentRA, out ctxt)) + throw new InvalidAffixProcessException(allo, false); + hcAllo.Rhs.Add(new InsertSimpleContext(ctxt)); + } + break; + + case MoCopyFromInputTags.kClassId: + var copyFromInput = (IMoCopyFromInput)mapping; + if (copyFromInput.ContentRA != null) + { + string partName = (copyFromInput.ContentRA.IndexInOwner + 1).ToString( + CultureInfo.InvariantCulture + ); + hcAllo.Rhs.Add(new CopyFromInput(partName)); + } + break; + + case MoInsertPhonesTags.kClassId: + var insertPhones = (IMoInsertPhones)mapping; + if (insertPhones.ContentRS.Count > 0) + { + var sb = new StringBuilder(); + foreach (IPhTerminalUnit termUnit in insertPhones.ContentRS) + { + IPhCode code = termUnit.CodesOS[0]; + string strRep = + termUnit.ClassID == PhBdryMarkerTags.kClassId + ? RemoveDottedCircles(code.Representation.BestVernacularAlternative.Text) + : RemoveDottedCircles(code.Representation.VernacularDefaultWritingSystem.Text); + if (strRep != null) + strRep = strRep.Trim(); + if (string.IsNullOrEmpty(strRep)) + throw new InvalidAffixProcessException(allo, false); + sb.Append(strRep); + } + hcAllo.Rhs.Add(new InsertSegments(Segments(sb.ToString()))); + } + break; + + case MoModifyFromInputTags.kClassId: + var modifyFromInput = (IMoModifyFromInput)mapping; + if ( + modifyFromInput.ContentRA != null + && modifyFromInput.ModificationRA != null + ) + { + SimpleContext ctxt; + if (!TryLoadSimpleContext(modifyFromInput.ModificationRA, out ctxt)) + throw new InvalidAffixProcessException(allo, false); + string partName = (modifyFromInput.ContentRA.IndexInOwner + 1).ToString( + CultureInfo.InvariantCulture + ); + hcAllo.Rhs.Add(new ModifyFromInput(partName, ctxt)); + } + break; + } + } + + if (allo.MorphTypeRA != null) + { + switch (allo.MorphTypeRA.Guid.ToString()) + { + case MoMorphTypeTags.kMorphPrefix: + hcAllo.ReduplicationHint = ReduplicationHint.Prefix; + break; + + case MoMorphTypeTags.kMorphSuffix: + hcAllo.ReduplicationHint = ReduplicationHint.Suffix; + break; + } + } + + hcAllo.Properties[FormID] = allo.Hvo; + return hcAllo; + } + + private AffixProcessAllomorph LoadFormAffixProcessAllomorph( + IMoForm allo, + IPhEnvironment env + ) + { + var hcAllo = new AffixProcessAllomorph(); + string form = RemoveDottedCircles(allo.Form.VernacularDefaultWritingSystem.Text.Trim()); + Tuple contexts = SplitEnvironment(env); + if (form.Contains("[")) + { + if (form.Contains("[...]")) + { + var stemPattern = new Pattern("stem", AnyPlus()); + stemPattern.Freeze(); + hcAllo.Lhs.Add(stemPattern); + + hcAllo.Rhs.Add(new CopyFromInput("stem")); + int beforePos = form.IndexOf('['); + string beforeStr = form.Substring(0, beforePos).Trim(); + hcAllo.Rhs.Add(new InsertSegments(Segments("+" + beforeStr))); + hcAllo.Rhs.Add(new CopyFromInput("stem")); + int afterPos = form.IndexOf(']'); + string afterStr = form.Substring(afterPos + 1).Trim(); + if (!string.IsNullOrEmpty(afterStr)) + hcAllo.Rhs.Add(new InsertSegments(Segments(afterStr))); + + switch (allo.MorphTypeRA.Guid.ToString()) + { + case MoMorphTypeTags.kMorphPrefix: + hcAllo.ReduplicationHint = ReduplicationHint.Prefix; + break; + + case MoMorphTypeTags.kMorphSuffix: + hcAllo.ReduplicationHint = ReduplicationHint.Suffix; + break; + } + } + else + { + string environment = "/_" + form; + string error; + // A form containing a reduplication expression should look like an environment + if (!IsValidEnvironment(environment, out error)) + throw new InvalidReduplicationFormException(error); + + var stemPattern = new Pattern("stem", AnyStar()); + stemPattern.Freeze(); + switch (allo.MorphTypeRA.Guid.ToString()) + { + case MoMorphTypeTags.kMorphSuffix: + case MoMorphTypeTags.kMorphSuffixingInterfix: + case MoMorphTypeTags.kMorphEnclitic: + hcAllo.Lhs.Add(stemPattern); + hcAllo.Lhs.AddRange(LoadReduplicationPatterns(contexts.Item1)); + var suffixNull = new Pattern( + "suffixNull", + SuffixNull() + ); + suffixNull.Freeze(); + hcAllo.Lhs.Add(suffixNull); + + hcAllo.Rhs.Add(new CopyFromInput("stem")); + hcAllo.Rhs.AddRange(LoadReduplicationOutputActions(contexts.Item1)); + hcAllo.Rhs.Add(new CopyFromInput("suffixNull")); + hcAllo.Rhs.Add(new InsertSegments(Segments("+"))); + hcAllo.Rhs.AddRange(LoadReduplicationOutputActions(form)); + break; + + case MoMorphTypeTags.kMorphPrefix: + case MoMorphTypeTags.kMorphPrefixingInterfix: + case MoMorphTypeTags.kMorphProclitic: + var prefixNull = new Pattern( + "prefixNull", + PrefixNull() + ); + prefixNull.Freeze(); + hcAllo.Lhs.Add(prefixNull); + hcAllo.Lhs.AddRange(LoadReduplicationPatterns(contexts.Item2)); + hcAllo.Lhs.Add(stemPattern); + + hcAllo.Rhs.AddRange(LoadReduplicationOutputActions(form)); + hcAllo.Rhs.Add(new InsertSegments(Segments("+"))); + hcAllo.Rhs.Add(new CopyFromInput("prefixNull")); + hcAllo.Rhs.AddRange(LoadReduplicationOutputActions(contexts.Item2)); + hcAllo.Rhs.Add(new CopyFromInput("stem")); + break; + } + } + } + else + { + switch (allo.MorphTypeRA.Guid.ToString()) + { + case MoMorphTypeTags.kMorphInfix: + case MoMorphTypeTags.kMorphInfixingInterfix: + var leftInfixPattern = new Pattern("left"); + if (contexts.Item1.StartsWith("#")) + leftInfixPattern.Children.Add(PrefixNull()); + else + leftInfixPattern.Children.AddRange(AnyStar()); + leftInfixPattern.Children.AddRange(LoadPatternNodes(contexts.Item1)); + leftInfixPattern.Freeze(); + hcAllo.Lhs.Add(leftInfixPattern); + + var rightInfixPattern = new Pattern("right"); + rightInfixPattern.Children.AddRange(LoadPatternNodes(contexts.Item2)); + if (contexts.Item2.EndsWith("#")) + rightInfixPattern.Children.Add(SuffixNull()); + else + rightInfixPattern.Children.AddRange(AnyStar()); + rightInfixPattern.Freeze(); + hcAllo.Lhs.Add(rightInfixPattern); + + hcAllo.Rhs.Add(new CopyFromInput("left")); + hcAllo.Rhs.Add(new InsertSegments(Segments("+" + form + "+"))); + hcAllo.Rhs.Add(new CopyFromInput("right")); + break; + + case MoMorphTypeTags.kMorphSuffix: + case MoMorphTypeTags.kMorphSuffixingInterfix: + case MoMorphTypeTags.kMorphEnclitic: + var suffixPattern = new Pattern("stem"); + if (string.IsNullOrEmpty(contexts.Item1)) + { + suffixPattern.Children.AddRange(AnyPlus()); + } + else + { + if (contexts.Item1.StartsWith("#")) + suffixPattern.Children.Add(PrefixNull()); + else + suffixPattern.Children.AddRange(AnyStar()); + + suffixPattern.Children.AddRange(LoadPatternNodes(contexts.Item1)); + suffixPattern.Children.Add(SuffixNull()); + } + suffixPattern.Freeze(); + hcAllo.Lhs.Add(suffixPattern); + + hcAllo.Rhs.Add(new CopyFromInput("stem")); + hcAllo.Rhs.Add(new InsertSegments(Segments("+" + form))); + + if (!string.IsNullOrEmpty(contexts.Item2)) + hcAllo.Environments.Add( + new AllomorphEnvironment( + ConstraintType.Require, + null, + LoadEnvironmentPattern(contexts.Item2, false) + ) + { + Name = env.StringRepresentation.Text + } + ); + break; + + case MoMorphTypeTags.kMorphPrefix: + case MoMorphTypeTags.kMorphPrefixingInterfix: + case MoMorphTypeTags.kMorphProclitic: + var prefixPattern = new Pattern("stem"); + if (string.IsNullOrEmpty(contexts.Item2)) + { + prefixPattern.Children.AddRange(AnyPlus()); + } + else + { + prefixPattern.Children.Add(PrefixNull()); + if (contexts.Item2.Contains("^")) + { + int i = 0; + int j = i++; + } + prefixPattern.Children.AddRange(LoadPatternNodes(contexts.Item2)); + if (contexts.Item2.EndsWith("#")) + prefixPattern.Children.Add(SuffixNull()); + else + prefixPattern.Children.AddRange(AnyStar()); + } + prefixPattern.Freeze(); + hcAllo.Lhs.Add(prefixPattern); + + hcAllo.Rhs.Add(new InsertSegments(Segments(form + "+"))); + hcAllo.Rhs.Add(new CopyFromInput("stem")); + + if (!string.IsNullOrEmpty(contexts.Item1)) + hcAllo.Environments.Add( + new AllomorphEnvironment( + ConstraintType.Require, + LoadEnvironmentPattern(contexts.Item1, true), + null + ) + { + Name = env.StringRepresentation.Text + } + ); + break; + } + } + + hcAllo.Properties[FormID] = allo.Hvo; + if (env != null) + hcAllo.Properties[Env] = env.StringRepresentation.Text; + return hcAllo; + } + + private IEnumerable> LoadReduplicationPatterns(string patternStr) + { + foreach (string token in TokenizeContext(patternStr)) + { + if (token.StartsWith("[")) + { + int caretPos = token.IndexOf('^'); + string ncAbbr = token.Substring(1, caretPos - 1).Trim(); + IPhNaturalClass naturalClass = m_naturalClassLookup[ncAbbr]; + SimpleContext ctxt; + TryLoadSimpleContext(naturalClass, out ctxt); + var pattern = new Pattern( + token.Substring(1, token.Length - 2).Trim().Replace("^", kCaretInID), + new Constraint(ctxt.FeatureStruct) { Tag = ctxt } + ); + pattern.Freeze(); + yield return pattern; + } + } + } + + private IEnumerable LoadReduplicationOutputActions( + string patternStr + ) + { + foreach (string token in TokenizeContext(patternStr)) + { + if (token.StartsWith("[")) + { + yield return new CopyFromInput( + token.Substring(1, token.Length - 2).Trim().Replace("^", kCaretInID) + ); + } + else + { + yield return new InsertSegments(Segments(token.Trim())); + } + } + } + + private AffixTemplate LoadAffixTemplate( + IMoInflAffixTemplate template, + IList slots + ) + { + var hcTemplate = new AffixTemplate + { + Name = template.Name.BestAnalysisAlternative.Text, + IsFinal = template.Final + }; + + var requiredFS = new FeatureStruct(); + requiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(template.OwnerOfClass()) + ); + requiredFS.Freeze(); + hcTemplate.RequiredSyntacticFeatureStruct = requiredFS; + hcTemplate.Properties[TemplateID] = template.Hvo; + + foreach (IMoInflAffixSlot slot in slots) + { + ILexEntryInflType type = slot.ReferringObjects + .OfType() + .FirstOrDefault(); + var rules = new List(); + foreach (IMoInflAffMsa msa in slot.Affixes) + { + List morphemes; + if (m_morphemes.TryGetValue(msa, out morphemes)) + { + foreach (AffixProcessRule mrule in morphemes.OfType()) + { + if (type != null) + { + // block slot from applying to irregularly inflected forms + foreach (AffixProcessAllomorph allo in mrule.Allomorphs) + allo.ExcludedMprFeatures.Add(m_mprFeatures[type]); + } + rules.Add(mrule); + } + } + } + + // add a null affix to the required slot so that irregularly inflected forms can parse correctly + // TODO: this really should be handled using rule blocking in HC + if (type != null && !slot.Optional) + rules.Add(LoadNullAffixProcessRule(type, template, slot)); + + hcTemplate.Slots.Add( + new AffixTemplateSlot(rules) + { + Name = slot.Name.BestAnalysisAlternative.Text, + Optional = slot.Optional + } + ); + } + + return hcTemplate; + } + + private AffixProcessRule LoadNullAffixProcessRule( + ILexEntryInflType type, + IMoInflAffixTemplate template, + IMoInflAffixSlot slot + ) + { + var mrule = new AffixProcessRule { Name = "Null" }; + + var outFS = new FeatureStruct(); + if (type.InflFeatsOA != null && !type.InflFeatsOA.IsEmpty) + outFS.AddValue( + m_headFeature, + LoadFeatureStruct(type.InflFeatsOA, m_language.SyntacticFeatureSystem) + ); + outFS.Freeze(); + mrule.OutSyntacticFeatureStruct = outFS; + + var msubrule = new AffixProcessAllomorph(); + + msubrule.RequiredMprFeatures.Add(m_mprFeatures[type]); + + var stemPattern = new Pattern("stem"); + stemPattern.Children.AddRange(AnyPlus()); + stemPattern.Freeze(); + msubrule.Lhs.Add(stemPattern); + + bool isPrefix = template.PrefixSlotsRS.Contains(slot); + + if (isPrefix) + msubrule.Rhs.Add(new InsertSegments(Segments("^0+"))); + msubrule.Rhs.Add(new CopyFromInput("stem")); + if (!isPrefix) + msubrule.Rhs.Add(new InsertSegments(Segments("+^0"))); + + mrule.Allomorphs.Add(msubrule); + + mrule.Properties[InflTypeID] = type.Hvo; + mrule.Properties[SlotID] = slot.Hvo; + msubrule.Properties[IsNull] = true; + msubrule.Properties[IsPrefix] = isPrefix; + + return mrule; + } + + private IEnumerable DefaultCompoundingRules() + { + var headPattern = new Pattern("head", AnyPlus()); + headPattern.Freeze(); + var nonheadPattern = new Pattern("nonhead", AnyPlus()); + nonheadPattern.Freeze(); + + var compLeft = new CompoundingRule { Name = "Default Left Head Compounding" }; + var csubruleLeft = new CompoundingSubrule(); + + csubruleLeft.HeadLhs.Add(headPattern); + csubruleLeft.NonHeadLhs.Add(nonheadPattern); + + csubruleLeft.Rhs.Add(new CopyFromInput("head")); + csubruleLeft.Rhs.Add(new InsertSegments(Segments("+"))); + csubruleLeft.Rhs.Add(new CopyFromInput("nonhead")); + compLeft.Subrules.Add(csubruleLeft); + + yield return compLeft; + + var compRight = new CompoundingRule { Name = "Default Right Head Compounding" }; + var csubruleRight = new CompoundingSubrule(); + + csubruleRight.HeadLhs.Add(headPattern); + csubruleRight.NonHeadLhs.Add(nonheadPattern); + + csubruleRight.Rhs.Add(new CopyFromInput("nonhead")); + csubruleRight.Rhs.Add(new InsertSegments(Segments("+"))); + csubruleRight.Rhs.Add(new CopyFromInput("head")); + compRight.Subrules.Add(csubruleRight); + + yield return compRight; + } + + private CompoundingRule LoadEndoCompoundingRule(IMoEndoCompound compoundRule) + { + var headRequiredFS = new FeatureStruct(); + var nonheadRequiredFS = new FeatureStruct(); + if (compoundRule.HeadLast) + { + if (compoundRule.RightMsaOA.PartOfSpeechRA != null) + headRequiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(compoundRule.RightMsaOA.PartOfSpeechRA) + ); + if (compoundRule.LeftMsaOA.PartOfSpeechRA != null) + nonheadRequiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(compoundRule.LeftMsaOA.PartOfSpeechRA) + ); + } + else + { + if (compoundRule.RightMsaOA.PartOfSpeechRA != null) + nonheadRequiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(compoundRule.RightMsaOA.PartOfSpeechRA) + ); + if (compoundRule.LeftMsaOA.PartOfSpeechRA != null) + headRequiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(compoundRule.LeftMsaOA.PartOfSpeechRA) + ); + } + headRequiredFS.Freeze(); + nonheadRequiredFS.Freeze(); + + var outFS = new FeatureStruct(); + if (compoundRule.OverridingMsaOA.PartOfSpeechRA != null) + outFS.AddValue( + m_posFeature, + m_posFeature.PossibleSymbols[ + "pos" + compoundRule.OverridingMsaOA.PartOfSpeechRA.Hvo + ] + ); + outFS.Freeze(); + + var headPattern = new Pattern("head", AnyPlus()); + headPattern.Freeze(); + var nonheadPattern = new Pattern("nonhead", AnyPlus()); + nonheadPattern.Freeze(); + + var hcCompoundRule = new CompoundingRule + { + Name = compoundRule.Name.BestAnalysisAlternative.Text, + HeadRequiredSyntacticFeatureStruct = headRequiredFS, + NonHeadRequiredSyntacticFeatureStruct = nonheadRequiredFS, + OutSyntacticFeatureStruct = outFS, + Properties = { { CRuleID, compoundRule.Hvo } } + }; + + var subrule = new CompoundingSubrule(); + + if (compoundRule.OverridingMsaOA.InflectionClassRA != null) + subrule.OutMprFeatures.Add( + m_mprFeatures[compoundRule.OverridingMsaOA.InflectionClassRA] + ); + + subrule.HeadLhs.Add(headPattern); + subrule.NonHeadLhs.Add(nonheadPattern); + + subrule.Rhs.Add(new CopyFromInput(compoundRule.HeadLast ? "nonhead" : "head")); + subrule.Rhs.Add(new InsertSegments(Segments("+"))); + subrule.Rhs.Add(new CopyFromInput(compoundRule.HeadLast ? "head" : "nonhead")); + + hcCompoundRule.Subrules.Add(subrule); + return hcCompoundRule; + } + + private IEnumerable LoadExoCompoundingRule(IMoExoCompound compoundRule) + { + var rightRequiredFS = new FeatureStruct(); + if (compoundRule.RightMsaOA.PartOfSpeechRA != null) + rightRequiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(compoundRule.RightMsaOA.PartOfSpeechRA) + ); + rightRequiredFS.Freeze(); + var leftRequiredFS = new FeatureStruct(); + if (compoundRule.LeftMsaOA.PartOfSpeechRA != null) + leftRequiredFS.AddValue( + m_posFeature, + LoadAllPartsOfSpeech(compoundRule.LeftMsaOA.PartOfSpeechRA) + ); + leftRequiredFS.Freeze(); + var outFS = new FeatureStruct(); + if (compoundRule.ToMsaOA.PartOfSpeechRA != null) + outFS.AddValue( + m_posFeature, + m_posFeature.PossibleSymbols["pos" + compoundRule.ToMsaOA.PartOfSpeechRA.Hvo] + ); + outFS.Freeze(); + + var headPattern = new Pattern("head", AnyPlus()); + headPattern.Freeze(); + var nonheadPattern = new Pattern("nonhead", AnyPlus()); + nonheadPattern.Freeze(); + + var hcRightCompoundRule = new CompoundingRule + { + Name = compoundRule.Name.BestAnalysisAlternative.Text, + HeadRequiredSyntacticFeatureStruct = rightRequiredFS, + NonHeadRequiredSyntacticFeatureStruct = leftRequiredFS, + OutSyntacticFeatureStruct = outFS, + Properties = { { CRuleID, compoundRule.Hvo } } + }; + + var rightSubrule = new CompoundingSubrule(); + + if (compoundRule.ToMsaOA.InflectionClassRA != null) + rightSubrule.OutMprFeatures.Add( + m_mprFeatures[compoundRule.ToMsaOA.InflectionClassRA] + ); + + rightSubrule.HeadLhs.Add(headPattern); + rightSubrule.NonHeadLhs.Add(nonheadPattern); + + rightSubrule.Rhs.Add(new CopyFromInput("nonhead")); + rightSubrule.Rhs.Add(new InsertSegments(Segments("+"))); + rightSubrule.Rhs.Add(new CopyFromInput("head")); + + hcRightCompoundRule.Subrules.Add(rightSubrule); + + yield return hcRightCompoundRule; + + var hcLeftCompoundRule = new CompoundingRule + { + Name = compoundRule.Name.BestAnalysisAlternative.Text, + HeadRequiredSyntacticFeatureStruct = leftRequiredFS, + NonHeadRequiredSyntacticFeatureStruct = rightRequiredFS, + OutSyntacticFeatureStruct = outFS, + Properties = { { CRuleID, compoundRule.Hvo } } + }; + + var leftSubrule = new CompoundingSubrule(); + + if (compoundRule.ToMsaOA.InflectionClassRA != null) + leftSubrule.OutMprFeatures.Add( + m_mprFeatures[compoundRule.ToMsaOA.InflectionClassRA] + ); + + leftSubrule.HeadLhs.Add(headPattern); + leftSubrule.NonHeadLhs.Add(nonheadPattern); + + leftSubrule.Rhs.Add(new CopyFromInput("head")); + leftSubrule.Rhs.Add(new InsertSegments(Segments("+"))); + leftSubrule.Rhs.Add(new CopyFromInput("nonhead")); + + hcLeftCompoundRule.Subrules.Add(leftSubrule); + + yield return hcLeftCompoundRule; + } + + private RewriteRule LoadRewriteRule(IPhRegularRule prule) + { + var variables = new Dictionary(); + int i = 0; + foreach (IPhFeatureConstraint var in prule.FeatureConstraints) + { + variables[var] = VariableNames[i]; + i++; + } + + var hcPrule = new RewriteRule { Name = prule.Name.BestAnalysisAlternative.Text }; + + switch (prule.Direction) + { + case 0: + hcPrule.Direction = Machine.DataStructures.Direction.LeftToRight; + hcPrule.ApplicationMode = RewriteApplicationMode.Iterative; + break; + + case 1: + hcPrule.Direction = Machine.DataStructures.Direction.RightToLeft; + hcPrule.ApplicationMode = RewriteApplicationMode.Iterative; + break; + + case 2: + hcPrule.Direction = Machine.DataStructures.Direction.LeftToRight; + hcPrule.ApplicationMode = RewriteApplicationMode.Simultaneous; + break; + } + + if (prule.StrucDescOS.Count > 0) + { + var lhsPattern = new Pattern(); + foreach (IPhSimpleContext ctxt in prule.StrucDescOS) + { + PatternNode node; + if (LoadPatternNode(ctxt, variables, out node)) + lhsPattern.Children.Add(node); + } + lhsPattern.Freeze(); + hcPrule.Lhs = lhsPattern; + } + hcPrule.Properties[PRuleID] = prule.Hvo; + + foreach (IPhSegRuleRHS rhs in prule.RightHandSidesOS) + { + var psubrule = new RewriteSubrule(); + + var requiredFS = new FeatureStruct(); + if (rhs.InputPOSesRC.Count > 0) + requiredFS.AddValue(m_posFeature, LoadAllPartsOfSpeech(rhs.InputPOSesRC)); + requiredFS.Freeze(); + psubrule.RequiredSyntacticFeatureStruct = requiredFS; + + psubrule.RequiredMprFeatures.AddRange( + rhs.ReqRuleFeatsRC.SelectMany(LoadMprFeatures) + ); + psubrule.ExcludedMprFeatures.AddRange( + rhs.ExclRuleFeatsRC.SelectMany(LoadMprFeatures) + ); + + if (rhs.StrucChangeOS.Count > 0) + { + var rhsPattern = new Pattern(); + foreach (IPhSimpleContext ctxt in rhs.StrucChangeOS) + { + PatternNode node; + if (LoadPatternNode(ctxt, variables, out node)) + rhsPattern.Children.Add(node); + } + rhsPattern.Freeze(); + psubrule.Rhs = rhsPattern; + } + + if (rhs.LeftContextOA != null) + { + var leftPattern = new Pattern(); + if (IsWordInitial(rhs.LeftContextOA.ToEnumerable())) + leftPattern.Children.Add( + new Constraint(HCFeatureSystem.LeftSideAnchor) + ); + PatternNode leftNode; + if (LoadPatternNode(rhs.LeftContextOA, variables, out leftNode)) + leftPattern.Children.Add(leftNode); + leftPattern.Freeze(); + psubrule.LeftEnvironment = leftPattern; + } + + if (rhs.RightContextOA != null) + { + var rightPattern = new Pattern(); + PatternNode rightNode; + if (LoadPatternNode(rhs.RightContextOA, variables, out rightNode)) + rightPattern.Children.Add(rightNode); + if (IsWordFinal(rhs.RightContextOA.ToEnumerable())) + rightPattern.Children.Add( + new Constraint(HCFeatureSystem.RightSideAnchor) + ); + rightPattern.Freeze(); + psubrule.RightEnvironment = rightPattern; + } + + hcPrule.Subrules.Add(psubrule); + } + + return hcPrule; + } + + private MetathesisRule LoadMetathesisRule(IPhMetathesisRule prule) + { + var hcPrule = new MetathesisRule { Name = prule.Name.BestAnalysisAlternative.Text }; + + switch (prule.Direction) + { + case 0: + case 2: + hcPrule.Direction = Machine.DataStructures.Direction.LeftToRight; + break; + + case 1: + hcPrule.Direction = Machine.DataStructures.Direction.RightToLeft; + break; + } + + bool isMiddleWithLeftSwitch; + int[] indices = prule.GetStrucChangeIndices(out isMiddleWithLeftSwitch); + + hcPrule.LeftSwitchName = "r"; + hcPrule.RightSwitchName = "l"; + + var pattern = new Pattern(); + if (IsWordInitial(prule.StrucDescOS)) + pattern.Children.Add( + new Constraint(HCFeatureSystem.LeftSideAnchor) + ); + for (int i = 0; i < prule.StrucDescOS.Count; i++) + { + PatternNode node; + if (LoadPatternNode(prule.StrucDescOS[i], out node)) + { + string name = null; + if (i == indices[PhMetathesisRuleTags.kidxRightSwitch]) + name = "r"; + else if (i == indices[PhMetathesisRuleTags.kidxLeftSwitch]) + name = "l"; + else if (i == indices[PhMetathesisRuleTags.kidxLeftEnv]) + name = "leftEnv"; + else if (i == indices[PhMetathesisRuleTags.kidxRightEnv]) + name = "rightEnv"; + else if (i == indices[PhMetathesisRuleTags.kidxMiddle]) + name = "middle"; + else + { + // we need a unique, non-null name for these as Hermit Crab uses a dictionary with unique keys + // in AnalysisMetathesisRuleSpec() constructor + // See LT-20038 + name = i.ToString(); + } + pattern.Children.Add(new Group(name, node)); + } + } + if (IsWordFinal(prule.StrucDescOS)) + pattern.Children.Add( + new Constraint(HCFeatureSystem.RightSideAnchor) + ); + pattern.Freeze(); + hcPrule.Pattern = pattern; + + hcPrule.Properties[CRuleID] = prule.Hvo; + + return hcPrule; + } + + private void LoadAllomorphCoOccurrenceRules(IMoAlloAdhocProhib alloAdhocProhib) + { + List firstAllos; + if (m_allomorphs.TryGetValue(alloAdhocProhib.FirstAllomorphRA, out firstAllos)) + { + var allOthers = new List>(); + foreach (IMoForm form in alloAdhocProhib.RestOfAllosRS) + { + List hcAllos; + if (m_allomorphs.TryGetValue(form, out hcAllos)) + allOthers.Add(hcAllos); + else + return; + } + + MorphCoOccurrenceAdjacency adjacency = GetAdjacency(alloAdhocProhib.Adjacency); + foreach (Allomorph[] others in Permute(allOthers, 0)) + { + foreach (Allomorph firstAllo in firstAllos) + { + var rule = new AllomorphCoOccurrenceRule( + ConstraintType.Exclude, + others, + adjacency + ); + firstAllo.AllomorphCoOccurrenceRules.Add(rule); + m_language.AllomorphCoOccurrenceRules.Add((firstAllo, rule)); + } + } + } + } + + private IEnumerable Permute(List> items, int index) + { + if (items.Count == 0) + yield break; + + if (index == items.Count) + { + yield return new T[items.Count]; + } + else + { + foreach (T item in items[index]) + { + foreach (T[] result in Permute(items, index + 1)) + { + result[index] = item; + yield return result; + } + } + } + } + + private void LoadMorphemeCoOccurrenceRules(IMoMorphAdhocProhib morphAdhocProhib) + { + List firstMorphemes; + if (m_morphemes.TryGetValue(morphAdhocProhib.FirstMorphemeRA, out firstMorphemes)) + { + var allOthers = new List>(); + foreach (IMoMorphSynAnalysis msa in morphAdhocProhib.RestOfMorphsRS) + { + List hcMorphemes; + if (m_morphemes.TryGetValue(msa, out hcMorphemes)) + allOthers.Add(hcMorphemes); + else + return; + } + + MorphCoOccurrenceAdjacency adjacency = GetAdjacency(morphAdhocProhib.Adjacency); + foreach (Morpheme[] others in Permute(allOthers, 0)) + { + foreach (Morpheme firstMorpheme in firstMorphemes) + { + var rule = new MorphemeCoOccurrenceRule( + ConstraintType.Exclude, + others, + adjacency + ); + firstMorpheme.MorphemeCoOccurrenceRules.Add(rule); + m_language.MorphemeCoOccurrenceRules.Add((firstMorpheme, rule)); + } + } + } + } + + private static MorphCoOccurrenceAdjacency GetAdjacency(int adj) + { + switch (adj) + { + case 0: + return MorphCoOccurrenceAdjacency.Anywhere; + case 1: + return MorphCoOccurrenceAdjacency.SomewhereToLeft; + case 2: + return MorphCoOccurrenceAdjacency.SomewhereToRight; + case 3: + return MorphCoOccurrenceAdjacency.AdjacentToLeft; + case 4: + return MorphCoOccurrenceAdjacency.AdjacentToRight; + } + + throw new InvalidEnumArgumentException(); + } + + private Tuple SplitEnvironment(IPhEnvironment env) + { + if (env == null) + return Tuple.Create("", ""); + string[] contexts = env.StringRepresentation.Text.Trim().Substring(1).Split('_'); + return Tuple.Create(contexts[0].Trim(), contexts[1].Trim()); + } + + private Pattern LoadEnvironmentPattern(string patternStr, bool left) + { + if (string.IsNullOrEmpty(patternStr)) + return null; + + var pattern = new Pattern(); + if (left && patternStr.StartsWith("#")) + pattern.Children.Add( + new Constraint(HCFeatureSystem.LeftSideAnchor) + ); + pattern.Children.AddRange(LoadPatternNodes(patternStr)); + if (!left && patternStr.EndsWith("#")) + pattern.Children.Add( + new Constraint(HCFeatureSystem.RightSideAnchor) + ); + pattern.Freeze(); + return pattern; + } + + private PatternNode PrefixNull() + { + return new Quantifier( + 0, + -1, + new Group( + new Constraint(m_null.FeatureStruct) { Tag = m_null }, + new Constraint(m_morphBdry.FeatureStruct) { Tag = m_morphBdry } + ) + ); + } + + private PatternNode SuffixNull() + { + return new Quantifier( + 0, + -1, + new Group( + new Constraint(m_morphBdry.FeatureStruct) + { + Tag = m_morphBdry + }, + new Constraint(m_null.FeatureStruct) { Tag = m_null } + ) + ); + } + + private IEnumerable> AnyPlus() + { + yield return PrefixNull(); + yield return new Quantifier( + 1, + -1, + new Constraint(m_any.FeatureStruct) { Tag = m_any } + ); + yield return SuffixNull(); + } + + private IEnumerable> AnyStar() + { + yield return PrefixNull(); + yield return new Quantifier( + 0, + -1, + new Constraint(m_any.FeatureStruct) { Tag = m_any } + ); + yield return SuffixNull(); + } + + private bool LoadPatternNode(IPhPhonContext ctxt, out PatternNode node) + { + return LoadPatternNode(ctxt, null, out node); + } + + private bool LoadPatternNode( + IPhPhonContext ctxt, + Dictionary variables, + out PatternNode node + ) + { + switch (ctxt.ClassID) + { + case PhSequenceContextTags.kClassId: + var seqCtxt = (IPhSequenceContext)ctxt; + var nodes = new List>(); + foreach (IPhPhonContext member in seqCtxt.MembersRS) + { + PatternNode n; + if (LoadPatternNode(member, variables, out n)) + nodes.Add(n); + } + if (nodes.Count > 0) + { + node = nodes.Count == 1 ? nodes.First() : new Group(nodes); + return true; + } + break; + + case PhIterationContextTags.kClassId: + var iterCtxt = (IPhIterationContext)ctxt; + PatternNode childNode; + if (LoadPatternNode(iterCtxt.MemberRA, variables, out childNode)) + { + node = new Quantifier( + iterCtxt.Minimum, + iterCtxt.Maximum, + childNode + ); + return true; + } + break; + + case PhSimpleContextBdryTags.kClassId: + var bdryCtxt = (IPhSimpleContextBdry)ctxt; + IPhBdryMarker bdry = bdryCtxt.FeatureStructureRA; + if (bdry != null && bdry.Guid != LangProjectTags.kguidPhRuleWordBdry) + { + CharacterDefinition cd; + if (m_charDefs.TryGetValue(bdry, out cd)) + { + node = new Constraint(cd.FeatureStruct) { Tag = cd }; + return true; + } + } + break; + + case PhSimpleContextSegTags.kClassId: + var segCtxt = (IPhSimpleContextSeg)ctxt; + IPhPhoneme phoneme = segCtxt.FeatureStructureRA; + if (phoneme != null) + { + CharacterDefinition cd; + if (m_charDefs.TryGetValue(phoneme, out cd)) + { + node = new Constraint(cd.FeatureStruct) { Tag = cd }; + return true; + } + } + break; + + case PhSimpleContextNCTags.kClassId: + var ncCtxt = (IPhSimpleContextNC)ctxt; + SimpleContext hcCtxt; + if (TryLoadSimpleContext(ncCtxt, variables, out hcCtxt)) + { + node = new Constraint(hcCtxt.FeatureStruct) + { + Tag = hcCtxt + }; + return true; + } + break; + } + + node = null; + return false; + } + + private IEnumerable> LoadPatternNodes(string patternStr) + { + foreach (string token in TokenizeContext(patternStr)) + { + switch (token[0]) + { + case '#': + break; + + case '[': + IPhNaturalClass nc = m_naturalClassLookup[ + token.Substring(1, token.Length - 2).Trim() + ]; + SimpleContext ctxt; + TryLoadSimpleContext(nc, out ctxt); + yield return new Constraint(ctxt.FeatureStruct) + { + Tag = ctxt + }; + break; + + case '(': + yield return new Quantifier( + 0, + 1, + new Group( + LoadPatternNodes(token.Substring(1, token.Length - 2).Trim()) + ) + ); + break; + + default: + string representation = token.Trim(); + Segments segments = Segments(representation); + yield return new Group( + segments.Shape.Select( + n => new Constraint(n.Annotation.FeatureStruct) + ) + ) + { + Tag = segments + }; + break; + } + } + } + + private IEnumerable TokenizeContext(string contextStr) + { + int pos = 0; + while (pos < contextStr.Length) + { + switch (contextStr[pos]) + { + case '#': + yield return "#"; + pos++; + break; + + case '[': + int endNCPos = contextStr.IndexOf(']', pos); + yield return contextStr.Substring(pos, endNCPos - pos + 1); + pos = endNCPos + 1; + break; + + case '(': + int endOptPos = contextStr.IndexOf(')', pos); + yield return contextStr.Substring(pos, endOptPos - pos + 1); + pos = endOptPos + 1; + break; + + case ' ': + pos++; + break; + + default: + int endRepPos = contextStr.IndexOfAny(new[] { '#', '[', '(', ' ' }, pos); + if (endRepPos == -1) + endRepPos = contextStr.Length; + yield return contextStr.Substring(pos, endRepPos - pos); + pos = endRepPos; + break; + } + } + } + + private bool IsWordInitial(IEnumerable ctxts) + { + IPhPhonContext ctxt = ctxts.First(); + if (IsWordBoundary(ctxt)) + return true; + + var seqCtxt = ctxt as IPhSequenceContext; + if (seqCtxt != null) + { + if (seqCtxt.MembersRS.Count > 0 && IsWordBoundary(seqCtxt.MembersRS[0])) + return true; + } + return false; + } + + private bool IsWordFinal(IEnumerable ctxts) + { + IPhPhonContext ctxt = ctxts.Last(); + if (IsWordBoundary(ctxt)) + return true; + + var seqCtxt = ctxt as IPhSequenceContext; + if (seqCtxt != null) + { + if ( + seqCtxt.MembersRS.Count > 0 + && IsWordBoundary(seqCtxt.MembersRS[seqCtxt.MembersRS.Count - 1]) + ) + return true; + } + return false; + } + + private static bool IsWordBoundary(IPhPhonContext ctxt) + { + var bdryCtxt = ctxt as IPhSimpleContextBdry; + if (bdryCtxt != null) + { + if (bdryCtxt.FeatureStructureRA.Guid == LangProjectTags.kguidPhRuleWordBdry) + return true; + } + return false; + } + + private FeatureStruct LoadFeatureStruct(IFsFeatStruc fs, FeatureSystem featSys) + { + var hcFS = new FeatureStruct(); + if (fs != null) + { + foreach (IFsFeatureSpecification value in fs.FeatureSpecsOC) + { + var closedValue = value as IFsClosedValue; + if (closedValue != null) + { + var hcFeature = featSys.GetFeature( + "feat" + closedValue.FeatureRA.Hvo + ); + // TODO: should we display something to the user if a FS has an invalid value? + if ( + hcFeature.PossibleSymbols.TryGet( + "sym" + closedValue.ValueRA.Hvo, + out var symbol + ) + ) + hcFS.AddValue(hcFeature, symbol); + } + else + { + var complexValue = (IFsComplexValue)value; + var hcFeature = featSys.GetFeature( + "feat" + complexValue.FeatureRA.Hvo + ); + hcFS.AddValue( + hcFeature, + LoadFeatureStruct((IFsFeatStruc)complexValue.ValueOA, featSys) + ); + } + } + } + return hcFS; + } + + private Shape Segment(string str) + { + Shape shape; + if (m_acceptUnspecifiedGraphemes && !IsLexicalPattern(str)) + { + int[] baseCharPositions = null; + do + { + int errorPos = m_table.TrySegment(str, out shape); + if (errorPos != -1) + { + shape = null; + // add whole text element (base character + diacritics) to character definition table + string symbolStr = StringInfo.GetNextTextElement(str, errorPos); + if ( + CharUnicodeInfo.GetUnicodeCategory(symbolStr[0]) + == UnicodeCategory.NonSpacingMark + ) + { + // we hit an unknown diacritic, so add previous base character along with diacritic to symbol table + if (baseCharPositions == null) + baseCharPositions = StringInfo.ParseCombiningCharacters(str); + symbolStr = StringInfo.GetNextTextElement( + str, + baseCharPositions.Last(pos => pos < errorPos) + ); + } + m_table.AddSegment(symbolStr); + } + } while (shape == null); + } + else + { + shape = m_table.Segment(str, true); + } + return shape; + } + + /// + /// Does form contain a lexical pattern (e.g. [Seg]*)? + /// + public static bool IsLexicalPattern(string form) + { + // This assumes that "[" and "]" are not part of any phonemes. + return form.Contains("[") && form.Contains("]"); + } + + private string FormatRootForm(string formStr) + { + var formatted = formStr.Trim().Replace(' ', '#'); + return formatted; + } + + private static string FormatForm(string formStr) + { + return formStr.Trim().Replace(' ', '.'); + } + + private IEnumerable LoadAllPartsOfSpeech(IPartOfSpeech pos) + { + return LoadAllPartsOfSpeech(pos.ToEnumerable()); + } + + private IEnumerable LoadAllPartsOfSpeech(IEnumerable poss) + { + foreach (IPartOfSpeech pos in poss) + { + yield return m_posFeature.PossibleSymbols["pos" + pos.Hvo]; + foreach ( + FeatureSymbol symbol in LoadAllPartsOfSpeech( + pos.SubPossibilitiesOS.Cast() + ) + ) + yield return symbol; + } + } + + private IEnumerable LoadAllInflClasses(IMoInflClass inflClass) + { + yield return m_mprFeatures[inflClass]; + foreach (MprFeature mprFeat in LoadAllInflClasses(inflClass.SubclassesOC)) + yield return mprFeat; + } + + private IEnumerable LoadAllInflClasses(IEnumerable inflClasses) + { + foreach (IMoInflClass inflClass in inflClasses) + { + yield return m_mprFeatures[inflClass]; + foreach (MprFeature mprFeat in LoadAllInflClasses(inflClass.SubclassesOC)) + yield return mprFeat; + } + } + + private IEnumerable LoadMprFeatures(IPhPhonRuleFeat ruleFeat) + { + switch (ruleFeat.ItemRA.ClassID) + { + case MoInflClassTags.kClassId: + foreach ( + MprFeature mprFeat in LoadAllInflClasses((IMoInflClass)ruleFeat.ItemRA) + ) + yield return mprFeat; + break; + + case CmPossibilityTags.kClassId: + yield return m_mprFeatures[ruleFeat.ItemRA]; + break; + } + } + + private static IMoInflClass GetInflClass(IMoStemMsa msa) + { + if (msa.InflectionClassRA != null) + return msa.InflectionClassRA; + if (msa.PartOfSpeechRA != null) + return GetDefaultInflClass(msa.PartOfSpeechRA); + return null; + } + + private static IMoInflClass GetDefaultInflClass(IPartOfSpeech pos) + { + while (true) + { + if (pos.DefaultInflectionClassRA != null) + { + return pos.DefaultInflectionClassRA; + } + if (!(pos.Owner is IPartOfSpeech parentPos)) + { + return null; + } + pos = parentPos; + } + } + + private static void LoadFeatureSystem(IFsFeatureSystem featSys, FeatureSystem hcFeatSys) + { + foreach (IFsFeatDefn feature in featSys.FeaturesOC) + { + var closedFeature = feature as IFsClosedFeature; + if (closedFeature != null) + { + hcFeatSys.Add( + new SymbolicFeature( + "feat" + closedFeature.Hvo, + closedFeature.ValuesOC.Select( + sfv => + new FeatureSymbol("sym" + sfv.Hvo) + { + Description = sfv.Abbreviation.BestAnalysisAlternative.Text + } + ) + ) + { + Description = feature.Abbreviation.BestAnalysisAlternative.Text + } + ); + } + else + { + hcFeatSys.Add( + new ComplexFeature("feat" + feature.Hvo) + { + Description = feature.Abbreviation.BestAnalysisAlternative.Text + } + ); + } + } + hcFeatSys.Freeze(); + } + + private void LoadCharacterDefinitionTable(IPhPhonemeSet phonemeSet) + { + m_table = new CharacterDefinitionTable + { + Name = phonemeSet.Name.BestAnalysisAlternative.Text + }; + foreach (IPhPhoneme phoneme in phonemeSet.PhonemesOC) + { + FeatureStruct fs = null; + if (phoneme.FeaturesOA != null && phoneme.FeaturesOA.FeatureSpecsOC.Count > 0) + fs = LoadFeatureStruct( + phoneme.FeaturesOA, + m_language.PhonologicalFeatureSystem + ); + + string[] reps = phoneme.CodesOS + .Where( + c => + !string.IsNullOrEmpty( + RemoveDottedCircles(c.Representation.VernacularDefaultWritingSystem.Text) + ) + ) + .Select(c => RemoveDottedCircles(c.Representation.VernacularDefaultWritingSystem.Text)) + .ToArray(); + if (reps.Length == 0) + { + // did not find a grapheme for this phoneme + m_logger.InvalidPhoneme(phoneme); + } + else if (reps.Any(r => m_table.Contains(r))) + { + // another phoneme has defined the same grapheme + m_logger.DuplicateGrapheme(phoneme); + } + else + { + CharacterDefinition cd = m_table.AddSegment(reps, fs); + m_charDefs[phoneme] = cd; + } + } + + foreach ( + IPhBdryMarker bdry in phonemeSet.BoundaryMarkersOC.Where( + bdry => bdry.Guid != LangProjectTags.kguidPhRuleWordBdry + ) + ) + { + string[] reps = bdry.CodesOS + .Where( + c => !string.IsNullOrEmpty(RemoveDottedCircles(c.Representation.BestVernacularAlternative.Text)) + ) + .Select(c => RemoveDottedCircles(c.Representation.BestVernacularAlternative.Text)) + .ToArray(); + if (reps.Length > 0) + { + CharacterDefinition cd = m_table.AddBoundary(reps); + m_charDefs[bdry] = cd; + } + } + + m_null = m_table.AddBoundary(new[] { "^0", "*0", "&0", "∅" }); + m_table.AddBoundary("."); + m_morphBdry = m_table["+"]; + + if (m_acceptUnspecifiedGraphemes) + { + // load valid characters from the default vernacular writing system into symbol table + var ws = m_cache.ServiceLocator.WritingSystems.DefaultVernacularWritingSystem; + var validChars = ValidCharacters.Load(ws); + foreach (string wordFormingChar in validChars.WordFormingCharacters) + { + if (!m_table.Contains(wordFormingChar)) + m_table.AddSegment(wordFormingChar); + } + + foreach (string otherChar in validChars.OtherCharacters) + { + if (!m_table.Contains(otherChar)) + m_table.AddBoundary(otherChar); + } + } + // Add natural classes to table for lexical patterns. + foreach (NaturalClass hcNaturalClass in m_language.NaturalClasses) + { + m_table.AddNaturalClass(hcNaturalClass); + } + foreach (string ncName in m_naturalClassLookup.Keys) + { + NaturalClass hcNaturalClass; + if (TryLoadNaturalClass(m_naturalClassLookup[ncName], out hcNaturalClass)) + m_table.AddNaturalClass(hcNaturalClass); + } + m_language.CharacterDefinitionTables.Add(m_table); + } + + private bool TryLoadSimpleContext( + IPhSimpleContextNC ctxt, + Dictionary variables, + out SimpleContext hcCtxt + ) + { + if (ctxt.FeatureStructureRA == null) + { + hcCtxt = null; + return false; + } + + NaturalClass hcNaturalClass; + if (!TryLoadNaturalClass(ctxt.FeatureStructureRA, out hcNaturalClass)) + { + hcCtxt = null; + return false; + } + + hcCtxt = new SimpleContext( + hcNaturalClass, + GetVariables(variables, ctxt.PlusConstrRS, true) + .Concat(GetVariables(variables, ctxt.MinusConstrRS, false)) + ); + return true; + } + + private IEnumerable GetVariables( + Dictionary variables, + IEnumerable constraints, + bool agree + ) + { + foreach (IPhFeatureConstraint constraint in constraints) + { + string varName = variables[constraint]; + var feat = m_language.PhonologicalFeatureSystem.GetFeature( + "feat" + constraint.FeatureRA.Hvo + ); + yield return new SymbolicFeatureValue(feat, varName, agree); + } + } + + private bool TryLoadSimpleContext(IPhNaturalClass naturalClass, out SimpleContext ctxt) + { + NaturalClass hcNaturalClass; + if (!TryLoadNaturalClass(naturalClass, out hcNaturalClass)) + { + ctxt = null; + return false; + } + + ctxt = new SimpleContext(hcNaturalClass, Enumerable.Empty()); + return true; + } + + private bool TryLoadNaturalClass( + IPhNaturalClass naturalClass, + out NaturalClass hcNaturalClass + ) + { + if (naturalClass == null) + { + hcNaturalClass = null; + return false; + } + + if (m_naturalClasses.TryGetValue(naturalClass, out hcNaturalClass)) + return hcNaturalClass != null; + + var segNC = naturalClass as IPhNCSegments; + if (segNC != null) + { + var segments = new List(); + foreach (IPhPhoneme phoneme in segNC.SegmentsRC) + { + CharacterDefinition cd; + if (m_charDefs.TryGetValue(phoneme, out cd)) + { + segments.Add(cd); + } + else + { + m_naturalClasses[naturalClass] = null; + return false; + } + } + + hcNaturalClass = new SegmentNaturalClass(segments); + } + else + { + var featNC = (IPhNCFeatures)naturalClass; + FeatureStruct fs = LoadFeatureStruct( + featNC.FeaturesOA, + m_language.PhonologicalFeatureSystem + ); + hcNaturalClass = new NaturalClass(fs); + } + hcNaturalClass.Name = naturalClass.Abbreviation.BestAnalysisAlternative.Text; + + m_naturalClasses[naturalClass] = hcNaturalClass; + return true; + } + + private Segments Segments(string representation) + { + representation = FormatForm(representation); + return new Segments(m_table, representation, Segment(representation)); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/IHCLoadErrorLogger.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/IHCLoadErrorLogger.cs new file mode 100644 index 0000000000..bda3c328be --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/IHCLoadErrorLogger.cs @@ -0,0 +1,24 @@ +using SIL.LCModel; + +namespace SIL.FieldWorks.WordWorks.Parser +{ + public interface IHCLoadErrorLogger + { + void InvalidShape(string str, int errorPos, IMoMorphSynAnalysis msa); + void InvalidAffixProcess( + IMoAffixProcess affixProcess, + bool isInvalidLhs, + IMoMorphSynAnalysis msa + ); + void InvalidPhoneme(IPhPhoneme phoneme); + void DuplicateGrapheme(IPhPhoneme phoneme); + void InvalidEnvironment( + IMoForm form, + IPhEnvironment env, + string reason, + IMoMorphSynAnalysis msa + ); + void InvalidReduplicationForm(IMoForm form, string reason, IMoMorphSynAnalysis msa); + void UnmatchedReduplicationIndexedClass(IMoForm form, string reason, string environment); + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/InvalidAffixProcessException.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/InvalidAffixProcessException.cs new file mode 100644 index 0000000000..cdd6c9871c --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/InvalidAffixProcessException.cs @@ -0,0 +1,27 @@ +using System; +using SIL.LCModel; + +namespace SIL.FieldWorks.WordWorks.Parser +{ + internal class InvalidAffixProcessException : Exception + { + private readonly IMoAffixProcess m_affixProcess; + private readonly bool m_invalidLhs; + + public InvalidAffixProcessException(IMoAffixProcess affixProcess, bool invalidLhs) + { + m_affixProcess = affixProcess; + m_invalidLhs = invalidLhs; + } + + public IMoAffixProcess AffixProcess + { + get { return m_affixProcess; } + } + + public bool IsInvalidLhs + { + get { return m_invalidLhs; } + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/InvalidReduplicationFormException.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/InvalidReduplicationFormException.cs new file mode 100644 index 0000000000..f187186963 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/InvalidReduplicationFormException.cs @@ -0,0 +1,14 @@ +// Copyright (c) 2015 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; + +namespace SIL.FieldWorks.WordWorks.Parser +{ + internal class InvalidReduplicationFormException : Exception + { + public InvalidReduplicationFormException(string message) + : base(message) { } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/NullFdoDirectories.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/NullFdoDirectories.cs new file mode 100644 index 0000000000..dc7d4b06a8 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/NullFdoDirectories.cs @@ -0,0 +1,11 @@ +using SIL.LCModel; + +namespace SIL.GenerateHCConfigForFLExTrans +{ + internal class NullFdoDirectories : ILcmDirectories + { + public string ProjectsDirectory => null; + + public string TemplateDirectory => null; + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/NullThreadedProgress.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/NullThreadedProgress.cs new file mode 100644 index 0000000000..40d66726b9 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/NullThreadedProgress.cs @@ -0,0 +1,72 @@ +using System; +using System.ComponentModel; +using SIL.LCModel.Utils; + +namespace SIL.GenerateHCConfigForFLExTrans +{ + internal class NullThreadedProgress : IThreadedProgress + { + private readonly ISynchronizeInvoke m_synchronizeInvoke; + + public NullThreadedProgress(ISynchronizeInvoke synchronizeInvoke) + { + m_synchronizeInvoke = synchronizeInvoke; + } + + public void Step(int amount) + { + Position += amount * StepSize; + } + + public string Title { get; set; } + + public string Message { get; set; } + + public int Position { get; set; } + + public int StepSize { get; set; } + + public int Minimum { get; set; } + + public int Maximum { get; set; } + + public ISynchronizeInvoke SynchronizeInvoke + { + get { return m_synchronizeInvoke; } + } + + public bool IsIndeterminate { get; set; } + + public bool AllowCancel { get; set; } + + public bool IsCanceling + { + get { return false; } + } + + public event CancelEventHandler Canceling + { + add { } + remove { } + } + + public object RunTask( + Func backgroundTask, + params object[] parameters + ) + { + return RunTask(true, backgroundTask, parameters); + } + + public object RunTask( + bool fDisplayUi, + Func backgroundTask, + params object[] parameters + ) + { + return backgroundTask(this, parameters); + } + + public bool Canceled { get; set; } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/ProjectIdentifier.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/ProjectIdentifier.cs new file mode 100644 index 0000000000..13ff5d3908 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/ProjectIdentifier.cs @@ -0,0 +1,69 @@ +using System; +using SIL.LCModel; + +namespace SIL.GenerateHCConfigForFLExTrans +{ + internal class ProjectIdentifier : IProjectIdentifier + { + private readonly BackendProviderType m_backendProviderType; + + public ProjectIdentifier(string projectPath) + { + Path = System.IO.Path.GetFullPath(projectPath); + string ext = System.IO.Path.GetExtension(Path); + switch (ext.ToLowerInvariant()) + { + case LcmFileHelper.ksFwDataXmlFileExtension: + m_backendProviderType = BackendProviderType.kXML; + break; + } + } + + public bool IsLocal + { + get { return true; } + } + + public string Path { get; set; } + + public string ProjectFolder + { + get { return System.IO.Path.GetDirectoryName(Path); } + } + + public string SharedProjectFolder + { + get { return ProjectFolder; } + } + + public string ServerName + { + get { return null; } + } + + public string Handle + { + get { return Name; } + } + + public string PipeHandle + { + get { throw new NotImplementedException(); } + } + + public string Name + { + get { return System.IO.Path.GetFileNameWithoutExtension(Path); } + } + + public BackendProviderType Type + { + get { return m_backendProviderType; } + } + + public string UiName + { + get { return Name; } + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/Properties/AssemblyInfo.cs b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..f40fe9f0cb --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/GenerateHCConfig4FLExTrans/Properties/AssemblyInfo.cs @@ -0,0 +1,31 @@ +// Copyright (c) 2023-2025 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System.Reflection; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("GenerateHCConfigForFLExTrans")] +[assembly: AssemblyDescription( + "This program extracts a Hermit Crab configuration file from a FLEx project.\r\nLexical glosses are in the form needed by FLExTrans.\r\nIt is compatible with FLEx database version 7000072 such as in version 9.1.12 Beta of FLEx.\r\n\r\nThis software is licensed under the LGPL, version 2.1 or later\r\n(http://www.gnu.org/licenses/lgpl-2.1.html)" +)] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("SIL International")] +[assembly: AssemblyProduct("GenerateHCConfigForFLExTrans")] +[assembly: AssemblyCopyright("Copyright © 2023-2026")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +[assembly: AssemblyVersion("1.8.0.0")] +[assembly: AssemblyFileVersion("1.8.0.0")] diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/App.config b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/App.config new file mode 100644 index 0000000000..731b8431b9 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/App.config @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/HCSynthByGloss.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/HCSynthByGloss.cs new file mode 100644 index 0000000000..aa6d107655 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/HCSynthByGloss.cs @@ -0,0 +1,76 @@ +// Copyright (c) 2022-2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using System.Threading.Tasks; +using System.Xml.Linq; +using System.Xml.XPath; +using System.Xml.Xsl; +using HCSynthByGloss; +using SIL.Machine.Morphology; +using SIL.Machine.Morphology.HermitCrab; +using SIL.Machine.Morphology.HermitCrab.MorphologicalRules; +using SIL.Machine.Translation; +using SIL.HCSynthByGloss; +using SIL.Utils; + +namespace SIL.HCSynthByGloss +{ + class HCSynthByGloss + { + static void Main(string[] args) + { + bool doTracing = false; + bool showTracing = false; + int argCount = args.Count(); + if (argCount != 6 || args[0] != "-h" || args[2] != "-g" || args[4] != "-o") + { + if (argCount == 7 && args[6] == "-t") + { + doTracing = true; + } + else if (argCount == 8 && args[6] == "-t" && args[7] == "-s") + { + doTracing = true; + showTracing = true; + } + else + { + Console.WriteLine("Usage:"); + Console.WriteLine( + "HCSynthByGloss -h HC.xml_file -g gloss_file -o output (-t (-s))" + ); + Console.WriteLine("\t-t = turn on tracing"); + Console.WriteLine( + "\t-s = show the tracing result in the system default web browser; -s is only valid when also using -t" + ); + Environment.Exit(1); + } + } + if (!File.Exists(args[1])) + { + Console.WriteLine("Could not find file '" + args[1] + "'."); + Environment.Exit(2); + } + if (!File.Exists(args[3])) + { + Console.WriteLine("Could not find file '" + args[3] + "'."); + Environment.Exit(3); + } + + var dll = new HCSynthByGlossDll(args[5]); + dll.HcXmlFile = args[1]; + dll.GlossFile = args[3]; + dll.DoTracing = doTracing; + dll.ShowTracing = showTracing; + var result = dll.Process(); + Console.WriteLine("Processing result: " + result + "."); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/HCSynthByGloss.csproj b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/HCSynthByGloss.csproj new file mode 100644 index 0000000000..564d570ced --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/HCSynthByGloss.csproj @@ -0,0 +1,113 @@ + + + + + Debug + AnyCPU + {765F99FE-C613-42B4-AC05-D5EB3D343BF7} + Exe + SIL.HCSynthByGloss + HCSynthByGloss + v4.6.2 + 512 + true + true + + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + ..\..\..\..\Output\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + + False + ..\..\..\..\Output\Debug\HCSynthByGlossDll.dll + + + ..\HCSynthByGlossLib\bin\x64\Debug\HCSynthByGlossLib.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\Newtonsoft.Json.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Core.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.Morphology.HermitCrab.dll + + + + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\System.Net.Http.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\System.ValueTuple.dll + + + + + + + + False + ..\..\..\..\Output\Debug\XMLUtils.dll + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/Properties/AssemblyInfo.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..f1c6eeb743 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/Properties/AssemblyInfo.cs @@ -0,0 +1,40 @@ +// Copyright (c) 2023-2025 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("HCSynthByGloss")] +[assembly: AssemblyDescription("Console app to run HermitCrab synthesis based on a set of glosses")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("SIL International")] +[assembly: AssemblyProduct("HCSynthByGloss")] +[assembly: AssemblyCopyright("Copyright © 2023-2026")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("765f99fe-c613-42b4-ac05-d5eb3d343bf7")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.7.0.0")] +[assembly: AssemblyFileVersion("1.7.0.0")] diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/IndonesianAnalyses.txt b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/IndonesianAnalyses.txt new file mode 100644 index 0000000000..0669c8ee8b --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/IndonesianAnalyses.txt @@ -0,0 +1,60 @@ +^ajar1.1$ +^amat1.1$ +^amat1.1$ +^ambil1.1$ +^baca1.1$ +^bagi1.1$ +^bantu1.1$ +^beli1.1$ +^cari1.1$ +^catat1.1$ +^dengar1.1$ +^ekspor1.1$ +^erang1.1$ +^fitnah1.1$ +^gambar1.1$ +^ganggu1.1$ +^ganti1.1$ +^hilang1.1$ +^hitung1.1$ +^isi1.1$ +^jahit1.1$ +^jual1.1$ +^kaca1.1$ +^karang1.1$ +^kayuh1.1$ +^khawatir1.1$ +^kirim1.1$ +^langit1.1$ +^lempar1.1$ +^lihat1.1$ +^masak1.1$ +^minta1.1$ +^nanti1.1$ +^ngaco1.1$ +^nganga1.1$ +^nikah1.1$ +^nyanyi1.1$ +^olah1.1$ +^pakai1.1$ +^pijit1.1$ +^pukul1.1$ +^rancang1.1$ +^rasa1.1$ +^satu1.1$ +^sewa1.1$ +^sewa1.1$ +^syarat1.1$ +^tulis1.1$ +^tulis1.1$ +^undang1.1$ +^urus1.1$ +^wakil1.1$ +^yakin1.1$ +^ziarah1.1$ +^yakin1.1$ +^yakin1.1$ +^yakinxyz1.1$ +^wakil1.1$ ^tulis1.1$ +^karang1.1$ +^karang1.1$ diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/expectedWordForms.txt b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/expectedWordForms.txt new file mode 100644 index 0000000000..a412167019 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/expectedWordForms.txt @@ -0,0 +1,60 @@ +mengajar +%2%mengamati-mengamati%mengamati-amati% +%2%mengamat-mengamati%mengamat-amati% +mengambil +membaca +%2%membagi-membagi%membagi-bagi% +membantu +membeli +mencari +mencatat +mendengar +mengekspor +mengerang +memfitnah +menggambar +mengganggu +mengganti +menghilang +menghitung +mengisi +menjahit +menjual +mengaca +mengarang +%2%mengayuh-mengayuh%mengayuh-ngayuh% +%0%^khawatir1.1$% Duplicate gloss(es) found for 'Caus'; synthesis may not work. +mengirim +melangit +melempar +melihat +memasak +%2%meminta-meminta%meminta-minta% +menanti +mengaco +menganga +menikah +menyanyi +mengolah +memakai +%2%memijit-memijit%memijit-mijit% +memukul +merancang +merasa +menyatu +menyewa +%2%menyewa-menyewa%menyewa-nyewa% +%0%^syarat1.1$% Duplicate gloss(es) found for 'Caus'; synthesis may not work. +menulis +%2%menulis-menulis%menulis-nulis% +mengundang +mengurus +mewakili +meyakini +menziarahi +%0%^yakin1.1$% One or more glosses not found: 'AVxyz'; +%0%^yakin1.1$% One or more glosses not found: 'CAUSiXyz'; +%0%^yakinxyz1.1$% One or more glosses not found: 'AVxyz'; 'yakinxyz1.1'; +mewakili,menulis +karang-karangi +kemengarangian diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/indoHC4FLExrans.xml b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/indoHC4FLExrans.xml new file mode 100644 index 0000000000..38a3b426cf --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/TestData/indoHC4FLExrans.xml @@ -0,0 +1,2487 @@ + + + + + Indonesian-HermitCrab + + + adv + + + n + + + pro-form + + + pro + + + v + + + adj + + + + + dr + + + + - + + + + OrthPlace + + velar + labial + alveolar + ? + palatal + glottal + + + + syl + + - + + + + + + nas + + - + + + + + + approx + + - + + + + + + back + + + + - + + + + high + + - + + + + + + cons + + + + - + + + + lat + + + + - + + + + NO→N + + + + - + + + + son + + - + + + + + + low + + - + + + + + + voice + + + + - + + + + cont + + + + - + + + + + + Does not undergo voiceless obstruent deletion + Irregularly Inflected Form + Past + Plural + + exceptionFeatures + + + lexEntryInflTypes + + + + Main phoneme set + + + + e + + + + + + + + + + + + + + + + + + p + + + + + + + + + + + + + + + + j + + + + + + + + + + + + + + + + + + + t + + + + + + + + + + + + + + + + l + + + + + + + + + + + + + + + + c + + + + + + + + + + + + + + + + + + + sy + + + + + + + + + + + + + + + + a + + + + + + + + + + + + + + + + + + h + + + + + + + + + + + + + + + + + + + b + + + + + + + + + + + + + + + + i + + + + + + + + + + + + + + + + + + n + + + + + + + + + + + + + + + + u + + + + + + + + + + + + + + + + + + d + + + + + + + + + + + + + + + + ny + + + + + + + + + + + + + + + + ng + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + kh + + + + + + + + + + + + + + + + + + + f + + + + + + + + + + + + + + + + k + + + + + + + + + + + + + + + + + + + z + + + + + + + + + + + + + + + + m + + + + + + + + + + + + + + + + o + + + + + + + + + + + + + + + + + + s + + + + + + + + + + + + + + + + r + + + + + + + + + + + + + + + + y + + + + + + + + + + + + + + + + + + + w + + + + + + + + + + + + + + + + + + + g + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^0 + *0 + &0 + Ø + + + + + + . + + + + + + + Any + + + V + + + + + + + + *** + + + + + *** + + + + + + + *** + + + + + + + + *** + + + + + + + A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + *** + + + + *** + + + + *** + + + + *** + + + + + + Unspecified nasal default + + + + + + + + + + + + + + + + + + + + + + + + + + + Nasal deletion + + + + + + + + + + + + + + + + + + + + + + + Nasalization in reduplication + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Nasal assimilation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Voiceless obstruent deletion + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Morphophonemic + + + Default Left Head Compounding + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Default Right Head Compounding + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + per- + + + + + + + + + + + + + + + + + + + + per+ + + + + + 15952 + + + + Caus + + 15277 + + + + -kan + + + + + + + + + + + + + + + + + + + + + +kan + + + + 15342 + + + + Caus + + 13245 + + + + ke- -an2 + + + + + + + + + + + + + + + + + + + + ke+ + + + + +an + + + + 14466 + 6662 + + + + NMLZR2 + + 8412 + + + + ke- -an1 + + + + + + + + + + + + + + + + + + + + k + + + e + + + + + + + + + + + + a + + + n + + + + 15809 + + + + NMLZR + + 5466 + + + + -Cont + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + 42 + + + + Cont + + 11813 + + + + peⁿ- -an + + + + + + + + + + + + + + + + + + + + p + + + e + + + + + + + + + + + + + + + a + + + n + + + + 14633 + + + + NMLZR3 + + 13251 + + + + -i + + + + + + + + + + + + + + + + + + + + + +i + + + + 1047 + + + + APPL + + 6104 + + + + -i + + + + + + + + + + + + + + + + + + + + + +i + + + + 1047 + + + + LOC + + 6636 + + + + -i + + + + + + + + + + + + + + + + + + + + + +i + + + + 1047 + + + + CAUSi + + 10386 + + + + -nya + + + + + + + + + + + + + + + + + + + + + +nya + + + + 9265 + + + + his + + 15675 + + + + -Pl + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + 12595 + + + + pl + + 11722 + + + + meN + + + + + + + + + + + + + + + + + + + + meⁿ+ + + + + + 15755 + + + + AV + + 14849 + + + + REDUP-meN + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + 3860 + + + + RECIP + + 15045 + + + + + + + + pakai + + 2483 + + + + pakai1.1 + + 13454 + + + + + + lihat + + 8502 + + + + lihat1.1 + + 1616 + + + + + + nyanyi + + 7560 + + + + nyanyi1.1 + + 9972 + + + + + + nikah + + 161 + + + + nikah1.1 + + 11125 + + + + + + rasa + + 12859 + + + + rasa1.1 + + 12474 + + + + + + orang + + 12698 + + + + orang1.1 + + 12514 + + + + + + ngaco + + 11088 + + + + ngaco1.1 + + 11463 + + + + + + ekspor + + 7937 + + + + ekspor1.1 + + 15733 + + + + + + nanti + + 9102 + + + + nanti1.1 + + 4334 + + + + + + nganga + + 11739 + + + + nganga1.1 + + 2594 + + + + + + proklamasi + + 11375 + + + + proklamasi1.1 + + 1459 + + + + + + satu + + 14581 + + + + satu1.1 + + 14690 + + + + + + bagi + + 8926 + + + + bagi1.1 + + 282 + + + + + + baca + + 184 + + + + baca1.1 + + 11790 + + + + + + lempar + + 9965 + + + + lempar1.1 + + 10012 + + + + + + amat + + 6434 + + + + amat1.1 + + 8254 + + + + + + gambar + + 3215 + + + + gambar1.1 + + 1525 + + + + + + tangan + + 1446 + + + + tangan1.1 + + 8945 + + + + + + klasifikasi + + 1740 + + + + klasifikasi1.1 + + 2818 + + + + + + jual + + 5386 + + + + jual1.1 + + 14132 + + + + + + rancang + + 9408 + + + + rancang1.1 + + 8983 + + + + + + erang + + 10283 + + + + erang1.1 + + 1782 + + + + + + urut + + 15561 + + + + urut1.1 + + 7621 + + + + + + hilang + + 6965 + + + + hilang1.1 + + 9513 + + + + + + ajar + + 3176 + + + + ajar1.1 + + 6858 + + + + + + ajar + + 31761 + + + + aja´r1.2 + + 68581 + + + + + + ajar + + 31762 + + + + ajár1.3 + + 68582 + + + + + + ajar + + 3176 + + + + ajar1.2 + + 7724 + + + + + + kaca + + 9509 + + + + kaca1.1 + + 4683 + + + + + + syarat + + 15976 + + + + syarat1.1 + + 10790 + + + + + + perpustakaan + + 632 + + + + perpustakaan1.1 + + 5267 + + + + + + undang + + 9738 + + + + undang1.1 + + 3715 + + + + + + buku + + 2244 + + + + buku1.1 + + 212 + + + + + + ambil + + 1090 + + + + ambil1.1 + + 9261 + + + + + + yakin + + 4862 + + + + yakin1.1 + + 4086 + + + + + + swadaya + + 1859 + + + + swadaya1.1 + + 14462 + + + + + + pukul + + 5859 + + + + pukul1.1 + + 4741 + + + + + + khawatir + + 7206 + + + + khawatir1.1 + + 10660 + + + + + + beli + + 7092 + + + + beli1.1 + + 12718 + + + + + + karang + + 11489 + + + + karang1.1 + + 11754 + + + + + + sewa + + 13870 + + + + sewa1.1 + + 4900 + + + + + + tulis + + 9341 + + + + tulis1.1 + + 286 + + + + + + traktir + + 4720 + + + + traktir1.1 + + 13436 + + + + + + isi + + 474 + + + + isi1.1 + + 6308 + + + + + + ziarah + + 9707 + + + + ziarah1.1 + + 14406 + + + + + + bantu + + 10269 + + + + bantu1.1 + + 925 + + + + + + cari + + 7493 + + + + cari1.1 + + 11690 + + + + + + pel + + 9352 + + + + pel1.1 + + 13782 + + + + + + ganti + + 3926 + + + + ganti1.1 + + 7904 + + + + + + lebar + + 2085 + + + + lebar1.1 + + 11150 + + + + + + langit + + 6585 + + + + langit1.1 + + 10906 + + + + + + oleh + + 1698 + + + + oleh1.1 + + 11369 + + + + + + jahit + + 15449 + + + + jahit1.1 + + 1467 + + + + + + wakil + + 11033 + + + + wakil1.1 + + 7648 + + + + + + olah + + 14790 + + + + olah1.1 + + 5421 + + + + + + ganggu + + 7518 + + + + ganggu1.1 + + 12532 + + + + + + hitung + + 9364 + + + + hitung1.1 + + 4864 + + + + + + kayuh + + 6109 + + + + kayuh1.1 + + 15257 + + + + + + kirim + + 10157 + + + + kirim1.1 + + 9966 + + + + + + urus + + 7550 + + + + urus1.1 + + 12645 + + + + + + pijit + + 5437 + + + + pijit1.1 + + 10128 + + + + + + minta + + 4032 + + + + minta1.1 + + 1618 + + + + + + dengar + + 10954 + + + + dengar1.1 + + 9627 + + + + + + fitnah + + 2146 + + + + fitnah1.1 + + 8582 + + + + + + masak + + 8421 + + + + masak1.1 + + 9770 + + + + + + catat + + 13204 + + + + catat1.1 + + 9026 + + + + + + komik + + 6013 + + + + komik1.1 + + 620 + + + + + + Clitic + + + Surface + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGloss/packages.config b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/packages.config new file mode 100644 index 0000000000..ec2a9b334f --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGloss/packages.config @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/App.config b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/App.config new file mode 100644 index 0000000000..731b8431b9 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/App.config @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/HCSynthByGlossDll.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/HCSynthByGlossDll.cs new file mode 100644 index 0000000000..82c82059c8 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/HCSynthByGlossDll.cs @@ -0,0 +1,141 @@ +using HCSynthByGloss; +using SIL.Machine.Morphology.HermitCrab; +using SIL.Utils; +using System; +using System.IO; +using System.Reflection; +using System.Text; +using System.Xml.XPath; +using System.Xml.Xsl; + +namespace SIL.HCSynthByGloss +{ + public class HCSynthByGlossDll + { + public bool DoTracing { get; set; } = false; + public bool ShowTracing { get; set; } = false; + public string HcXmlFile { get; set; } = ""; + public string GlossFile { get; set; } = ""; + public string OutputFile { get; set; } = ""; + public string kSuccess { get; } = "Success!"; + public string kError1 { get; } = "Could not find "; + public string kHCXmlFile { get; } = "HC XML file"; + public string kGlossFile { get; } = "Gloss file"; + public string kError2 { get; } = " '"; + public string kError3 { get; } = "'"; + Language synLang; + string glosses = ""; + + public HCSynthByGlossDll(string output) + { + OutputFile = output; + } + + public string SetHcXmlFile(string value) + { + if (!File.Exists(value)) + { + return kError1 + kHCXmlFile + kError2 + value + kError3; + } + HcXmlFile = value; + synLang = XmlLanguageLoader.Load(HcXmlFile); + return kSuccess; + } + + public string SetGlossFile(string value) + { + if (!File.Exists(value)) + { + return kError1 + kGlossFile + kError2 + value + kError3; + } + GlossFile = value; + glosses = File.ReadAllText(GlossFile, Encoding.UTF8); + return kSuccess; + } + + public string Process() + { + + if (!File.Exists(HcXmlFile)) + { + return kError1 + kHCXmlFile + kError2 + HcXmlFile + kError3; + } + if (!File.Exists(GlossFile)) + { + return kError1 + kGlossFile + kError2 + GlossFile + kError3; + } + //Language synLang = XmlLanguageLoader.Load(HcXmlFile); + var hcTraceManager = new HcXmlTraceManager(); + hcTraceManager.IsTracing = DoTracing; + var srcMorpher = new Morpher(hcTraceManager, synLang); + var synthesizer = Synthesizer.Instance; + string synthesizedWordForms = synthesizer.SynthesizeGlosses( + glosses, + srcMorpher, + synLang, + hcTraceManager + ); + synthesizedWordForms = synthesizedWordForms.Replace("#", " "); + File.WriteAllText(OutputFile, synthesizedWordForms, Encoding.UTF8); + if (hcTraceManager.IsTracing) + { + // we want to create a temp XML file and stuff synthesizer.Trace into it + // then transform it to an html file and show the html file + var tempXMlResult = CreateXmlFile(synthesizer); + string tempHtmResult = CreateHtmResult(tempXMlResult, synthesizer); + if (ShowTracing) + { + System.Diagnostics.Process.Start(tempHtmResult); + } + } + + return kSuccess; + } + + private static string CreateHtmResult(string xmlFile, Synthesizer synthesizer) + { + string tempHtmResult = Path.Combine(Path.GetTempPath(), "HCSynthTrace.htm"); + Uri uriBase = new Uri(Assembly.GetExecutingAssembly().CodeBase); + var rootdir = Path.GetDirectoryName(Uri.UnescapeDataString(uriBase.AbsolutePath)); + string basedir = rootdir; + int i = rootdir.LastIndexOf("bin"); + if (i >= 0) + { + // rootdir is in development environment; adjust the value + basedir = rootdir.Substring(0, i); + } + string iconPath = Path.Combine( + basedir, + "Language Explorer", + "Configuration", + "Words", + "Analyses", + "TraceParse" + ); + var traceTransform = XmlUtils.CreateTransform("HCSynthByGlossFormatHCTrace", "PresentationTransforms"); + XPathDocument doc = new XPathDocument(xmlFile); + + StreamWriter result = new StreamWriter(tempHtmResult); + XsltArgumentList argList = new XsltArgumentList(); + argList.AddParam("prmIconPath", "", iconPath); + // we do not have access to any of the following; use defaults + //argList.AddParam("prmAnalysisFont", "", m_language.NTFontFace); + //argList.AddParam("prmAnalysisFontSize", "", m_language.NTFontSize.ToString()); + //argList.AddParam("prmVernacularFont", "", m_language.LexFontFace); + //argList.AddParam("prmVernacularFontSize", "", m_language.LexFontSize.ToString()); + //argList.AddParam("prmVernacularRTL", "", m_language.NTColorName); + argList.AddParam("prmShowTrace", "", "true"); + traceTransform.Transform(doc, argList, result); + result.Close(); + return tempHtmResult; + } + + private string CreateXmlFile(Synthesizer synthesizer) + { + string tempXmlResult = Path.Combine(Path.GetTempPath(), "HCSynthTrace.xml"); + File.WriteAllText(tempXmlResult, synthesizer.Trace.ToString()); + return tempXmlResult; + } + + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/HCSynthByGlossDll.csproj b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/HCSynthByGlossDll.csproj new file mode 100644 index 0000000000..678c7806e6 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/HCSynthByGlossDll.csproj @@ -0,0 +1,98 @@ + + + + + Debug + AnyCPU + {7E5A90A2-F882-48F8-A92B-8D7AE0447B58} + Library + Properties + SIL.HCSynthByGlossDll + HCSynthByGlossDll + v4.6.2 + 512 + true + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + ..\..\..\..\Output\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + + + + False + ..\..\..\..\Output\Debug\HCSynthByGlossLib.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\Newtonsoft.Json.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Core.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.Morphology.HermitCrab.dll + + + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\System.Net.Http.dll + + + + + + + + False + ..\..\..\..\Output\Debug\XMLUtils.dll + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/Properties/AssemblyInfo.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..ed046d4608 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/Properties/AssemblyInfo.cs @@ -0,0 +1,40 @@ +// Copyright (c) 2023-2025 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("HCSynthByGlossDll")] +[assembly: AssemblyDescription("DLL app to run HermitCrab synthesis based on a set of glosses")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("SIL International")] +[assembly: AssemblyProduct("HCSynthByGlossDll")] +[assembly: AssemblyCopyright("Copyright © 2024-2026")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("7e5a90a2-f882-48f8-a92b-8d7ae0447b58")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.7.0.0")] +[assembly: AssemblyFileVersion("1.7.0.0")] diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/packages.config b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/packages.config new file mode 100644 index 0000000000..ec2a9b334f --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDll/packages.config @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/App.config b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/App.config new file mode 100644 index 0000000000..ba917941ae --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/App.config @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/HCSynthByGlossDllTest.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/HCSynthByGlossDllTest.cs new file mode 100644 index 0000000000..4eb8a42895 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/HCSynthByGlossDllTest.cs @@ -0,0 +1,94 @@ +using NUnit.Framework; +using SIL.FieldWorks.Common.FwUtils; +using SIL.HCSynthByGloss; +using SIL.Machine.Morphology.HermitCrab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using System.Threading.Tasks; + + +namespace SIL.HCSynthByGlossDllTest +{ + public class HCSynthByGlossDllTest + { + string glossFile = ""; + string testDataDir = ""; + string expectedWordFormsFile = ""; + string tempFile; + HCSynthByGlossDll dll; + string hcConfig = ""; + string result = ""; + + [SetUp] + public void Setup() + { + testDataDir = Path.Combine(FwDirectoryFinder.SourceDirectory, "Utilities", "HCSynthByGloss", "HCSynthByGloss", "TestData"); + hcConfig = Path.Combine(testDataDir, "indoHC4FLExrans.xml"); + glossFile = Path.Combine(testDataDir, "IndonesianAnalyses.txt"); + tempFile = Path.Combine(Path.GetTempPath(), "results.txt"); + dll = new HCSynthByGlossDll(tempFile); + } + + [Test] + public void SetFilesTest() + { + string result = dll.SetHcXmlFile("abc"); + Assert.AreEqual(dll.kError1 + dll.kHCXmlFile + dll.kError2 + "abc" + dll.kError3, result); + result = dll.SetHcXmlFile(hcConfig); + Assert.AreEqual(dll.kSuccess, result); + result = dll.SetGlossFile("abc"); + Assert.AreEqual(dll.kError1 + dll.kGlossFile + dll.kError2 + "abc" + dll.kError3, result); + result = dll.SetGlossFile(glossFile); + Assert.AreEqual(dll.kSuccess, result); + } + + [Test] + public void ProcessTest() + { + Stopwatch stopwatch = Stopwatch.StartNew(); + dll = new HCSynthByGlossDll(tempFile); + result = dll.SetHcXmlFile(hcConfig); + Assert.AreEqual(dll.kSuccess, result); + result = dll.SetGlossFile(glossFile); + Assert.AreEqual(dll.kSuccess, result); + result = dll.Process(); + stopwatch.Stop(); + Console.WriteLine("Initial processing time = " + stopwatch.ElapsedMilliseconds); + Assert.AreEqual(dll.kSuccess, result); + expectedWordFormsFile = Path.Combine(testDataDir, "expectedWordForms.txt"); + string expectedWordForms = File.ReadAllText(expectedWordFormsFile, Encoding.UTF8) + .Replace("\r", ""); + string synthesizedWordForms = File.ReadAllText(tempFile, Encoding.UTF8) + .Replace("\r", ""); + //Console.Write(synthesizedWordForms); + Assert.AreEqual(expectedWordForms, synthesizedWordForms); + // Process again without initializing the input files + stopwatch.Restart(); + result = dll.Process(); + stopwatch.Stop(); + Console.WriteLine("Restart processing time = " + stopwatch.ElapsedMilliseconds); + Assert.AreEqual(dll.kSuccess, result); + synthesizedWordForms = File.ReadAllText(tempFile, Encoding.UTF8) + .Replace("\r", ""); + //Console.Write(synthesizedWordForms); + Assert.AreEqual(expectedWordForms, synthesizedWordForms); + } + + [Test] + public void ProcessBadInputFilesTest() + { + dll = new HCSynthByGlossDll(tempFile); + result = dll.Process(); + Assert.AreEqual(dll.kError1 + dll.kHCXmlFile + dll.kError2 + "" + dll.kError3, result); + result = dll.SetHcXmlFile(hcConfig); + Assert.AreEqual(dll.kSuccess, result); + result = dll.Process(); + Assert.AreEqual(dll.kError1 + dll.kGlossFile + dll.kError2 + "" + dll.kError3, result); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/HCSynthByGlossDllTest.csproj b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/HCSynthByGlossDllTest.csproj new file mode 100644 index 0000000000..7554085ea1 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/HCSynthByGlossDllTest.csproj @@ -0,0 +1,100 @@ + + + + + Debug + AnyCPU + {7EF3805F-2979-4941-8260-2D99CEBDBC0D} + Library + Properties + SIL.HCSynthByGlossDllTest + HCSynthByGlossDllTest + v4.6.2 + 512 + true + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + x64 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + ..\..\..\..\Output\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + + + + False + ..\..\..\..\Output\Debug\HCSynthByGlossDll.dll + + + False + ..\..\..\..\Output\Debug\HCSynthByGlossLib.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\Newtonsoft.Json.dll + + + False + ..\..\..\..\packages\NUnit.3.13.3\lib\net45\nunit.framework.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.Morphology.HermitCrab.dll + + + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\System.Net.Http.dll + + + + + + + + + + + + + + + + + + {89EC1097-4786-4611-B6CB-2B8BC01CDDED} + FwUtils + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/Properties/AssemblyInfo.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..85db4519f4 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("HCSynthByGlossDllTest")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("HCSynthByGlossDllTest")] +[assembly: AssemblyCopyright("Copyright © 2024")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("7ef3805f-2979-4941-8260-2d99cebdbc0d")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/packages.config b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/packages.config new file mode 100644 index 0000000000..0ff18eca63 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossDllTest/packages.config @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/AnalysesCreator.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/AnalysesCreator.cs new file mode 100644 index 0000000000..6c13c39c2e --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/AnalysesCreator.cs @@ -0,0 +1,137 @@ +// Copyright (c) 2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using SIL.Machine.Morphology; +using SIL.Machine.Morphology.HermitCrab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace HCSynthByGloss +{ + public class AnalysesCreator + { + private static readonly AnalysesCreator instance = new AnalysesCreator(); + public string category { get; set; } = ""; + const char closingWedge = '>'; + const char openingWedge = '<'; + public List Forms { get; } = new List(); + public int RootIndex { get; set; } = -1; + + public static AnalysesCreator Instance + { + get { return instance; } + } + + public List ExtractMorphemes(string analysis, Morpher srcMorpher) + { + List morphemes = new List(); + instance.Forms.Clear(); + var state = State.BEGIN; + int index = 0; + int morphIndex = 0; + RootIndex = -1; + while (index < analysis.Length) + { + switch (state) + { + case State.BEGIN: + index++; + if (analysis[index] == openingWedge) + state = State.PREFIX; + else + state = State.ROOT; + break; + case State.PREFIX: + morphIndex++; + index = AddMorph(analysis, srcMorpher, morphemes, ++index, closingWedge); + if (analysis[index] == openingWedge) + state = State.PREFIX; + else + state = State.ROOT; + break; + case State.ROOT: + if (RootIndex == -1) + { + RootIndex = morphIndex; + // we need the prefixes to be synthesized in the reverse order + morphemes.Reverse(); + } + morphIndex++; + index = AddMorph(analysis, srcMorpher, morphemes, index, openingWedge); + state = State.CATEGORY; + break; + case State.CATEGORY: + int indexEnd = analysis.Substring(index).IndexOf(closingWedge) + index; + category = analysis.Substring(index, indexEnd - index); + index = indexEnd + 1; + if (analysis[index] == openingWedge) + state = State.SUFFIX; + else + state = State.END; + break; + case State.SUFFIX: + morphIndex++; + index = AddMorph(analysis, srcMorpher, morphemes, ++index, closingWedge); + if (analysis[index] == openingWedge) + state = State.SUFFIX; + else + state = State.END; + break; + case State.END: + index = analysis.Length; + // we need the suffixes to be synthesized in the reverse order + if (morphemes.Count > RootIndex + 1) + { + morphemes.Reverse(RootIndex + 1, morphemes.Count - (RootIndex + 1)); + } + break; + } + } + return morphemes; + } + + private static int AddMorph( + string analysis, + Morpher srcMorpher, + List morphemes, + int index, + char endMarker + ) + { + int indexEnd = analysis.Substring(index).IndexOf(endMarker) + index; + string shape = analysis.Substring(index, indexEnd - index); + // Note: for testing we ignore the morpher. + if (srcMorpher != null) + { + IMorpheme morph = srcMorpher.Morphemes.FirstOrDefault( + m => + m.Gloss != null + && m.Gloss.Normalize(NormalizationForm.FormD) + == shape.Normalize(NormalizationForm.FormD) + ); + morphemes.Add(morph as Morpheme); + } + else + { + morphemes.Add(null); + } + instance.Forms.Add(shape); + index = indexEnd + 1; + return index; + } + } + + enum State + { + BEGIN, + PREFIX, + ROOT, + CATEGORY, + SUFFIX, + END + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/HCSynthByGlossLib.csproj b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/HCSynthByGlossLib.csproj new file mode 100644 index 0000000000..9763a08c52 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/HCSynthByGlossLib.csproj @@ -0,0 +1,90 @@ + + + + + Debug + AnyCPU + {5E31B48C-00A1-4799-A9F7-589E01B1BBD5} + Library + Properties + HCSynthByGlossLib + HCSynthByGlossLib + v4.6.2 + 512 + true + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + ..\..\..\..\Output\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\Newtonsoft.Json.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Core.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.dll + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.Morphology.HermitCrab.dll + + + + + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\System.Net.Http.dll + + + + + + + + False + ..\..\..\..\Output\Debug\XMLUtils.dll + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/HcXmlTraceManager.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/HcXmlTraceManager.cs new file mode 100644 index 0000000000..4f02831f2a --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/HcXmlTraceManager.cs @@ -0,0 +1,838 @@ +// Copyright (c) 2022-2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Xml.Linq; +using System.Text; +using System.Threading.Tasks; +using SIL.Machine.Morphology.HermitCrab; +using SIL.Machine.Morphology.HermitCrab.MorphologicalRules; +using SIL.Machine.Morphology.HermitCrab.PhonologicalRules; +using SIL.Machine.FeatureModel; + +namespace HCSynthByGloss +{ + // This is based on FwXmlTraceManager from LexText.ParserCore + // Since there is no LcmCache, we use other things + public class HcXmlTraceManager : ISynTraceManager + { + // Following moved from HCParser as we do not need anything else from HCParser + internal const string CRuleID = "ID"; + internal const string FormID = "ID"; + internal const string FormID2 = "ID2"; + internal const string InflTypeID = "InflTypeID"; + internal const string MsaID = "ID"; + internal const string PRuleID = "ID"; + internal const string SlotID = "SlotID"; + internal const string TemplateID = "ID"; + + internal const string IsNull = "IsNull"; + internal const string IsPrefix = "IsPrefix"; + internal const string Env = "Env"; + internal const string PrefixEnv = "PrefixEnv"; + internal const string SuffixEnv = "SuffixEnv"; + + Language _lang; + + public HcXmlTraceManager() { } + + public bool IsTracing { get; set; } + + public object GenerateWords(Language lang) + { + _lang = lang; + XElement affixPermutation = new XElement("AffixPermutation"); + return affixPermutation; + } + + public void GenerateWords(string analysis, Word input) + { + // we're not using this + } + + public void AnalyzeWord(Language lang, Word input) + { + input.CurrentTrace = new XElement( + "WordAnalysisTrace", + new XElement( + "InputWord", + input.Shape.ToString(lang.SurfaceStratum.CharacterDefinitionTable, true) + ) + ); + } + + public void BeginUnapplyStratum(Stratum stratum, Word input) { } + + public void EndUnapplyStratum(Stratum stratum, Word output) { } + + public void PhonologicalRuleUnapplied( + IPhonologicalRule rule, + int subruleIndex, + Word input, + Word output + ) + { + ((XElement)output.CurrentTrace).Add( + new XElement( + "PhonologicalRuleAnalysisTrace", + CreateHCRuleElement("PhonologicalRule", rule), + CreateWordElement("Input", input, true), + CreateWordElement("Output", output, true) + ) + ); + } + + public void PhonologicalRuleNotUnapplied( + IPhonologicalRule rule, + int subruleIndex, + Word input + ) + { + ((XElement)input.CurrentTrace).Add( + new XElement( + "PhonologicalRuleAnalysisTrace", + CreateHCRuleElement("PhonologicalRule", rule), + CreateWordElement("Input", input, true), + CreateWordElement("Output", input, true) + ) + ); + } + + public void BeginUnapplyTemplate(AffixTemplate template, Word input) + { + ((XElement)input.CurrentTrace).Add( + new XElement( + "TemplateAnalysisTraceIn", + CreateHCRuleElement("AffixTemplate", template), + CreateWordElement("Input", input, true) + ) + ); + } + + public void EndUnapplyTemplate(AffixTemplate template, Word output, bool unapplied) + { + ((XElement)output.CurrentTrace).Add( + new XElement( + "TemplateAnalysisTraceOut", + CreateHCRuleElement("AffixTemplate", template), + CreateWordElement("Output", unapplied ? output : null, true) + ) + ); + } + + public void MorphologicalRuleUnapplied( + IMorphologicalRule rule, + int subruleIndex, + Word input, + Word output + ) + { + var trace = new XElement( + "MorphologicalRuleAnalysisTrace", + CreateMorphologicalRuleElement(rule) + ); + var aprule = rule as AffixProcessRule; + if (aprule != null) + trace.Add(CreateAllomorphElement(aprule.Allomorphs[subruleIndex])); + trace.Add(CreateWordElement("Output", output, true)); + ((XElement)output.CurrentTrace).Add(trace); + output.CurrentTrace = trace; + } + + public void MorphologicalRuleNotUnapplied( + IMorphologicalRule rule, + int subruleIndex, + Word input + ) + { } + + public void LexicalLookup(Stratum stratum, Word input) + { + var trace = new XElement( + "LexLookupTrace", + new XElement("Stratum", stratum.Name), + new XElement( + "Shape", + input.Shape.ToRegexString(stratum.CharacterDefinitionTable, true) + ) + ); + ((XElement)input.CurrentTrace).Add(trace); + } + + public void SynthesizeWord(Language lang, Word input) + { + var trace = new XElement( + "WordSynthesisTrace", + CreateAllomorphElement(input.RootAllomorph), + new XElement( + "MorphologicalRules", + input.MorphologicalRules.Select(CreateMorphologicalRuleElement) + ) + ); + var curTrace = (XElement)input.CurrentTrace; + var lookupTrace = (XElement)curTrace.LastNode; + if (lookupTrace == null) + lookupTrace = curTrace; + lookupTrace.Add(trace); + input.CurrentTrace = trace; + } + + public void BeginApplyStratum(Stratum stratum, Word input) { } + + public void NonFinalTemplateAppliedLast(Stratum stratum, Word word) + { + ((XElement)word.CurrentTrace).Add( + new XElement("FailureReason", new XAttribute("type", "nonFinalTemplate")) + ); + } + + public void ApplicableTemplatesNotApplied(Stratum stratum, Word word) + { + ((XElement)word.CurrentTrace).Add( + new XElement("FailureReason", new XAttribute("type", "noTemplatesApplied")) + ); + } + + public void EndApplyStratum(Stratum stratum, Word output) { } + + public void PhonologicalRuleApplied( + IPhonologicalRule rule, + int subruleIndex, + Word input, + Word output + ) + { + ((XElement)output.CurrentTrace).Add( + new XElement( + "PhonologicalRuleSynthesisTrace", + CreateHCRuleElement("PhonologicalRule", rule), + CreateWordElement("Input", input, false), + CreateWordElement("Output", output, false) + ) + ); + } + + public void PhonologicalRuleNotApplied( + IPhonologicalRule rule, + int subruleIndex, + Word input, + FailureReason reason, + object failureObj + ) + { + var pruleTrace = new XElement( + "PhonologicalRuleSynthesisTrace", + CreateHCRuleElement("PhonologicalRule", rule), + CreateWordElement("Input", input, false), + CreateWordElement("Output", input, false) + ); + + var rewriteRule = rule as RewriteRule; + if (rewriteRule != null) + { + RewriteSubrule sr = rewriteRule.Subrules[subruleIndex]; + switch (reason) + { + case FailureReason.RequiredSyntacticFeatureStruct: + pruleTrace.Add( + new XElement( + "FailureReason", + new XAttribute("type", "category"), + new XElement( + "Category", + input.SyntacticFeatureStruct.PartsOfSpeech().FirstOrDefault() + ), + new XElement( + "RequiredCategories", + sr.RequiredSyntacticFeatureStruct + .PartsOfSpeech() + .Select(pos => new XElement("Category", pos)) + ) + ) + ); + break; + + case FailureReason.RequiredMprFeatures: + pruleTrace.Add( + CreateMprFeaturesFailureElement( + true, + (MprFeatureGroup)failureObj, + sr.RequiredMprFeatures, + input + ) + ); + break; + + case FailureReason.ExcludedMprFeatures: + pruleTrace.Add( + CreateMprFeaturesFailureElement( + false, + (MprFeatureGroup)failureObj, + sr.ExcludedMprFeatures, + input + ) + ); + break; + } + } + + ((XElement)input.CurrentTrace).Add(pruleTrace); + } + + private static XElement CreateMprFeaturesFailureElement( + bool required, + MprFeatureGroup group, + MprFeatureSet feats, + Word input + ) + { + return new XElement( + "FailureReason", + new XAttribute("type", "mprFeatures"), + new XElement("MatchType", required ? "required" : "excluded"), + new XElement("Group", group), + new XElement( + "MprFeatures", + input.MprFeatures + .Where(mf => mf.Group == group) + .Select(f => new XElement("MprFeature", f)) + ), + new XElement( + "ConstrainingMprFeatrues", + feats.Where(mf => mf.Group == group).Select(f => new XElement("MprFeature", f)) + ) + ); + } + + public void BeginApplyTemplate(AffixTemplate template, Word input) + { + ((XElement)input.CurrentTrace).Add( + new XElement( + "TemplateSynthesisTraceIn", + CreateHCRuleElement("AffixTemplate", template), + CreateWordElement("Input", input, false) + ) + ); + } + + public void EndApplyTemplate(AffixTemplate template, Word output, bool applied) + { + ((XElement)output.CurrentTrace).Add( + new XElement( + "TemplateSynthesisTraceOut", + CreateHCRuleElement("AffixTemplate", template), + CreateWordElement("Output", applied ? output : null, false) + ) + ); + } + + public void MorphologicalRuleApplied( + IMorphologicalRule rule, + int subruleIndex, + Word input, + Word output + ) + { + var trace = new XElement( + "MorphologicalRuleSynthesisTrace", + CreateMorphologicalRuleElement(rule) + ); + var aprule = rule as AffixProcessRule; + if (aprule != null) + trace.Add(CreateAllomorphElement(aprule.Allomorphs[subruleIndex])); + trace.Add(CreateWordElement("Output", output, false)); + ((XElement)output.CurrentTrace).Add(trace); + output.CurrentTrace = trace; + } + + public void MorphologicalRuleNotApplied( + IMorphologicalRule rule, + int subruleIndex, + Word input, + FailureReason reason, + object failureObj + ) + { + var trace = new XElement( + "MorphologicalRuleSynthesisTrace", + CreateMorphologicalRuleElement(rule) + ); + var aprule = rule as AffixProcessRule; + if (aprule != null) + trace.Add( + CreateAllomorphElement( + subruleIndex == -1 + ? aprule.Allomorphs.Last() + : aprule.Allomorphs[subruleIndex] + ) + ); + trace.Add(new XElement("Output", "*None*")); + switch (reason) + { + case FailureReason.RequiredSyntacticFeatureStruct: + //Debug.Assert(aprule != null); + var requiredFS = (FeatureStruct)failureObj; + FeatureSymbol[] requiredPos = requiredFS.PartsOfSpeech().ToArray(); + FeatureSymbol[] inputPos = input.SyntacticFeatureStruct + .PartsOfSpeech() + .ToArray(); + if (requiredPos.Intersect(inputPos).Any()) + { + trace.Add( + new XElement( + "FailureReason", + new XAttribute("type", "inflFeats"), + CreateInflFeaturesElement( + "InflFeatures", + input.SyntacticFeatureStruct + ), + CreateInflFeaturesElement("RequiredInflFeatures", requiredFS) + ) + ); + } + else + { + trace.Add( + new XElement( + "FailureReason", + new XAttribute("type", "pos"), + new XElement( + "Pos", + string.Join(", ", inputPos.Select(s => s.Description)) + ), + new XElement( + "RequiredPos", + string.Join(", ", requiredPos.Select(s => s.Description)) + ) + ) + ); + } + break; + + case FailureReason.RequiredStemName: + trace.Add( + new XElement( + "FailureReason", + new XAttribute("type", "fromStemName"), + new XElement("StemName", failureObj) + ) + ); + break; + + case FailureReason.RequiredMprFeatures: + //Debug.Assert(aprule != null); + var group = (MprFeatureGroup)failureObj; + trace.Add( + group.Name == "lexEntryInflTypes" + ? new XElement( + "FailureReason", + new XAttribute("type", "requiredInflType") + ) + : CreateMprFeaturesFailureElement( + true, + group, + aprule.Allomorphs[subruleIndex].RequiredMprFeatures, + input + ) + ); + break; + + case FailureReason.ExcludedMprFeatures: + trace.Add( + new XElement("FailureReason", new XAttribute("type", "excludedInflType")) + ); + break; + + case FailureReason.Pattern: + //Debug.Assert(aprule != null); + var env = (string)aprule.Allomorphs[subruleIndex].Properties[Env]; + var prefixEnv = (string)aprule.Allomorphs[subruleIndex].Properties[PrefixEnv]; + var suffixEnv = (string)aprule.Allomorphs[subruleIndex].Properties[SuffixEnv]; + if (env != null || prefixEnv != null || suffixEnv != null) + { + var reasonElem = new XElement( + "FailureReason", + new XAttribute("type", "environment") + ); + if (env != null) + reasonElem.Add(new XElement("Environment", env)); + if (prefixEnv != null) + reasonElem.Add(new XElement("Environment", env)); + if (suffixEnv != null) + reasonElem.Add(new XElement("Environment", env)); + trace.Add(reasonElem); + } + else + { + trace.Add( + new XElement("FailureReason", new XAttribute("type", "affixProcess")) + ); + } + break; + + case FailureReason.MaxApplicationCount: + trace.Add(new XElement("FailureReason", new XAttribute("type", "maxAppCount"))); + break; + + case FailureReason.NonPartialRuleProhibitedAfterFinalTemplate: + trace.Add( + new XElement( + "FailureReason", + new XAttribute("type", "nonPartialRuleAfterFinalTemplate") + ) + ); + break; + + case FailureReason.NonPartialRuleRequiredAfterNonFinalTemplate: + trace.Add( + new XElement( + "FailureReason", + new XAttribute("type", "partialRuleAfterNonFinalTemplate") + ) + ); + break; + + default: + return; + } + ((XElement)input.CurrentTrace).Add(trace); + } + + public void Blocked(IHCRule rule, Word output) { } + + public void Successful(Language lang, Word word) + { + ((XElement)word.CurrentTrace).Add( + new XElement( + "ParseCompleteTrace", + new XAttribute("success", true), + CreateWordElement("Result", word, false) + ) + ); + } + + public void Failed( + Language lang, + Word word, + FailureReason reason, + Allomorph allomorph, + object failureObj + ) + { + XElement trace; + switch (reason) + { + case FailureReason.AllomorphCoOccurrenceRules: + var alloRule = (AllomorphCoOccurrenceRule)failureObj; + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "adhocProhibitionRule"), + new XElement("RuleType", "Allomorph"), + CreateAllomorphElement(allomorph), + new XElement("Others", alloRule.Others.Select(CreateAllomorphElement)), + new XElement("Adjacency", alloRule.Adjacency) + ) + ); + break; + + case FailureReason.MorphemeCoOccurrenceRules: + var morphemeRule = (MorphemeCoOccurrenceRule)failureObj; + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "adhocProhibitionRule"), + new XElement("RuleType", "Morpheme"), + CreateMorphemeElement(allomorph.Morpheme), + new XElement( + "Others", + morphemeRule.Others.Select(CreateMorphemeElement) + ), + new XElement("Adjacency", morphemeRule.Adjacency) + ) + ); + break; + + case FailureReason.Environments: + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "environment"), + CreateAllomorphElement(allomorph), + new XElement("Environment", failureObj) + ) + ); + break; + + case FailureReason.SurfaceFormMismatch: + trace = CreateParseCompleteElement( + word, + new XElement("FailureReason", new XAttribute("type", "formMismatch")) + ); + break; + + case FailureReason.RequiredSyntacticFeatureStruct: + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "affixInflFeats"), + CreateAllomorphElement(allomorph), + CreateInflFeaturesElement("InflFeatures", word.SyntacticFeatureStruct), + CreateInflFeaturesElement( + "RequiredInflFeatures", + (FeatureStruct)failureObj + ) + ) + ); + break; + + case FailureReason.RequiredStemName: + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "requiredStemName"), + CreateAllomorphElement(allomorph), + new XElement("StemName", failureObj) + ) + ); + break; + + case FailureReason.ExcludedStemName: + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "excludedStemName"), + CreateAllomorphElement(allomorph), + new XElement("StemName", failureObj) + ) + ); + break; + + case FailureReason.BoundRoot: + trace = CreateParseCompleteElement( + word, + new XElement("FailureReason", new XAttribute("type", "boundStem")) + ); + break; + + case FailureReason.DisjunctiveAllomorph: + XElement failureEelement = null; + if (failureObj is Word) + { + failureEelement = CreateWordElement("Word", (Word)failureObj, false); + } + else if (failureObj is AffixProcessAllomorph) + { + failureEelement = CreateWordElement("Word", word, false); + } + + trace = CreateParseCompleteElement( + word, + new XElement( + "FailureReason", + new XAttribute("type", "disjunctiveAllomorph"), + failureEelement + ) + ); + break; + + case FailureReason.PartialParse: + trace = CreateParseCompleteElement( + word, + new XElement("FailureReason", new XAttribute("type", "partialParse")) + ); + break; + + default: + return; + } + ((XElement)word.CurrentTrace).Add(trace); + } + + private static XElement CreateParseCompleteElement(Word word, XElement reasonElem) + { + return new XElement( + "ParseCompleteTrace", + new XAttribute("success", false), + CreateWordElement("Result", word, false), + reasonElem + ); + } + + private static XElement CreateInflFeaturesElement(string name, FeatureStruct fs) + { + return new XElement(name, fs.Head().ToString().Replace(",", "")); + } + + private static XElement CreateWordElement(string name, Word word, bool analysis) + { + string wordStr; + if (word == null) + wordStr = "*None*"; + else + wordStr = analysis + ? word.Shape.ToRegexString(word.Stratum.CharacterDefinitionTable, true) + : word.Shape.ToString(word.Stratum.CharacterDefinitionTable, true); + return new XElement(name, wordStr); + } + + private XElement CreateMorphemeElement(Morpheme morpheme) + { + var msaID = (int?)morpheme.Properties[MsaID] ?? 0; + //IMoMorphSynAnalysis msa; + //if (msaID == 0 || !m_cache.ServiceLocator.GetInstance().TryGetObject(msaID, out msa)) + // return null; + + //var inflTypeID = (int?)morpheme.Properties[InflTypeID] ?? 0; + //ILexEntryInflType inflType = null; + //if (inflTypeID != 0 && !m_cache.ServiceLocator.GetInstance().TryGetObject(inflTypeID, out inflType)) + return null; + + //return CreateMorphemeElement(msa, inflType); + } + + private static XElement CreateMorphologicalRuleElement(IMorphologicalRule rule) + { + XElement elem = CreateHCRuleElement("MorphologicalRule", rule); + elem.Add(new XAttribute("type", rule is AffixProcessRule ? "affix" : "compound")); + return elem; + } + + private static XElement CreateHCRuleElement(string name, IHCRule rule) + { + string id = "Unknown"; + var morpheme = rule as Morpheme; + if (morpheme != null) + id = morpheme.Gloss; + //id = (int?)morpheme.Properties[MsaID] ?? 0; + return new XElement(name, new XAttribute("id", id), rule.Name); + } + + private XElement CreateAllomorphElement(Allomorph allomorph) + { + string morphId = String.IsNullOrEmpty(allomorph.Morpheme.Id) + ? "??" + : allomorph.Morpheme.Id; + Morpheme morph = allomorph.Morpheme; + string form = (morph != null) ? morph.ToString() : "unknown"; + string cat = "unknown"; + if (morph != null) + { + string posIndex = (morph.Category != null) ? morph.Category : null; + if (!String.IsNullOrEmpty(posIndex)) + { + var feat = + _lang.SyntacticFeatureSystem.PartOfSpeechFeature.PossibleSymbols.FirstOrDefault( + f => f.ID == posIndex + ); + if (feat != null) + { + cat = feat.Description; + } + } + } + return new XElement( + "Allomorph", + new XAttribute("id", allomorph.Index), + new XElement("Form", form), + new XElement( + "Morpheme", + new XAttribute("id", morphId), + new XAttribute("type", allomorph.Morpheme.MorphemeType) + ), + new XElement("Gloss", allomorph.Morpheme.Gloss), + new XElement("Category", cat) + ); + + //bool isNull = (bool?)allomorph.Properties[IsNull] ?? false; + //if (isNull) + //{ + // var slotID = (int)allomorph.Morpheme.Properties[SlotID]; + // //IMoInflAffixSlot slot; + // //if (!m_cache.ServiceLocator.GetInstance().TryGetObject(slotID, out slot)) + // // return null; + + // var nullInflTypeID = (int)allomorph.Morpheme.Properties[InflTypeID]; + // //ILexEntryInflType nullInflType; + // //if (!m_cache.ServiceLocator.GetInstance().TryGetObject(nullInflTypeID, out nullInflType)) + // // return null; + + // //var isPrefix = (bool)allomorph.Properties[IsPrefix]; + // //return new XElement("Allomorph", new XAttribute("id", 0), new XAttribute("type", isPrefix ? MoMorphTypeTags.kMorphPrefix : MoMorphTypeTags.kMorphSuffix), + // // new XElement("Form", "^0"), + // // new XElement("Morpheme", new XAttribute("id", 0), new XAttribute("type", "infl"), + // // new XElement("HeadWord", string.Format("Automatically generated null affix for the {0} irregularly inflected form", nullInflType.Name.BestAnalysisAlternative.Text)), + // // new XElement("Gloss", (nullInflType.GlossPrepend.BestAnalysisAlternative.Text == "***" ? "" : nullInflType.GlossPrepend.BestAnalysisAlternative.Text) + // // + (nullInflType.GlossAppend.BestAnalysisAlternative.Text == "***" ? "" : nullInflType.GlossAppend.BestAnalysisAlternative.Text)), + // // new XElement("Category", slot.OwnerOfClass().Abbreviation.BestAnalysisAlternative.Text), + // // new XElement("Slot", new XAttribute("optional", slot.Optional), slot.Name.BestAnalysisAlternative.Text))); + //} + + //var formID = allomorph.Properties[FormID]; + + ////var formID = (int?)allomorph.Properties[FormID] ?? 0; + ////IMoForm form; + ////if (formID == 0 || !m_cache.ServiceLocator.GetInstance().TryGetObject(formID, out form)) + //// return null; + //var formID2 = allomorph.Properties[FormID2]; + ////var formID2 = (int?)allomorph.Properties[FormID2] ?? 0; + + //var msaID = allomorph.Morpheme.Properties[MsaID]; + ////IMoMorphSynAnalysis msa; + ////if (!m_cache.ServiceLocator.GetInstance().TryGetObject(msaID, out msa)) + //// return null; + + //var inflTypeID = allomorph.Morpheme.Properties[InflTypeID]; + ////var inflTypeID = (int?)allomorph.Morpheme.Properties[InflTypeID] ?? 0; + ////ILexEntryInflType inflType = null; + ////if (inflTypeID != 0 && !m_cache.ServiceLocator.GetInstance().TryGetObject(inflTypeID, out inflType)) + //// return null; + + ////return CreateAllomorphElement("Allomorph", form, msa, inflType, formID2 != 0); + //return new XElement("sumpn"); + } + + void ITraceManager.CompoundingRuleNotUnapplied(IMorphologicalRule rule, int subruleIndex, Word input, FailureReason reason, object failureObj) + { + var trace = new XElement("CompoundingRuleAnalysisTrace", CreateMorphologicalRuleElement(rule)); + var crule = rule as CompoundingRule; + if (crule != null) + { + var stremProdRestricts = failureObj as MprFeatureSet; + if (stremProdRestricts != null) + { + trace.Add(new XElement("FailureReason", new XAttribute("type", "missingProdRestrict"), + new XElement("StemProdRestricts", stremProdRestricts.Select(f => new XElement("MprFeature", f))), + new XElement("RuleProdRestricts", crule.NonHeadProdRestrictionsMprFeatures.Select(f => new XElement("MprFeature", f))))); + } + } + trace.Add(new XElement("Output", "*None*")); + ((XElement)input.CurrentTrace).Add(trace); + + } + + void ITraceManager.CompoundingRuleNotApplied(IMorphologicalRule rule, int subruleIndex, Word input, FailureReason reason, object failureObj) + { + var trace = new XElement("CompoundingRuleSynthesisTrace", CreateMorphologicalRuleElement(rule)); + var crule = rule as CompoundingRule; + if (crule != null) + { + trace.Add(new XElement("FailureReason", new XAttribute("type", "missingProdRestrict"), + new XElement("StemProdRestricts", input.MprFeatures.Select(f => new XElement("MprFeature", f))), + new XElement("RuleProdRestricts", crule.HeadProdRestrictionsMprFeatures.Select(f => new XElement("MprFeature", f))))); + } + trace.Add(new XElement("Output", "*None*")); + ((XElement)input.CurrentTrace).Add(trace); + + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/ISynTraceManager.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/ISynTraceManager.cs new file mode 100644 index 0000000000..a9249e9f0c --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/ISynTraceManager.cs @@ -0,0 +1,18 @@ +// Copyright (c) 2022-2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using SIL.Machine.Morphology.HermitCrab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace HCSynthByGloss +{ + public interface ISynTraceManager : ITraceManager + { + void GenerateWords(string analysis, Word input); + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/Properties/AssemblyInfo.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..b335815582 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/Properties/AssemblyInfo.cs @@ -0,0 +1,40 @@ +// Copyright (c) 2023-2025 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("HCSynthByGlossLib")] +[assembly: AssemblyDescription("Library for common classes for running HermitCrab synthesis based on a set of glosses")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("SIL International")] +[assembly: AssemblyProduct("HCSynthByGlossLib")] +[assembly: AssemblyCopyright("Copyright © 2024-2026")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("5e31b48c-00a1-4799-a9f7-589e01b1bbd5")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.7.0.0")] +[assembly: AssemblyFileVersion("1.7.0.0")] diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/SynthesizedWordFomatter.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/SynthesizedWordFomatter.cs new file mode 100644 index 0000000000..3bd37e53d3 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/SynthesizedWordFomatter.cs @@ -0,0 +1,52 @@ +// Copyright (c) 2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace HCSynthByGloss +{ + public class SynthesizedWordFomatter + { + private static readonly SynthesizedWordFomatter instance = new SynthesizedWordFomatter(); + const string separator = "%"; + const string failure = "%0%"; + + public static SynthesizedWordFomatter Instance + { + get { return instance; } + } + + public string Format(IEnumerable forms, string analysis) + { + StringBuilder sb = new StringBuilder(); + int count = forms.Count(); + switch (count) + { + case 0: + sb.Append(failure); + sb.Append(analysis); + sb.Append(separator); + break; + case 1: + sb.Append(forms.ElementAt(0)); + break; + default: + sb.Append(separator); + sb.Append(count); + sb.Append(separator); + for (int i = 0; i < count; i++) + { + sb.Append(forms.ElementAt(i)); + sb.Append(separator); + } + break; + } + return sb.ToString(); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/Synthesizer.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/Synthesizer.cs new file mode 100644 index 0000000000..66a9ebd1ad --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossLib/Synthesizer.cs @@ -0,0 +1,277 @@ +// Copyright (c) 2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using SIL.Machine.FeatureModel; +using SIL.Machine.Morphology; +using SIL.Machine.Morphology.HermitCrab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Xml.Linq; +using System.Xml.XPath; +namespace HCSynthByGloss +{ + public class Synthesizer + { + private static readonly Synthesizer instance = new Synthesizer(); + public object Trace { get; set; } + + public static Synthesizer Instance + { + get { return instance; } + } + + public string SynthesizeGlosses( + string glosses, + Morpher morpher, + Language lang, + ISynTraceManager traceManager + ) + { + StringBuilder sb = new StringBuilder(); + var analysesCreator = AnalysesCreator.Instance; + var formatter = SynthesizedWordFomatter.Instance; + int indexCaret = glosses.IndexOf("^"); + int indexBeg = glosses.IndexOf("^"); + int indexEnd = glosses.IndexOf("$"); + while (indexCaret >= 0 && indexBeg >= 0 && indexEnd >= 0) + { + string analysis = glosses.Substring(indexBeg, (indexEnd - indexBeg) + 1); + List morphemes = analysesCreator.ExtractMorphemes(analysis, morpher); + if (morphemes.Contains(null)) + { + sb.Append(formatter.Format(new List(), analysis)); + CollectMissingGlosses(sb, analysesCreator, morphemes); + CheckForDuplicates(morpher, sb, morphemes); + AddToTracing(traceManager, sb, analysis); + } + else + { + WordAnalysis wordAnalysis = new WordAnalysis( + morphemes, + analysesCreator.RootIndex, + analysesCreator.category + ); + IEnumerable newSyntheses = new List(); + if (traceManager.IsTracing) + { + LexEntry rootEntry = morphemes[analysesCreator.RootIndex] as LexEntry; + FeatureStruct realizationalFS = new FeatureStruct(); + var results = new HashSet(); + object trace = null; + XElement topLevelTrace = CreateTopLevelElement(analysis); + + foreach ( + Stack otherMorphemes in PermuteOtherMorphemes( + morphemes, + wordAnalysis.RootMorphemeIndex - 1, + wordAnalysis.RootMorphemeIndex + 1 + ) + ) + { + results.UnionWith( + morpher.GenerateWords( + rootEntry, + otherMorphemes, + realizationalFS, + out trace + ) + ); + // output of trace makes more sense if we invert the order + topLevelTrace.AddFirst(trace); + } + Trace = topLevelTrace; +#if ChangePeriodToSpace + IEnumerable list = topLevelTrace.XPathSelectElements( + "//ParseCompleteTrace[@success='true']/Result" + ); + for (int i = 0; i < results.Count && i < list.Count(); i++) + { + string fromHC = results.ElementAt(i); + string fromTrace = list.ElementAt(i).Value; + if (fromTrace.Contains(".")) + { + fromHC = fromTrace.Replace(".", " "); + } + ((List)newSyntheses).Add(fromHC); + } +#else + newSyntheses = results; +#endif + } + else + { + newSyntheses = morpher.GenerateWords(wordAnalysis); + } + string result = formatter.Format(newSyntheses, analysis); + sb.Append(result); + if (CheckForDuplicates(morpher, sb, morphemes)) + { + AddToTracing(traceManager, sb, analysis); + } + } + int lastIndexEnd = indexEnd; + indexCaret = AppendBetweenWordsContent(glosses, sb, lastIndexEnd); + indexBeg = indexCaret + lastIndexEnd; + indexEnd = glosses.Substring(lastIndexEnd + 1).IndexOf("$") + lastIndexEnd + 1; + } + return sb.ToString(); + } + + private void CollectMissingGlosses( + StringBuilder sb, + AnalysesCreator analysesCreator, + List morphemes + ) + { + sb.Append(" One or more glosses not found:"); + var glossesFound = new List(); + foreach (Morpheme morpheme in morphemes) + { + if (morpheme != null) + { + glossesFound.Add(morpheme.Gloss); + } + } + foreach (string form in analysesCreator.Forms) + { + if (!glossesFound.Contains(form)) + { + sb.Append(" '"); + sb.Append(form); + sb.Append("';"); + } + } + } + + private void AddToTracing(ISynTraceManager traceManager, StringBuilder sb, string analysis) + { + if (traceManager.IsTracing) + { + XElement topLevelTrace = CreateTopLevelElement(analysis); + XElement error = new XElement("error"); + error.Add(sb.ToString()); + topLevelTrace.Add(error); + Trace = topLevelTrace; + } + } + + private bool CheckForDuplicates(Morpher morpher, StringBuilder sb, List morphemes) + { + bool duplicateFound = false; + foreach (Morpheme morph in morphemes) + { + if (morph == null) + continue; + var duplicateGloss = morpher.Morphemes.FirstOrDefault( + m => m.Gloss == morph.Gloss && m != morph + ); + if (duplicateGloss != null) + { + if (!duplicateFound) + { + sb.Append(" Duplicate gloss(es) found for '"); + duplicateFound = true; + } + else + { + sb.Append(" '"); + } + sb.Append(morph.Gloss); + sb.Append("';"); + } + } + if (duplicateFound) + { + sb.Append(" synthesis may not work."); + } + return duplicateFound; + } + + private static XElement CreateTopLevelElement(string analysis) + { + XElement traceRemember = new XElement("Synthesis"); + int len = analysis.Length - 1; + int indexBegin = analysis[0] == '^' ? 1 : 0; + int indexEnd = analysis[len] == '$' ? len - 1 : len; + var inputAnalysis = new XAttribute( + "analysis", + analysis.Substring(indexBegin, (indexEnd - indexBegin) + 1) + ); + traceRemember.Add(inputAnalysis); + return traceRemember; + } + + // folloowing borrowed form Morpher; we could make the Morpher one be public + private IEnumerable> PermuteOtherMorphemes( + List morphemes, + int leftIndex, + int rightIndex + ) + { + if (leftIndex == -1 && rightIndex == morphemes.Count) + { + yield return new Stack(); + } + else + { + if (rightIndex < morphemes.Count) + { + foreach ( + Stack p in PermuteOtherMorphemes( + morphemes, + leftIndex, + rightIndex + 1 + ) + ) + { + p.Push(morphemes[rightIndex]); + yield return p; + } + } + + if (leftIndex > -1) + { + foreach ( + Stack p in PermuteOtherMorphemes( + morphemes, + leftIndex - 1, + rightIndex + ) + ) + { + p.Push(morphemes[leftIndex]); + yield return p; + } + } + } + } + + private static int AppendBetweenWordsContent( + string glosses, + StringBuilder sb, + int lastIndexEnd + ) + { + int indexWhiteSpace = lastIndexEnd + 1; + int indexCaret = glosses.Substring(lastIndexEnd).IndexOf("^"); + if (indexCaret != -1) + { + string afterDollar = glosses.Substring( + indexWhiteSpace, + (lastIndexEnd + indexCaret) - indexWhiteSpace + ); + if (indexWhiteSpace < glosses.Length && !afterDollar.Contains("\n")) + sb.Append(","); + else + sb.Append("\n"); + } + else + sb.Append("\n"); + return indexCaret; + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/App.config b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/App.config new file mode 100644 index 0000000000..ba917941ae --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/App.config @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/HCSynthByGlossTest.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/HCSynthByGlossTest.cs new file mode 100644 index 0000000000..b87041cbbc --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/HCSynthByGlossTest.cs @@ -0,0 +1,127 @@ +// Copyright (c) 2023 SIL International +// This software is licensed under the LGPL, version 2.1 or later +// (http://www.gnu.org/licenses/lgpl-2.1.html) + +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using SIL.Machine.Morphology; +using SIL.Machine.Morphology.HermitCrab; +using System.Reflection; +using System.IO; +using HCSynthByGloss; +using SIL.FieldWorks.Common.FwUtils; + +namespace SIL.HCSynthByGlossTest +{ + public class HCSynthByGlossTest + { + Morpher morpher = null; + Language synLang; + TraceManager hcTraceManager; + string glosses = ""; + string glossFile = ""; + string expectedWordFormsFile = ""; + + [SetUp] + public void Setup() + { + string testDataDir = Path.Combine(FwDirectoryFinder.SourceDirectory, "Utilities", "HCSynthByGloss", "HCSynthByGloss", "TestData"); + string hcConfig = Path.Combine(testDataDir, "indoHC4FLExrans.xml"); + glossFile = Path.Combine(testDataDir, "IndonesianAnalyses.txt"); + expectedWordFormsFile = Path.Combine(testDataDir, "expectedWordForms.txt"); + synLang = XmlLanguageLoader.Load(hcConfig); + hcTraceManager = new TraceManager(); + morpher = new Morpher(hcTraceManager, synLang); + } + + [Test] + public void AnalysesCreatorTest() + { + var creator = AnalysesCreator.Instance; + string analysis = "^ajar1.1$"; + List morphemes = creator.ExtractMorphemes(analysis, morpher); + Assert.AreEqual(2, morphemes.Count); + Assert.AreEqual("AV", morphemes.ElementAt(0).Gloss); + Assert.AreEqual("ajar1.1", morphemes.ElementAt(1).Gloss); + Assert.AreEqual("v", creator.category); + Assert.AreEqual(1, creator.RootIndex); + + analysis = "^ajar1.1$"; + morphemes = creator.ExtractMorphemes(analysis, morpher); + Assert.AreEqual(2, morphemes.Count); + Assert.AreEqual(null, morphemes.ElementAt(0)); + Assert.AreEqual("ajar1.1", morphemes.ElementAt(1).Gloss); + Assert.AreEqual("v", creator.category); + Assert.AreEqual("AVxyz", creator.Forms[0]); + Assert.AreEqual(1, creator.RootIndex); + + analysis = "^ajar1.1$"; + morphemes = creator.ExtractMorphemes(analysis, morpher); + Assert.AreEqual(3, morphemes.Count); + Assert.AreEqual("ajar1.1", morphemes.ElementAt(0).Gloss); + Assert.AreEqual("v", creator.category); + Assert.AreEqual("LOC", morphemes.ElementAt(1).Gloss); + Assert.AreEqual("APPL", morphemes.ElementAt(2).Gloss); + Assert.AreEqual(0, creator.RootIndex); + + analysis = "^karang1.1$"; + morphemes = creator.ExtractMorphemes(analysis, morpher); + Assert.AreEqual(4, morphemes.Count); + Assert.AreEqual("AV", morphemes.ElementAt(0).Gloss); + Assert.AreEqual("NMLZR", morphemes.ElementAt(1).Gloss); + Assert.AreEqual("karang1.1", morphemes.ElementAt(2).Gloss); + Assert.AreEqual("v", creator.category); + Assert.AreEqual("LOC", morphemes.ElementAt(3).Gloss); + Assert.AreEqual(2, creator.RootIndex); + + // NFD case + analysis = "^aja´r1.2$"; + morphemes = creator.ExtractMorphemes(analysis, morpher); + Assert.AreEqual(1, morphemes.Count); + Assert.AreEqual("aja´r1.2", morphemes.ElementAt(0).Gloss); + Assert.AreEqual("v", creator.category); + Assert.AreEqual(0, creator.RootIndex); + + // NFC case + analysis = "^ajár1.3$"; + morphemes = creator.ExtractMorphemes(analysis, morpher); + Assert.AreEqual(1, morphemes.Count); + Assert.AreEqual("ajár1.3", morphemes.ElementAt(0).Gloss); + Assert.AreEqual("v", creator.category); + Assert.AreEqual(0, creator.RootIndex); + } + + [Test] + public void SynthesizerTest() + { + ISynTraceManager traceManager = new HcXmlTraceManager(); + var synthesizer = Synthesizer.Instance; + glosses = ""; + string synthesizedWordForms = synthesizer.SynthesizeGlosses( + glosses, + morpher, + synLang, + traceManager + ); + Assert.AreEqual("", synthesizedWordForms); + + glosses = File.ReadAllText(glossFile, Encoding.UTF8); + Assert.AreEqual(1309, glosses.Length); + synthesizedWordForms = synthesizer.SynthesizeGlosses( + glosses, + morpher, + synLang, + traceManager + ); + // Remove the comment on the next line to see the current results. + //Console.Write(synthesizedWordForms); + string expectedWordForms = File.ReadAllText(expectedWordFormsFile, Encoding.UTF8) + .Replace("\r", ""); + Assert.AreEqual(expectedWordForms, synthesizedWordForms); + } + } +} diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/HCSynthByGlossTest.csproj b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/HCSynthByGlossTest.csproj new file mode 100644 index 0000000000..6d01204380 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/HCSynthByGlossTest.csproj @@ -0,0 +1,103 @@ + + + + + Debug + AnyCPU + {CD9E5E81-F590-4B06-9C43-CEAF4AAD71DB} + Library + Properties + HCSynthByGlossTest + HCSynthByGlossTest + v4.6.2 + 512 + true + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + ..\..\..\..\Output\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + + + + ..\HCSynthByGlossLib\bin\x64\Debug\HCSynthByGlossLib.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\Newtonsoft.Json.dll + + + False + ..\..\..\..\packages\NUnit.3.13.3\lib\net45\nunit.framework.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Core.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.dll + + + False + ..\..\..\..\..\..\..\fwrepo\fw\Output\Debug\SIL.Machine.Morphology.HermitCrab.dll + + + + + + + + + + + + + + + + + + + + {89EC1097-4786-4611-B6CB-2B8BC01CDDED} + FwUtils + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/Properties/AssemblyInfo.cs b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..b267744fa0 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("HCSynthByGlossTest")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("HCSynthByGlossTest")] +[assembly: AssemblyCopyright("Copyright © 2023")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("cd9e5e81-f590-4b06-9c43-ceaf4aad71db")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/packages.config b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/packages.config new file mode 100644 index 0000000000..0ff18eca63 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/HCSynthByGlossTest/packages.config @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Src/Utilities/HCSynthByGloss/ReadMe.txt b/Src/Utilities/HCSynthByGloss/ReadMe.txt new file mode 100644 index 0000000000..103a8274b7 --- /dev/null +++ b/Src/Utilities/HCSynthByGloss/ReadMe.txt @@ -0,0 +1,2 @@ +This folder was taken from https://github.com/sillsdev/HCSynthByGloss on February 13, 2026. +See that repo to see earlier history of these files.