URI: 
       tAdd choiceoverride option - transliterate - Transliteration engine
  HTML git clone git://lumidify.org/transliterate.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit c3b7a4e83d73104c627e8e6eafaf9e17c47e51ce
   DIR parent 468067a8fdf246671997930166c26957b052d8cf
  HTML Author: lumidify <nobody@lumidify.org>
       Date:   Mon,  6 Apr 2020 09:43:30 +0200
       
       Add choiceoverride option
       
       Diffstat:
         M transliterate.pl                    |      47 +++++++++++++++++++++++++------
       
       1 file changed, 38 insertions(+), 9 deletions(-)
       ---
   DIR diff --git a/transliterate.pl b/transliterate.pl
       t@@ -248,6 +248,11 @@ sub prompt_choose_word {
                my @replacements;
                foreach (0..$#$substrings) {
                        if ($substrings->[$_]->[1] =~ /\Q$config->{choicesep}\E/) {
       +                        if (exists $config->{"choiceoverride"} &&
       +                                exists $config->{"choiceoverride"}->{$substrings->[$_]->[1]}) {
       +                                $substrings->[$_]->[1] = $config->{"choiceoverride"}->{$substrings->[$_]->[1]};
       +                                next;
       +                        }
                                # This ugly bit of code is here as a special case for transliterating
                                # Hindi to Urdu text - if there are *exactly* two choices and one
                                # contains diacritics but the other one doesn't, the one with diacritics
       t@@ -256,7 +261,6 @@ sub prompt_choose_word {
                                        my @choices = split /\Q$config->{choicesep}\E/, $substrings->[$_]->[1];
                                        my @diacritics = @{$config->{"targetdiacritics"}};
                                        if (@choices == 2) {
       -                                        @choices = map {NFD($_)} @choices;
                                                my $first_matches = grep {$choices[0] =~ /$_/} @diacritics;
                                                my $second_matches = grep {$choices[1] =~ /$_/} @diacritics;
                                                if ($first_matches && !$second_matches) {
       t@@ -370,7 +374,7 @@ sub prompt_choose_word {
                                foreach my $choice (@choices) {
                                        $choice_nums{$choice} = 0;
                                        foreach my $diacritic (@{$config->{"targetdiacritics"}}) {
       -                                        my @matches = NFD($choice) =~ /$diacritic/;
       +                                        my @matches = $choice =~ /$diacritic/;
                                                $choice_nums{$choice} += scalar @matches if @matches;
                                        }
                                }
       t@@ -538,10 +542,10 @@ sub load_table {
                        my $replacement;
                        if ($revert) {
                                $word = NFD $words[1];
       -                        $replacement = $words[0];
       +                        $replacement = NFD $words[0];
                        } else {
                                $word = NFD $words[0];
       -                        $replacement = $words[1];
       +                        $replacement = NFD $words[1];
                        }
                        my @word_choices = split /\Q$config->{choicesep}\E/, $word;
                        foreach my $word_choice (@word_choices) {
       t@@ -692,7 +696,8 @@ sub interpret_config {
                        "group" => [],
                        "endgroup" => [],
                        "diacritics" => [$STRING],
       -                "targetdiacritics" => [$STRING]
       +                "targetdiacritics" => [$STRING],
       +                "choiceoverride" => [$STRING]
                );
                my $in_group = 0;
                foreach my $cmd (@$config_list) {
       t@@ -718,7 +723,7 @@ sub interpret_config {
                                                $table = $path_to_table{$table_path};
                                        } else {
                                                $table = load_table $table_path, $args, \%config, $table_args{"revert"};
       -                                        return if !$table;
       +                                        return if !defined $table;
                                                $path_to_table{$table_path} = $table;
                                        }
                                        if ($table_args{"revert"}) {
       t@@ -730,6 +735,15 @@ sub interpret_config {
                                        # this is a hash to avoid duplicates if the same file
                                        # is loaded multiple times
                                        $config{"display_tables"}->{$table_path} = 1 if !exists $table_args{"nodisplay"};
       +                        } elsif ($cmd_name eq "choiceoverride") {
       +                                my $table_path = $cmd->[1]->{"value"};
       +                                my $table = load_table $table_path, $args, \%config;
       +                                return if !defined $table;
       +                                if (exists $config{"choiceoverride"}) {
       +                                        warn "Duplicate specification of \"choiceoverride\" option.\n";
       +                                        return;
       +                                }
       +                                $config{"choiceoverride"} = $table;
                                } elsif ($cmd_name eq "expand") {
                                        my $orig_table_id = $cmd->[1]->{"value"};
                                        my $ending_table_id = $cmd->[2]->{"value"};
       t@@ -776,7 +790,7 @@ sub interpret_config {
                                        push @{$config{"replacements"}}, {
                                                "type" => "match",
                                                "search" => NFD($cmd->[1]->{"value"}),
       -                                        "replace" => $cmd->[2]->{"value"}};
       +                                        "replace" => NFD($cmd->[2]->{"value"})};
                                        for (3..$#$cmd) {
                                                # add optional arguments as keys in replacement config
                                                $config{"replacements"}->[-1]->{$cmd->[$_]->{"value"}} = 1;
       t@@ -834,7 +848,7 @@ sub interpret_config {
                                } elsif ($cmd_name eq "ignore") {
                                        $config{"ignore"} = $cmd->[1]->{"value"};
                                        my $table = load_ignore_table $cmd->[1]->{"value"}, $args;
       -                                return if !$table;
       +                                return if !defined $table;
                                        $config{"ignore_words"} = $table;
                                } else {
                                        warn "ERROR: unknown command \"" . $cmd_name . "\" in config.\n";
       t@@ -1300,7 +1314,7 @@ sub replace {
                        }
        
                        foreach (@$substrings) {
       -                        print $outputfh $_->[1];
       +                        print $outputfh NFC($_->[1]);
                        }
                }
                close $fh;
       t@@ -1796,6 +1810,21 @@ cannot currently think of any reason why someone would want to load a file both
        with and without B<revert> in the same config, but I still wanted to add this
        warning just in case.
        
       +=item B<choiceoverride> <table path>
       +
       +Reads the mapping in the table at C<< <table path> >> and uses it to override
       +the choice mechanism.
       +
       +The table contains a mapping of choices (separated by B<choicesep>) to single
       +replacements. This was added to help in Urdu<->Hindi transliteration with the
       +same database, since sometimes words with and without diacritics that actually
       +mean the same thing are added for one direction but should default to one of
       +them in the other direction.
       +
       +Note that this does not sort the choices before comparison and they have to
       +be matched exactly, so when a new choice is added, that needs to be added to
       +this mapping as well, in exactly the same order.
       +
        =item B<expand> <table identifier> <word ending table> [noroot]
        
        Expand the table C<< <table identifier> >>, i.e. generate all the word forms using