URI: 
       tFix 'Add replacement' with words requiring expansions in the GUI; tests currently broken - transliterate - Transliteration engine
  HTML git clone git://lumidify.org/transliterate.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 186074f5c746c6f3452da864722a9fd7da2dd964
   DIR parent 1adf70462c0ec838b8611ce505c139862a768e6d
  HTML Author: lumidify <nobody@lumidify.org>
       Date:   Mon, 30 Mar 2020 13:04:56 +0200
       
       Fix 'Add replacement' with words requiring expansions in the GUI; tests currently broken
       
       Diffstat:
         A tests/data/endings2.txt             |       2 ++
         M tests/data/words.txt                |       1 +
         A tests/test4/config                  |      18 ++++++++++++++++++
         A tests/test4/descr.txt               |       1 +
         A tests/test4/err.txt                 |       3 +++
         A tests/test4/expected.txt            |       3 +++
         A tests/test4/input.txt               |       1 +
         M transliterate.pl                    |     196 +++++++++++++++++++++----------
       
       8 files changed, 163 insertions(+), 62 deletions(-)
       ---
   DIR diff --git a/tests/data/endings2.txt b/tests/data/endings2.txt
       t@@ -0,0 +1,2 @@
       +end4        end4_replaced
       +end5        end5_replaced
   DIR diff --git a/tests/data/words.txt b/tests/data/words.txt
       t@@ -8,3 +8,4 @@ word6        word6_replaced
        word7        word7_replaced
        word8        word8_replaced
        word9        word9_replaced
       +word10        word10_replaced
   DIR diff --git a/tests/test4/config b/tests/test4/config
       t@@ -0,0 +1,18 @@
       +split "\s+"
       +beforeword "\s"
       +afterword "\s"
       +
       +ignore "../data/ignore.txt"
       +table words "../data/words.txt"
       +table words1 "../data/words.txt"
       +table endings "../data/endings.txt"
       +table endings2 "../data/endings2.txt"
       +
       +expand words endings noroot
       +expand words endings2 noroot
       +expand words1 endings2 noroot
       +
       +group beginword endword
       +replace words
       +replace words1
       +endgroup
   DIR diff --git a/tests/test4/descr.txt b/tests/test4/descr.txt
       t@@ -0,0 +1 @@
       +matchignore only endword; expand noroot
   DIR diff --git a/tests/test4/err.txt b/tests/test4/err.txt
       t@@ -0,0 +1,3 @@
       +Unknown word: "word0"
       +Word "word1_replacedend1r1|word1_replacedend1r2" with 2 word choices.
       +Unknown word: "-dword9end2"
   DIR diff --git a/tests/test4/expected.txt b/tests/test4/expected.txt
       t@@ -0,0 +1,3 @@
       +ignore
       +word0   word1_replacedend1r1|word1_replacedend1r2
       +-dword9end2 word9_replacedend2r-d
   DIR diff --git a/tests/test4/input.txt b/tests/test4/input.txt
       t@@ -0,0 +1 @@
       +word10end1end5 word10end5
   DIR diff --git a/transliterate.pl b/transliterate.pl
       t@@ -561,28 +561,7 @@ sub load_ignore_table {
        # 0 - an error occurred
        # 1 - everything's fine
        sub expand_table {
       -        my ($cmd, $tables, $config) = @_;
       -        my $table_name = $cmd->[1]->{"value"};
       -        my $new_table_name = $cmd->[2]->{"value"};
       -        my $forms_name = $cmd->[3]->{"value"};
       -        if ($new_table_name eq "same") {
       -                $new_table_name = $table_name;
       -        }
       -        if (!exists($tables->{$table_name})) {
       -                warn "expand_table: table \"$table_name\" does not exist.\n";
       -                return 0;
       -        }
       -        if (!exists($tables->{$forms_name})) {
       -                warn "expand_table: table \"$forms_name\" does not exist.\n";
       -                return 0;
       -        }
       -        my $table = $tables->{$table_name};
       -        my $forms = $tables->{$forms_name};
       -
       -        my $noroot = 0;
       -        if ($#$cmd >= 4 && $cmd->[4]->{"value"} eq "noroot") {
       -                $noroot = 1;
       -        }
       +        my ($table, $forms, $noroot, $config) = @_;
        
                my %new_table;
                foreach my $word (keys %$table) {
       t@@ -603,8 +582,7 @@ sub expand_table {
                                $new_table{$word} = $table->{$word};
                        }
                }
       -        $tables->{$new_table_name} = \%new_table;
       -        return 1;
       +        return \%new_table;
        }
        
        # Check if the number and types of arguments given to a config command are right
       t@@ -648,9 +626,19 @@ sub check_args {
        # $config_list - the list returned by parse_config
        sub interpret_config {
                my ($config_list, $args) = @_;
       +        my %tables;
                my %config;
       +        # table_paths stores a list of all table and replacement ids that are
       +        # impacted by the path so the replacement can be added on the fly when
       +        # a new replacement is added from the GUI
       +        # the "replacement id" is just the number of the replacement group,
       +        # starting at 0 with the first group in the config
                $config{"table_paths"} = {};
       -        $config{"tables"} = {};
       +        # a mapping between the table ids and tables for all tables used as
       +        # ending tables in expand statements - so expansions can be done
       +        # on the fly when adding a replacement word from the GUI
       +        $config{"ending_tables"} = {};
       +        # ignore is the path to the ignore file, ignore_words the actual table
                $config{"ignore"} = "";
                $config{"ignore_words"} = {};
                $config{"split"} = "\\s";
       t@@ -658,12 +646,16 @@ sub interpret_config {
                $config{"afterword"} = "\\s";
                $config{"tablesep"} = "\t";
                $config{"choicesep"} = "\$";
       +        # a list of "replacement configs", which specify the type and any
       +        # other arguments (this is given to replace_match, etc.
                $config{"replacements"} = [];
       -        my %tmp_table_paths;
       +        # these are temporary mappings used while loading the config
       +        my %path_to_table;
       +        my %table_id_to_path;
                my %mandatory_args = (
                        "ignore" => [$STRING],
                        "table" => [$ID],
       -                "expand" => [$ID, $ID, $ID],
       +                "expand" => [$ID, $ID],
                        "match" => [$STRING, $STRING],
                        "matchignore" => [$STRING],
                        "replace" => [$ID],
       t@@ -688,17 +680,42 @@ sub interpret_config {
                                        return undef;
                                }
                                if ($cmd->[0]->{"value"} eq "table") {
       -                                my $table = load_table $cmd->[2]->{"value"}, $args, \%config;
       -                                if (!$table) {
       -                                        return undef;
       +                                my $table_path = $cmd->[2]->{"value"};
       +                                my $table;
       +                                # add to temporary path-to-table mapping so tables aren't
       +                                # loaded unnecessarily
       +                                if (exists $path_to_table{$table_path}) {
       +                                        $table = $path_to_table{$table_path};
       +                                } else {
       +                                        $table = load_table $table_path, $args, \%config;
       +                                        $path_to_table{$table_path} = $table;
       +                                        return if !$table;
                                        }
       -                                $config{tables}->{$cmd->[1]->{"value"}} = $table;
       -                                $tmp_table_paths{$cmd->[1]->{"value"}} = $cmd->[2]->{"value"};
       +                                my $table_id = $cmd->[1]->{"value"};
       +                                $tables{$table_id} = $table;
       +                                $table_id_to_path{$table_id} = $table_path;
                                } elsif ($cmd->[0]->{"value"} eq "expand") {
       -                                # FIXME: need to handle table paths when a new table name is used for the expansion
       -                                if (!expand_table($cmd, $config{tables}, \%config)) {
       -                                        return undef;
       +                                my $orig_table_id = $cmd->[1]->{"value"};
       +                                my $ending_table_id = $cmd->[2]->{"value"};
       +                                my $noroot = 0;
       +                                if ($#$cmd >= 3 && $cmd->[3]->{"value"} eq "noroot") {
       +                                        $noroot = 1;
       +                                }
       +                                if (!exists $tables{$orig_table_id}) {
       +                                        warn "expand: table \"$orig_table_id\" doesn't exist\n";
       +                                        return;
       +                                } elsif (!exists $tables{$ending_table_id}) {
       +                                        warn "expand: table \"$ending_table_id\" doesn't exist\n";
       +                                        return
                                        }
       +
       +                                $config{"ending_tables"}->{$ending_table_id} = $tables{$ending_table_id};
       +                                $config{"expands"}->{$orig_table_id} = [] if !exists $config{"expands"}->{$orig_table_id};
       +                                push @{$config{"expands"}->{$orig_table_id}}, [$ending_table_id, $noroot];
       +
       +                                my $new_table = expand_table($tables{$orig_table_id}, $tables{$ending_table_id}, $noroot, \%config);
       +                                return if !defined $new_table;
       +                                $tables{$orig_table_id} = $new_table;
                                } elsif ($cmd->[0]->{"value"} eq "group") {
                                        if ($in_group) {
                                                warn "ERROR: New group started without ending last one in config\n";
       t@@ -725,6 +742,7 @@ sub interpret_config {
                                                "search" => NFD($cmd->[1]->{"value"}),
                                                "replace" => $cmd->[2]->{"value"}};
                                        for (3..$#$cmd) {
       +                                        # add optional arguments as keys in replacement config
                                                $config{"replacements"}->[-1]->{$cmd->[$_]->{"value"}} = 1;
                                        }
                                } elsif ($cmd->[0]->{"value"} eq "matchignore") {
       t@@ -742,17 +760,24 @@ sub interpret_config {
                                                return undef;
                                        }
                                        my $table = $cmd->[1]->{"value"};
       -                                if (!exists($config{tables}->{$table})) {
       +                                if (!exists($tables{$table})) {
                                                warn "ERROR: nonexistent table \"$table\" in replace statement.\n";
                                                return undef;
                                        }
       -                                if (exists($tmp_table_paths{$table})) {
       -                                        $config{"table_paths"}->{$tmp_table_paths{$table}} = [$#{$config{"replacements"}}, $table];
       -                                }
       +
       +                                # make a list of all replacements that are affected by this
       +                                # file so that they can be updated when a word is added
       +                                # through the gui
       +                                my $table_path = $table_id_to_path{$table};
       +                                my $replacement_id = $#{$config{"replacements"}};
       +                                $config{"table_paths"}->{$table_path} = [] if !exists $config{"table_paths"}->{$table_path};
       +                                push @{$config{"table_paths"}->{$table_path}}, [$replacement_id, $table];
       +
                                        # Note: we don't need to check if $table{"choicesep"} was defined
                                        # here since we can't ever get this far without first having
                                        # loaded a table anyways
       -                                add_to_trie($table, $config{"replacements"}->[-1]->{"words"}, $config{tables}->{$table}, $args, \%config);
       +                                my $trie_root = $config{"replacements"}->[-1]->{"words"};
       +                                add_to_trie($table, $trie_root, $tables{$table}, $args, \%config);
                                } elsif ($cmd->[0]->{"value"} eq "split") {
                                        $config{"split"} = $cmd->[1]->{"value"};
                                } elsif ($cmd->[0]->{"value"} eq "beforeword") {
       t@@ -857,9 +882,21 @@ sub handle_unknown_word_action {
                        }
                        print($fh $word . $config->{tablesep} . $replace_word . "\n");
                        close($fh);
       -                my $replacement_id = $config->{"table_paths"}->{$table_path}->[0];
       -                my $trie = $config->{"replacements"}->[$replacement_id]->{"words"};
       -                add_to_trie($config->{"table_paths"}->{$table_path}->[1], $trie, {$word => $replace_word}, $args, $config);
       +                # loop over all table ids that are impacted by this file
       +                foreach my $replacement (@{$config->{"table_paths"}->{$table_path}}) {
       +                        my $replacement_id = $replacement->[0];
       +                        my $table_id = $replacement->[1];
       +                        my $trie = $config->{"replacements"}->[$replacement_id]->{"words"};
       +                        my $final_table = {$word => $replace_word};
       +                        # handle expansions
       +                        foreach my $expand (@{$config->{"expands"}->{$table_id}}) {
       +                                my $ending_table_id = $expand->[0];
       +                                my $noroot = $expand->[1];
       +                                my $endings_table = $config->{"ending_tables"}->{$ending_table_id};
       +                                $final_table = expand_table $final_table, $endings_table, $noroot, $config;
       +                        }
       +                        add_to_trie($table_id, $trie, $final_table, $args, $config);
       +                }
                        return 1;
                } elsif ($action->[0] eq "reload") {
                        my $new_config = load_config $args;
       t@@ -1451,18 +1488,15 @@ pressed.
        
        The filtering for which table files are actually shown here is
        currently a bit rudimentary. First, all paths that are used in the
       -B<table> statements in the config are put into a list. Then, the
       -paths corresponding to any tables used for word endings in the
       -B<expand> statements are removed from the list, and that is what
       -is shown in this window. The reason for removing those paths from
       -the list is that it gets somewhat confusing when all the tables
       -that are only used for word endings are also in the list, and it
       -is very unlikely that an unknown word would need to be written to
       -one of those files. If necessary, the word can always be added
       -manually and the config reloaded. Note also that only actual
       -table paths are shown here, not the tables themselves - this is
       -not necessarily a one-to-one mapping since new tables can be
       -generated with the B<expand> statements.
       +B<table> statements in the config are put into a list. Then, only
       +the paths corresponding to table IDs actually used in B<replace>
       +statements are added to the list that gets shown in the GUI. The
       +reason for not showing all table paths in the list is that it gets
       +somewhat confusing when all the tables that are only used for word
       +endings are also in the list, and it is very unlikely that an
       +unknown word would need to be written to one of those files. If
       +necessary, the word can always be added manually and the config
       +reloaded.
        
        =item Reload config
        
       t@@ -1570,6 +1604,17 @@ cannot be used as part of the replacement string in B<match> statements.
        Lookaheads and lookbehinds are fine, though, and could be useful in
        certain cases.
        
       +All tables must be loaded before they are used, or there will be an
       +error that the table does not exist.
       +
       +Warning: If a B<replace> statement is located before an B<expand>
       +statement that would have impacted the table used, there will be no
       +error but the expand statement won't have any impact.
       +
       +Basic rule of thumb: Always put the B<table> statements before the
       +B<expand> statements and the B<expand> statements before the B<replace>
       +statements.
       +
        =over 8
        
        =item B<split> <regex string>
       t@@ -1643,12 +1688,21 @@ original script first and the replacement word second. The replacement word
        can optionally have several parts separated by B<choicesep>, which will cause the
        user to be prompted to choose one of the options.
        
       -=item B<expand> <table identifier> <new table identifier> <word ending table> [noroot]
       +If, for whatever reason, the same table is needed twice, but with different endings,
       +the table can simply be loaded twice with different IDs. If the same path is loaded,
       +the table that has already been loaded will be reused.
       +
       +=item B<expand> <table identifier> <word ending table> [noroot]
        
        Expand the table C<< <table identifier> >>, i.e. generate all the word forms using
        the word endings in C<< <word ending table> >>, saving the result as a table with the
       -identifier C<< <new table identifier> >>. If C<same> is specified as C<< <new table
       -identifier> >>, the original C<< <table identifier> >> is used instead.
       +identifier C<< <new table identifier> >>.
       +
       +Note: There used to be a C<< <new table identifier> >> argument to create a new
       +table in case one table had to be expanded with different endings. This has been
       +removed because it was a bit ugly, especially since there wasn't a proper mapping
       +from table IDs to filenames anymore. If this functionality is needed, the same table
       +file can simply be loaded multiple times. See the B<table> section above.
        
        If B<noroot> is set, the root forms of the words are not kept.
        
       t@@ -1660,10 +1714,6 @@ Note that each of the root words is also split into its choices (if necessary)
        during the expanding, so it is possible to use B<choicesep> in both the endings
        and root words.
        
       -Note that the paths of all tables used for C<< <word ending table> >> are removed from the
       -list of paths that is later shown in the unknown word window. See L</"UNKNOWN WORD
       -WINDOW"> for details.
       -
        =item B<match> <regex string> <replacement string> [beginword] [endword] [nofinal]
        
        Perform a RegEx match using the given C<< <regex string> >>, replacing it with
       t@@ -1701,6 +1751,28 @@ End a replacement group.
        
        =back
        
       +=head1 BUGS
       +
       +Although it may not seem like it, one of the ugliest parts of the program is the
       +GUI functionality that allows the user to add a replacement word. The problem is
       +that all information about the B<expand> and B<replace> statements has to be kept
       +in order to properly handle adding a word to one of the files and simultaneously
       +adding it to the currently loaded tables I<without reloading the entire config>.
       +The way it currently works, the replacement word is directly written to the file,
       +then all B<expand> statements that would have impacted the words from this file
       +are redone (just for the newly added word) and the resulting words are added to
       +the appropriate tables (or, technically, the appropriate 'trie'). Since a file
       +can be mapped to multiple table IDs and a table ID can occur in multiple replace
       +statements, this is more complicated than it sounds, and thus it is very likely
       +that there are bugs lurking here somewhere. Do note that "Reload config" will
       +B<always> reload the entire configuration, so that's safe to do even if the
       +on-the-fly replacing doesn't work.
       +
       +In general, I have tested the GUI code much less than the rest since you can't
       +really test it automatically very well.
       +
       +Tell me if you find any bugs.
       +
        =head1 SEE ALSO
        
        perlre, perlretut