lumidify.org/1/git/transliterate/commit/9bece82fb8385f9b72a49e7c2b98a9b4f2006182.gph

  URI:

       tAdd comment option for text files; default to STDIN for input file - transliterate - Transliteration engine
  HTML git clone git://lumidify.org/transliterate.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 9bece82fb8385f9b72a49e7c2b98a9b4f2006182
   DIR parent 32b57ea48795b5d6a406fcf15e3543d47618b666
  HTML Author: lumidify <nobody@lumidify.org>
       Date:   Wed,  8 Apr 2020 08:00:04 +0200
       
       Add comment option for text files; default to STDIN for input file
       
       Diffstat:
         M tests/test6/config                  |       1 +
         M tests/test6/input.txt               |       1 +
         M transliterate.pl                    |      51 +++++++++++++++++++++-----------
       
       3 files changed, 36 insertions(+), 17 deletions(-)
       ---
   DIR diff --git a/tests/test6/config b/tests/test6/config
       t@@ -1,6 +1,7 @@
        split "[ \n]+"
        beforeword " "
        afterword "[ \n]"
       +comment "#"
        
        ignore "../data/ignore.txt"
        table words "../data/words.txt"
   DIR diff --git a/tests/test6/input.txt b/tests/test6/input.txt
       t@@ -1,3 +1,4 @@
       +word1#sfsafafasfs#sdfdsfsfs
        word1 word2
        123word1       word9   123   word4
        word20 word01231    word0
   DIR diff --git a/transliterate.pl b/transliterate.pl
       t@@ -469,7 +469,7 @@ sub parse_config {
                        $state = 0;
                        push(@commands, []);
                        foreach my $char (split(//, $line)) {
       -                        if ($char eq "#") {
       +                        if ($char eq "#" && !($state & $IN_STR)) {
                                        last;
                                } elsif ($char eq '"') {
                                        if ($state & $IN_STR) {
       t@@ -698,6 +698,7 @@ sub interpret_config {
                        "afterword" => [$STRING],
                        "tablesep" => [$STRING],
                        "choicesep" => [$STRING],
       +                "comment" => [$STRING],
                        "group" => [],
                        "endgroup" => [],
                        "retrywithout" => [$STRING, $STRING],
       t@@ -837,7 +838,7 @@ sub interpret_config {
                                        }
                                } elsif ($cmd_name eq "split" || $cmd_name eq "beforeword" ||
                                        $cmd_name eq "afterword" || $cmd_name eq "tablesep" ||
       -                                $cmd_name eq "choicesep") {
       +                                $cmd_name eq "choicesep" || $cmd_name eq "comment") {
                                        $config{$cmd_name} = $cmd->[1]->{"value"};
                                } elsif ($cmd_name eq "ignore") {
                                        $config{"ignore"} = $cmd->[1]->{"value"};
       t@@ -1272,15 +1273,12 @@ sub get_unknown_words {
        # to the file handle $outputfh, prompting the user for unknown words or
        # word choices (if those aren't disabled on the command line)
        sub replace {
       -        my ($config, $args, $outputfh) = @_;
       -        # Is there *really* no more efficient way to get the total number of lines?
       -        open my $fh, "<", $args->{"input"} or die "ERROR: Cannot open input file \"$args->{input}\" for reading.\n";
       -        my $total_lines = 0;
       -        while (<$fh>) {$total_lines++};
       -        close $fh;
       -        open $fh, "<", $args->{"input"} or die "ERROR: Cannot open input file \"$args->{input}\" for reading.\n";
       -        while (my $line = <$fh>) {
       +        my ($config, $args, $total_lines, $inputfh, $outputfh) = @_;
       +        while (my $line = <$inputfh>) {
                        next if $. < $args->{"start"};
       +                if (exists $config->{"comment"}) {
       +                        $line =~ s/\Q$config->{comment}\E.*$//;
       +                }
                        my $nfd_line = NFD($line);
                        my $substrings = replace_line($config, $nfd_line);
        
       t@@ -1311,7 +1309,6 @@ sub replace {
                                print $outputfh $_->[1];
                        }
                }
       -        close $fh;
        }
        
        my %args = ("config" => "config", "start" => 1, "errors" => "", "output" => "");
       t@@ -1324,7 +1321,7 @@ GetOptions(
                "checkduplicates") or pod2usage(1);
        
        pod2usage(-exitval => 0, -verbose => 2) if $args{"help"};
       -pod2usage(1) if $#ARGV != 0 && !$args{"checkduplicates"};
       +pod2usage(-exitval => 1, -verbose => 1) if @ARGV > 1;
        
        if (!-f $args{"config"}) {
                die "ERROR: config file \"$args{config}\" does not exist or is not a file.\n";
       t@@ -1335,11 +1332,19 @@ if (!$config) {
        }
        exit 0 if ($args{"checkduplicates"});
        
       -my $input = $ARGV[0];
       -if (!-f $input) {
       -        die "ERROR: input file \"$input\" does not exist or is not a file.\n";
       +my $inputfh;
       +my $total_lines = "UNKNOWN";
       +if (@ARGV < 1) {
       +        warn "WARNING: no input file supplied; taking input from STDIN\n";
       +        $inputfh = \*STDIN;
       +} else {
       +        open $inputfh, "<", $ARGV[0] or die "ERROR: Cannot open input file \"$ARGV[0]\" for reading.\n";
       +        # Is there *really* no more efficient way to get the total number of lines?
       +        $total_lines = 0;
       +        while (<$inputfh>) {$total_lines++};
       +        close $inputfh;
       +        open $inputfh, "<", $ARGV[0] or die "ERROR: Cannot open input file \"$ARGV[0]\" for reading.\n";
        }
       -$args{"input"} = $input;
        
        if (-f $args{"errors"} && !$args{"force"}) {
                my $choice = "";
       t@@ -1373,7 +1378,8 @@ if ($args{"output"} eq "") {
                open $outputfh, ">", $args{"output"} or die "ERROR: cannot open \"$args{output}\" for writing.\n";
        }
        
       -replace($config, \%args, $outputfh);
       +replace($config, \%args, $total_lines, $inputfh, $outputfh);
       +close $inputfh;
        close $outputfh;
        
        __END__
       t@@ -1387,6 +1393,7 @@ transliterate.pl - Transliterate text files
        transliterate.pl [options][input file]
        
        Start the transliteration engine with the given file as input.
       +The input file defaults to STDIN if no filename is given.
        
        =head1 OPTIONS
        
       t@@ -1762,6 +1769,16 @@ prompting the user.
        
        B<Default:> C<$>
        
       +=item B<comment> <string>
       +
       +If enabled, anything after C<< <string> >> will be ignored on all lines in
       +the input file.
       +
       +Note that this is really just a "dumb replacement", so there's no way to
       +prevent a line with the comment character from being replaced. Just try
       +to always set this to a character that does not occur anywhere in the text
       +(or don't use the option at all).
       +
        =item B<ignore> <filename>
        
        Sets the file of words to ignore.