URI: 
       tImprove book sorting - lsg - Lumidify Site Generator
  HTML git clone git://lumidify.org/lsg.git (fast, but not encrypted)
  HTML git clone https://lumidify.org/git/lsg.git (encrypted, but very slow)
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 901aadb474e06693524e3fc6f2092f4d94bb1d07
   DIR parent 60cba53ab7684b466185fd3886fb19d22b688e53
  HTML Author: lumidify <nobody@lumidify.org>
       Date:   Wed, 27 Dec 2023 15:26:40 +0100
       
       Improve book sorting
       
       Diffstat:
         M LSG/Config.pm                       |       2 ++
         M LSG/Generate.pm                     |      85 ++++++++++++++++++++++---------
         M LSG/Markdown.pm                     |      17 +++++++++++------
         M LSG/UserFuncs.pm                    |     190 ++++++++++++++++++++++++++++---
         M lsg.pl                              |      24 ++++++++++++------------
       
       5 files changed, 257 insertions(+), 61 deletions(-)
       ---
   DIR diff --git a/LSG/Config.pm b/LSG/Config.pm
       t@@ -25,6 +25,7 @@ use Exporter qw(import);
        our @EXPORT_OK = qw($config);
        
        # Yes, I know this isn't just used for real config
       +# FIXME: separate fields from config.ini from other parts to avoid conflicts
        our $config;
        
        sub read_modified_dates {
       t@@ -81,6 +82,7 @@ sub read_config {
                                $section = $1;
                                next;
                        }
       +                # FIXME: report errors properly
                        my ($key, $value) = split("=", $_, 2);
                        if ($value =~ /:/) {
                                my @value = split(":", $value);
   DIR diff --git a/LSG/Generate.pm b/LSG/Generate.pm
       t@@ -26,32 +26,60 @@ use File::Path qw(make_path);
        use LSG::Markdown;
        use LSG::Config qw($config);
        
       -sub gen_files() {
       -        foreach my $pageid (keys %{$config->{"metadata"}}) {
       -                foreach my $lang (keys %{$config->{"langs"}}) {
       -                        my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html";
       -                        if (
       -                                exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) &&
       -                                exists($config->{"modified_dates"}->{"templates"}->{$template}) &&
       -                                $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} &&
       -                                $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} &&
       -                                (!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ||
       -                                 $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true")
       -                        ) {
       -                                next;
       -                        }
       -                        print("Processing $pageid.$lang\n");
       -                        my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"});
       -                        make_path($html_dir);
       -                        my $fullname = catfile("pages", "$pageid.$lang");
       -                        my $html = LSG::Markdown::parse_md($lang, $pageid, $fullname);
       -                        my $final_html = LSG::Template::render_template($html, $lang, $pageid);
       -                        my $html_file = catfile("site", $lang, $pageid) . ".html";
       -                        open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n";
       -                        print $in $final_html;
       -                        close($in);
       -                }
       +sub gen_page {
       +        my ($pageid, $html_pages) = @_;
       +        my @ret_pages;
       +        foreach my $lang (keys %{$config->{"langs"}}) {
       +                my $template = $config->{"metadata"}->{$pageid}->{"template"} . ".$lang.html";
       +                # FIXME: also check if the html file actually exists
       +                # -> maybe compare with modified date of html instead of markdown
       +                if (
       +                        exists($config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"}) &&
       +                        exists($config->{"modified_dates"}->{"templates"}->{$template}) &&
       +                        $config->{"modified_dates"}->{"pages"}->{"$pageid.$lang"} eq $config->{"metadata"}->{$pageid}->{"modified"}->{$lang} &&
       +                        $config->{"modified_dates"}->{"templates"}->{$template} eq $config->{"templates"}->{$template}->{"modified"} &&
       +                        (!exists $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ||
       +                         $config->{"metadata"}->{$pageid}->{$lang}->{"always_update"} ne "true")
       +                ) {
       +                        next;
       +                }
       +                #print("Processing $pageid.$lang\n");
       +                my $html_dir = catfile("site", $lang, $config->{"metadata"}->{$pageid}->{"dirname"});
       +                make_path($html_dir);
       +                my $html;
       +                if (defined($html_pages)) {
       +                        if (!exists($html_pages->{$lang})) {
       +                                die "Page $pageid does not exist for language $lang\n";
       +                        }
       +                        $html = $html_pages->{$lang};
       +                } else {
       +                        my $fullname = catfile("pages", "$pageid.$lang");
       +                        my @tmp_pages;
       +                        ($html, @tmp_pages) = LSG::Markdown::parse_md($lang, $pageid, $fullname);
       +                        push(@ret_pages, @tmp_pages);
       +                }
       +                my $final_html = LSG::Template::render_template($html, $lang, $pageid);
       +                my $html_file = catfile("site", $lang, $pageid) . ".html";
       +                open(my $in, ">", $html_file) or die "ERROR: can't open $html_file for writing\n";
       +                print $in $final_html;
       +                close($in);
       +        }
       +        return @ret_pages;
       +}
       +
       +sub gen_files {
       +        my %extra_pages;
       +        for my $pageid (keys %{$config->{"metadata"}}) {
       +                for my $page (gen_page($pageid)) {
       +                        if (exists $extra_pages{$page->[0]}->{$page->[1]}) {
       +                                die "Duplicate page $page->[0] for language $page->[1]\n";
       +                        }
       +                        $extra_pages{$page->[0]}->{$page->[1]} = $page->[2];
       +                }
                }
       +        for my $pageid (keys %extra_pages) {
       +                gen_page($pageid, $extra_pages{$pageid});
       +        }
        }
        
        sub delete_obsolete_recurse {
       t@@ -74,6 +102,13 @@ sub delete_obsolete_recurse {
                        }
                }
                closedir($dh);
       +        opendir($dh, $dir) or die "Unable to open directory \"" . getcwd() . "/$dir\": $!\n";
       +        if (scalar(grep { $_ ne "." && $_ ne ".." } readdir($dh)) == 0) {
       +                print("Deleting old directory \"" . getcwd() . "/$dir\".\n");
       +                rmdir($dir);
       +        }
       +        closedir($dh);
       +        # FIXME: remove empty dirs
                foreach (@dirs) {
                        delete_obsolete_recurse($_);
                }
   DIR diff --git a/LSG/Markdown.pm b/LSG/Markdown.pm
       t@@ -128,15 +128,17 @@ sub finish_child {
                my $file = shift;
                my $parent = $child->{"parent"};
        
       +        my ($html, @extra_pages);
                if ($child->{"type"} eq "img") {
       -                $parent->{"txt"} .= handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
       +                $html = handle_img($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
                } elsif ($child->{"type"} eq "lnk") {
       -                $parent->{"txt"} .= handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
       +                $html = handle_lnk($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
                } elsif ($child->{"type"} eq "fnc") {
       -                $parent->{"txt"} .= handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
       +                ($html, @extra_pages) = handle_fnc($pageid, $lang, $line, $file, $child->{"txt"}, $child->{"url"});
                }
       +        $parent->{"txt"} .= $html;
        
       -        return $parent;
       +        return ($parent, @extra_pages);
        }
        
        # FIXME: This whole "parser" is extremely primitive and will break on a lot of input.
       t@@ -160,6 +162,7 @@ sub parse_md {
                my %structure = (txt => "", child => {});
                my $cur_child_ref = \%structure;
                my @states = (0);
       +        my @extra_pages;
                foreach (<$in>) {
                        foreach my $char (split //, $_) {
                                if ($char eq "\\") {
       t@@ -196,7 +199,9 @@ sub parse_md {
                                        $states[-1] |= $IN_URL;
                                } elsif ($char eq ")" && ($states[-1] & $IN_URL)) {
                                        pop(@states);
       -                                $cur_child_ref = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath);
       +                                my @tmp_pages;
       +                                ($cur_child_ref, @tmp_pages) = finish_child($cur_child_ref, $pageid, $lang, $_, $inpath);
       +                                push(@extra_pages, @tmp_pages);
                                } else {
                                        if ($states[-1] & $IN_IMG_START) {
                                                pop(@states);
       t@@ -213,7 +218,7 @@ sub parse_md {
                        }
                }
        
       -        return markdown($structure{"txt"});
       +        return (markdown($structure{"txt"}), @extra_pages);
        }
        
        1;
   DIR diff --git a/LSG/UserFuncs.pm b/LSG/UserFuncs.pm
       t@@ -31,6 +31,14 @@ use LSG::Misc;
        # 2:  page language
        # 3-: other args (e.g. for func call)
        
       +# Return value:
       +# Usually just the html text.
       +# Optionally, a list of array references of the form [$pageid, $lang, $html]
       +# defining further pages, together with the complete body html text of the
       +# page. The returned text is always taken verbatim as the html code of the
       +# page body, there is no option to interpret it as markdown.
       +# When called from templates, the extra pages are ignored.
       +
        # Yeah, this is extremely inefficient, but it's
        # not like we're comparing billions of books.
        sub sort_numeric {
       t@@ -58,33 +66,176 @@ sub sort_numeric {
        }
        
        sub sort_books {
       -        my $pageid = shift;
       -        my $lang = shift;
       -        my $sort_by = shift;
       -        my $create_subheadings = shift;
       -        if (!$sort_by) {die "ERROR: not enough arguments to function call in $pageid\n"}
       -        my $output = "";
       -        my %tmp_md = ();
       +        # FIXME: 'list' currently doesn't make much sense - the
       +        # sorting should be changed to just be alphabetical by
       +        # title when 'list' is used
       +
       +        # $mode == list: just list books
       +        # $mode == combined: create subheadings for @sort_by
       +        # $mode == separate: create separate pages for @sort_by
       +        # $dir: directory to search for pages to sort
       +        # (new pages are also created in this directory)
       +        # @sort_by: list of metadata attributes to sort by
       +        # (this is a hierarchical sorting, i.e. if the second
       +        # category in @sort_by is the same for two pages,
       +        # the first category must also be the same, and so
       +        # on, otherwise there will probably be an error at
       +        # some point, or the result will just be weird)
       +        my ($pageid, $lang, $dir, $mode, @sort_by) = @_;
       +        if (!defined($dir) || !defined($mode)) {
       +                die "ERROR: Too few arguments to sort_by.\n";
       +        }
       +        if ($mode eq "list") {
       +                $mode = 0;
       +        } elsif ($mode eq "combined") {
       +                $mode = 1;
       +        } elsif ($mode eq "separate") {
       +                $mode = 2;
       +        } else {
       +                die "ERROR: Invalid mode $mode for sort_books.\n";
       +        }
       +        my %tmp_md;
                foreach my $id (keys %{$config->{"metadata"}}) {
       -                if ($config->{"metadata"}->{$id}->{"dirname"} eq "books") {
       +                # pages generated by sort_books need to be skipped so when this
       +                # function is called again for other languages, it doesn't try
       +                # to sort all the generated pages (yes, this is really ugly)
       +
       +                # prevent autovivification of $config->{"metadata"}->{$id}->{$lang}
       +                next if (exists($config->{"metadata"}->{$id}->{$lang}) &&
       +                        $config->{"metadata"}->{$id}->{$lang}->{"generated:sort_books"});
       +                if ($config->{"metadata"}->{$id}->{"dirname"} eq $dir) {
                                $tmp_md{$id} = $config->{"metadata"}->{$id};
       -                        if (!exists($config->{"metadata"}->{$id}->{$lang}->{$sort_by})) {
       -                                die "ERROR: $pageid: can't sort by \"$sort_by\"\n";
       +                        my $found = 0;
       +                        for my $sb (@sort_by) {
       +                                if (!exists($config->{"metadata"}->{$id}->{$lang}) ||
       +                                    !exists($config->{"metadata"}->{$id}->{$lang}->{$sb})) {
       +                                        $found = 1;
       +                                } else {
       +                                        if ($found) {
       +                                                # there can't be any "undef gaps" - as soon as one sort key
       +                                                # is undef, all the ones afterwards are ignored (in the
       +                                                # final output, the page is located on the same "level" as
       +                                                # the category of the first undef sort key)
       +                                                die "ERROR: $pageid: metadata $sb defined but previous " .
       +                                                     "sort key already undef.\n";
       +                                        }
       +                                        my $val = $config->{"metadata"}->{$id}->{$lang}->{$sb};
       +                                        if (!exists($config->{"$sb:$lang"}->{$val})) {
       +                                                die "No display value configured for sort key $sb=$val (language $lang).\n";
       +                                        }
       +                                }
                                }
                        }
                }
       -        my $current = "";
       -        foreach my $id (sort {$tmp_md{$a}->{$lang}->{$sort_by} cmp $tmp_md{$b}->{$lang}->{$sort_by} or
       -                              sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"})} (keys %tmp_md)) {
       -                if ($create_subheadings && $create_subheadings eq "true" && $current ne $tmp_md{$id}->{$lang}->{$sort_by}) {
       -                        $current = $tmp_md{$id}->{$lang}->{$sort_by};
       -                        $output .= "<h3>$current</h3>\n";
       +        # I could do a Schwartzian transform here, but I won't because I'm too lazy.
       +        my @sorted = sort {
       +                for my $sb (@sort_by) {
       +                        # if a sort_by value is undef, use the title of the page instead
       +                        # so entries on the same level are sorted properly even if some
       +                        # are actual pages and other are categories
       +                        my $sort_a = exists($tmp_md{$a}->{$lang}->{$sb}) ?
       +                                $config->{"$sb:$lang"}->{$tmp_md{$a}->{$lang}->{$sb}} :
       +                                $tmp_md{$a}->{$lang}->{"title"};
       +                        my $sort_b = exists($tmp_md{$b}->{$lang}->{$sb}) ?
       +                                $config->{"$sb:$lang"}->{$tmp_md{$b}->{$lang}->{$sb}} :
       +                                $tmp_md{$b}->{$lang}->{"title"};
       +                        if ((my $ret = sort_numeric($sort_a, $sort_b))) {
       +                                return $ret;
       +                        }
                        }
       +                return sort_numeric($tmp_md{$a}->{$lang}->{"title"}, $tmp_md{$b}->{$lang}->{"title"});
       +        } (keys %tmp_md);
       +        my $output = "";
       +        my %current;
       +        my @extra_pages;
       +        my @page_stack = ([$pageid, $lang, ""]);
       +        my $margin_dir = $config->{"lang_dirs"}->{$lang} eq "rtl" ? "right" : "left";
       +        foreach my $id (@sorted) {
                        my $rel_lnk = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$id.html");
       -                $output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
       +                if ($mode == 1 || $mode == 2) {
       +                        my $indent = 0;
       +                        my $found_unequal = 0;
       +                        for my $i (0..$#sort_by) {
       +                                my $sb = $sort_by[$i];
       +                                # Note: it would be possible to uses exists instead of
       +                                # defined here, but using defined makes the code a bit simpler
       +                                if (defined($current{$sb}) != defined($tmp_md{$id}->{$lang}->{$sb}) ||
       +                                    (defined($current{$sb}) && $current{$sb} ne $tmp_md{$id}->{$lang}->{$sb})) {
       +                                        $found_unequal = 1;
       +                                        $current{$sb} = $tmp_md{$id}->{$lang}->{$sb};
       +                                        for my $j ($indent+1..$#page_stack) {
       +                                                push(@extra_pages, pop(@page_stack));
       +                                        }
       +                                        if (defined($current{$sb})) {
       +                                                my $name = $config->{"$sb:$lang"}->{$current{$sb}};
       +                                                # This is currently hard-coded. Up to four heading sizes are
       +                                                # used (starting at <h3>), then they just stay the same
       +                                                if ($mode == 1) {
       +                                                        my $h_sz = $indent + 3 > 6 ? 6 : $indent + 3;
       +                                                        $output .= "<h$h_sz style=\"margin-$margin_dir: " .
       +                                                                ($indent * 15). "pt;\">$name</h$h_sz>\n";
       +                                                } else {
       +                                                        my $new_id = "$dir/$sb/$current{$sb}";
       +                                                        if (exists $config->{"metadata"}->{$new_id}->{$lang}) {
       +                                                                die "ERROR: Duplicate page $new_id (lang $lang).\n";
       +                                                        }
       +                                                        my $cat_lnk = LSG::Misc::gen_relative_link(
       +                                                                "$lang/$page_stack[-1]->[0]", "$lang/$new_id.html"
       +                                                        );
       +                                                        $page_stack[-1]->[2] .= "<p><a href=\"$cat_lnk\">$name</a></p>\n";
       +                                                        push(@page_stack, [
       +                                                                $new_id,
       +                                                                $lang,
       +                                                                "<h3>$name</h3>\n"
       +                                                        ]);
       +                                                        $config->{"metadata"}->{$new_id}->{$lang} = {
       +                                                                title => $name,
       +                                                                "generated:sort_books" => 1
       +                                                        };
       +                                                        # FIXME: maybe check if these overwrite a different value
       +                                                        $config->{"metadata"}->{$new_id}->{"template"} = $config->{"metadata"}->{$pageid}->{"template"};
       +                                                        $config->{"metadata"}->{$new_id}->{"dirname"} = "$dir/$sb";
       +                                                        $config->{"metadata"}->{$new_id}->{"basename"} = $current{$sb};
       +                                                }
       +                                        }
       +                                } elsif ($found_unequal && defined($current{$sb})) {
       +                                        die "ERROR: $sb same as previous page in list for page $id, but higher-level category different (lang $lang).\n";
       +                                }
       +                                if (!defined($current{$sb})) {
       +                                        # as soon as one sort key is undef, the other ones should
       +                                        # also be undef for it to make sense
       +                                        for my $j ($i+1..$#sort_by) {
       +                                                if (defined($tmp_md{$id}->{$lang}->{$sort_by[$j]})) {
       +                                                        die "ERROR: $sort_by[$j] set for page $id, but $sb unset (lang $lang).\n";
       +                                                }
       +                                                $current{$sort_by[$j]} = undef;
       +                                        }
       +                                        last;
       +                                }
       +                                $indent++;
       +                        }
       +                        if ($mode == 1) {
       +                                $output .= "<p style=\"margin-$margin_dir: " . ($indent * 15) . "pt;\">" .
       +                                        "<a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
       +                        } else {
       +                                $rel_lnk = LSG::Misc::gen_relative_link("$lang/$page_stack[-1]->[0]", "$lang/$id.html");
       +                                $page_stack[-1]->[2] .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
       +                        }
       +                } else {
       +                        $output .= "<p><a href=\"$rel_lnk\">" . $tmp_md{$id}->{$lang}->{"title"} . "</a></p>\n";
       +                }
                }
        
       -        return $output;
       +        if ($mode == 2) {
       +                for my $i (1..$#page_stack) {
       +                        push(@extra_pages, pop(@page_stack));
       +                }
       +                $output = $page_stack[0]->[2];
       +                shift @page_stack;
       +                return ($output, @extra_pages);
       +        } else {
       +                return $output;
       +        }
        }
        
        sub gen_lang_selector {
       t@@ -115,6 +266,9 @@ sub gen_nav {
                #}
                foreach my $nav_page (@nav) {
                        my $title = $config->{"metadata"}->{$nav_page}->{$lang}->{"title"};
       +                if (!defined($title)) {
       +                        die "Unable to find title for navigation page \"$nav_page\"\n";
       +                }
                        my $url = LSG::Misc::gen_relative_link("$lang/$pageid", "$lang/$nav_page.html");
                        $output .= "<li><a href=\"$url\">$title</a></li>\n";
                }
   DIR diff --git a/lsg.pl b/lsg.pl
       t@@ -222,22 +222,22 @@ B<sort_books>
        
        B<Parameters:>
        
       -- attribute to sort by
       +- directory to take books from
        
       -- create heading when attribute changes or not
       +- mode
        
       -B<Purpose:>
       -
       -Generate sorted list of all books, first by the given attribute, which can be anything
       -in the metadata, then by the titles. The second attribute can be used to create, for
       -instance, category titles. This does not make sense though when the attribute is just
       -the title which changes every time anyways. If the second argument is left out, it
       -defaults to "false". The attribute to be sorted by (obviously) needs to be defined for
       -each book.
       +- attribute(s) to sort by
        
       -B<Example:>
       +B<Purpose:>
        
       -C<{sort_books}(category false)>
       +Generate sorted list of all pages in the given directory, first by the given attribute(s),
       +which can be anything in the metadata, then by the titles. The mode argument can be used
       +to either just list all books ("list"), list all books with subheadings for the different
       +sorting attributes ("combined"), or generate different pages for the different values of
       +the sorting attributes. Note that the display names for the attributes need to be defined
       +in the configuration file config.ini. For instance, if a page contains metadata
       +"category:stuff", config.ini must contain a section "[category:$lang]" for each language
       +that contains a line "stuff=Display Name".
        
        This function was created for a book site, but it could probably be used for articles
        as well.