From 941b33368579e215e1e3c179cffe9963ed78d22c Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Fri, 12 Dec 2025 23:22:26 +0000 Subject: [PATCH 1/3] Create cross-references between duplicate issues in second step This separates cross-linking of duplicates from the process of turning the issue text and issue resolution into HTML, as suggested in comments in the code. --- src/issues.h | 2 +- src/lists.cpp | 35 ++++++++++++++++++++--------------- src/report_generator.cpp | 22 ++++++++++------------ 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/issues.h b/src/issues.h index 8bfbcda84c..9cf3fb2207 100644 --- a/src/issues.h +++ b/src/issues.h @@ -25,7 +25,7 @@ struct issue { std::string submitter; // original submitter of the issue chrono::year_month_day date; // date the issue was filed chrono::year_month_day mod_date; // date the issue was last changed - std::set duplicates; // sorted list of duplicate issues, stored as html anchor references. + std::map duplicates; // duplicate issues, as number and formatted html anchor std::string text; // text representing the issue int priority = 99; // severity, 1 = critical, 4 = minor concern, 0 = trivial to resolve, 99 = not yet prioritised std::string owner; // person identified as taking ownership of drafting/progressing the issue diff --git a/src/lists.cpp b/src/lists.cpp index 43cdb2f969..e601aeab64 100644 --- a/src/lists.cpp +++ b/src/lists.cpp @@ -230,7 +230,7 @@ std::string paper_title_attr(std::string paper_number, lwg::metadata& meta) { } void format_issue_as_html(lwg::issue & is, - std::span issues, + std::span issues, lwg::metadata & meta) { auto& section_db = meta.section_db; @@ -302,10 +302,10 @@ void format_issue_as_html(lwg::issue & is, // note

[NOTE CONTENTS]

// !-- comments are simply erased // - // In addition, as duplicate issues are discovered, the duplicates are marked up - // in the supplied range [first_issue,last_issue). Similarly, if an unexpected - // (unknown) section is discovered, it will be inserted into the supplied - // section index, 'section_db'. + // In addition, as duplicate issues are discovered, the duplicates are recorded + // in the issue for later processing. + // Similarly, if an unexpected (unknown) section is discovered, + // it will be inserted into the supplied section index, 'section_db'. // // The behavior is undefined unless the issues in the supplied span are sorted by issue-number. // @@ -414,8 +414,7 @@ void format_issue_as_html(lwg::issue & is, } if (!tag_stack.empty() and tag_stack.back() == "duplicate") { - n->duplicates.insert(make_html_anchor(is)); - is.duplicates.insert(make_html_anchor(*n)); + is.duplicates[num] = make_html_anchor(*n); r.clear(); } else { @@ -488,16 +487,22 @@ void format_issue_as_html(lwg::issue & is, void prepare_issues(std::span issues, lwg::metadata & meta) { // Initially sort the issues by issue number, so each issue can be correctly 'format'ted - std::ranges::sort(issues, {}, &lwg::issue::num); - - // Then we format the issues, which should be the last time we need to touch the issues themselves - // We may turn this into a two-stage process, analysing duplicates and then applying the links - // This will allow us to better express constness when the issues are used purely for reference. - // Currently, the 'format' function takes a span of non-const-issues purely to - // mark up information related to duplicates, so processing duplicates in a separate pass may - // clarify the code. + std::ranges::sort(issues, {}, &lwg::issue::num); + + // Then we format the issues, which should be the last time we need to touch the issues themselves. + // The full list of issues is passed so that elements can be resolved to an issue. for (auto & i : issues) { format_issue_as_html(i, issues, meta); } + // Process the duplicates found while formatting the HTML. + // Ensure that each issue in i->duplicates has i in its own set of duplicates. + for (auto& i : issues) { + for (auto& dup : i.duplicates) { + auto& dupi = *std::ranges::lower_bound(issues, dup.first, {}, &lwg::issue::num); + if (auto& rev = dupi.duplicates[i.num]; rev.empty()) + rev = make_html_anchor(i); + } + } + // Issues will be routinely re-sorted in later code, but contents should be fixed after formatting. // This suggests we may want to be storing some kind of issue handle in the functions that keep // re-sorting issues, and so minimize the churn on the larger objects. diff --git a/src/report_generator.cpp b/src/report_generator.cpp index 4870e1c244..6fc716afc8 100644 --- a/src/report_generator.cpp +++ b/src/report_generator.cpp @@ -146,16 +146,6 @@ auto to_string(major_section_key sn) -> std::string { return out.str(); } -template -void print_list(std::ostream & out, Container const & source, char const * separator) { - char const * sep{""}; - for (auto const & x : source) { - out << sep << x; - sep = separator; - } -} - - void print_file_header(std::ostream& out, std::string const & title, std::string url_filename = {}, std::string desc = {}) { out << @@ -301,7 +291,11 @@ R"( // Duplicates out << "\n" << "\n"; } @@ -380,7 +374,11 @@ void print_issue(std::ostream & out, lwg::issue const & iss, lwg::section_map & // duplicates if (!iss.duplicates.empty()) { out << "

Duplicate of: "; - print_list(out, iss.duplicates, ", "); + char const* sep = ""; + for (auto const& x : iss.duplicates) { + out << sep << x.second; + sep = ", "; + } out << "

\n"; } From e5b42c25b6ad4be09a8f6be31ad488feaf7b8af1 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Sat, 13 Dec 2025 11:24:54 +0000 Subject: [PATCH 2/3] Make most lwg::report_generator member functions const The remaining functions access the section_db and so can't be const yet. Ideally we would avoid making any changes to that section_db after parsing the XML files and converting the issue text to HTML. That would allow all functions that generate HTML files to be const, which would allow them to run in parallel. Generating the individual HTML files currently takes about 25% of the total time to generate the lists, but in theory in should be possible to generate those files concurrently. Also make the paper_title_attr function take a const reference to the lwg::metadata object. It needs to lookup paper titles in the map, but it can use map::find instead of map::operator[], so that it doesn't need a non-const reference. --- src/lists.cpp | 18 ++++++++++++++++-- src/report_generator.cpp | 20 ++++++++++---------- src/report_generator.h | 20 ++++++++++---------- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/lists.cpp b/src/lists.cpp index e601aeab64..ac41d1ca30 100644 --- a/src/lists.cpp +++ b/src/lists.cpp @@ -218,8 +218,10 @@ namespace } // The title of the specified paper, formatted as an HTML title="..." attribute. -std::string paper_title_attr(std::string paper_number, lwg::metadata& meta) { - auto title = meta.paper_titles[paper_number]; +std::string paper_title_attr(std::string const& paper_number, lwg::metadata const& meta) { + std::string title; + if (auto pos = meta.paper_titles.find(paper_number); pos != meta.paper_titles.end()) + title = pos->second; if (!title.empty()) { title = lwg::replace_reserved_char(std::move(title), '&', "&"); @@ -721,6 +723,18 @@ void check_is_directory(fs::path const & directory) { } } +// Notes on performance (as of December 2025): +// Reading the XML files for each issues takes about 7% of the total run time. +// Converting the issue text to HTML takes about 7%. +// Generating the three main lists (active, defects, closed) takes about 50%. +// Generating the individual HTML pages for each issue takes about 25%. +// +// The cost of copying the vectors of issues and sorting them repeatedly is insignificant. +// +// Converting issues to HTML cannot be parallelized currently because it +// involves non-const accesses to the section_db, but it's not worth doing +// when it only takes 7% of the total time anyway. + int main(int argc, char* argv[]) { try { fs::path path; diff --git a/src/report_generator.cpp b/src/report_generator.cpp index 6fc716afc8..28693e7a31 100644 --- a/src/report_generator.cpp +++ b/src/report_generator.cpp @@ -477,7 +477,7 @@ namespace lwg // A precondition for calling any of these functions is that the list of issues is sorted in numerical order, by issue number. // While nothing disastrous will happen if this precondition is violated, the published issues list will list items // in the wrong order. -void report_generator::make_active(std::span issues, fs::path const & path, std::string const & diff_report) { +void report_generator::make_active(std::span issues, fs::path const & path, std::string const & diff_report) const { assert(std::ranges::is_sorted(issues, {}, &issue::num)); fs::path filename{path / "lwg-active.html"}; @@ -496,7 +496,7 @@ void report_generator::make_active(std::span issues, fs::path const } -void report_generator::make_defect(std::span issues, fs::path const & path, std::string const & diff_report) { +void report_generator::make_defect(std::span issues, fs::path const & path, std::string const & diff_report) const { assert(std::ranges::is_sorted(issues, {}, &issue::num)); fs::path filename{path / "lwg-defects.html"}; @@ -514,7 +514,7 @@ void report_generator::make_defect(std::span issues, fs::path const } -void report_generator::make_closed(std::span issues, fs::path const & path, std::string const & diff_report) { +void report_generator::make_closed(std::span issues, fs::path const & path, std::string const & diff_report) const { assert(std::ranges::is_sorted(issues, {}, &issue::num)); fs::path filename{path / "lwg-closed.html"}; @@ -533,7 +533,7 @@ void report_generator::make_closed(std::span issues, fs::path const // Additional non-standard documents, useful for running LWG meetings -void report_generator::make_tentative(std::span issues, fs::path const & path) { +void report_generator::make_tentative(std::span issues, fs::path const & path) const { // publish a document listing all tentative issues that may be acted on during a meeting. assert(std::ranges::is_sorted(issues, {}, &issue::num)); @@ -553,7 +553,7 @@ void report_generator::make_tentative(std::span issues, fs::path co } -void report_generator::make_unresolved(std::span issues, fs::path const & path) { +void report_generator::make_unresolved(std::span issues, fs::path const & path) const { // publish a document listing all non-tentative, non-ready issues that must be reviewed during a meeting. assert(std::ranges::is_sorted(issues, {}, &issue::num)); @@ -572,7 +572,7 @@ void report_generator::make_unresolved(std::span issues, fs::path c print_file_trailer(out); } -void report_generator::make_immediate(std::span issues, fs::path const & path) { +void report_generator::make_immediate(std::span issues, fs::path const & path) const { // publish a document listing all non-tentative, non-ready issues that must be reviewed during a meeting. assert(std::ranges::is_sorted(issues, {}, &issue::num)); @@ -607,7 +607,7 @@ out << R"(

C++ Standard Library Issues Resolved Directly In [INSERT CURRENT M print_file_trailer(out); } -void report_generator::make_ready(std::span issues, fs::path const & path) { +void report_generator::make_ready(std::span issues, fs::path const & path) const { // publish a document listing all ready issues for a formal vote assert(std::ranges::is_sorted(issues, {}, &issue::num)); @@ -642,7 +642,7 @@ out << R"(

C++ Standard Library Issues to be moved in [INSERT CURRENT MEETING print_file_trailer(out); } -void report_generator::make_editors_issues(std::span issues, fs::path const & path) { +void report_generator::make_editors_issues(std::span issues, fs::path const & path) const { // publish a single document listing all 'Voting' and 'Immediate' resolutions (only). assert(std::ranges::is_sorted(issues, {}, &issue::num)); @@ -657,7 +657,7 @@ void report_generator::make_editors_issues(std::span issues, fs::pa print_file_trailer(out); } -void report_generator::make_sort_by_num(std::span issues, fs::path const & filename) { +void report_generator::make_sort_by_num(std::span issues, fs::path const & filename) const { std::ranges::sort(issues, {}, &issue::num); std::ofstream out{filename}; @@ -747,7 +747,7 @@ sorted by priority.

print_file_trailer(out); } -void report_generator::make_sort_by_status_impl(std::span issues, fs::path const & filename, std::string title) { +void report_generator::make_sort_by_status_impl(std::span issues, fs::path const & filename, std::string title) const { std::ofstream out{filename}; if (!out) throw std::runtime_error{"Failed to open " + filename.string()}; diff --git a/src/report_generator.h b/src/report_generator.h index 74feaed165..0f35929d02 100644 --- a/src/report_generator.h +++ b/src/report_generator.h @@ -27,27 +27,27 @@ struct report_generator { // A precondition for calling any of these functions is that the list of issues is sorted in numerical order, by issue number. // While nothing disastrous will happen if this precondition is violated, the published issues list will list items // in the wrong order. - void make_active(std::span issues, fs::path const & path, std::string const & diff_report); + void make_active(std::span issues, fs::path const & path, std::string const & diff_report) const; - void make_defect(std::span issues, fs::path const & path, std::string const & diff_report); + void make_defect(std::span issues, fs::path const & path, std::string const & diff_report) const; - void make_closed(std::span issues, fs::path const & path, std::string const & diff_report); + void make_closed(std::span issues, fs::path const & path, std::string const & diff_report) const; // Additional non-standard documents, useful for running LWG meetings - void make_tentative(std::span issues, fs::path const & path); + void make_tentative(std::span issues, fs::path const & path) const; // publish a document listing all tentative issues that may be acted on during a meeting. - void make_unresolved(std::span issues, fs::path const & path); + void make_unresolved(std::span issues, fs::path const & path) const; // publish a document listing all non-tentative, non-ready issues that must be reviewed during a meeting. - void make_immediate(std::span issues, fs::path const & path); + void make_immediate(std::span issues, fs::path const & path) const; // publish a document listing all non-tentative, non-ready issues that must be reviewed during a meeting. - void make_ready(std::span issues, fs::path const & path); + void make_ready(std::span issues, fs::path const & path) const; // publish a document listing all ready issues for a formal vote - void make_sort_by_num(std::span issues, fs::path const & filename); + void make_sort_by_num(std::span issues, fs::path const & filename) const; void make_sort_by_priority(std::span issues, fs::path const & filename); @@ -57,12 +57,12 @@ struct report_generator { void make_sort_by_section(std::span issues, fs::path const & filename, bool active_only = false); - void make_editors_issues(std::span issues, fs::path const & path); + void make_editors_issues(std::span issues, fs::path const & path) const; void make_individual_issues(std::span issues, fs::path const & path); private: - void make_sort_by_status_impl(std::span issues, fs::path const & filename, std::string title); + void make_sort_by_status_impl(std::span issues, fs::path const & filename, std::string title) const; mailing_info const & lwg_issues_xml; section_map & section_db; From 5a10e592807f4ff33cf77024397e88f97a0046c3 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Sat, 13 Dec 2025 12:07:43 +0000 Subject: [PATCH 3/3] Generate issue lists concurrently By using four threads to create the issues lists we reduce the total runtime from 1.5s to 1s. We could do much better if we could generate the individual HTML files for each issue in parallel, or even just do that in parallel with creating the main lists. Currently we have to finish generating the main lists before starting to generate the individual issues. --- src/lists.cpp | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/lists.cpp b/src/lists.cpp index ac41d1ca30..5cbe3356b9 100644 --- a/src/lists.cpp +++ b/src/lists.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include @@ -724,16 +726,16 @@ void check_is_directory(fs::path const & directory) { } // Notes on performance (as of December 2025): -// Reading the XML files for each issues takes about 7% of the total run time. -// Converting the issue text to HTML takes about 7%. -// Generating the three main lists (active, defects, closed) takes about 50%. -// Generating the individual HTML pages for each issue takes about 25%. +// Reading the XML files for each issues takes about 10% of the total run time. +// Converting the issue text to HTML takes about 10%. +// Generating the three main lists (active, defects, closed) takes about 20%. +// Generating the individual HTML pages for each issue takes about 35%. // // The cost of copying the vectors of issues and sorting them repeatedly is insignificant. // // Converting issues to HTML cannot be parallelized currently because it // involves non-const accesses to the section_db, but it's not worth doing -// when it only takes 7% of the total time anyway. +// when it only takes 10% of the total time anyway. int main(int argc, char* argv[]) { try { @@ -823,20 +825,34 @@ int main(int argc, char* argv[]) { : std::back_inserter(unresolved_issues); std::copy_if(issues.begin(), issues.end(), ready_inserter, [](lwg::issue const & iss){ return lwg::is_ready(iss.stat); } ); + using span = std::span; + const auto launch = std::launch::async; // use deferred to serialize + // First generate the primary 3 standard issues lists - generator.make_active(issues, target_path, diff_report); - generator.make_defect(issues, target_path, diff_report); - generator.make_closed(issues, target_path, diff_report); - // unofficial documents - generator.make_tentative (issues, target_path); - generator.make_unresolved(issues, target_path); - generator.make_immediate (issues, target_path); - generator.make_ready (issues, target_path); - generator.make_editors_issues(issues, target_path); - generator.make_individual_issues(issues, target_path); + auto fut_active = std::async(launch, &lwg::report_generator::make_active, + std::cref(generator), span(issues), std::cref(target_path), std::cref(diff_report)); + auto fut_defects = std::async(launch, &lwg::report_generator::make_defect, + std::cref(generator), span(issues), std::cref(target_path), std::cref(diff_report)); + auto fut_closed = std::async(launch, &lwg::report_generator::make_closed, + std::cref(generator), span(issues), std::cref(target_path), std::cref(diff_report)); + // unofficial documents + std::as_const(generator).make_tentative (issues, target_path); + std::as_const(generator).make_unresolved(issues, target_path); + std::as_const(generator).make_immediate (issues, target_path); + std::as_const(generator).make_ready (issues, target_path); + std::as_const(generator).make_editors_issues(issues, target_path); + + // We need to join the concurrent tasks because make_individual_issues is non-const + // We could run the three make_sort_by_num calls before joining the futures, + // because those are const, except for re-sorting the issues span by issue number, + // but as they run first the issues are actually already sorted. + fut_active.wait(); + fut_defects.wait(); + fut_closed.wait(); + generator.make_individual_issues(issues, target_path); // Now we have a parsed and formatted set of issues, we can write the standard set of HTML documents // Note that each of these functions is going to re-sort the 'issues' vector for its own purposes

"; - print_list(out, i.duplicates, ", "); + char const* sep = ""; + for (auto const& x : i.duplicates) { + out << sep << x.second; + sep = ", "; + } out << "