From c8f4e9c5d5e744ab8e04dc7c8c24ed1432f71d30 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Sun, 23 Nov 2025 20:00:48 -0700 Subject: [PATCH 1/4] fix --- README.rst | 18 ++- qtp_job_output_folder/summary.py | 66 +++++++---- .../tests/test_data/folder_1/index.html | 0 .../test_data/folder_a/folder_b/index.html | 0 qtp_job_output_folder/tests/test_summary.py | 111 +++++++++++------- 5 files changed, 128 insertions(+), 67 deletions(-) create mode 100644 qtp_job_output_folder/tests/test_data/folder_1/index.html create mode 100644 qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html diff --git a/README.rst b/README.rst index 6a32705..1af47fb 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,20 @@ Job Output Folder Data Type Plugin ================================== -This is the data type plugin for Qiita jobs that only create a folder and the command is responsible of generating all the files. +The goal of this Qiita type plugin is to validate and summarize any kind of folder output. + +Note that `job-output-folder` expects a single folder and this will become an artifact that will live in +`[qiita-base-path]/job-output-folder/[artifact-id]/[output-folder]` and this plugin will generate: + +- `summary.html`: a browser friendly file listing that will include all files at `[artifact-id]/[output-folder]` and + any `index.html` files in any subfolder. As a reminder, the Qiita nginx basic configuration allows to display/load any + html/JS available files; thus, able to display properly `index.html` files available +- `MANIFEST.txt`: a comprehensive list of all available files in the folder. + +The two main plugins using this output are: + +- https://github.com/qiita-spots/qp-knight-lab-processing: which will generate an `[output-folder]` contaning all the logs, + files and summaries from BCL to clean FASTQ processing. Note that multiqc resoults are part of this and the outputs are + properly displayed in Qiita using this method. +- https://github.com/qiita-spots/qp-pacbio: `PacBio processing`, the output are MAG, LCG and other output, which will be used + for dowstream analyses. diff --git a/qtp_job_output_folder/summary.py b/qtp_job_output_folder/summary.py index 3892ff1..6257c35 100644 --- a/qtp_job_output_folder/summary.py +++ b/qtp_job_output_folder/summary.py @@ -6,36 +6,58 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import exists, isdir, join, dirname -from glob import glob from json import dumps +from os import sep, walk +from os.path import basename, dirname, exists, isdir, join def _folder_listing(folder): - results = [] - for f in glob(f'{folder}/*'): - if isdir(f): - results.append(('folder', f)) - results.extend(_folder_listing(f'{f}/*')) - else: - results.append(('file', f)) - return results + index, manifest = [], [] + # only adding main files on top directory + # and index.html at any level + separator = "|--" + for dpath, _, files in walk(folder): + # if we are at the top, we should add + # all files + if dpath == folder: + for f in files: + index.append(("file", f"{dpath}/{f}")) + # if we are not at the top, we should only add + # the index.html files + elif "index.html" in files: + index.append(("file", f"{dpath}/index.html")) + + depth = dpath.replace(folder, "").count(sep) + space = separator * depth + manifest.append(f"{space} {basename(dpath)}/") + for filename in files: + manifest.append(f"{space}{separator} {filename}") + + return index, manifest def _generate_html_summary(jid, folder, out_dir): - summary = f'

{folder} does not exist.

' + summary = f"

{folder} does not exist.

" + manifest_fp = join(folder, "MANIFEST.txt") + index_fp = join(out_dir, "summary.html") if exists(folder) and isdir(folder): # calculating the "trimming" for the fullpaths, +1 is to remove / tname = len(dirname(folder)) + 1 tlink = len(dirname(dirname(folder))) - summary = '
\n'.join([ - f'' - f'{f[tname:]}' - for ft, f in _folder_listing(folder)]) + link = '%s' + index, manifest = _folder_listing(folder) - index_fp = join(out_dir, "summary.html") - with open(index_fp, 'w') as of: + with open(manifest_fp, "w") as of: + of.write("\n".join(manifest)) + + links = [link % (manifest_fp[tlink:], "file", manifest_fp[tname:])] + for ft, f in index: + links.append(link % (f[tlink:], ft, f[tname:])) + + summary = "
\n".join(links) + + with open(index_fp, "w") as of: of.write(summary) # we could add a support folder for the summary @@ -69,22 +91,22 @@ def generate_html_summary(qclient, job_id, parameters, out_dir): # we are going to use the "raw" code for retrieving artifact_info vs. the # qiita_client.artifact_and_preparation_files method because this only # expects a single filepath - artifact_id = parameters['input_data'] + artifact_id = parameters["input_data"] qclient_url = "/qiita_db/artifacts/%s/" % artifact_id artifact_info = qclient.get(qclient_url) # [0] there is only one directory - folder = artifact_info['files']['directory'][0]['filepath'] + folder = artifact_info["files"]["directory"][0]["filepath"] # 2. Generate summary index_fp, viz_fp = _generate_html_summary(job_id, folder, out_dir) # Step 3: add the new file to the artifact using REST api success = True - error_msg = '' + error_msg = "" try: - fps = dumps({'html': index_fp, 'dir': viz_fp}) - qclient.patch(qclient_url, 'add', '/html_summary/', value=fps) + fps = dumps({"html": index_fp, "dir": viz_fp}) + qclient.patch(qclient_url, "add", "/html_summary/", value=fps) except Exception as e: success = False error_msg = str(e) diff --git a/qtp_job_output_folder/tests/test_data/folder_1/index.html b/qtp_job_output_folder/tests/test_data/folder_1/index.html new file mode 100644 index 0000000..e69de29 diff --git a/qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html b/qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html new file mode 100644 index 0000000..e69de29 diff --git a/qtp_job_output_folder/tests/test_summary.py b/qtp_job_output_folder/tests/test_summary.py index a4aae89..e86fb4c 100644 --- a/qtp_job_output_folder/tests/test_summary.py +++ b/qtp_job_output_folder/tests/test_summary.py @@ -6,13 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main -from tempfile import mkdtemp -from os import remove -from os.path import exists, isdir, join, dirname, abspath from inspect import currentframe, getfile -from shutil import rmtree, copytree from json import dumps +from os import remove +from os.path import abspath, dirname, exists, isdir, join +from shutil import copytree, rmtree +from tempfile import mkdtemp +from unittest import main from qiita_client.testing import PluginTestCase @@ -23,8 +23,8 @@ class SummaryTests(PluginTestCase): def setUp(self): self.out_dir = mkdtemp() - self.source_dir = join(mkdtemp(), 'test_data') - source = join(dirname(abspath(getfile(currentframe()))), 'test_data') + self.source_dir = join(mkdtemp(), "test_data") + source = join(dirname(abspath(getfile(currentframe()))), "test_data") copytree(source, self.source_dir) self._clean_up_files = [self.out_dir] @@ -37,21 +37,29 @@ def tearDown(self): remove(fp) def test_summary(self): - files = [(self.source_dir, 'directory')] - data = {'filepaths': dumps(files), 'type': 'job-output-folder', - 'name': "A name", 'data_type': 'Job Output Folder'} - aid = self.qclient.post('/apitest/artifact/', data=data)['artifact'] - parameters = {'input_data': aid} - data = {'command': dumps(['qtp-job-output-folder', __version__, - 'Generate HTML summary']), - 'parameters': dumps(parameters), - 'status': 'running'} - job_id = self.qclient.post( - '/apitest/processing_job/', data=data)['job'] + files = [(self.source_dir, "directory")] + data = { + "filepaths": dumps(files), + "type": "job-output-folder", + "name": "A name", + "data_type": "Job Output Folder", + } + aid = self.qclient.post("/apitest/artifact/", data=data)["artifact"] + parameters = {"input_data": aid} + data = { + "command": dumps( + ["qtp-job-output-folder", __version__, "Generate HTML summary"] + ), + "parameters": dumps(parameters), + "status": "running", + } + url = "/apitest/processing_job/" + job_id = self.qclient.post(url, data=data)["job"] # Run the test obs_success, obs_ainfo, obs_error = generate_html_summary( - self.qclient, job_id, parameters, self.out_dir) + self.qclient, job_id, parameters, self.out_dir + ) # asserting reply self.assertTrue(obs_success) @@ -61,36 +69,51 @@ def test_summary(self): # asserting content of html res = self.qclient.get("/qiita_db/artifacts/%s/" % aid) # cleaning artifact files, to avoid errors - [self._clean_up_files.extend([ff['filepath']]) - for f in res['files'].values() for ff in f] - html_fp = res['files']['html_summary'][0]['filepath'] + [ + self._clean_up_files.extend([ff["filepath"]]) + for f in res["files"].values() + for ff in f + ] + html_fp = res["files"]["html_summary"][0]["filepath"] with open(html_fp) as html_f: html = html_f.read() - self.assertCountEqual( - sorted(html.replace('
', '').split('\n')), - sorted(EXP_HTML.format(aid=aid).replace('
', '').split('\n'))) + self.assertEqual(html, EXP_HTML.format(aid=aid)) + + # verifying the new MANIFEST.txt + mfp = join(res["files"]["directory"][0]["filepath"], "MANIFEST.txt") + self.assertTrue(exists(f"{mfp}")) + with open(mfp, "r") as f: + obs = f.readlines() + self.assertCountEqual(obs, EXP_MANIFEST) EXP_HTML = ( - '' - 'test_data/folder_a
\n' - 'test_data/folder_a/folder_b/folder_c
\n' - '' - 'test_data/file_2
\n' + '' + "test_data/MANIFEST.txt
\n" '' - 'test_data/file_1
\n' - '' - 'test_data/test_data
\n' - 'test_data/test_data/folder_a/folder_b
\n' - 'test_data/test_data/folder_a/folder_b/' - 'folder_c/file_c
\n' - 'test_data/test_data/folder_a/file_a') - - -if __name__ == '__main__': + "test_data/file_1
\n" + '' + "test_data/file_2
\n" + 'test_data/folder_a/folder_b/index.html
\n' + 'test_data/folder_1/index.html' +) +EXP_MANIFEST = [ + " test_data/\n", + "|-- file_1\n", + "|-- file_2\n", + "|-- folder_a/\n", + "|--|-- file_a\n", + "|--|-- folder_b/\n", + "|--|--|-- index.html\n", + "|--|--|-- folder_c/\n", + "|--|--|--|-- file_c\n", + "|-- folder_1/\n", + "|--|-- index.html", +] + + +if __name__ == "__main__": main() From 2b0b8bbafb4489eb1349fdca2d901cccaebe73b5 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Mon, 24 Nov 2025 07:08:28 -0700 Subject: [PATCH 2/4] files.sort() --- qtp_job_output_folder/summary.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qtp_job_output_folder/summary.py b/qtp_job_output_folder/summary.py index 6257c35..e797a22 100644 --- a/qtp_job_output_folder/summary.py +++ b/qtp_job_output_folder/summary.py @@ -17,6 +17,9 @@ def _folder_listing(folder): # and index.html at any level separator = "|--" for dpath, _, files in walk(folder): + # assuring same order, mainly for testing + files.sort() + # if we are at the top, we should add # all files if dpath == folder: From d5e53c919c977373ad72f04b9aa697c91e8f5d9d Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Mon, 24 Nov 2025 08:46:18 -0700 Subject: [PATCH 3/4] debug --- qtp_job_output_folder/tests/test_summary.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/qtp_job_output_folder/tests/test_summary.py b/qtp_job_output_folder/tests/test_summary.py index e86fb4c..3fa73db 100644 --- a/qtp_job_output_folder/tests/test_summary.py +++ b/qtp_job_output_folder/tests/test_summary.py @@ -78,6 +78,13 @@ def test_summary(self): with open(html_fp) as html_f: html = html_f.read() + print("-------------") + print("-------------") + print(html) + print("-------------") + print(EXP_HTML.format(aid=aid)) + print("-------------") + print("-------------") self.assertEqual(html, EXP_HTML.format(aid=aid)) # verifying the new MANIFEST.txt From b1abc8f3df0953864cce15c628e8b80f0d34d3d1 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Mon, 24 Nov 2025 10:20:20 -0700 Subject: [PATCH 4/4] avoid duplicated links --- qtp_job_output_folder/summary.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qtp_job_output_folder/summary.py b/qtp_job_output_folder/summary.py index e797a22..c520f52 100644 --- a/qtp_job_output_folder/summary.py +++ b/qtp_job_output_folder/summary.py @@ -56,7 +56,10 @@ def _generate_html_summary(jid, folder, out_dir): links = [link % (manifest_fp[tlink:], "file", manifest_fp[tname:])] for ft, f in index: - links.append(link % (f[tlink:], ft, f[tname:])) + # to avoid any duplication of lines: + _link = link % (f[tlink:], ft, f[tname:]) + if _link not in links: + links.append(_link) summary = "
\n".join(links)