diff --git a/README.rst b/README.rst index 6a32705..1af47fb 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,20 @@ Job Output Folder Data Type Plugin ================================== -This is the data type plugin for Qiita jobs that only create a folder and the command is responsible of generating all the files. +The goal of this Qiita type plugin is to validate and summarize any kind of folder output. + +Note that `job-output-folder` expects a single folder and this will become an artifact that will live in +`[qiita-base-path]/job-output-folder/[artifact-id]/[output-folder]` and this plugin will generate: + +- `summary.html`: a browser friendly file listing that will include all files at `[artifact-id]/[output-folder]` and + any `index.html` files in any subfolder. As a reminder, the Qiita nginx basic configuration allows to display/load any + html/JS available files; thus, able to display properly `index.html` files available +- `MANIFEST.txt`: a comprehensive list of all available files in the folder. + +The two main plugins using this output are: + +- https://github.com/qiita-spots/qp-knight-lab-processing: which will generate an `[output-folder]` contaning all the logs, + files and summaries from BCL to clean FASTQ processing. Note that multiqc resoults are part of this and the outputs are + properly displayed in Qiita using this method. +- https://github.com/qiita-spots/qp-pacbio: `PacBio processing`, the output are MAG, LCG and other output, which will be used + for dowstream analyses. diff --git a/qtp_job_output_folder/summary.py b/qtp_job_output_folder/summary.py index 019f272..e797a22 100644 --- a/qtp_job_output_folder/summary.py +++ b/qtp_job_output_folder/summary.py @@ -6,37 +6,60 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from glob import glob from json import dumps -from os.path import dirname, exists, isdir, join +from os import sep, walk +from os.path import basename, dirname, exists, isdir, join def _folder_listing(folder): - results = [] - for f in glob(f"{folder}/*"): - if isdir(f): - results.append(("folder", f)) - results.extend(_folder_listing(f"{f}/*")) - else: - results.append(("file", f)) - return results + index, manifest = [], [] + # only adding main files on top directory + # and index.html at any level + separator = "|--" + for dpath, _, files in walk(folder): + # assuring same order, mainly for testing + files.sort() + + # if we are at the top, we should add + # all files + if dpath == folder: + for f in files: + index.append(("file", f"{dpath}/{f}")) + # if we are not at the top, we should only add + # the index.html files + elif "index.html" in files: + index.append(("file", f"{dpath}/index.html")) + + depth = dpath.replace(folder, "").count(sep) + space = separator * depth + manifest.append(f"{space} {basename(dpath)}/") + for filename in files: + manifest.append(f"{space}{separator} {filename}") + + return index, manifest def _generate_html_summary(jid, folder, out_dir): summary = f"

{folder} does not exist.

" + manifest_fp = join(folder, "MANIFEST.txt") + index_fp = join(out_dir, "summary.html") if exists(folder) and isdir(folder): # calculating the "trimming" for the fullpaths, +1 is to remove / tname = len(dirname(folder)) + 1 tlink = len(dirname(dirname(folder))) - summary = "
\n".join( - [ - f'{f[tname:]}' - for ft, f in _folder_listing(folder) - ] - ) + link = '%s' + index, manifest = _folder_listing(folder) + + with open(manifest_fp, "w") as of: + of.write("\n".join(manifest)) + + links = [link % (manifest_fp[tlink:], "file", manifest_fp[tname:])] + for ft, f in index: + links.append(link % (f[tlink:], ft, f[tname:])) + + summary = "
\n".join(links) - index_fp = join(out_dir, "summary.html") with open(index_fp, "w") as of: of.write(summary) diff --git a/qtp_job_output_folder/tests/test_data/folder_1/index.html b/qtp_job_output_folder/tests/test_data/folder_1/index.html new file mode 100644 index 0000000..e69de29 diff --git a/qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html b/qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html new file mode 100644 index 0000000..e69de29 diff --git a/qtp_job_output_folder/tests/test_summary.py b/qtp_job_output_folder/tests/test_summary.py index 2eeacaf..f4c387b 100644 --- a/qtp_job_output_folder/tests/test_summary.py +++ b/qtp_job_output_folder/tests/test_summary.py @@ -23,7 +23,7 @@ class SummaryTests(PluginTestCase): def setUp(self): self.out_dir = mkdtemp() - self.source_dir = join(mkdtemp(), "test_data") + self.source_dir = join(mkdtemp(), "result") source = join(dirname(abspath(getfile(currentframe()))), "test_data") copytree(source, self.source_dir) self._clean_up_files = [self.out_dir] @@ -77,31 +77,41 @@ def test_summary(self): with open(html_fp) as html_f: html = html_f.read() - self.assertCountEqual( - sorted(html.replace("
", "").split("\n")), - sorted(EXP_HTML.format(aid=aid).replace("
", "").split("\n")), - ) + self.assertEqual(html, EXP_HTML.format(aid=aid)) + + # verifying the new MANIFEST.txt + mfp = join(res["files"]["directory"][0]["filepath"], "MANIFEST.txt") + self.assertTrue(exists(f"{mfp}")) + with open(mfp, "r") as f: + obs = f.readlines() + self.assertCountEqual(obs, EXP_MANIFEST) EXP_HTML = ( - '' - "test_data/folder_a
\n" - 'test_data/folder_a/folder_b/folder_c
\n' - '' - "test_data/file_2
\n" - '' - "test_data/file_1
\n" - '' - "test_data/test_data
\n" - 'test_data/test_data/folder_a/folder_b
\n' - 'test_data/test_data/folder_a/folder_b/' - "folder_c/file_c
\n" - 'test_data/test_data/folder_a/file_a' + '' + "result/MANIFEST.txt
\n" + '' + "result/file_1
\n" + '' + "result/file_2
\n" + 'result/folder_a/folder_b/index.html
\n' + 'result/folder_1/index.html' ) +EXP_MANIFEST = [ + " result/\n", + "|-- file_1\n", + "|-- file_2\n", + "|-- folder_a/\n", + "|--|-- file_a\n", + "|--|-- folder_b/\n", + "|--|--|-- index.html\n", + "|--|--|-- folder_c/\n", + "|--|--|--|-- file_c\n", + "|-- folder_1/\n", + "|--|-- index.html", +] if __name__ == "__main__":