qiita-spots · antgonza · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/README.rst b/README.rst
@@ -1,4 +1,20 @@
 Job Output Folder Data Type Plugin
 ==================================
 
-This is the data type plugin for Qiita jobs that only create a folder and the command is responsible of generating all the files.
+The goal of this Qiita type plugin is to validate and summarize any kind of folder output.
+
+Note that `job-output-folder` expects a single folder and this will become an artifact that will live in
+`[qiita-base-path]/job-output-folder/[artifact-id]/[output-folder]` and this plugin will generate:
+
+- `summary.html`: a browser friendly file listing that will include all files at `[artifact-id]/[output-folder]` and
+  any `index.html` files in any subfolder. As a reminder, the Qiita nginx basic configuration allows to display/load any
+  html/JS available files; thus, able to display properly `index.html` files available
+- `MANIFEST.txt`: a comprehensive list of all available files in the folder.
+
+The two main plugins using this output are:
+
+- https://github.com/qiita-spots/qp-knight-lab-processing: which will generate an `[output-folder]` contaning all the logs,
+  files and summaries from BCL to clean FASTQ processing. Note that multiqc resoults are part of this and the outputs are
+  properly displayed in Qiita using this method.
+- https://github.com/qiita-spots/qp-pacbio: `PacBio processing`, the output are MAG, LCG and other output, which will be used
+  for dowstream analyses.
diff --git a/qtp_job_output_folder/summary.py b/qtp_job_output_folder/summary.py
@@ -6,37 +6,60 @@
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
 
-from glob import glob
 from json import dumps
-from os.path import dirname, exists, isdir, join
+from os import sep, walk
+from os.path import basename, dirname, exists, isdir, join
 
 
 def _folder_listing(folder):
-    results = []
-    for f in glob(f"{folder}/*"):
-        if isdir(f):
-            results.append(("folder", f))
-            results.extend(_folder_listing(f"{f}/*"))
-        else:
-            results.append(("file", f))
-    return results
+    index, manifest = [], []
+    # only adding main files on top directory
+    # and index.html at any level
+    separator = "|--"
+    for dpath, _, files in walk(folder):
+        # assuring same order, mainly for testing
+        files.sort()
+
+        # if we are at the top, we should add
+        # all files
+        if dpath == folder:
+            for f in files:
+                index.append(("file", f"{dpath}/{f}"))
+        # if we are not at the top, we should only add
+        # the index.html files
+        elif "index.html" in files:
+            index.append(("file", f"{dpath}/index.html"))
+
+        depth = dpath.replace(folder, "").count(sep)
+        space = separator * depth
+        manifest.append(f"{space} {basename(dpath)}/")
+        for filename in files:
+            manifest.append(f"{space}{separator} {filename}")
+
+    return index, manifest
 
 
 def _generate_html_summary(jid, folder, out_dir):
     summary = f"<h3><b>{folder}</b> does not exist.</h3>"
+    manifest_fp = join(folder, "MANIFEST.txt")
+    index_fp = join(out_dir, "summary.html")
 
     if exists(folder) and isdir(folder):
         # calculating the "trimming" for the fullpaths, +1 is to remove /
         tname = len(dirname(folder)) + 1
         tlink = len(dirname(dirname(folder)))
-        summary = "<br/>\n".join(
-            [
-                f'<a href=".{f[tlink:]}" type="{ft}" target="_blank">{f[tname:]}</a>'
-                for ft, f in _folder_listing(folder)
-            ]
-        )
+        link = '<a href=".%s" type="%s" target="_blank">%s</a>'
+        index, manifest = _folder_listing(folder)
+
+        with open(manifest_fp, "w") as of:
+            of.write("\n".join(manifest))
+
+        links = [link % (manifest_fp[tlink:], "file", manifest_fp[tname:])]
+        for ft, f in index:
+            links.append(link % (f[tlink:], ft, f[tname:]))
+
+        summary = "<br/>\n".join(links)
 
-    index_fp = join(out_dir, "summary.html")
     with open(index_fp, "w") as of:
         of.write(summary)
 

diff --git a/qtp_job_output_folder/tests/test_data/folder_1/index.html b/qtp_job_output_folder/tests/test_data/folder_1/index.html
diff --git a/qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html b/qtp_job_output_folder/tests/test_data/folder_a/folder_b/index.html
diff --git a/qtp_job_output_folder/tests/test_summary.py b/qtp_job_output_folder/tests/test_summary.py
@@ -23,7 +23,7 @@
 class SummaryTests(PluginTestCase):
     def setUp(self):
         self.out_dir = mkdtemp()
-        self.source_dir = join(mkdtemp(), "test_data")
+        self.source_dir = join(mkdtemp(), "result")
         source = join(dirname(abspath(getfile(currentframe()))), "test_data")
         copytree(source, self.source_dir)
         self._clean_up_files = [self.out_dir]
@@ -77,31 +77,41 @@ def test_summary(self):
         with open(html_fp) as html_f:
             html = html_f.read()
 
-        self.assertCountEqual(
-            sorted(html.replace("<br/>", "").split("\n")),
-            sorted(EXP_HTML.format(aid=aid).replace("<br/>", "").split("\n")),
-        )
+        self.assertEqual(html, EXP_HTML.format(aid=aid))
+
+        # verifying the new MANIFEST.txt
+        mfp = join(res["files"]["directory"][0]["filepath"], "MANIFEST.txt")
+        self.assertTrue(exists(f"{mfp}"))
+        with open(mfp, "r") as f:
+            obs = f.readlines()
+        self.assertCountEqual(obs, EXP_MANIFEST)
 
 
 EXP_HTML = (
-    '<a href="./{aid}/test_data/folder_a" type="folder" target="_blank">'
-    "test_data/folder_a</a><br/>\n"
-    '<a href="./{aid}/test_data/folder_a/folder_b/folder_c" type="folder" '
-    'target="_blank">test_data/folder_a/folder_b/folder_c</a><br/>\n'
-    '<a href="./{aid}/test_data/file_2" type="file" target="_blank">'
-    "test_data/file_2</a><br/>\n"
-    '<a href="./{aid}/test_data/file_1" type="file" target="_blank">'
-    "test_data/file_1</a><br/>\n"
-    '<a href="./{aid}/test_data/test_data" type="folder" target="_blank">'
-    "test_data/test_data</a><br/>\n"
-    '<a href="./{aid}/test_data/test_data/folder_a/folder_b" type="folder" '
-    'target="_blank">test_data/test_data/folder_a/folder_b</a><br/>\n'
-    '<a href="./{aid}/test_data/test_data/folder_a/folder_b/folder_c/file_c" '
-    'type="file" target="_blank">test_data/test_data/folder_a/folder_b/'
-    "folder_c/file_c</a><br/>\n"
-    '<a href="./{aid}/test_data/test_data/folder_a/file_a" type="file" '
-    'target="_blank">test_data/test_data/folder_a/file_a</a>'
+    '<a href="./{aid}/result/MANIFEST.txt" type="file" target="_blank">'
+    "result/MANIFEST.txt</a><br/>\n"
+    '<a href="./{aid}/result/file_1" type="file" target="_blank">'
+    "result/file_1</a><br/>\n"
+    '<a href="./{aid}/result/file_2" type="file" target="_blank">'
+    "result/file_2</a><br/>\n"
+    '<a href="./{aid}/result/folder_a/folder_b/index.html" type="file" '
+    'target="_blank">result/folder_a/folder_b/index.html</a><br/>\n'
+    '<a href="./{aid}/result/folder_1/index.html" type="file" '
+    'target="_blank">result/folder_1/index.html</a>'
 )
+EXP_MANIFEST = [
+    " result/\n",
+    "|-- file_1\n",
+    "|-- file_2\n",
+    "|-- folder_a/\n",
+    "|--|-- file_a\n",
+    "|--|-- folder_b/\n",
+    "|--|--|-- index.html\n",
+    "|--|--|-- folder_c/\n",
+    "|--|--|--|-- file_c\n",
+    "|-- folder_1/\n",
+    "|--|-- index.html",
+]
 
 
 if __name__ == "__main__":