diff --git a/README.rst b/README.rst index 6a32705..1af47fb 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,20 @@ Job Output Folder Data Type Plugin ================================== -This is the data type plugin for Qiita jobs that only create a folder and the command is responsible of generating all the files. +The goal of this Qiita type plugin is to validate and summarize any kind of folder output. + +Note that `job-output-folder` expects a single folder and this will become an artifact that will live in +`[qiita-base-path]/job-output-folder/[artifact-id]/[output-folder]` and this plugin will generate: + +- `summary.html`: a browser friendly file listing that will include all files at `[artifact-id]/[output-folder]` and + any `index.html` files in any subfolder. As a reminder, the Qiita nginx basic configuration allows to display/load any + html/JS available files; thus, able to display properly `index.html` files available +- `MANIFEST.txt`: a comprehensive list of all available files in the folder. + +The two main plugins using this output are: + +- https://github.com/qiita-spots/qp-knight-lab-processing: which will generate an `[output-folder]` contaning all the logs, + files and summaries from BCL to clean FASTQ processing. Note that multiqc resoults are part of this and the outputs are + properly displayed in Qiita using this method. +- https://github.com/qiita-spots/qp-pacbio: `PacBio processing`, the output are MAG, LCG and other output, which will be used + for dowstream analyses. diff --git a/qtp_job_output_folder/summary.py b/qtp_job_output_folder/summary.py index 019f272..e797a22 100644 --- a/qtp_job_output_folder/summary.py +++ b/qtp_job_output_folder/summary.py @@ -6,37 +6,60 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from glob import glob from json import dumps -from os.path import dirname, exists, isdir, join +from os import sep, walk +from os.path import basename, dirname, exists, isdir, join def _folder_listing(folder): - results = [] - for f in glob(f"{folder}/*"): - if isdir(f): - results.append(("folder", f)) - results.extend(_folder_listing(f"{f}/*")) - else: - results.append(("file", f)) - return results + index, manifest = [], [] + # only adding main files on top directory + # and index.html at any level + separator = "|--" + for dpath, _, files in walk(folder): + # assuring same order, mainly for testing + files.sort() + + # if we are at the top, we should add + # all files + if dpath == folder: + for f in files: + index.append(("file", f"{dpath}/{f}")) + # if we are not at the top, we should only add + # the index.html files + elif "index.html" in files: + index.append(("file", f"{dpath}/index.html")) + + depth = dpath.replace(folder, "").count(sep) + space = separator * depth + manifest.append(f"{space} {basename(dpath)}/") + for filename in files: + manifest.append(f"{space}{separator} {filename}") + + return index, manifest def _generate_html_summary(jid, folder, out_dir): summary = f"