From 8763170935a678cad12b66e50663651091af267a Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 19 Dec 2024 14:21:26 -0700 Subject: [PATCH 1/6] fix #3389 --- qiita_pet/handlers/download.py | 130 +++++++++++++++++- .../templates/study_ajax/prep_summary.html | 3 + qiita_pet/webserver.py | 5 +- 3 files changed, 135 insertions(+), 3 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 5ce61aa6a..3ce3402ab 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -9,7 +9,7 @@ from tornado.web import authenticated, HTTPError from tornado.gen import coroutine -from os.path import basename, getsize, join, isdir +from os.path import basename, getsize, join, isdir, getctime from os import walk from .base_handlers import BaseHandler @@ -23,7 +23,7 @@ from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir, get_filepath_information, get_mountpoint, filepath_id_to_object_id, get_data_types, - retrieve_filepaths) + retrieve_filepaths, get_work_base_dir) from qiita_db.meta_util import validate_filepath_access_by_user from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.prep_template import PrepTemplate @@ -35,6 +35,10 @@ from uuid import uuid4 from base64 import b64encode from datetime import datetime, timedelta, timezone +from tempfile import mkdtemp +from zipfile import ZipFile +from io import BytesIO +from shutil import copyfile class BaseHandlerDownload(BaseHandler): @@ -374,6 +378,128 @@ def get(self, path): self.finish() +class DownloadDataReleaseFromPrep(BaseHandlerDownload): + @authenticated + @coroutine + @execute_as_transaction + def get(self, prep_template_id): + user = self.current_user + if user.level not in ('admin', 'web-lab admin'): + raise HTTPError(403, reason="%s doesn't have access to download " + "the data release files" % user.email) + + pid = int(prep_template_id) + pt = PrepTemplate(pid) + sid = pt.study_id + st = SampleTemplate(sid) + date = datetime.now().strftime('%m%d%y-%H%M%S') + td = mkdtemp(dir=get_work_base_dir()) + + files = [] + readme = [ + f'Delivery created on {date}', + '', + f'Host (human) removal: {pt.artifact.human_reads_filter_method}', + '', + # this is not changing in the near future so just leaving + # hardcoded for now + 'Main woltka reference: WoLr2, more info visit: ' + 'https://ftp.microbio.me/pub/wol2/', + '', + f"Qiita's prep: https://qiita.ucsd.edu/study/description/{sid}" + f"?prep_id={pid}", + '', + ] + + human_names = { + 'ec.biom': 'KEGG Enzyme (EC)', + 'per-gene.biom': 'Per gene Predictions', + 'none.biom': 'Per genome Predictions', + 'cell_counts.biom': 'Cell counts', + 'pathway.biom': 'KEGG Pathway', + 'ko.biom': 'KEGG Ontology (KO)', + 'rna_copy_counts.biom': 'RNA copy counts' + } + + fn = join(td, f'sample_information_from_prep_{pid}.tsv') + readme.append(f'Sample information: {basename(fn)}') + files.append(fn) + st.to_dataframe(samples=list(pt)).to_csv(fn, sep='\t') + + fn = join(td, f'prep_information_{pid}.tsv') + readme.append(f'Prep information: {basename(fn)}') + files.append(fn) + pt.to_dataframe().to_csv(fn, sep='\t') + + readme.append('') + + bioms = dict() + coverages = None + for a in Study(sid).artifacts(artifact_type='BIOM'): + if a.prep_templates[0].id != pid: + continue + biom = None + for fp in a.filepaths: + if fp['fp_type'] == 'biom': + biom = fp + if coverages is None and 'coverages.tgz' == basename(fp['fp']): + coverages = fp['fp'] + if biom is None: + continue + biom_fn = basename(biom['fp']) + if biom_fn not in bioms: + bioms[biom_fn] = [a, biom] + else: + if getctime(biom['fp']) > getctime(bioms[biom_fn][1]['fp']): + bioms[biom_fn] = [a, biom] + + for fn, (a, fp) in bioms.items(): + aname = basename(fp["fp"]) + nname = f'{a.id}_{aname}' + nfile = join(td, nname) + copyfile(fp['fp'], nfile) + files.append(nfile) + + hname = '' + if aname in human_names: + hname = human_names[aname] + readme.append(f'{nname}\t{hname}') + + for an in a.ancestors.nodes(): + p = an.processing_parameters + if p is not None: + c = p.command + cn = c.name + s = c.software + sn = s.name + sv = s.version + pd = p.dump() + readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}') + + if coverages is not None: + aname = basename(fp["fp"]) + nfile = join(td, aname) + copyfile(fp['fp'], nfile) + files.append(nfile) + + fn = join(td, 'README.txt') + with open(fn, 'w') as fp: + fp.write('\n'.join(readme)) + files.append(fn) + + zp_fn = f'data_release_{pid}_{date}.zip' + zp = BytesIO() + with ZipFile(zp, 'w') as zipf: + for fp in files: + zipf.write(fp, basename(fp)) + + self.set_header('Content-Type', 'application/zip') + self.set_header("Content-Disposition", f"attachment; filename={zp_fn}") + self.write(zp.getvalue()) + zp.close() + self.finish() + + class DownloadPublicHandler(BaseHandlerDownload): @coroutine @execute_as_transaction diff --git a/qiita_pet/templates/study_ajax/prep_summary.html b/qiita_pet/templates/study_ajax/prep_summary.html index 25674e8a1..adbf63415 100644 --- a/qiita_pet/templates/study_ajax/prep_summary.html +++ b/qiita_pet/templates/study_ajax/prep_summary.html @@ -447,6 +447,9 @@

{% end %} {% if editable %}
+ {% if user_level in ('admin', 'wet-lab admin') %} + Dowload Data Release + {% end %} {% if deprecated %} Remove Deprecation {% else%} diff --git a/qiita_pet/webserver.py b/qiita_pet/webserver.py index 74367489b..17fbf7af8 100644 --- a/qiita_pet/webserver.py +++ b/qiita_pet/webserver.py @@ -55,7 +55,8 @@ DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease, DownloadRawData, DownloadEBISampleAccessions, DownloadEBIPrepAccessions, DownloadUpload, DownloadPublicHandler, DownloadPublicArtifactHandler, - DownloadSampleInfoPerPrep, DownloadPrivateArtifactHandler) + DownloadSampleInfoPerPrep, DownloadPrivateArtifactHandler, + DownloadDataReleaseFromPrep) from qiita_pet.handlers.prep_template import ( PrepTemplateHandler, PrepTemplateGraphHandler, PrepTemplateJobHandler) from qiita_pet.handlers.ontology import OntologyHandler @@ -194,6 +195,8 @@ def __init__(self): (r"/software/", SoftwareHandler), (r"/workflows/", WorkflowsHandler), (r"/download/(.*)", DownloadHandler), + (r"/download_data_release_from_prep/(.*)", + DownloadDataReleaseFromPrep), (r"/download_study_bioms/(.*)", DownloadStudyBIOMSHandler), (r"/download_raw_data/(.*)", DownloadRawData), (r"/download_ebi_accessions/samples/(.*)", From faf6d7cb08befc4a8a5445650add9725a158d92f Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 19 Dec 2024 14:58:13 -0700 Subject: [PATCH 2/6] add coverages --- qiita_pet/handlers/download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 3ce3402ab..bff64cd1c 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -477,9 +477,9 @@ def get(self, prep_template_id): readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}') if coverages is not None: - aname = basename(fp["fp"]) + aname = basename(coverages) nfile = join(td, aname) - copyfile(fp['fp'], nfile) + copyfile(coverages, nfile) files.append(nfile) fn = join(td, 'README.txt') From 56eb4f32718a0dde9671b7267289cee6502e7f07 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 19 Dec 2024 15:18:21 -0700 Subject: [PATCH 3/6] update html [no ci] --- qiita_pet/templates/study_ajax/prep_summary.html | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qiita_pet/templates/study_ajax/prep_summary.html b/qiita_pet/templates/study_ajax/prep_summary.html index adbf63415..001f313b6 100644 --- a/qiita_pet/templates/study_ajax/prep_summary.html +++ b/qiita_pet/templates/study_ajax/prep_summary.html @@ -447,8 +447,10 @@

{% end %} {% if editable %}
- {% if user_level in ('admin', 'wet-lab admin') %} - Dowload Data Release + {% if user_level in ('admin', 'wet-lab admin') and data_type in {'Metagenomic', 'Metatranscriptomic'} %} + {% end %} {% if deprecated %} Remove Deprecation From e83913dde365869c1518bbea85b516c97e1ff0b9 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Fri, 20 Dec 2024 07:05:43 -0700 Subject: [PATCH 4/6] no copyfile --- qiita_pet/handlers/download.py | 37 +++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index bff64cd1c..5cf9cd987 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -38,7 +38,6 @@ from tempfile import mkdtemp from zipfile import ZipFile from io import BytesIO -from shutil import copyfile class BaseHandlerDownload(BaseHandler): @@ -383,6 +382,10 @@ class DownloadDataReleaseFromPrep(BaseHandlerDownload): @coroutine @execute_as_transaction def get(self, prep_template_id): + """ This method constructs an on the fly ZIP with all the files + required for a data-prep release/data-delivery. Mainly sample, prep + info, bioms and coverage + """ user = self.current_user if user.level not in ('admin', 'web-lab admin'): raise HTTPError(403, reason="%s doesn't have access to download " @@ -411,6 +414,7 @@ def get(self, prep_template_id): '', ] + # helper dict to add "user/human" friendly names to the bioms human_names = { 'ec.biom': 'KEGG Enzyme (EC)', 'per-gene.biom': 'Per gene Predictions', @@ -421,18 +425,21 @@ def get(self, prep_template_id): 'rna_copy_counts.biom': 'RNA copy counts' } + # sample-info creation fn = join(td, f'sample_information_from_prep_{pid}.tsv') readme.append(f'Sample information: {basename(fn)}') - files.append(fn) + files.append([fn, basename(fn)]) st.to_dataframe(samples=list(pt)).to_csv(fn, sep='\t') + # prep-info creation fn = join(td, f'prep_information_{pid}.tsv') readme.append(f'Prep information: {basename(fn)}') - files.append(fn) + files.append([fn, basename(fn)]) pt.to_dataframe().to_csv(fn, sep='\t') readme.append('') + # finding the bioms to be added bioms = dict() coverages = None for a in Study(sid).artifacts(artifact_type='BIOM'): @@ -447,25 +454,27 @@ def get(self, prep_template_id): if biom is None: continue biom_fn = basename(biom['fp']) + # there is a small but real chance that the same prep has the same + # artifacts so using the latests if biom_fn not in bioms: bioms[biom_fn] = [a, biom] else: if getctime(biom['fp']) > getctime(bioms[biom_fn][1]['fp']): bioms[biom_fn] = [a, biom] + # once we have all the bioms, we can add them to the list of zips + # and to the readme the biom details and all the processing for fn, (a, fp) in bioms.items(): aname = basename(fp["fp"]) nname = f'{a.id}_{aname}' - nfile = join(td, nname) - copyfile(fp['fp'], nfile) - files.append(nfile) + files.append(fp['fp'], nname) hname = '' if aname in human_names: hname = human_names[aname] readme.append(f'{nname}\t{hname}') - for an in a.ancestors.nodes(): + for an in set(a.ancestors.nodes()): p = an.processing_parameters if p is not None: c = p.command @@ -476,22 +485,22 @@ def get(self, prep_template_id): pd = p.dump() readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}') + # if a coverage was found, add it to the list of files if coverages is not None: - aname = basename(coverages) - nfile = join(td, aname) - copyfile(coverages, nfile) - files.append(nfile) + fn = basename(coverages) + readme.append(f'{fn}\tcoverage files') + files.append([coverages, fn]) fn = join(td, 'README.txt') with open(fn, 'w') as fp: fp.write('\n'.join(readme)) - files.append(fn) + files.append([fn, basename(fn)]) zp_fn = f'data_release_{pid}_{date}.zip' zp = BytesIO() with ZipFile(zp, 'w') as zipf: - for fp in files: - zipf.write(fp, basename(fp)) + for fp, fn in files: + zipf.write(fp, fn) self.set_header('Content-Type', 'application/zip') self.set_header("Content-Disposition", f"attachment; filename={zp_fn}") From b21348f3093f7f20d26f6dd3daff425bc538029d Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Fri, 20 Dec 2024 07:13:20 -0700 Subject: [PATCH 5/6] missing [ - no ci --- qiita_pet/handlers/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 5cf9cd987..60e029f29 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -467,7 +467,7 @@ def get(self, prep_template_id): for fn, (a, fp) in bioms.items(): aname = basename(fp["fp"]) nname = f'{a.id}_{aname}' - files.append(fp['fp'], nname) + files.append([fp['fp'], nname]) hname = '' if aname in human_names: From 428413114f4094366b90e11b52baa5ff428b83e3 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 2 Jan 2025 06:37:13 -0700 Subject: [PATCH 6/6] Dowload -> Download --- qiita_pet/support_files/doc/source/faq.rst | 2 +- qiita_pet/templates/study_ajax/prep_summary.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_pet/support_files/doc/source/faq.rst b/qiita_pet/support_files/doc/source/faq.rst index be56ae7b2..96e8f4154 100755 --- a/qiita_pet/support_files/doc/source/faq.rst +++ b/qiita_pet/support_files/doc/source/faq.rst @@ -316,7 +316,7 @@ To take advantage of this feature you need to: server that is OK to give access to the key created to your storage-shed; note that if you want to completely stop that key to work you can open that file and remove the line with the name of this key. -#. Dowload your new generated key `qiita-key` (the file) to your local computer and use it +#. Download your new generated key `qiita-key` (the file) to your local computer and use it in the `Key` option of "Upload via Remote Server (ADVANCED)". Using this key you can `List Files` to test the connection and verify the list of study files. Then, diff --git a/qiita_pet/templates/study_ajax/prep_summary.html b/qiita_pet/templates/study_ajax/prep_summary.html index 001f313b6..a36dcbd1c 100644 --- a/qiita_pet/templates/study_ajax/prep_summary.html +++ b/qiita_pet/templates/study_ajax/prep_summary.html @@ -449,7 +449,7 @@


{% if user_level in ('admin', 'wet-lab admin') and data_type in {'Metagenomic', 'Metatranscriptomic'} %} {% end %} {% if deprecated %}