From 063cd858bd22e9c0991a5adf3b851eec24732084 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Fri, 5 Dec 2025 10:55:37 -0700 Subject: [PATCH 1/3] flake8 to ruff --- .github/workflows/qiita-ci.yml | 6 +- .../generate-allocation-summary-arrays.py | 299 +- .../generate-allocation-summary.py | 123 +- qiita_core/configuration_manager.py | 267 +- qiita_core/environment_manager.py | 62 +- qiita_core/exceptions.py | 18 +- qiita_core/qiita_settings.py | 10 +- qiita_core/testing.py | 9 +- .../tests/test_configuration_manager.py | 148 +- qiita_core/tests/test_util.py | 41 +- qiita_core/util.py | 57 +- qiita_db/__init__.py | 69 +- qiita_db/analysis.py | 381 +- qiita_db/archive.py | 57 +- qiita_db/artifact.py | 488 ++- qiita_db/base.py | 19 +- qiita_db/commands.py | 127 +- qiita_db/download_link.py | 12 +- qiita_db/environment_manager.py | 228 +- qiita_db/exceptions.py | 50 +- qiita_db/handlers/analysis.py | 21 +- qiita_db/handlers/archive.py | 19 +- qiita_db/handlers/artifact.py | 207 +- qiita_db/handlers/core.py | 5 +- qiita_db/handlers/oauth2.py | 211 +- qiita_db/handlers/plugin.py | 97 +- qiita_db/handlers/prep_template.py | 59 +- qiita_db/handlers/processing_job.py | 48 +- qiita_db/handlers/reference.py | 14 +- qiita_db/handlers/sample_information.py | 5 +- qiita_db/handlers/studies.py | 11 +- qiita_db/handlers/tests/oauthbase.py | 54 +- qiita_db/handlers/tests/test_analysis.py | 148 +- qiita_db/handlers/tests/test_archive.py | 39 +- qiita_db/handlers/tests/test_artifact.py | 484 +-- qiita_db/handlers/tests/test_core.py | 12 +- qiita_db/handlers/tests/test_oauth2.py | 381 +- qiita_db/handlers/tests/test_plugin.py | 302 +- qiita_db/handlers/tests/test_prep_template.py | 307 +- .../handlers/tests/test_processing_job.py | 372 +- qiita_db/handlers/tests/test_reference.py | 28 +- .../handlers/tests/test_sample_information.py | 111 +- qiita_db/handlers/tests/test_studies.py | 19 +- qiita_db/handlers/tests/test_user.py | 50 +- qiita_db/handlers/tests/test_util.py | 14 +- qiita_db/handlers/user.py | 17 +- qiita_db/handlers/util.py | 2 +- qiita_db/investigation.py | 1 + qiita_db/logger.py | 16 +- qiita_db/meta_util.py | 395 +- qiita_db/metadata_template/__init__.py | 5 +- .../base_metadata_template.py | 396 +- qiita_db/metadata_template/constants.py | 92 +- qiita_db/metadata_template/prep_template.py | 328 +- qiita_db/metadata_template/sample_template.py | 72 +- .../test/test_base_metadata_template.py | 186 +- .../test/test_prep_template.py | 2277 ++++++----- .../test/test_sample_template.py | 3384 ++++++++++------- qiita_db/metadata_template/test/test_util.py | 899 +++-- qiita_db/metadata_template/util.py | 159 +- qiita_db/ontology.py | 12 +- qiita_db/portal.py | 73 +- qiita_db/processing_job.py | 1050 ++--- qiita_db/reference.py | 36 +- qiita_db/software.py | 365 +- qiita_db/sql_connection.py | 90 +- qiita_db/study.py | 273 +- qiita_db/test/support_files/worker.py | 33 +- qiita_db/test/test_analysis.py | 649 ++-- qiita_db/test/test_archive.py | 86 +- qiita_db/test/test_artifact.py | 1281 ++++--- qiita_db/test/test_base.py | 10 +- qiita_db/test/test_commands.py | 317 +- qiita_db/test/test_logger.py | 44 +- qiita_db/test/test_meta_util.py | 351 +- qiita_db/test/test_ontology.py | 47 +- qiita_db/test/test_portal.py | 141 +- qiita_db/test/test_processing_job.py | 1125 +++--- qiita_db/test/test_reference.py | 33 +- qiita_db/test/test_setup.py | 8 +- qiita_db/test/test_software.py | 1306 ++++--- qiita_db/test/test_sql.py | 146 +- qiita_db/test/test_sql_connection.py | 183 +- qiita_db/test/test_study.py | 859 +++-- qiita_db/test/test_user.py | 418 +- qiita_db/test/test_util.py | 1304 ++++--- qiita_db/user.py | 152 +- qiita_db/util.py | 987 ++--- qiita_pet/exceptions.py | 9 +- qiita_pet/handlers/admin_processing_job.py | 112 +- .../handlers/analysis_handlers/__init__.py | 33 +- .../analysis_handlers/base_handlers.py | 142 +- .../analysis_handlers/listing_handlers.py | 105 +- .../analysis_handlers/sharing_handlers.py | 37 +- .../tests/test_base_handlers.py | 575 ++- .../tests/test_listing_handlers.py | 15 +- .../tests/test_sharing_handlers.py | 45 +- .../analysis_handlers/tests/test_util.py | 12 +- qiita_pet/handlers/analysis_handlers/util.py | 3 +- qiita_pet/handlers/api_proxy/__init__.py | 143 +- qiita_pet/handlers/api_proxy/artifact.py | 186 +- qiita_pet/handlers/api_proxy/ontology.py | 28 +- qiita_pet/handlers/api_proxy/prep_template.py | 376 +- qiita_pet/handlers/api_proxy/processing.py | 239 +- .../handlers/api_proxy/sample_template.py | 100 +- qiita_pet/handlers/api_proxy/studies.py | 282 +- .../handlers/api_proxy/tests/test_artifact.py | 498 ++- .../handlers/api_proxy/tests/test_ontology.py | 25 +- .../api_proxy/tests/test_prep_template.py | 979 +++-- .../api_proxy/tests/test_processing.py | 365 +- .../api_proxy/tests/test_sample_template.py | 518 ++- .../handlers/api_proxy/tests/test_studies.py | 845 ++-- .../handlers/api_proxy/tests/test_user.py | 17 +- .../handlers/api_proxy/tests/test_util.py | 32 +- qiita_pet/handlers/api_proxy/user.py | 25 +- qiita_pet/handlers/api_proxy/util.py | 14 +- .../handlers/artifact_handlers/__init__.py | 5 +- .../artifact_handlers/base_handlers.py | 274 +- .../tests/test_base_handlers.py | 540 +-- qiita_pet/handlers/auth_handlers.py | 121 +- qiita_pet/handlers/base_handlers.py | 54 +- qiita_pet/handlers/cloud_handlers/__init__.py | 10 +- .../cloud_handlers/file_transfer_handlers.py | 69 +- .../tests/test_file_transfer_handlers.py | 73 +- qiita_pet/handlers/download.py | 545 +-- qiita_pet/handlers/logger_handlers.py | 18 +- qiita_pet/handlers/ontology.py | 13 +- qiita_pet/handlers/portal.py | 45 +- qiita_pet/handlers/prep_template.py | 51 +- qiita_pet/handlers/public.py | 99 +- qiita_pet/handlers/qiita_redbiom.py | 91 +- qiita_pet/handlers/resources.py | 164 +- qiita_pet/handlers/rest/__init__.py | 52 +- qiita_pet/handlers/rest/rest_handler.py | 8 +- qiita_pet/handlers/rest/study.py | 128 +- qiita_pet/handlers/rest/study_association.py | 78 +- qiita_pet/handlers/rest/study_person.py | 45 +- qiita_pet/handlers/rest/study_preparation.py | 44 +- qiita_pet/handlers/rest/study_samples.py | 109 +- qiita_pet/handlers/software.py | 63 +- qiita_pet/handlers/stats.py | 64 +- qiita_pet/handlers/study_handlers/__init__.py | 113 +- qiita_pet/handlers/study_handlers/artifact.py | 143 +- qiita_pet/handlers/study_handlers/base.py | 137 +- .../handlers/study_handlers/ebi_handlers.py | 170 +- .../handlers/study_handlers/edit_handlers.py | 182 +- .../study_handlers/listing_handlers.py | 121 +- .../handlers/study_handlers/prep_template.py | 122 +- .../handlers/study_handlers/processing.py | 66 +- .../study_handlers/sample_template.py | 354 +- .../study_handlers/tests/test_artifact.py | 295 +- .../study_handlers/tests/test_base.py | 74 +- .../study_handlers/tests/test_ebi_handlers.py | 23 +- .../tests/test_edit_handlers.py | 164 +- .../tests/test_listing_handlers.py | 225 +- .../tests/test_prep_template.py | 14 +- .../study_handlers/tests/test_processing.py | 264 +- .../tests/test_sample_template.py | 779 ++-- .../handlers/study_handlers/vamps_handlers.py | 97 +- qiita_pet/handlers/upload.py | 201 +- qiita_pet/handlers/user_handlers.py | 255 +- qiita_pet/handlers/util.py | 40 +- qiita_pet/handlers/websocket_handlers.py | 57 +- qiita_pet/nginx_example.conf | 2 +- qiita_pet/portal.py | 29 +- qiita_pet/test/rest/test_base.py | 11 +- qiita_pet/test/rest/test_sample_detail.py | 235 +- qiita_pet/test/rest/test_study.py | 211 +- .../test/rest/test_study_associations.py | 355 +- qiita_pet/test/rest/test_study_person.py | 105 +- qiita_pet/test/rest/test_study_preparation.py | 181 +- qiita_pet/test/rest/test_study_samples.py | 587 +-- .../test_admin_processing_job_handlers.py | 56 +- qiita_pet/test/test_auth_handlers.py | 44 +- qiita_pet/test/test_base_handlers.py | 16 +- qiita_pet/test/test_download.py | 629 +-- qiita_pet/test/test_logger.py | 7 +- qiita_pet/test/test_ontology.py | 10 +- qiita_pet/test/test_portal.py | 51 +- qiita_pet/test/test_prep_template.py | 161 +- qiita_pet/test/test_public.py | 51 +- qiita_pet/test/test_qiita_redbiom.py | 186 +- qiita_pet/test/test_software.py | 430 ++- qiita_pet/test/test_upload.py | 51 +- qiita_pet/test/test_user_handlers.py | 87 +- qiita_pet/test/test_util.py | 59 +- qiita_pet/test/test_websocket_handlers.py | 17 +- qiita_pet/test/tornado_test_base.py | 45 +- qiita_pet/util.py | 98 +- qiita_pet/webserver.py | 265 +- qiita_ware/commands.py | 253 +- qiita_ware/ebi.py | 870 +++-- qiita_ware/exceptions.py | 7 + qiita_ware/metadata_pipeline.py | 51 +- qiita_ware/private_plugin.py | 290 +- qiita_ware/test/test_commands.py | 225 +- qiita_ware/test/test_ebi.py | 1149 +++--- qiita_ware/test/test_metadata_pipeline.py | 105 +- qiita_ware/test/test_private_plugin.py | 793 ++-- scripts/qiita | 376 +- scripts/qiita-auto-processing | 352 +- scripts/qiita-cron-job | 40 +- scripts/qiita-env | 60 +- scripts/qiita-private-launcher | 19 +- scripts/qiita-private-launcher-slurm | 34 +- scripts/qiita-private-plugin | 8 +- scripts/qiita-recover-jobs | 186 +- scripts/qiita-test-install | 337 +- setup.py | 214 +- .../studies/895/prep_template_895.txt | 2 +- 210 files changed, 26856 insertions(+), 20010 deletions(-) diff --git a/.github/workflows/qiita-ci.yml b/.github/workflows/qiita-ci.yml index 3abf6984f..6b6c1bc4b 100644 --- a/.github/workflows/qiita-ci.yml +++ b/.github/workflows/qiita-ci.yml @@ -252,7 +252,7 @@ jobs: lint: runs-on: ubuntu-latest steps: - - name: flake8 + - name: ruff uses: actions/setup-python@v2 with: python-version: "3.9" @@ -262,5 +262,5 @@ jobs: uses: actions/checkout@v2 - name: lint run: | - pip install -q flake8 - flake8 qiita_* setup.py scripts/qiita* notebooks/*/*.py + pip install -q ruff + ruff check qiita_* setup.py scripts/qiita* notebooks/*/*.py diff --git a/notebooks/resource-allocation/generate-allocation-summary-arrays.py b/notebooks/resource-allocation/generate-allocation-summary-arrays.py index eef363e7d..d4ed604a3 100644 --- a/notebooks/resource-allocation/generate-allocation-summary-arrays.py +++ b/notebooks/resource-allocation/generate-allocation-summary-arrays.py @@ -1,10 +1,12 @@ -from qiita_db.util import MaxRSS_helper -from qiita_db.software import Software import datetime from io import StringIO +from os.path import join from subprocess import check_output + import pandas as pd -from os.path import join + +from qiita_db.software import Software +from qiita_db.util import MaxRSS_helper # This is an example script to collect the data we need from SLURM, the plan # is that in the near future we will clean up and add these to the Qiita's main @@ -22,10 +24,9 @@ # Here we are only going to create summaries for (*) -sacct = ['sacct', '-p', - '--format=JobName,JobID,ElapsedRaw,MaxRSS,ReqMem', '-j'] +sacct = ["sacct", "-p", "--format=JobName,JobID,ElapsedRaw,MaxRSS,ReqMem", "-j"] # for the non admin jobs, we will use jobs from the last six months -six_months = datetime.date.today() - datetime.timedelta(weeks=6*4) +six_months = datetime.date.today() - datetime.timedelta(weeks=6 * 4) print('The current "sofware - commands" that use job-arrays are:') for s in Software.iter(): @@ -35,122 +36,152 @@ # 1. Command: woltka -fn = join('/panfs', 'qiita', 'jobs_woltka.tsv.gz') +fn = join("/panfs", "qiita", "jobs_woltka.tsv.gz") print(f"Generating the summary for the woltka jobs: {fn}.") -cmds = [c for s in Software.iter(False) - if 'woltka' in s.name for c in s.commands] -jobs = [j for c in cmds for j in c.processing_jobs if j.status == 'success' and - j.heartbeat.date() > six_months and j.input_artifacts] +cmds = [c for s in Software.iter(False) if "woltka" in s.name for c in s.commands] +jobs = [ + j + for c in cmds + for j in c.processing_jobs + if j.status == "success" and j.heartbeat.date() > six_months and j.input_artifacts +] data = [] for j in jobs: - size = sum([fp['fp_size'] for fp in j.input_artifacts[0].filepaths]) + size = sum([fp["fp_size"] for fp in j.input_artifacts[0].filepaths]) jid, mjid = j.external_id.strip().split() - rvals = StringIO(check_output(sacct + [jid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - jmem = _d.MaxRSS.apply(lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [jid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + jmem = _d.MaxRSS.apply( + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() jwt = _d.ElapsedRaw.max() - rvals = StringIO(check_output(sacct + [mjid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - mmem = _d.MaxRSS.apply(lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [mjid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + mmem = _d.MaxRSS.apply( + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() mwt = _d.ElapsedRaw.max() - data.append({ - 'jid': j.id, 'sjid': jid, 'mem': jmem, 'wt': jwt, 'type': 'main', - 'db': j.parameters.values['Database'].split('/')[-1]}) data.append( - {'jid': j.id, 'sjid': mjid, 'mem': mmem, 'wt': mwt, 'type': 'merge', - 'db': j.parameters.values['Database'].split('/')[-1]}) + { + "jid": j.id, + "sjid": jid, + "mem": jmem, + "wt": jwt, + "type": "main", + "db": j.parameters.values["Database"].split("/")[-1], + } + ) + data.append( + { + "jid": j.id, + "sjid": mjid, + "mem": mmem, + "wt": mwt, + "type": "merge", + "db": j.parameters.values["Database"].split("/")[-1], + } + ) df = pd.DataFrame(data) -df.to_csv(fn, sep='\t', index=False) +df.to_csv(fn, sep="\t", index=False) # 2. qp-meta Sortmerna -fn = join('/panfs', 'qiita', 'jobs_sortmerna.tsv.gz') +fn = join("/panfs", "qiita", "jobs_sortmerna.tsv.gz") print(f"Generating the summary for the woltka jobs: {fn}.") # for woltka we will only use jobs from the last 6 months -cmds = [c for s in Software.iter(False) - if 'minimap2' in s.name.lower() for c in s.commands] -jobs = [j for c in cmds for j in c.processing_jobs if j.status == 'success' and - j.heartbeat.date() > six_months and j.input_artifacts] +cmds = [ + c for s in Software.iter(False) if "minimap2" in s.name.lower() for c in s.commands +] +jobs = [ + j + for c in cmds + for j in c.processing_jobs + if j.status == "success" and j.heartbeat.date() > six_months and j.input_artifacts +] data = [] for j in jobs: - size = sum([fp['fp_size'] for fp in j.input_artifacts[0].filepaths]) + size = sum([fp["fp_size"] for fp in j.input_artifacts[0].filepaths]) jid, mjid = j.external_id.strip().split() - rvals = StringIO(check_output(sacct + [jid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - jmem = _d.MaxRSS.apply(lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [jid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + jmem = _d.MaxRSS.apply( + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() jwt = _d.ElapsedRaw.max() - rvals = StringIO(check_output(sacct + [mjid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - mmem = _d.MaxRSS.apply(lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [mjid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + mmem = _d.MaxRSS.apply( + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() mwt = _d.ElapsedRaw.max() - data.append({ - 'jid': j.id, 'sjid': jid, 'mem': jmem, 'wt': jwt, 'type': 'main'}) - data.append( - {'jid': j.id, 'sjid': mjid, 'mem': mmem, 'wt': mwt, 'type': 'merge'}) + data.append({"jid": j.id, "sjid": jid, "mem": jmem, "wt": jwt, "type": "main"}) + data.append({"jid": j.id, "sjid": mjid, "mem": mmem, "wt": mwt, "type": "merge"}) df = pd.DataFrame(data) -df.to_csv(fn, sep='\t', index=False) +df.to_csv(fn, sep="\t", index=False) # 3. Adapter and host filtering. Note that there is a new version deployed on # Jan 2024 so the current results will not be the most accurate -fn = join('/panfs', 'qiita', 'jobs_adapter_host.tsv.gz') +fn = join("/panfs", "qiita", "jobs_adapter_host.tsv.gz") print(f"Generating the summary for the woltka jobs: {fn}.") # for woltka we will only use jobs from the last 6 months -cmds = [c for s in Software.iter(False) - if 'meta' in s.name.lower() for c in s.commands] -jobs = [j for c in cmds if 'sortmerna' in c.name.lower() - for j in c.processing_jobs if j.status == 'success' and - j.heartbeat.date() > six_months and j.input_artifacts] +cmds = [c for s in Software.iter(False) if "meta" in s.name.lower() for c in s.commands] +jobs = [ + j + for c in cmds + if "sortmerna" in c.name.lower() + for j in c.processing_jobs + if j.status == "success" and j.heartbeat.date() > six_months and j.input_artifacts +] data = [] for j in jobs: - size = sum([fp['fp_size'] for fp in j.input_artifacts[0].filepaths]) + size = sum([fp["fp_size"] for fp in j.input_artifacts[0].filepaths]) jid, mjid = j.external_id.strip().split() - rvals = StringIO(check_output(sacct + [jid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - jmem = _d.MaxRSS.apply(lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [jid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + jmem = _d.MaxRSS.apply( + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() jwt = _d.ElapsedRaw.max() - rvals = StringIO(check_output(sacct + [mjid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - mmem = _d.MaxRSS.apply(lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [mjid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + mmem = _d.MaxRSS.apply( + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() mwt = _d.ElapsedRaw.max() - data.append({ - 'jid': j.id, 'sjid': jid, 'mem': jmem, 'wt': jwt, 'type': 'main'}) - data.append( - {'jid': j.id, 'sjid': mjid, 'mem': mmem, 'wt': mwt, 'type': 'merge'}) + data.append({"jid": j.id, "sjid": jid, "mem": jmem, "wt": jwt, "type": "main"}) + data.append({"jid": j.id, "sjid": mjid, "mem": mmem, "wt": mwt, "type": "merge"}) df = pd.DataFrame(data) -df.to_csv(fn, sep='\t', index=False) +df.to_csv(fn, sep="\t", index=False) # 4. The SPP! -fn = join('/panfs', 'qiita', 'jobs_spp.tsv.gz') +fn = join("/panfs", "qiita", "jobs_spp.tsv.gz") print(f"Generating the summary for the SPP jobs: {fn}.") # for the SPP we will look at jobs from the last year year = datetime.date.today() - datetime.timedelta(days=365) -cmds = [c for s in Software.iter(False) - if s.name == 'qp-klp' for c in s.commands] -jobs = [j for c in cmds for j in c.processing_jobs if j.status == 'success' and - j.heartbeat.date() > year] +cmds = [c for s in Software.iter(False) if s.name == "qp-klp" for c in s.commands] +jobs = [ + j + for c in cmds + for j in c.processing_jobs + if j.status == "success" and j.heartbeat.date() > year +] # for the SPP we need to find the jobs that were actually run, this means # looping throught the existing slurm jobs and finding them @@ -159,81 +190,121 @@ data = [] for job in jobs: jei = int(job.external_id) - rvals = StringIO( - check_output(sacct + [str(jei)]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - mem = _d.MaxRSS.apply( - lambda x: x if type(x) is not str else MaxRSS_helper(x)).max() + rvals = StringIO(check_output(sacct + [str(jei)]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + mem = _d.MaxRSS.apply(lambda x: x if type(x) is not str else MaxRSS_helper(x)).max() wt = _d.ElapsedRaw.max() # the current "easy" way to determine if amplicon or other is to check # the file extension of the filename - stype = 'other' - if job.parameters.values['sample_sheet']['filename'].endswith('.txt'): - stype = 'amplicon' - rid = job.parameters.values['run_identifier'] + stype = "other" + if job.parameters.values["sample_sheet"]["filename"].endswith(".txt"): + stype = "amplicon" + rid = job.parameters.values["run_identifier"] data.append( - {'jid': job.id, 'sjid': jei, 'mem': mem, 'stype': stype, 'wt': wt, - 'type': 'main', 'rid': rid, 'name': _d.JobName[0]}) + { + "jid": job.id, + "sjid": jei, + "mem": mem, + "stype": stype, + "wt": wt, + "type": "main", + "rid": rid, + "name": _d.JobName[0], + } + ) # let's look for the convert job for jid in range(jei + 1, jei + max_inter): - rvals = StringIO(check_output(sacct + [str(jid)]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') + rvals = StringIO(check_output(sacct + [str(jid)]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") if [1 for x in _d.JobName.values if x.startswith(job.id)]: cjid = int(_d.JobID[0]) mem = _d.MaxRSS.apply( - lambda x: x if type(x) is not str else MaxRSS_helper(x)).max() + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() wt = _d.ElapsedRaw.max() data.append( - {'jid': job.id, 'sjid': cjid, 'mem': mem, 'stype': stype, - 'wt': wt, 'type': 'convert', 'rid': rid, - 'name': _d.JobName[0]}) + { + "jid": job.id, + "sjid": cjid, + "mem": mem, + "stype": stype, + "wt": wt, + "type": "convert", + "rid": rid, + "name": _d.JobName[0], + } + ) # now let's look for the next step, if amplicon that's fastqc but # if other that's qc/nuqc for jid in range(cjid + 1, cjid + max_inter): - rvals = StringIO( - check_output(sacct + [str(jid)]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') + rvals = StringIO(check_output(sacct + [str(jid)]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") if [1 for x in _d.JobName.values if x.startswith(job.id)]: - qc_jid = _d.JobIDRaw.apply( - lambda x: int(x.split('.')[0])).max() + qc_jid = _d.JobIDRaw.apply(lambda x: int(x.split(".")[0])).max() qcmem = _d.MaxRSS.apply( - lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + lambda x: x if type(x) is not str else MaxRSS_helper(x) + ).max() qcwt = _d.ElapsedRaw.max() - if stype == 'amplicon': + if stype == "amplicon": data.append( - {'jid': job.id, 'sjid': qc_jid, 'mem': qcmem, - 'stype': stype, 'wt': qcwt, 'type': 'fastqc', - 'rid': rid, 'name': _d.JobName[0]}) + { + "jid": job.id, + "sjid": qc_jid, + "mem": qcmem, + "stype": stype, + "wt": qcwt, + "type": "fastqc", + "rid": rid, + "name": _d.JobName[0], + } + ) else: data.append( - {'jid': job.id, 'sjid': qc_jid, 'mem': qcmem, - 'stype': stype, 'wt': qcwt, 'type': 'qc', - 'rid': rid, 'name': _d.JobName[0]}) + { + "jid": job.id, + "sjid": qc_jid, + "mem": qcmem, + "stype": stype, + "wt": qcwt, + "type": "qc", + "rid": rid, + "name": _d.JobName[0], + } + ) for jid in range(qc_jid + 1, qc_jid + max_inter): - rvals = StringIO(check_output( - sacct + [str(jid)]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - if [1 for x in _d.JobName.values if x.startswith( - job.id)]: + rvals = StringIO( + check_output(sacct + [str(jid)]).decode("ascii") + ) + _d = pd.read_csv(rvals, sep="|") + if [1 for x in _d.JobName.values if x.startswith(job.id)]: fqc_jid = _d.JobIDRaw.apply( - lambda x: int(x.split('.')[0])).max() + lambda x: int(x.split(".")[0]) + ).max() fqcmem = _d.MaxRSS.apply( - lambda x: x if type(x) is not str - else MaxRSS_helper(x)).max() + lambda x: x + if type(x) is not str + else MaxRSS_helper(x) + ).max() fqcwt = _d.ElapsedRaw.max() data.append( - {'jid': job.id, 'sjid': fqc_jid, - 'mem': fqcmem, 'stype': stype, - 'wt': fqcwt, 'type': 'fastqc', - 'rid': rid, 'name': _d.JobName[0]}) + { + "jid": job.id, + "sjid": fqc_jid, + "mem": fqcmem, + "stype": stype, + "wt": fqcwt, + "type": "fastqc", + "rid": rid, + "name": _d.JobName[0], + } + ) break break break df = pd.DataFrame(data) -df.to_csv(fn, sep='\t', index=False) +df.to_csv(fn, sep="\t", index=False) diff --git a/notebooks/resource-allocation/generate-allocation-summary.py b/notebooks/resource-allocation/generate-allocation-summary.py index 6d1203c07..741a42ce1 100644 --- a/notebooks/resource-allocation/generate-allocation-summary.py +++ b/notebooks/resource-allocation/generate-allocation-summary.py @@ -1,35 +1,44 @@ -from subprocess import check_output -import pandas as pd from datetime import datetime, timedelta from io import StringIO from json import loads from os.path import join +from subprocess import check_output + +import pandas as pd -from qiita_db.util import MaxRSS_helper from qiita_db.exceptions import QiitaDBUnknownIDError from qiita_db.processing_job import ProcessingJob from qiita_db.software import Software - +from qiita_db.util import MaxRSS_helper all_commands = [c for s in Software.iter(False) for c in s.commands] # retrieving only the numerice external_id means that we are only focusing # on barnacle2/slurm jobs -main_jobs = [j for c in all_commands for j in c.processing_jobs - if j.status == 'success' and j.external_id.isnumeric()] - -sacct = ['sacct', '-p', '--format=JobID,ElapsedRaw,MaxRSS,Submit,Start,MaxRSS,' - 'CPUTimeRAW,ReqMem,AllocCPUs,AveVMSize', '-j'] +main_jobs = [ + j + for c in all_commands + for j in c.processing_jobs + if j.status == "success" and j.external_id.isnumeric() +] + +sacct = [ + "sacct", + "-p", + "--format=JobID,ElapsedRaw,MaxRSS,Submit,Start,MaxRSS," + "CPUTimeRAW,ReqMem,AllocCPUs,AveVMSize", + "-j", +] data = [] for i, j in enumerate(main_jobs): if i % 1000 == 0: - print(f'{i}/{len(main_jobs)}') + print(f"{i}/{len(main_jobs)}") eid = j.external_id - extra_info = '' - rvals = StringIO(check_output(sacct + [eid]).decode('ascii')) - _d = pd.read_csv(rvals, sep='|') - _d['QiitaID'] = j.id + extra_info = "" + rvals = StringIO(check_output(sacct + [eid]).decode("ascii")) + _d = pd.read_csv(rvals, sep="|") + _d["QiitaID"] = j.id cmd = j.command s = j.command.software try: @@ -43,44 +52,49 @@ except TypeError as e: # similar to the except above, exept that for these 2 commands, we have # the study_id as None - if cmd.name in {'create_sample_template', 'delete_sample_template', - 'list_remote_files'}: + if cmd.name in { + "create_sample_template", + "delete_sample_template", + "list_remote_files", + }: continue else: raise e sname = s.name - if cmd.name == 'release_validators': - ej = ProcessingJob(j.parameters.values['job']) + if cmd.name == "release_validators": + ej = ProcessingJob(j.parameters.values["job"]) extra_info = ej.command.name samples, columns, input_size = ej.shape - elif cmd.name == 'complete_job': - artifacts = loads(j.parameters.values['payload'])['artifacts'] + elif cmd.name == "complete_job": + artifacts = loads(j.parameters.values["payload"])["artifacts"] if artifacts is not None: - extra_info = ','.join({ - x['artifact_type'] for x in artifacts.values() - if 'artifact_type' in x}) - elif cmd.name == 'Validate': - input_size = sum([len(x) for x in loads( - j.parameters.values['files']).values()]) + extra_info = ",".join( + {x["artifact_type"] for x in artifacts.values() if "artifact_type" in x} + ) + elif cmd.name == "Validate": + input_size = sum([len(x) for x in loads(j.parameters.values["files"]).values()]) sname = f"{sname} - {j.parameters.values['artifact_type']}" - elif cmd.name == 'Alpha rarefaction curves [alpha_rarefaction]': + elif cmd.name == "Alpha rarefaction curves [alpha_rarefaction]": extra_info = j.parameters.values[ - ('The number of rarefaction depths to include between min_depth ' - 'and max_depth. (steps)')] - - _d['external_id'] = eid - _d['sId'] = s.id - _d['sName'] = sname - _d['sVersion'] = s.version - _d['cId'] = cmd.id - _d['cName'] = cmd.name - _d['samples'] = samples - _d['columns'] = columns - _d['input_size'] = input_size - _d['extra_info'] = extra_info - _d.drop(columns=['Unnamed: 10'], inplace=True) + ( + "The number of rarefaction depths to include between min_depth " + "and max_depth. (steps)" + ) + ] + + _d["external_id"] = eid + _d["sId"] = s.id + _d["sName"] = sname + _d["sVersion"] = s.version + _d["cId"] = cmd.id + _d["cName"] = cmd.name + _d["samples"] = samples + _d["columns"] = columns + _d["input_size"] = input_size + _d["extra_info"] = extra_info + _d.drop(columns=["Unnamed: 10"], inplace=True) data.append(_d) data = pd.concat(data) @@ -100,34 +114,33 @@ # external_id but separate from external_id.batch. # Here we are going to merge all this info into a single row + some # other columns -date_fmt = '%Y-%m-%dT%H:%M:%S' +date_fmt = "%Y-%m-%dT%H:%M:%S" df = [] -for eid, __df in data.groupby('external_id'): +for eid, __df in data.groupby("external_id"): tmp = __df.iloc[1].copy() # Calculating WaitTime, basically how long did the job took to start # this is useful for some general profiling - tmp['WaitTime'] = datetime.strptime( - __df.iloc[0].Start, date_fmt) - datetime.strptime( - __df.iloc[0].Submit, date_fmt) + tmp["WaitTime"] = datetime.strptime( + __df.iloc[0].Start, date_fmt + ) - datetime.strptime(__df.iloc[0].Submit, date_fmt) df.append(tmp) df = pd.DataFrame(df) # This is important as we are transforming the MaxRSS to raw value # so we need to confirm that there is no other suffixes -print('Make sure that only 0/K/M exist', set( - df.MaxRSS.apply(lambda x: str(x)[-1]))) +print("Make sure that only 0/K/M exist", set(df.MaxRSS.apply(lambda x: str(x)[-1]))) # Generating new columns -df['MaxRSSRaw'] = df.MaxRSS.apply(lambda x: MaxRSS_helper(str(x))) -df['ElapsedRawTime'] = df.ElapsedRaw.apply( - lambda x: timedelta(seconds=float(x))) +df["MaxRSSRaw"] = df.MaxRSS.apply(lambda x: MaxRSS_helper(str(x))) +df["ElapsedRawTime"] = df.ElapsedRaw.apply(lambda x: timedelta(seconds=float(x))) # Thu, Apr 27, 2023 was the first time Jeff and I changed the old allocations # (from barnacle) to a better allocation so using job 1265533 as the # before/after so we only use the latests for the newest version -df['updated'] = df.external_id.apply( - lambda x: 'after' if int(x) >= 1265533 else 'before') +df["updated"] = df.external_id.apply( + lambda x: "after" if int(x) >= 1265533 else "before" +) -fn = join('/panfs', 'qiita', f'jobs_{df.Start.max()[:10]}.tsv.gz') -df.to_csv(fn, sep='\t', index=False) +fn = join("/panfs", "qiita", f"jobs_{df.Start.max()[:10]}.tsv.gz") +df.to_csv(fn, sep="\t", index=False) diff --git a/qiita_core/configuration_manager.py b/qiita_core/configuration_manager.py index 02fb555c4..afa8c60c7 100644 --- a/qiita_core/configuration_manager.py +++ b/qiita_core/configuration_manager.py @@ -6,17 +6,16 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +import warnings +from base64 import b64encode +from configparser import ConfigParser, Error, NoOptionError from functools import partial -from os.path import join, dirname, abspath, isdir, expanduser, exists from os import environ, mkdir -from base64 import b64encode +from os.path import abspath, dirname, exists, expanduser, isdir, join from uuid import uuid4 -import warnings from .exceptions import MissingConfigSection -from configparser import ConfigParser, Error, NoOptionError - class ConfigurationManager(object): """Holds the QIITA configuration @@ -133,13 +132,13 @@ class ConfigurationManager(object): Error When an option is no longer available. """ + def __init__(self): # If conf_fp is None, we default to the test configuration file try: - conf_fp = environ['QIITA_CONFIG_FP'] + conf_fp = environ["QIITA_CONFIG_FP"] except KeyError: - conf_fp = join(dirname(abspath(__file__)), - 'support_files/config_test.cfg') + conf_fp = join(dirname(abspath(__file__)), "support_files/config_test.cfg") self.conf_fp = conf_fp # Parse the configuration file @@ -147,11 +146,10 @@ def __init__(self): with open(conf_fp, newline=None) as conf_file: config.read_file(conf_file) - _required_sections = {'main', 'redis', 'postgres', 'smtp', 'ebi', - 'portal'} + _required_sections = {"main", "redis", "postgres", "smtp", "ebi", "portal"} if not _required_sections.issubset(set(config.sections())): missing = _required_sections - set(config.sections()) - raise MissingConfigSection(', '.join(missing)) + raise MissingConfigSection(", ".join(missing)) self._get_main(config) self._get_smtp(config) @@ -165,228 +163,253 @@ def __init__(self): def _get_main(self, config): """Get the configuration of the main section""" - self.test_environment = config.getboolean('main', 'TEST_ENVIRONMENT') + self.test_environment = config.getboolean("main", "TEST_ENVIRONMENT") install_dir = dirname(dirname(abspath(__file__))) - default_base_data_dir = join(install_dir, 'qiita_db', 'support_files', - 'test_data') - self.base_data_dir = config.get('main', 'BASE_DATA_DIR') or \ - default_base_data_dir + default_base_data_dir = join( + install_dir, "qiita_db", "support_files", "test_data" + ) + self.base_data_dir = ( + config.get("main", "BASE_DATA_DIR") or default_base_data_dir + ) try: - log_path = config.get('main', 'LOG_PATH') + log_path = config.get("main", "LOG_PATH") if log_path: - raise Error('The option LOG_PATH in the main section is no ' - 'longer supported, use LOG_DIR instead.') + raise Error( + "The option LOG_PATH in the main section is no " + "longer supported, use LOG_DIR instead." + ) except NoOptionError: pass - self.log_dir = config.get('main', 'LOG_DIR') + self.log_dir = config.get("main", "LOG_DIR") if self.log_dir: # if the option is a directory, it will exist if not isdir(self.log_dir): - raise ValueError("The LOG_DIR (%s) option is required to be a " - "directory." % self.log_dir) + raise ValueError( + "The LOG_DIR (%s) option is required to be a " + "directory." % self.log_dir + ) - self.base_url = config.get('main', 'BASE_URL') + self.base_url = config.get("main", "BASE_URL") if not isdir(self.base_data_dir): - raise ValueError("The BASE_DATA_DIR (%s) folder doesn't exist" % - self.base_data_dir) + raise ValueError( + "The BASE_DATA_DIR (%s) folder doesn't exist" % self.base_data_dir + ) - self.working_dir = config.get('main', 'WORKING_DIR') + self.working_dir = config.get("main", "WORKING_DIR") if not isdir(self.working_dir): - raise ValueError("The WORKING_DIR (%s) folder doesn't exist" % - self.working_dir) - self.max_upload_size = config.getint('main', 'MAX_UPLOAD_SIZE') - self.require_approval = config.getboolean('main', 'REQUIRE_APPROVAL') + raise ValueError( + "The WORKING_DIR (%s) folder doesn't exist" % self.working_dir + ) + self.max_upload_size = config.getint("main", "MAX_UPLOAD_SIZE") + self.require_approval = config.getboolean("main", "REQUIRE_APPROVAL") - self.qiita_env = config.get('main', 'QIITA_ENV') + self.qiita_env = config.get("main", "QIITA_ENV") if not self.qiita_env: self.qiita_env = "" - self.private_launcher = config.get('main', 'PRIVATE_LAUNCHER') + self.private_launcher = config.get("main", "PRIVATE_LAUNCHER") - self.plugin_launcher = config.get('main', 'PLUGIN_LAUNCHER') - self.plugin_dir = config.get('main', 'PLUGIN_DIR') + self.plugin_launcher = config.get("main", "PLUGIN_LAUNCHER") + self.plugin_dir = config.get("main", "PLUGIN_DIR") if not self.plugin_dir: - self.plugin_dir = join(expanduser('~'), '.qiita_plugins') + self.plugin_dir = join(expanduser("~"), ".qiita_plugins") if not exists(self.plugin_dir): mkdir(self.plugin_dir) elif not isdir(self.plugin_dir): - raise ValueError("The PLUGIN_DIR (%s) folder doesn't exist" - % self.plugin_dir) + raise ValueError( + "The PLUGIN_DIR (%s) folder doesn't exist" % self.plugin_dir + ) - self.valid_upload_extension = [ve.strip() for ve in config.get( - 'main', 'VALID_UPLOAD_EXTENSION').split(',')] - if (not self.valid_upload_extension or - self.valid_upload_extension == ['']): + self.valid_upload_extension = [ + ve.strip() for ve in config.get("main", "VALID_UPLOAD_EXTENSION").split(",") + ] + if not self.valid_upload_extension or self.valid_upload_extension == [""]: self.valid_upload_extension = [] - raise ValueError('No files will be allowed to be uploaded.') + raise ValueError("No files will be allowed to be uploaded.") - self.certificate_file = config.get('main', 'CERTIFICATE_FILE') + self.certificate_file = config.get("main", "CERTIFICATE_FILE") if not self.certificate_file: - self.certificate_file = join(install_dir, 'qiita_core', - 'support_files', 'ci_server.crt') + self.certificate_file = join( + install_dir, "qiita_core", "support_files", "ci_server.crt" + ) - self.cookie_secret = config.get('main', 'COOKIE_SECRET') + self.cookie_secret = config.get("main", "COOKIE_SECRET") if not self.cookie_secret: self.cookie_secret = b64encode(uuid4().bytes + uuid4().bytes) warnings.warn("Random cookie secret generated.") - self.jwt_secret = config.get('main', 'JWT_SECRET') + self.jwt_secret = config.get("main", "JWT_SECRET") if not self.jwt_secret: self.jwt_secret = b64encode(uuid4().bytes + uuid4().bytes) - warnings.warn("Random JWT secret generated. Non Public Artifact " - "Download Links will expire upon system restart.") + warnings.warn( + "Random JWT secret generated. Non Public Artifact " + "Download Links will expire upon system restart." + ) - self.key_file = config.get('main', 'KEY_FILE') + self.key_file = config.get("main", "KEY_FILE") if not self.key_file: - self.key_file = join(install_dir, 'qiita_core', 'support_files', - 'ci_server.key') + self.key_file = join( + install_dir, "qiita_core", "support_files", "ci_server.key" + ) - self.help_email = config.get('main', 'HELP_EMAIL') + self.help_email = config.get("main", "HELP_EMAIL") if not self.help_email: raise ValueError( "You did not specify the HELP_EMAIL address in the main " "section of Qiita's config file. This address is essential " "for users to ask for help as it is displayed at various " - "location throughout Qiita's web pages.") - if (self.help_email == 'foo@bar.com') and \ - (self.test_environment is False): + "location throughout Qiita's web pages." + ) + if (self.help_email == "foo@bar.com") and (self.test_environment is False): warnings.warn( - "Using the github fake email for HELP_EMAIL, " - "are you sure this is OK?") + "Using the github fake email for HELP_EMAIL, are you sure this is OK?" + ) - self.sysadmin_email = config.get('main', 'SYSADMIN_EMAIL') + self.sysadmin_email = config.get("main", "SYSADMIN_EMAIL") if not self.sysadmin_email: raise ValueError( "You did not specify the SYSADMIN_EMAIL address in the main " "section of Qiita's config file. Serious issues will " "automatically be reported to a sys admin, an according " - "address is therefore required!") - if (self.sysadmin_email == 'jeff@bar.com') and \ - (self.test_environment is False): + "address is therefore required!" + ) + if (self.sysadmin_email == "jeff@bar.com") and (self.test_environment is False): warnings.warn( "Using the github fake email for SYSADMIN_EMAIL, " - "are you sure this is OK?") + "are you sure this is OK?" + ) def _get_job_scheduler(self, config): """Get the configuration of the job_scheduler section""" self.job_scheduler_owner = config.get( - 'job_scheduler', 'JOB_SCHEDULER_JOB_OWNER', fallback=None) + "job_scheduler", "JOB_SCHEDULER_JOB_OWNER", fallback=None + ) self.job_scheduler_poll_val = config.get( - 'job_scheduler', 'JOB_SCHEDULER_POLLING_VALUE', fallback=None) + "job_scheduler", "JOB_SCHEDULER_POLLING_VALUE", fallback=None + ) self.job_scheduler_dependency_q_cnt = config.get( - 'job_scheduler', 'JOB_SCHEDULER_PROCESSING_QUEUE_COUNT', - fallback=None) + "job_scheduler", "JOB_SCHEDULER_PROCESSING_QUEUE_COUNT", fallback=None + ) if self.job_scheduler_poll_val is not None: self.job_scheduler_poll_val = int(self.job_scheduler_poll_val) if self.job_scheduler_dependency_q_cnt is not None: self.job_scheduler_dependency_q_cnt = int( - self.job_scheduler_dependency_q_cnt) + self.job_scheduler_dependency_q_cnt + ) def _get_postgres(self, config): """Get the configuration of the postgres section""" - self.user = config.get('postgres', 'USER') - self.admin_user = config.get('postgres', 'ADMIN_USER') or None + self.user = config.get("postgres", "USER") + self.admin_user = config.get("postgres", "ADMIN_USER") or None - self.password = config.get('postgres', 'PASSWORD') + self.password = config.get("postgres", "PASSWORD") if not self.password: self.password = None - self.admin_password = config.get('postgres', 'ADMIN_PASSWORD') + self.admin_password = config.get("postgres", "ADMIN_PASSWORD") if not self.admin_password: self.admin_password = None - self.database = config.get('postgres', 'DATABASE') - self.host = config.get('postgres', 'HOST') - self.port = config.getint('postgres', 'PORT') + self.database = config.get("postgres", "DATABASE") + self.host = config.get("postgres", "HOST") + self.port = config.getint("postgres", "PORT") def _get_redis(self, config): """Get the configuration of the redis section""" - sec_get = partial(config.get, 'redis') - sec_getint = partial(config.getint, 'redis') + sec_get = partial(config.get, "redis") + sec_getint = partial(config.getint, "redis") - self.redis_host = sec_get('HOST') - self.redis_password = sec_get('PASSWORD') - self.redis_db = sec_getint('DB') - self.redis_port = sec_getint('PORT') + self.redis_host = sec_get("HOST") + self.redis_password = sec_get("PASSWORD") + self.redis_db = sec_getint("DB") + self.redis_port = sec_getint("PORT") def _get_smtp(self, config): - sec_get = partial(config.get, 'smtp') - sec_getint = partial(config.getint, 'smtp') - sec_getbool = partial(config.getboolean, 'smtp') + sec_get = partial(config.get, "smtp") + sec_getint = partial(config.getint, "smtp") + sec_getbool = partial(config.getboolean, "smtp") self.smtp_host = sec_get("HOST") self.smtp_port = sec_getint("PORT") self.smtp_user = sec_get("USER") self.smtp_password = sec_get("PASSWORD") self.smtp_ssl = sec_getbool("SSL") - self.smtp_email = sec_get('EMAIL') + self.smtp_email = sec_get("EMAIL") def _get_ebi(self, config): - sec_get = partial(config.get, 'ebi') + sec_get = partial(config.get, "ebi") - self.ebi_seq_xfer_user = sec_get('EBI_SEQ_XFER_USER') - self.ebi_seq_xfer_pass = sec_get('EBI_SEQ_XFER_PASS') - self.ebi_seq_xfer_url = sec_get('EBI_SEQ_XFER_URL') - self.ebi_dropbox_url = sec_get('EBI_DROPBOX_URL') - self.ebi_center_name = sec_get('EBI_CENTER_NAME') - self.ebi_organization_prefix = sec_get('EBI_ORGANIZATION_PREFIX') + self.ebi_seq_xfer_user = sec_get("EBI_SEQ_XFER_USER") + self.ebi_seq_xfer_pass = sec_get("EBI_SEQ_XFER_PASS") + self.ebi_seq_xfer_url = sec_get("EBI_SEQ_XFER_URL") + self.ebi_dropbox_url = sec_get("EBI_DROPBOX_URL") + self.ebi_center_name = sec_get("EBI_CENTER_NAME") + self.ebi_organization_prefix = sec_get("EBI_ORGANIZATION_PREFIX") def _get_vamps(self, config): - self.vamps_user = config.get('vamps', 'USER') - self.vamps_pass = config.get('vamps', 'PASSWORD') - self.vamps_url = config.get('vamps', 'URL') + self.vamps_user = config.get("vamps", "USER") + self.vamps_pass = config.get("vamps", "PASSWORD") + self.vamps_url = config.get("vamps", "URL") def _get_portal(self, config): - self.portal_fp = config.get('portal', 'PORTAL_FP') - self.portal = config.get('portal', 'PORTAL') - self.portal_dir = config.get('portal', 'PORTAL_DIR') + self.portal_fp = config.get("portal", "PORTAL_FP") + self.portal = config.get("portal", "PORTAL") + self.portal_dir = config.get("portal", "PORTAL_DIR") if self.portal_dir: - if not self.portal_dir.startswith('/'): + if not self.portal_dir.startswith("/"): self.portal_dir = "/%s" % self.portal_dir - if self.portal_dir.endswith('/'): + if self.portal_dir.endswith("/"): self.portal_dir = self.portal_dir[:-1] else: self.portal_dir = "" - msg = ("The value %s for %s you set in Qiita's configuration file " - "(section 'portal') for the Stats world map cannot be " - "intepreted as a float! %s") + msg = ( + "The value %s for %s you set in Qiita's configuration file " + "(section 'portal') for the Stats world map cannot be " + "intepreted as a float! %s" + ) lat_default = 40.01027 # Boulder CO, USA try: self.stats_map_center_latitude = config.get( - 'portal', 'STATS_MAP_CENTER_LATITUDE', fallback=lat_default) - if self.stats_map_center_latitude == '': + "portal", "STATS_MAP_CENTER_LATITUDE", fallback=lat_default + ) + if self.stats_map_center_latitude == "": self.stats_map_center_latitude = lat_default - self.stats_map_center_latitude = float( - self.stats_map_center_latitude) + self.stats_map_center_latitude = float(self.stats_map_center_latitude) except ValueError as e: - raise ValueError(msg % (self.stats_map_center_latitude, - 'STATS_MAP_CENTER_LATITUDE', e)) + raise ValueError( + msg % (self.stats_map_center_latitude, "STATS_MAP_CENTER_LATITUDE", e) + ) lon_default = -105.24827 # Boulder CO, USA try: self.stats_map_center_longitude = config.get( - 'portal', 'STATS_MAP_CENTER_LONGITUDE', fallback=lon_default) - if self.stats_map_center_longitude == '': + "portal", "STATS_MAP_CENTER_LONGITUDE", fallback=lon_default + ) + if self.stats_map_center_longitude == "": self.stats_map_center_longitude = lon_default - self.stats_map_center_longitude = float( - self.stats_map_center_longitude) + self.stats_map_center_longitude = float(self.stats_map_center_longitude) except ValueError as e: - raise ValueError(msg % (self.stats_map_center_longitude, - 'STATS_MAP_CENTER_LONGITUDE', e)) - for (name, val) in [('latitude', self.stats_map_center_latitude), - ('longitude', self.stats_map_center_longitude)]: - msg = ("The %s of %s you set in Qiita's configuration file " - "(section 'portal') for the Stats world map cannot be %s!") + raise ValueError( + msg % (self.stats_map_center_longitude, "STATS_MAP_CENTER_LONGITUDE", e) + ) + for name, val in [ + ("latitude", self.stats_map_center_latitude), + ("longitude", self.stats_map_center_longitude), + ]: + msg = ( + "The %s of %s you set in Qiita's configuration file " + "(section 'portal') for the Stats world map cannot be %s!" + ) if val < -180: - raise ValueError(msg % (name, val, 'smaller than -180°')) + raise ValueError(msg % (name, val, "smaller than -180°")) if val > 180: - raise ValueError(msg % (name, val, 'larger than 180°')) + raise ValueError(msg % (name, val, "larger than 180°")) def _iframe(self, config): - self.iframe_qiimp = config.get('iframe', 'QIIMP', fallback=None) + self.iframe_qiimp = config.get("iframe", "QIIMP", fallback=None) diff --git a/qiita_core/environment_manager.py b/qiita_core/environment_manager.py index cfecf2849..4dd11332f 100644 --- a/qiita_core/environment_manager.py +++ b/qiita_core/environment_manager.py @@ -6,47 +6,48 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from sys import exc_info, stderr import traceback - +from sys import exc_info, stderr MAX_TEST_WAIT = 5 -TEST_RUNNERS = ('local', 'remote', 'all') +TEST_RUNNERS = ("local", "remote", "all") def _test_wrapper_local(func): """Execute a function locally""" try: - return ('SUCCESS', func()) + return ("SUCCESS", func()) except Exception: - return ('FAIL', traceback.format_exception(*exc_info())) + return ("FAIL", traceback.format_exception(*exc_info())) def _test_result(test_type, name, state, result, expected): """Write out the results of the test""" correct_result = result == expected - to_write = ["**** Name: %s" % name, - "**** Runner: %s" % test_type, - "**** Execution: %s" % state] + to_write = [ + "**** Name: %s" % name, + "**** Runner: %s" % test_type, + "**** Execution: %s" % state, + ] if correct_result: - to_write.append('**** Correct result: %s' % str(correct_result)) + to_write.append("**** Correct result: %s" % str(correct_result)) else: - to_write.append('#### EXPECTED RESULT: %s' % str(expected)) - to_write.append('#### OBSERVED RESULT: %s' % str(result)) + to_write.append("#### EXPECTED RESULT: %s" % str(expected)) + to_write.append("#### OBSERVED RESULT: %s" % str(result)) - stderr.write('\n'.join(to_write)) - stderr.write('\n') + stderr.write("\n".join(to_write)) + stderr.write("\n") - if state == 'FAIL': - stderr.write('#' * 80) - stderr.write('\n') - stderr.write(''.join(result)) - stderr.write('#' * 80) - stderr.write('\n') + if state == "FAIL": + stderr.write("#" * 80) + stderr.write("\n") + stderr.write("".join(result)) + stderr.write("#" * 80) + stderr.write("\n") - stderr.write('\n') + stderr.write("\n") def test(runner): @@ -57,17 +58,23 @@ def test(runner): Tests are performed both on the server and ipengines. """ + def redis_test(**kwargs): """Put and get a key from redis""" from uuid import uuid4 + from redis import Redis + from qiita_core.configuration_manager import ConfigurationManager + config = ConfigurationManager() - r_client = Redis(host=config.redis_host, - port=config.redis_port, - password=config.redis_password, - db=config.redis_db) + r_client = Redis( + host=config.redis_host, + port=config.redis_port, + password=config.redis_password, + db=config.redis_db, + ) key = str(uuid4()) r_client.set(key, 42, ex=1) return int(r_client.get(key)) @@ -75,12 +82,13 @@ def redis_test(**kwargs): def postgres_test(**kwargs): """Open a connection and query postgres""" from qiita_db.sql_connection import TRN + with TRN: TRN.add("SELECT 42") return TRN.execute_fetchflatten()[0] - if runner == 'all': - runner = ('local', ) + if runner == "all": + runner = ("local",) else: runner = [runner] @@ -91,7 +99,7 @@ def postgres_test(**kwargs): def _test_runner(test_type, name, func, expected): """Dispatch to the corresponding runner""" - if test_type == 'local': + if test_type == "local": state, result = _test_wrapper_local(func) else: raise ValueError("Unknown test type: %s" % test_type) diff --git a/qiita_core/exceptions.py b/qiita_core/exceptions.py index 30d08674e..17a3a2f51 100644 --- a/qiita_core/exceptions.py +++ b/qiita_core/exceptions.py @@ -1,5 +1,6 @@ #!/usr/bin/env python from configparser import Error as ConfigParser_Error + # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # @@ -11,63 +12,76 @@ class QiitaError(Exception): """Base clase for all Qiita exceptions""" + pass class IncompetentQiitaDeveloperError(QiitaError): """Exception for developer errors""" + pass class QiitaSearchError(QiitaError): """Exception for errors when using search objects""" + pass class QiitaUserError(QiitaError): """Exception for error when handling with user objects""" + pass class QiitaAnalysisError(QiitaError): """Exception for error when handling with analysis objects""" + pass class QiitaJobError(QiitaError): """Exception for error when handling with job objects""" + pass class QiitaStudyError(QiitaError): """Exception for error when handling with study objects""" + pass class IncorrectPasswordError(QiitaError): """User passes wrong password""" + pass class IncorrectEmailError(QiitaError): """Email fails validation""" + pass class UnverifiedEmailError(QiitaError): """Email has not been validated""" + pass class QiitaEnvironmentError(QiitaError): """Exception for error when dealing with the environment""" + pass class MissingConfigSection(ConfigParser_Error): """Exception when the config file is missing a required section""" + def __init__(self, section): - super(MissingConfigSection, self).__init__('Missing section(s): %r' % - (section,)) + super(MissingConfigSection, self).__init__( + "Missing section(s): %r" % (section,) + ) self.section = section self.args = (section,) diff --git a/qiita_core/qiita_settings.py b/qiita_core/qiita_settings.py index e539ccf93..7c0967621 100644 --- a/qiita_core/qiita_settings.py +++ b/qiita_core/qiita_settings.py @@ -12,7 +12,9 @@ qiita_config = ConfigurationManager() -r_client = Redis(host=qiita_config.redis_host, - port=qiita_config.redis_port, - password=qiita_config.redis_password, - db=qiita_config.redis_db) +r_client = Redis( + host=qiita_config.redis_host, + port=qiita_config.redis_port, + password=qiita_config.redis_password, + db=qiita_config.redis_db, +) diff --git a/qiita_core/testing.py b/qiita_core/testing.py index 159d2823b..432ff88be 100644 --- a/qiita_core/testing.py +++ b/qiita_core/testing.py @@ -10,7 +10,6 @@ from time import sleep from qiita_core.qiita_settings import r_client - from qiita_db.processing_job import ProcessingJob @@ -30,14 +29,14 @@ def wait_for_prep_information_job(prep_id, raise_if_none=True): AssertionError If `raise_if_none` is True and the correspondent redis key is not set """ - res = r_client.get('prep_template_%d' % prep_id) + res = r_client.get("prep_template_%d" % prep_id) if raise_if_none and res is None: raise AssertionError("unexpectedly None") if res is not None: payload = loads(res) - job_id = payload['job_id'] + job_id = payload["job_id"] wait_for_processing_job(job_id) @@ -50,9 +49,9 @@ def wait_for_processing_job(job_id): Job id """ job = ProcessingJob(job_id) - while job.status not in ('success', 'error'): + while job.status not in ("success", "error"): sleep(0.8) # this print is useful for debugging - if job.status == 'error': + if job.status == "error": print("==> %s: %s" % (job.id, job.log.msg)) sleep(0.8) diff --git a/qiita_core/tests/test_configuration_manager.py b/qiita_core/tests/test_configuration_manager.py index 17bc4b007..6ffcbe616 100644 --- a/qiita_core/tests/test_configuration_manager.py +++ b/qiita_core/tests/test_configuration_manager.py @@ -6,26 +6,25 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from os import environ, close, remove -from tempfile import mkstemp -from functools import partial import warnings +from configparser import ConfigParser +from functools import partial +from os import close, environ, remove +from tempfile import mkstemp +from unittest import TestCase, main -from qiita_core.exceptions import MissingConfigSection from qiita_core.configuration_manager import ConfigurationManager - -from configparser import ConfigParser +from qiita_core.exceptions import MissingConfigSection class ConfigurationManagerTests(TestCase): def setUp(self): - self.old_conf_fp = environ.get('QIITA_CONFIG_FP') - fd, self.conf_fp = mkstemp(suffix='.txt') + self.old_conf_fp = environ.get("QIITA_CONFIG_FP") + fd, self.conf_fp = mkstemp(suffix=".txt") close(fd) - with open(self.conf_fp, 'w') as f: + with open(self.conf_fp, "w") as f: f.write(CONF) - environ['QIITA_CONFIG_FP'] = self.conf_fp + environ["QIITA_CONFIG_FP"] = self.conf_fp self.conf = ConfigParser() with open(self.conf_fp, newline=None) as f: @@ -33,9 +32,9 @@ def setUp(self): def tearDown(self): if self.old_conf_fp is not None: - environ['QIITA_CONFIG_FP'] = self.old_conf_fp + environ["QIITA_CONFIG_FP"] = self.old_conf_fp else: - del environ['QIITA_CONFIG_FP'] + del environ["QIITA_CONFIG_FP"] remove(self.conf_fp) def test_init(self): @@ -49,12 +48,13 @@ def test_init(self): self.assertEqual(obs.max_upload_size, 100) self.assertTrue(obs.require_approval) self.assertEqual(obs.qiita_env, "source activate qiita") - self.assertEqual(obs.private_launcher, 'qiita-private-launcher') + self.assertEqual(obs.private_launcher, "qiita-private-launcher") self.assertEqual(obs.plugin_launcher, "qiita-plugin-launcher") self.assertEqual(obs.plugin_dir, "/tmp/") self.assertEqual( obs.valid_upload_extension, - ["fastq", "fastq.gz", "txt", "tsv", "sff", "fna", "qual"]) + ["fastq", "fastq.gz", "txt", "tsv", "sff", "fna", "qual"], + ) self.assertEqual(obs.certificate_file, "/tmp/server.cert") self.assertEqual(obs.cookie_secret, "SECRET") self.assertEqual(obs.key_file, "/tmp/server.key") @@ -93,15 +93,17 @@ def test_init(self): self.assertEqual(obs.ebi_seq_xfer_url, "webin.ebi.ac.uk") self.assertEqual( obs.ebi_dropbox_url, - "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/") + "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/", + ) self.assertEqual(obs.ebi_center_name, "qiita-test") self.assertEqual(obs.ebi_organization_prefix, "example_organization") # VAMPS section self.assertEqual(obs.vamps_user, "user") self.assertEqual(obs.vamps_pass, "password") - self.assertEqual(obs.vamps_url, - "https://vamps.mbl.edu/mobe_workshop/getfile.php") + self.assertEqual( + obs.vamps_url, "https://vamps.mbl.edu/mobe_workshop/getfile.php" + ) # Portal section self.assertEqual(obs.portal_fp, "/tmp/portal.cfg") @@ -112,7 +114,7 @@ def test_init(self): self.assertIsNone(obs.iframe_qiimp) def test_init_error(self): - with open(self.conf_fp, 'w') as f: + with open(self.conf_fp, "w") as f: f.write("\n") with self.assertRaises(MissingConfigSection): @@ -121,62 +123,64 @@ def test_init_error(self): def test_get_main(self): obs = ConfigurationManager() - conf_setter = partial(self.conf.set, 'main') - conf_setter('COOKIE_SECRET', '') - conf_setter('JWT_SECRET', '') - conf_setter('BASE_DATA_DIR', '') - conf_setter('PLUGIN_DIR', '') - conf_setter('CERTIFICATE_FILE', '') - conf_setter('KEY_FILE', '') - conf_setter('QIITA_ENV', '') + conf_setter = partial(self.conf.set, "main") + conf_setter("COOKIE_SECRET", "") + conf_setter("JWT_SECRET", "") + conf_setter("BASE_DATA_DIR", "") + conf_setter("PLUGIN_DIR", "") + conf_setter("CERTIFICATE_FILE", "") + conf_setter("KEY_FILE", "") + conf_setter("QIITA_ENV", "") # Warning raised if No files will be allowed to be uploaded # Warning raised if no cookie_secret - self.conf.set('main', 'HELP_EMAIL', 'ignore@me') - self.conf.set('main', 'SYSADMIN_EMAIL', 'ignore@me') + self.conf.set("main", "HELP_EMAIL", "ignore@me") + self.conf.set("main", "SYSADMIN_EMAIL", "ignore@me") with warnings.catch_warnings(record=True) as warns: obs._get_main(self.conf) obs_warns = [str(w.message) for w in warns] - exp_warns = ['Random cookie secret generated.', - 'Random JWT secret generated. Non Public Artifact ' - 'Download Links will expire upon system restart.'] + exp_warns = [ + "Random cookie secret generated.", + "Random JWT secret generated. Non Public Artifact " + "Download Links will expire upon system restart.", + ] self.assertCountEqual(obs_warns, exp_warns) self.assertNotEqual(obs.cookie_secret, "SECRET") # Test default base_data_dir - self.assertTrue( - obs.base_data_dir.endswith("/qiita_db/support_files/test_data")) + self.assertTrue(obs.base_data_dir.endswith("/qiita_db/support_files/test_data")) # Test default plugin dir self.assertTrue(obs.plugin_dir.endswith("/.qiita_plugins")) # Default certificate_file self.assertTrue( - obs.certificate_file.endswith( - "/qiita_core/support_files/ci_server.crt")) + obs.certificate_file.endswith("/qiita_core/support_files/ci_server.crt") + ) # Default key_file self.assertTrue( - obs.key_file.endswith("/qiita_core/support_files/ci_server.key")) + obs.key_file.endswith("/qiita_core/support_files/ci_server.key") + ) # BASE_DATA_DIR does not exist - conf_setter('BASE_DATA_DIR', '/surprised/if/this/dir/exists') + conf_setter("BASE_DATA_DIR", "/surprised/if/this/dir/exists") with self.assertRaises(ValueError): obs._get_main(self.conf) # WORKING_DIR does not exist - conf_setter('BASE_DATA_DIR', '/tmp') - conf_setter('WORKING_DIR', '/surprised/if/this/dir/exists') + conf_setter("BASE_DATA_DIR", "/tmp") + conf_setter("WORKING_DIR", "/surprised/if/this/dir/exists") with self.assertRaises(ValueError): obs._get_main(self.conf) # PLUGIN_DIR does not exist - conf_setter('WORKING_DIR', '/tmp') - conf_setter('PLUGIN_DIR', '/surprised/if/this/dir/exists') + conf_setter("WORKING_DIR", "/tmp") + conf_setter("PLUGIN_DIR", "/surprised/if/this/dir/exists") with self.assertRaises(ValueError): obs._get_main(self.conf) # No files can be uploaded - conf_setter('PLUGIN_DIR', '/tmp') - conf_setter('VALID_UPLOAD_EXTENSION', '') + conf_setter("PLUGIN_DIR", "/tmp") + conf_setter("VALID_UPLOAD_EXTENSION", "") with self.assertRaises(ValueError): obs._get_main(self.conf) @@ -187,61 +191,61 @@ def test_help_email(self): with warnings.catch_warnings(record=True) as warns: # warning get only issued when in non test environment - self.conf.set('main', 'TEST_ENVIRONMENT', 'FALSE') + self.conf.set("main", "TEST_ENVIRONMENT", "FALSE") obs._get_main(self.conf) - self.assertEqual(obs.help_email, 'foo@bar.com') - self.assertEqual(obs.sysadmin_email, 'jeff@bar.com') + self.assertEqual(obs.help_email, "foo@bar.com") + self.assertEqual(obs.sysadmin_email, "jeff@bar.com") obs_warns = [str(w.message) for w in warns] exp_warns = [ - 'Using the github fake email for HELP_EMAIL, ' - 'are you sure this is OK?', - 'Using the github fake email for SYSADMIN_EMAIL, ' - 'are you sure this is OK?'] + "Using the github fake email for HELP_EMAIL, are you sure this is OK?", + "Using the github fake email for SYSADMIN_EMAIL, " + "are you sure this is OK?", + ] self.assertCountEqual(obs_warns, exp_warns) # test if it falls back to qiita.help@gmail.com - self.conf.set('main', 'HELP_EMAIL', '') + self.conf.set("main", "HELP_EMAIL", "") with self.assertRaises(ValueError): obs._get_main(self.conf) # test if it falls back to qiita.help@gmail.com - self.conf.set('main', 'SYSADMIN_EMAIL', '') + self.conf.set("main", "SYSADMIN_EMAIL", "") with self.assertRaises(ValueError): obs._get_main(self.conf) def test_get_job_scheduler(self): obs = ConfigurationManager() - conf_setter = partial(self.conf.set, 'job_scheduler') - conf_setter('JOB_SCHEDULER_JOB_OWNER', '') + conf_setter = partial(self.conf.set, "job_scheduler") + conf_setter("JOB_SCHEDULER_JOB_OWNER", "") obs._get_job_scheduler(self.conf) - self.assertEqual('', obs.job_scheduler_owner) + self.assertEqual("", obs.job_scheduler_owner) def test_get_postgres(self): obs = ConfigurationManager() - conf_setter = partial(self.conf.set, 'postgres') - conf_setter('PASSWORD', '') - conf_setter('ADMIN_PASSWORD', '') + conf_setter = partial(self.conf.set, "postgres") + conf_setter("PASSWORD", "") + conf_setter("ADMIN_PASSWORD", "") obs._get_postgres(self.conf) self.assertIsNone(obs.password) self.assertIsNone(obs.admin_password) def test_get_portal(self): obs = ConfigurationManager() - conf_setter = partial(self.conf.set, 'portal') + conf_setter = partial(self.conf.set, "portal") # Default portal_dir - conf_setter('PORTAL_DIR', '') + conf_setter("PORTAL_DIR", "") obs._get_portal(self.conf) self.assertEqual(obs.portal_dir, "") # Portal dir does not start with / - conf_setter('PORTAL_DIR', 'gold_portal') + conf_setter("PORTAL_DIR", "gold_portal") obs._get_portal(self.conf) self.assertEqual(obs.portal_dir, "/gold_portal") # Portal dir endswith / - conf_setter('PORTAL_DIR', '/gold_portal/') + conf_setter("PORTAL_DIR", "/gold_portal/") obs._get_portal(self.conf) self.assertEqual(obs.portal_dir, "/gold_portal") @@ -253,39 +257,39 @@ def test_get_portal_latlong(self): self.assertEqual(obs.stats_map_center_longitude, -105.24827) # a string cannot be parsed as a float - self.conf.set('portal', 'STATS_MAP_CENTER_LATITUDE', 'kurt') + self.conf.set("portal", "STATS_MAP_CENTER_LATITUDE", "kurt") with self.assertRaises(ValueError): obs._get_portal(self.conf) # check for illegal float values - self.conf.set('portal', 'STATS_MAP_CENTER_LATITUDE', "-200") + self.conf.set("portal", "STATS_MAP_CENTER_LATITUDE", "-200") with self.assertRaises(ValueError): obs._get_portal(self.conf) - self.conf.set('portal', 'STATS_MAP_CENTER_LATITUDE', "200") + self.conf.set("portal", "STATS_MAP_CENTER_LATITUDE", "200") with self.assertRaises(ValueError): obs._get_portal(self.conf) # check if value defaults if option is missing altogether - self.conf.remove_option('portal', 'STATS_MAP_CENTER_LATITUDE') + self.conf.remove_option("portal", "STATS_MAP_CENTER_LATITUDE") obs._get_portal(self.conf) self.assertEqual(obs.stats_map_center_latitude, 40.01027) # same as above, but for longitude # a string cannot be parsed as a float - self.conf.set('portal', 'STATS_MAP_CENTER_LONGITUDE', 'kurt') + self.conf.set("portal", "STATS_MAP_CENTER_LONGITUDE", "kurt") with self.assertRaises(ValueError): obs._get_portal(self.conf) # check for illegal float values - self.conf.set('portal', 'STATS_MAP_CENTER_LONGITUDE', "-200") + self.conf.set("portal", "STATS_MAP_CENTER_LONGITUDE", "-200") with self.assertRaises(ValueError): obs._get_portal(self.conf) - self.conf.set('portal', 'STATS_MAP_CENTER_LONGITUDE', "200") + self.conf.set("portal", "STATS_MAP_CENTER_LONGITUDE", "200") with self.assertRaises(ValueError): obs._get_portal(self.conf) # check if value defaults if option is missing altogether - self.conf.remove_option('portal', 'STATS_MAP_CENTER_LONGITUDE') + self.conf.remove_option("portal", "STATS_MAP_CENTER_LONGITUDE") obs._get_portal(self.conf) self.assertEqual(obs.stats_map_center_longitude, -105.24827) @@ -470,5 +474,5 @@ def test_get_portal_latlong(self): [iframe] """ -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_core/tests/test_util.py b/qiita_core/tests/test_util.py index a3fef6942..c0565e60e 100644 --- a/qiita_core/tests/test_util.py +++ b/qiita_core/tests/test_util.py @@ -8,12 +8,18 @@ from unittest import TestCase, main +import qiita_db as qdb from qiita_core.util import ( - qiita_test_checker, execute_as_transaction, get_qiita_version, - is_test_environment, get_release_info) + execute_as_transaction, + get_qiita_version, + get_release_info, + is_test_environment, + qiita_test_checker, +) from qiita_db.meta_util import ( - generate_biom_and_metadata_release, generate_plugin_releases) -import qiita_db as qdb + generate_biom_and_metadata_release, + generate_plugin_releases, +) class UtilTests(TestCase): @@ -26,14 +32,16 @@ def test_send_email_fail(self): """testing send email functionality""" # the default configuration is not correct and should fail with self.assertRaises(IOError): - qdb.util.send_email("antgonza@gmail.com", "This is a test", - "This is the body of the test") + qdb.util.send_email( + "antgonza@gmail.com", "This is a test", "This is the body of the test" + ) def test_is_test_environment(self): self.assertTrue(is_test_environment()) def test_qiita_test_checker(self): """testing qiita test checker""" + @qiita_test_checker() class test_class: pass @@ -41,12 +49,14 @@ class test_class: def test_qiita_test_checker_fail(self): """testing qiita test checker fail""" with self.assertRaises(RuntimeError): + @qiita_test_checker(test=True) class test_class_fail: pass def test_execute_as_transaction(self): """testing that execute as transaction returns 2 different wrappers""" + @execute_as_transaction def function(): # retrieve transaction id @@ -66,22 +76,21 @@ def test_get_qiita_version(self): def test_get_release_info(self): # making sure there is a release - generate_biom_and_metadata_release('private') + generate_biom_and_metadata_release("private") # just checking that is not empty cause the MD5 will change on every # run - biom_metadata_release, archive_release = get_release_info('private') + biom_metadata_release, archive_release = get_release_info("private") # note that we are testing not equal as we should have some information # and then we will test that at least the 2nd element is correct - self.assertNotEqual(biom_metadata_release, ('', '', '')) - self.assertEqual(biom_metadata_release[1], - 'releases/QIITA-private.tgz') - self.assertEqual(archive_release, ('', '', '')) + self.assertNotEqual(biom_metadata_release, ("", "", "")) + self.assertEqual(biom_metadata_release[1], "releases/QIITA-private.tgz") + self.assertEqual(archive_release, ("", "", "")) generate_plugin_releases() - biom_metadata_release, archive_release = get_release_info('public') - self.assertEqual(biom_metadata_release, ('', '', '')) - self.assertNotEqual(archive_release, ('', '', '')) + biom_metadata_release, archive_release = get_release_info("public") + self.assertEqual(biom_metadata_release, ("", "", "")) + self.assertNotEqual(archive_release, ("", "", "")) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_core/util.py b/qiita_core/util.py index b3f6b4142..c987ddb08 100644 --- a/qiita_core/util.py +++ b/qiita_core/util.py @@ -7,16 +7,16 @@ # ----------------------------------------------------------------------------- from functools import wraps from os.path import dirname + from git import Repo from git.exc import InvalidGitRepositoryError from qiita_core.qiita_settings import qiita_config, r_client -from qiita_pet import __version__ as qiita_pet_lib_version -from qiita_db.sql_connection import TRN from qiita_db.environment_manager import reset_test_database +from qiita_db.sql_connection import TRN +from qiita_pet import __version__ as qiita_pet_lib_version - -REDIS_QIITA_GIT_SHA_KEY = 'qiita-git-sha' +REDIS_QIITA_GIT_SHA_KEY = "qiita-git-sha" def is_test_environment(): @@ -54,11 +54,14 @@ def qiita_test_checker(test=False): RuntimeError If Qiita is set up to work in a production environment """ + def class_modifier(cls): if not is_test_environment() or test: - raise RuntimeError("Working in a production environment. Not " - "executing the tests to keep the production " - "database safe.") + raise RuntimeError( + "Working in a production environment. Not " + "executing the tests to keep the production " + "database safe." + ) # Now, we decorate the setup and teardown functions class DecoratedClass(cls): @@ -71,16 +74,20 @@ def tearDownClass(cls): pass return DecoratedClass + return class_modifier def execute_as_transaction(func): """Decorator to make a method execute inside a transaction""" + @wraps(func) def wrapper(*args, **kwargs): from qiita_db.sql_connection import TRN + with TRN: return func(*args, **kwargs) + return wrapper @@ -94,7 +101,7 @@ def update_redis_qiita_sha_version(): sha = repo.active_branch.commit.hexsha repo.__del__() except (InvalidGitRepositoryError, TypeError): - sha = '' + sha = "" r_client.set(REDIS_QIITA_GIT_SHA_KEY, sha) @@ -109,12 +116,12 @@ def get_qiita_version(): """ sha = r_client.get(REDIS_QIITA_GIT_SHA_KEY) if sha is None: - sha = '' + sha = "" return (qiita_pet_lib_version, sha) -def get_release_info(study_status='public'): +def get_release_info(study_status="public"): """Returns the studies and the archive release details Parameters @@ -130,24 +137,24 @@ def get_release_info(study_status='public'): The release MD5, filepath and timestamp """ portal = qiita_config.portal - md5sum = r_client.get('%s:release:%s:md5sum' % (portal, study_status)) - filepath = r_client.get('%s:release:%s:filepath' % (portal, study_status)) - timestamp = r_client.get('%s:release:%s:time' % (portal, study_status)) + md5sum = r_client.get("%s:release:%s:md5sum" % (portal, study_status)) + filepath = r_client.get("%s:release:%s:filepath" % (portal, study_status)) + timestamp = r_client.get("%s:release:%s:time" % (portal, study_status)) # replacing None values for empty strings as the text is displayed nicely # in the GUI - md5sum = '' if md5sum is None else md5sum.decode('ascii') - filepath = '' if filepath is None else filepath.decode('ascii') - timestamp = '' if timestamp is None else timestamp.decode('ascii') - biom_metadata_release = ((md5sum, filepath, timestamp)) - - md5sum = r_client.get('release-archive:md5sum') - filepath = r_client.get('release-archive:filepath') - timestamp = r_client.get('release-archive:time') + md5sum = "" if md5sum is None else md5sum.decode("ascii") + filepath = "" if filepath is None else filepath.decode("ascii") + timestamp = "" if timestamp is None else timestamp.decode("ascii") + biom_metadata_release = (md5sum, filepath, timestamp) + + md5sum = r_client.get("release-archive:md5sum") + filepath = r_client.get("release-archive:filepath") + timestamp = r_client.get("release-archive:time") # replacing None values for empty strings as the text is displayed nicely # in the GUI - md5sum = '' if md5sum is None else md5sum.decode('ascii') - filepath = '' if filepath is None else filepath.decode('ascii') - timestamp = '' if timestamp is None else timestamp.decode('ascii') - archive_release = ((md5sum, filepath, timestamp)) + md5sum = "" if md5sum is None else md5sum.decode("ascii") + filepath = "" if filepath is None else filepath.decode("ascii") + timestamp = "" if timestamp is None else timestamp.decode("ascii") + archive_release = (md5sum, filepath, timestamp) return (biom_metadata_release, archive_release) diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py index 12e9e3939..fed025057 100644 --- a/qiita_db/__init__.py +++ b/qiita_db/__init__.py @@ -6,31 +6,50 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from . import base -from . import util -from . import sql_connection -from . import metadata_template -from . import analysis -from . import artifact -from . import archive -from . import commands -from . import environment_manager -from . import exceptions -from . import investigation -from . import logger -from . import meta_util -from . import ontology -from . import portal -from . import reference -from . import software -from . import study -from . import user -from . import processing_job +from . import ( + analysis, + archive, + artifact, + base, + commands, + environment_manager, + exceptions, + investigation, + logger, + meta_util, + metadata_template, + ontology, + portal, + processing_job, + reference, + software, + sql_connection, + study, + user, + util, +) __version__ = "2025.11" -__all__ = ["analysis", "artifact", "archive", "base", "commands", - "environment_manager", "exceptions", "investigation", "logger", - "meta_util", "ontology", "portal", "reference", - "software", "sql_connection", "study", "user", "util", - "metadata_template", "processing_job"] +__all__ = [ + "analysis", + "artifact", + "archive", + "base", + "commands", + "environment_manager", + "exceptions", + "investigation", + "logger", + "meta_util", + "ontology", + "portal", + "reference", + "software", + "sql_connection", + "study", + "user", + "util", + "metadata_template", + "processing_job", +] diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py index dc9126691..0ccd088c2 100644 --- a/qiita_db/analysis.py +++ b/qiita_db/analysis.py @@ -16,21 +16,21 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from collections import defaultdict from itertools import product -from os.path import join, exists +from json import dump, loads from os import mkdir -from collections import defaultdict +from os.path import exists, join +from re import sub +import pandas as pd from biom import load_table -from biom.util import biom_open from biom.exception import DisjointIDError -from re import sub -import pandas as pd +from biom.util import biom_open +import qiita_db as qdb from qiita_core.exceptions import IncompetentQiitaDeveloperError from qiita_core.qiita_settings import qiita_config -import qiita_db as qdb -from json import loads, dump class Analysis(qdb.base.QiitaObject): @@ -67,7 +67,7 @@ class Analysis(qdb.base.QiitaObject): _table = "analysis" _portal_table = "analysis_portal" - _analysis_id_column = 'analysis_id' + _analysis_id_column = "analysis_id" @classmethod def iter(cls): @@ -102,7 +102,7 @@ def get_by_status(cls, status): with qdb.sql_connection.TRN: # Sandboxed analyses are the analyses that have not been started # and hence they don't have an artifact yet - if status == 'sandbox': + if status == "sandbox": sql = """SELECT DISTINCT analysis FROM qiita.analysis JOIN qiita.analysis_portal USING (analysis_id) @@ -122,13 +122,20 @@ def get_by_status(cls, status): qdb.sql_connection.TRN.add(sql, [status, qiita_config.portal]) return set( - cls(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()) + cls(aid) for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ) @classmethod - def create(cls, owner, name, description, from_default=False, - merge_duplicated_sample_ids=False, categories=None, - reservation=None): + def create( + cls, + owner, + name, + description, + from_default=False, + merge_duplicated_sample_ids=False, + categories=None, + reservation=None, + ): """Creates a new analysis on the database Parameters @@ -158,15 +165,15 @@ def create(cls, owner, name, description, from_default=False, """ with qdb.sql_connection.TRN: portal_id = qdb.util.convert_to_id( - qiita_config.portal, 'portal_type', 'portal') + qiita_config.portal, "portal_type", "portal" + ) # Create the row in the analysis table sql = """INSERT INTO qiita.{0} (email, name, description) VALUES (%s, %s, %s) RETURNING analysis_id""".format(cls._table) - qdb.sql_connection.TRN.add( - sql, [owner.id, name, description]) + qdb.sql_connection.TRN.add(sql, [owner.id, name, description]) a_id = qdb.sql_connection.TRN.execute_fetchlast() if from_default: @@ -183,9 +190,8 @@ def create(cls, owner, name, description, from_default=False, VALUES (%s, %s)""" args = [[a_id, portal_id]] - if qiita_config.portal != 'QIITA': - qp_id = qdb.util.convert_to_id( - 'QIITA', 'portal_type', 'portal') + if qiita_config.portal != "QIITA": + qp_id = qdb.util.convert_to_id("QIITA", "portal_type", "portal") args.append([a_id, qp_id]) qdb.sql_connection.TRN.add(sql, args, many=True) @@ -195,16 +201,17 @@ def create(cls, owner, name, description, from_default=False, # Once the analysis is created, we can create the mapping file and # the initial set of artifacts - plugin = qdb.software.Software.from_name_and_version( - 'Qiita', 'alpha') - cmd = plugin.get_command('build_analysis_files') + plugin = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = plugin.get_command("build_analysis_files") params = qdb.software.Parameters.load( - cmd, values_dict={ - 'analysis': a_id, - 'merge_dup_sample_ids': merge_duplicated_sample_ids, - 'categories': categories}) - job = qdb.processing_job.ProcessingJob.create( - owner, params, True) + cmd, + values_dict={ + "analysis": a_id, + "merge_dup_sample_ids": merge_duplicated_sample_ids, + "categories": categories, + }, + ) + job = qdb.processing_job.ProcessingJob.create(owner, params, True) sql = """INSERT INTO qiita.analysis_processing_job (analysis_id, processing_job_id) VALUES (%s, %s)""" @@ -257,20 +264,23 @@ def delete(cls, _id): qdb.sql_connection.TRN.add(sql, [_id]) if qdb.sql_connection.TRN.execute_fetchlast(): raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can't delete analysis %d, has artifacts attached" - % _id) + "Can't delete analysis %d, has artifacts attached" % _id + ) sql = "DELETE FROM qiita.analysis_filepath WHERE {0} = %s".format( - cls._analysis_id_column) + cls._analysis_id_column + ) args = [_id] qdb.sql_connection.TRN.add(sql, args) sql = "DELETE FROM qiita.analysis_portal WHERE {0} = %s".format( - cls._analysis_id_column) + cls._analysis_id_column + ) qdb.sql_connection.TRN.add(sql, args) sql = "DELETE FROM qiita.analysis_sample WHERE {0} = %s".format( - cls._analysis_id_column) + cls._analysis_id_column + ) qdb.sql_connection.TRN.add(sql, args) sql = """DELETE FROM qiita.analysis_processing_job @@ -280,7 +290,8 @@ def delete(cls, _id): # TODO: issue #1176 sql = """DELETE FROM qiita.{0} WHERE {1} = %s""".format( - cls._table, cls._analysis_id_column) + cls._table, cls._analysis_id_column + ) qdb.sql_connection.TRN.add(sql, args) qdb.sql_connection.TRN.execute() @@ -306,8 +317,9 @@ def exists(cls, analysis_id): JOIN qiita.analysis_portal USING (analysis_id) JOIN qiita.portal_type USING (portal_type_id) WHERE {1}=%s - AND portal=%s)""".format(cls._table, - cls._analysis_id_column) + AND portal=%s)""".format( + cls._table, cls._analysis_id_column + ) qdb.sql_connection.TRN.add(sql, [analysis_id, qiita_config.portal]) return qdb.sql_connection.TRN.execute_fetchlast() @@ -322,7 +334,8 @@ def owner(self): """ with qdb.sql_connection.TRN: sql = "SELECT email FROM qiita.{0} WHERE analysis_id = %s".format( - self._table) + self._table + ) qdb.sql_connection.TRN.add(sql, [self._id]) return qdb.user.User(qdb.sql_connection.TRN.execute_fetchlast()) @@ -337,7 +350,8 @@ def name(self): """ with qdb.sql_connection.TRN: sql = "SELECT name FROM qiita.{0} WHERE analysis_id = %s".format( - self._table) + self._table + ) qdb.sql_connection.TRN.add(sql, [self._id]) return qdb.sql_connection.TRN.execute_fetchlast() @@ -450,8 +464,10 @@ def shared_with(self): sql = """SELECT email FROM qiita.analysis_users WHERE analysis_id = %s""" qdb.sql_connection.TRN.add(sql, [self._id]) - return [qdb.user.User(uid) - for uid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.user.User(uid) + for uid in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def artifacts(self): @@ -460,8 +476,10 @@ def artifacts(self): FROM qiita.analysis_artifact WHERE analysis_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [qdb.artifact.Artifact(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.artifact.Artifact(aid) + for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def mapping_file(self): @@ -473,9 +491,13 @@ def mapping_file(self): The filepath id of the analysis mapping file or None if not generated """ - fp = [x['fp_id'] for x in qdb.util.retrieve_filepaths( - "analysis_filepath", "analysis_id", self._id) - if x['fp_type'] == 'plain_text'] + fp = [ + x["fp_id"] + for x in qdb.util.retrieve_filepaths( + "analysis_filepath", "analysis_id", self._id + ) + if x["fp_type"] == "plain_text" + ] if fp: # returning the actual filepath id vs. an array @@ -506,11 +528,12 @@ def metadata_categories(self): metadata = defaultdict(dict) for sid, aid in qdb.sql_connection.TRN.execute_fetchindex(): if sid not in metadata: - metadata[sid]['sample'] = set(ST(sid).categories) - metadata[sid]['prep'] = set() + metadata[sid]["sample"] = set(ST(sid).categories) + metadata[sid]["prep"] = set() for pt in qdb.artifact.Artifact(aid).prep_templates: - metadata[sid]['prep'] = metadata[sid]['prep'] | set( - PT(pt.id).categories) + metadata[sid]["prep"] = metadata[sid]["prep"] | set( + PT(pt.id).categories + ) return metadata @@ -523,9 +546,13 @@ def tgz(self): str or None full filepath to the mapping file or None if not generated """ - fp = [x['fp'] for x in qdb.util.retrieve_filepaths( - "analysis_filepath", "analysis_id", self._id) - if x['fp_type'] == 'tgz'] + fp = [ + x["fp"] + for x in qdb.util.retrieve_filepaths( + "analysis_filepath", "analysis_id", self._id + ) + if x["fp_type"] == "tgz" + ] if fp: # returning the actual path vs. an array @@ -547,8 +574,10 @@ def jobs(self): FROM qiita.analysis_processing_job WHERE analysis_id = %s""" qdb.sql_connection.TRN.add(sql, [self._id]) - return [qdb.processing_job.ProcessingJob(jid) - for jid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.processing_job.ProcessingJob(jid) + for jid in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def pmid(self): @@ -561,7 +590,8 @@ def pmid(self): """ with qdb.sql_connection.TRN: sql = "SELECT pmid FROM qiita.{0} WHERE analysis_id = %s".format( - self._table) + self._table + ) qdb.sql_connection.TRN.add(sql, [self._id]) return qdb.sql_connection.TRN.execute_fetchlast() @@ -608,7 +638,7 @@ def can_be_publicized(self): ORDER BY artifact_id""" qdb.sql_connection.TRN.add(sql, [self.id]) for aid in qdb.sql_connection.TRN.execute_fetchflatten(): - if qdb.artifact.Artifact(aid).visibility != 'public': + if qdb.artifact.Artifact(aid).visibility != "public": non_public.append(aid) return (non_public == [], non_public) @@ -632,7 +662,7 @@ def is_public(self): qdb.sql_connection.TRN.add(sql, [self.id]) visibilities = set(qdb.sql_connection.TRN.execute_fetchflatten()) - return visibilities == {'public'} + return visibilities == {"public"} def make_public(self): """Makes an analysis public @@ -645,9 +675,10 @@ def make_public(self): with qdb.sql_connection.TRN: can_be_publicized, non_public = self.can_be_publicized if not can_be_publicized: - raise ValueError('Not all artifacts that generated this ' - 'analysis are public: %s' % ', '.join( - map(str, non_public))) + raise ValueError( + "Not all artifacts that generated this " + "analysis are public: %s" % ", ".join(map(str, non_public)) + ) # getting all root artifacts / command_id IS NULL sql = """SELECT artifact_id @@ -657,7 +688,7 @@ def make_public(self): qdb.sql_connection.TRN.add(sql, [self.id]) aids = qdb.sql_connection.TRN.execute_fetchflatten() for aid in aids: - qdb.artifact.Artifact(aid).visibility = 'public' + qdb.artifact.Artifact(aid).visibility = "public" def add_artifact(self, artifact): """Adds an artifact to the analysis @@ -675,8 +706,9 @@ def add_artifact(self, artifact): FROM qiita.analysis_artifact WHERE analysis_id = %s AND artifact_id = %s)""" - qdb.sql_connection.TRN.add(sql, [self.id, artifact.id, - self.id, artifact.id]) + qdb.sql_connection.TRN.add( + sql, [self.id, artifact.id, self.id, artifact.id] + ) def set_error(self, error_msg): """Sets the analysis error @@ -686,7 +718,7 @@ def set_error(self, error_msg): error_msg : str The error message """ - le = qdb.logger.LogEntry.create('Runtime', error_msg) + le = qdb.logger.LogEntry.create("Runtime", error_msg) sql = """UPDATE qiita.analysis SET logging_id = %s WHERE analysis_id = %s""" @@ -707,11 +739,15 @@ def has_access(self, user): """ with qdb.sql_connection.TRN: # if admin or superuser, just return true - if user.level in {'superuser', 'admin'}: + if user.level in {"superuser", "admin"}: return True - return self in Analysis.get_by_status('public') | \ - user.private_analyses | user.shared_analyses + return ( + self + in Analysis.get_by_status("public") + | user.private_analyses + | user.shared_analyses + ) def can_edit(self, user): """Returns whether the given user can edit the analysis @@ -728,8 +764,11 @@ def can_edit(self, user): """ # The analysis is editable only if the user is the owner, is in the # shared list or the user is an admin - return (user.level in {'superuser', 'admin'} or self.owner == user or - user in self.shared_with) + return ( + user.level in {"superuser", "admin"} + or self.owner == user + or user in self.shared_with + ) def summary_data(self): """Return number of studies, artifacts, and samples selected @@ -791,7 +830,8 @@ def _lock_samples(self): qdb.sql_connection.TRN.add(sql, [self.id]) if not qdb.sql_connection.TRN.execute_fetchlast(): raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can't add/remove samples from this analysis") + "Can't add/remove samples from this analysis" + ) def add_samples(self, samples): """Adds samples to the analysis @@ -849,8 +889,7 @@ def remove_samples(self, artifacts=None, samples=None): AND sample_id = %s""" # Build the SQL arguments to remove the samples of the # given artifacts. - args = [[self._id, a.id, s] - for a, s in product(artifacts, samples)] + args = [[self._id, a.id, s] for a, s in product(artifacts, samples)] elif artifacts: sql = """DELETE FROM qiita.analysis_sample WHERE analysis_id = %s AND artifact_id = %s""" @@ -861,8 +900,8 @@ def remove_samples(self, artifacts=None, samples=None): args = [[self._id, s] for s in samples] else: raise IncompetentQiitaDeveloperError( - "Must provide list of samples and/or proc_data for " - "removal") + "Must provide list of samples and/or proc_data for removal" + ) qdb.sql_connection.TRN.add(sql, args, many=True) qdb.sql_connection.TRN.execute() @@ -909,12 +948,11 @@ def build_files(self, merge_duplicated_sample_ids, categories=None): post_processing_cmds = dict() for aid, asamples in samples.items(): # find the artifact info, [0] there should be only one info - ainfo = [bi for bi in bioms_info - if bi['artifact_id'] == aid][0] - data_type = ainfo['data_type'] + ainfo = [bi for bi in bioms_info if bi["artifact_id"] == aid][0] + data_type = ainfo["data_type"] # ainfo['algorithm'] is the original merging scheme - label = "%s || %s" % (data_type, ainfo['algorithm']) + label = "%s || %s" % (data_type, ainfo["algorithm"]) if label not in grouped_samples: aparams = qdb.artifact.Artifact(aid).processing_parameters if aparams is not None: @@ -922,52 +960,51 @@ def build_files(self, merge_duplicated_sample_ids, categories=None): if cmd is not None: # preserve label, in case it's needed. merging_scheme = sub( - ', BIOM: [0-9a-zA-Z-.]+', '', - ainfo['algorithm']) - post_processing_cmds[ainfo['algorithm']] = ( - merging_scheme, cmd) + ", BIOM: [0-9a-zA-Z-.]+", "", ainfo["algorithm"] + ) + post_processing_cmds[ainfo["algorithm"]] = ( + merging_scheme, + cmd, + ) grouped_samples[label] = [] grouped_samples[label].append((aid, asamples)) # We need to negate merge_duplicated_sample_ids because in # _build_mapping_file is acually rename: merge yes == rename no rename_dup_samples = not merge_duplicated_sample_ids - self._build_mapping_file( - samples, rename_dup_samples, categories=categories) + self._build_mapping_file(samples, rename_dup_samples, categories=categories) if post_processing_cmds: biom_files = self._build_biom_tables( - grouped_samples, - rename_dup_samples, - post_processing_cmds=post_processing_cmds) + grouped_samples, + rename_dup_samples, + post_processing_cmds=post_processing_cmds, + ) else: # preserve the legacy path biom_files = self._build_biom_tables( - grouped_samples, - rename_dup_samples) + grouped_samples, rename_dup_samples + ) # if post_processing_cmds exists, biom_files will be a triplet, # instead of a pair; the final element in the tuple will be an # file path to the new phylogenetic tree. return biom_files - def _build_biom_tables(self, - grouped_samples, - rename_dup_samples=False, - post_processing_cmds=None): + def _build_biom_tables( + self, grouped_samples, rename_dup_samples=False, post_processing_cmds=None + ): """Build tables and add them to the analysis""" with qdb.sql_connection.TRN: # creating per analysis output folder _, base_fp = qdb.util.get_mountpoint(self._table)[0] - base_fp = join(base_fp, 'analysis_%d' % self.id) + base_fp = join(base_fp, "analysis_%d" % self.id) if not exists(base_fp): mkdir(base_fp) biom_files = [] for label, tables in grouped_samples.items(): - - data_type, algorithm = [ - line.strip() for line in label.split('||')] + data_type, algorithm = [line.strip() for line in label.split("||")] new_table = None artifact_ids = [] @@ -981,28 +1018,28 @@ def _build_biom_tables(self, # only have one biom biom_table_fp = None for x in artifact.filepaths: - if x['fp_type'] == 'biom': - biom_table_fp = x['fp'] + if x["fp_type"] == "biom": + biom_table_fp = x["fp"] break if not biom_table_fp: raise RuntimeError( - "Artifact %s does not have a biom table associated" - % aid) + "Artifact %s does not have a biom table associated" % aid + ) # loading the found biom table biom_table = load_table(biom_table_fp) # filtering samples to keep those selected by the user biom_table_samples = set(biom_table.ids()) selected_samples = biom_table_samples.intersection(samples) - biom_table.filter(selected_samples, axis='sample', - inplace=True) + biom_table.filter(selected_samples, axis="sample", inplace=True) if len(biom_table.ids()) == 0: continue if rename_dup_samples: - ids_map = {_id: "%d.%s" % (aid, _id) - for _id in biom_table.ids()} - biom_table.update_ids(ids_map, 'sample', True, True) + ids_map = { + _id: "%d.%s" % (aid, _id) for _id in biom_table.ids() + } + biom_table.update_ids(ids_map, "sample", True, True) if new_table is None: new_table = biom_table @@ -1015,44 +1052,52 @@ def _build_biom_tables(self, if not new_table or len(new_table.ids()) == 0: # if we get to this point the only reason for failure is # rarefaction - raise RuntimeError("All samples filtered out from " - "analysis due to rarefaction level") + raise RuntimeError( + "All samples filtered out from " + "analysis due to rarefaction level" + ) # write out the file # data_type and algorithm values become part of the file # name(s). info = "%s_%s" % ( - sub('[^0-9a-zA-Z]+', '', data_type), - sub('[^0-9a-zA-Z]+', '', algorithm)) + sub("[^0-9a-zA-Z]+", "", data_type), + sub("[^0-9a-zA-Z]+", "", algorithm), + ) fn = "%d_analysis_%s.biom" % (self._id, info) biom_fp = join(base_fp, fn) # save final biom here - with biom_open(biom_fp, 'w') as f: + with biom_open(biom_fp, "w") as f: new_table.to_hdf5( - f, "Generated by Qiita, analysis id: %d, info: %s" % ( - self._id, label)) + f, + "Generated by Qiita, analysis id: %d, info: %s" + % (self._id, label), + ) # let's add the regular biom without post processing biom_files.append((data_type, biom_fp, None)) # post_processing_cmds can be None, default, or a dict of # algorithm: merging_scheme, command - if (post_processing_cmds is not None and - algorithm in post_processing_cmds): + if ( + post_processing_cmds is not None + and algorithm in post_processing_cmds + ): merging_scheme, pp_cmd = post_processing_cmds[algorithm] # assuming all commands require archives, obtain # archives once, instead of for every cmd. - features = load_table(biom_fp).ids(axis='observation') + features = load_table(biom_fp).ids(axis="observation") features = list(features) archives = qdb.archive.Archive.retrieve_feature_values( - archive_merging_scheme=merging_scheme, - features=features) + archive_merging_scheme=merging_scheme, features=features + ) # remove archives that SEPP could not match - archives = {f: loads(archives[f]) - for f, plc - in archives.items() - if plc != ''} + archives = { + f: loads(archives[f]) + for f, plc in archives.items() + if plc != "" + } # since biom_fp uses base_fp as its location, assume it's # suitable for other files as well. @@ -1060,10 +1105,9 @@ def _build_biom_tables(self, if not exists(output_dir): mkdir(output_dir) - fp_archive = join(output_dir, - 'archive_%d.json' % (self._id)) + fp_archive = join(output_dir, "archive_%d.json" % (self._id)) - with open(fp_archive, 'w') as out_file: + with open(fp_archive, "w") as out_file: dump(archives, out_file) # assume archives file is passed as: @@ -1074,13 +1118,17 @@ def _build_biom_tables(self, # --fp_biom= # concatenate any other parameters into a string - params = ' '.join(["%s=%s" % (k, v) for k, v in - pp_cmd['script_params'].items()]) + params = " ".join( + ["%s=%s" % (k, v) for k, v in pp_cmd["script_params"].items()] + ) # append archives file and output dir parameters - params = ("%s --fp_biom=%s --fp_archive=%s " - "--output_dir=%s" % ( - params, biom_fp, fp_archive, output_dir)) + params = "%s --fp_biom=%s --fp_archive=%s --output_dir=%s" % ( + params, + biom_fp, + fp_archive, + output_dir, + ) # if environment is successfully activated, # run script with parameters @@ -1088,7 +1136,10 @@ def _build_biom_tables(self, # script_path e.g.: # python 'qiita_db/test/support_files/worker.py' cmd = "%s %s %s" % ( - pp_cmd['script_env'], pp_cmd['script_path'], params) + pp_cmd["script_env"], + pp_cmd["script_path"], + params, + ) p_out, p_err, rv = qdb.processing_job._system_call(cmd) p_out = p_out.rstrip() # based on the set of commands ran, we could get a @@ -1098,21 +1149,19 @@ def _build_biom_tables(self, # the file path to the new tree, depending on p's # return code. if rv != 0: - raise ValueError('Error %d: %s' % (rv, p_err)) + raise ValueError("Error %d: %s" % (rv, p_err)) p_out = loads(p_out) - if p_out['archive'] is not None: - biom_files.append( - (data_type, p_out['biom'], p_out['archive'])) + if p_out["archive"] is not None: + biom_files.append((data_type, p_out["biom"], p_out["archive"])) # return the biom files, either with or without needed tree, to # the user. return biom_files - def _build_mapping_file(self, samples, rename_dup_samples=False, - categories=None): + def _build_mapping_file(self, samples, rename_dup_samples=False, categories=None): """Builds the combined mapping file for all samples - Code modified slightly from qiime.util.MetadataMap.__add__""" + Code modified slightly from qiime.util.MetadataMap.__add__""" with qdb.sql_connection.TRN: all_ids = set() to_concat = [] @@ -1123,26 +1172,24 @@ def _build_mapping_file(self, samples, rename_dup_samples=False, if si not in sample_infos: si_df = si.to_dataframe() if categories is not None: - si_df = si_df[list(set(categories) & - set(si_df.columns))] + si_df = si_df[list(set(categories) & set(si_df.columns))] sample_infos[si] = si_df pt = artifact.prep_templates[0] pt_df = pt.to_dataframe() if categories is not None: - pt_df = pt_df[list(set(categories) & - set(pt_df.columns))] + pt_df = pt_df[list(set(categories) & set(pt_df.columns))] qm = pt_df.join(sample_infos[si], lsuffix="_prep") # if we are not going to merge the duplicated samples # append the aid to the sample name - qm['qiita_artifact_id'] = aid - qm['qiita_prep_deprecated'] = pt.deprecated + qm["qiita_artifact_id"] = aid + qm["qiita_prep_deprecated"] = pt.deprecated if rename_dup_samples: - qm['original_SampleID'] = qm.index - qm['#SampleID'] = "%d." % aid + qm.index - samps = set(['%d.%s' % (aid, _id) for _id in samps]) - qm.set_index('#SampleID', inplace=True, drop=True) + qm["original_SampleID"] = qm.index + qm["#SampleID"] = "%d." % aid + qm.index + samps = set(["%d.%s" % (aid, _id) for _id in samps]) + qm.set_index("#SampleID", inplace=True, drop=True) else: samps = set(samps) - all_ids all_ids.update(samps) @@ -1151,11 +1198,11 @@ def _build_mapping_file(self, samples, rename_dup_samples=False, study = qdb.artifact.Artifact(aid).study study_owner = study.owner study_info = study.info - pi = study_info['principal_investigator'] - qm['qiita_study_title'] = study.title - qm['qiita_study_alias'] = study.info['study_alias'] - qm['qiita_owner'] = study_owner.info['name'] - qm['qiita_principal_investigator'] = pi.name + pi = study_info["principal_investigator"] + qm["qiita_study_title"] = study.title + qm["qiita_study_alias"] = study.info["study_alias"] + qm["qiita_owner"] = study_owner.info["name"] + qm["qiita_principal_investigator"] = pi.name qm = qm.loc[list(samps)] to_concat.append(qm) @@ -1165,8 +1212,13 @@ def _build_mapping_file(self, samples, rename_dup_samples=False, # Save the mapping file _, base_fp = qdb.util.get_mountpoint(self._table)[0] mapping_fp = join(base_fp, "%d_analysis_mapping.txt" % self._id) - merged_map.to_csv(mapping_fp, index_label='#SampleID', - na_rep='unknown', sep='\t', encoding='utf-8') + merged_map.to_csv( + mapping_fp, + index_label="#SampleID", + na_rep="unknown", + sep="\t", + encoding="utf-8", + ) self._add_file("%d_analysis_mapping.txt" % self._id, "plain_text") @@ -1181,11 +1233,11 @@ def _add_file(self, filename, filetype, data_type=None): data_type : str, optional """ with qdb.sql_connection.TRN: - filetype_id = qdb.util.convert_to_id(filetype, 'filepath_type') - _, mp = qdb.util.get_mountpoint('analysis')[0] - fpid = qdb.util.insert_filepaths([ - (join(mp, filename), filetype_id)], -1, 'analysis', - move_files=False)[0] + filetype_id = qdb.util.convert_to_id(filetype, "filepath_type") + _, mp = qdb.util.get_mountpoint("analysis")[0] + fpid = qdb.util.insert_filepaths( + [(join(mp, filename), filetype_id)], -1, "analysis", move_files=False + )[0] col = "" dtid = "" @@ -1219,10 +1271,10 @@ def slurm_reservation(self): """ slurm_reservation = self._slurm_reservation() - if slurm_reservation and slurm_reservation[0] != '': + if slurm_reservation and slurm_reservation[0] != "": cmd = f"scontrol show reservations {slurm_reservation[0]}" p_out, p_err, rv = qdb.processing_job._system_call(cmd) - if rv == 0 and p_out != 'No reservations in the system\n': + if rv == 0 and p_out != "No reservations in the system\n": return slurm_reservation[0] return None @@ -1239,5 +1291,4 @@ def slurm_reservation(self, slurm_reservation): sql = """UPDATE qiita.{0} SET slurm_reservation = %s WHERE analysis_id = %s""".format(self._table) - qdb.sql_connection.perform_as_transaction( - sql, [slurm_reservation, self._id]) + qdb.sql_connection.perform_as_transaction(sql, [slurm_reservation, self._id]) diff --git a/qiita_db/archive.py b/qiita_db/archive.py index d411d598a..5869f5cda 100644 --- a/qiita_db/archive.py +++ b/qiita_db/archive.py @@ -59,7 +59,8 @@ def _inserting_main_steps(cls, ms, features): vals = [[amsi, _id, val] for _id, val in features.items()] qdb.sql_connection.TRN.add( - "SELECT archive_upsert(%s, %s, %s)", vals, many=True) + "SELECT archive_upsert(%s, %s, %s)", vals, many=True + ) qdb.sql_connection.TRN.execute() @classmethod @@ -81,18 +82,15 @@ def insert_from_artifact(cls, artifact, features): """ with qdb.sql_connection.TRN: atype = artifact.artifact_type - if atype != 'BIOM': - raise ValueError( - "To archive artifact must be BIOM but %s" % atype) + if atype != "BIOM": + raise ValueError("To archive artifact must be BIOM but %s" % atype) - bfps = [x['fp'] for x in artifact.filepaths - if x['fp_type'] == 'biom'] + bfps = [x["fp"] for x in artifact.filepaths if x["fp_type"] == "biom"] if not bfps: raise ValueError("The artifact has no biom files") # [0] as it returns a list - ms = qdb.util.get_artifacts_information( - [artifact.id])[0]['algorithm'] + ms = qdb.util.get_artifacts_information([artifact.id])[0]["algorithm"] cls._inserting_main_steps(ms, features) @@ -126,20 +124,30 @@ def get_merging_scheme_from_job(cls, job): parent_cmd_name = pcmd.name parent_parameters = parent_pparameters.values parent_merging_scheme = pcmd.merging_scheme - if not parent_merging_scheme['ignore_parent_command']: + if not parent_merging_scheme["ignore_parent_command"]: gp = parent.parents[0] gp_params = gp.processing_parameters if gp_params is not None: gp_cmd = gp_params.command phms = qdb.util.human_merging_scheme( - parent_cmd_name, parent_merging_scheme, - gp_cmd.name, gp_cmd.merging_scheme, - parent_parameters, [], gp_params.values) + parent_cmd_name, + parent_merging_scheme, + gp_cmd.name, + gp_cmd.merging_scheme, + parent_parameters, + [], + gp_params.values, + ) hms = qdb.util.human_merging_scheme( - acmd.name, acmd.merging_scheme, - parent_cmd_name, parent_merging_scheme, - job.parameters.values, [], parent_parameters) + acmd.name, + acmd.merging_scheme, + parent_cmd_name, + parent_merging_scheme, + job.parameters.values, + [], + parent_parameters, + ) if phms is not None: hms = qdb.util.merge_overlapping_strings(hms, phms) @@ -147,8 +155,7 @@ def get_merging_scheme_from_job(cls, job): return hms @classmethod - def retrieve_feature_values(cls, archive_merging_scheme=None, - features=None): + def retrieve_feature_values(cls, archive_merging_scheme=None, features=None): r"""Retrieves all features/values from the archive Parameters @@ -175,8 +182,13 @@ def retrieve_feature_values(cls, archive_merging_scheme=None, # the features elements can be string or bytes; making sure # everything is string for SQL vals.append( - tuple([f.decode('ascii') if isinstance(f, bytes) else f - for f in features])) + tuple( + [ + f.decode("ascii") if isinstance(f, bytes) else f + for f in features + ] + ) + ) sql = """SELECT archive_feature, archive_feature_value FROM qiita.archive_feature_value @@ -185,10 +197,10 @@ def retrieve_feature_values(cls, archive_merging_scheme=None, ORDER BY archive_merging_scheme, archive_feature""" if extras: - sql = sql.format('WHERE ' + ' AND '.join(extras)) + sql = sql.format("WHERE " + " AND ".join(extras)) qdb.sql_connection.TRN.add(sql, vals) else: - qdb.sql_connection.TRN.add(sql.format('')) + qdb.sql_connection.TRN.add(sql.format("")) return dict(qdb.sql_connection.TRN.execute_fetchindex()) @@ -211,6 +223,7 @@ def insert_features(cls, merging_scheme, features): cls._inserting_main_steps(merging_scheme, features) inserted = cls.retrieve_feature_values( - archive_merging_scheme=merging_scheme, features=features.keys()) + archive_merging_scheme=merging_scheme, features=features.keys() + ) return inserted diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index e5eb581eb..4ea95160a 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -5,23 +5,21 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from itertools import chain +from collections import namedtuple from datetime import datetime +from itertools import chain +from json import dumps from os import remove from os.path import isfile, relpath from shutil import rmtree -from collections import namedtuple -from json import dumps -from qiita_db.util import create_nested_path import networkx as nx import qiita_db as qdb - from qiita_core.qiita_settings import qiita_config +from qiita_db.util import create_nested_path - -TypeNode = namedtuple('TypeNode', ['id', 'job_id', 'name', 'type']) +TypeNode = namedtuple("TypeNode", ["id", "job_id", "name", "type"]) class Artifact(qdb.base.QiitaObject): @@ -56,6 +54,7 @@ class Artifact(qdb.base.QiitaObject): -------- qiita_db.QiitaObject """ + _table = "artifact" @classmethod @@ -123,9 +122,14 @@ def types(): return qdb.sql_connection.TRN.execute_fetchindex() @staticmethod - def create_type(name, description, can_be_submitted_to_ebi, - can_be_submitted_to_vamps, is_user_uploadable, - filepath_types): + def create_type( + name, + description, + can_be_submitted_to_ebi, + can_be_submitted_to_vamps, + is_user_uploadable, + filepath_types, + ): """Creates a new artifact type in the system Parameters @@ -157,22 +161,31 @@ def create_type(name, description, can_be_submitted_to_ebi, qdb.sql_connection.TRN.add(sql, [name]) if qdb.sql_connection.TRN.execute_fetchlast(): raise qdb.exceptions.QiitaDBDuplicateError( - 'artifact type', 'name: %s' % name) + "artifact type", "name: %s" % name + ) sql = """INSERT INTO qiita.artifact_type (artifact_type, description, can_be_submitted_to_ebi, can_be_submitted_to_vamps, is_user_uploadable) VALUES (%s, %s, %s, %s, %s) RETURNING artifact_type_id""" qdb.sql_connection.TRN.add( - sql, [name, description, can_be_submitted_to_ebi, - can_be_submitted_to_vamps, is_user_uploadable]) + sql, + [ + name, + description, + can_be_submitted_to_ebi, + can_be_submitted_to_vamps, + is_user_uploadable, + ], + ) at_id = qdb.sql_connection.TRN.execute_fetchlast() sql = """INSERT INTO qiita.artifact_type_filepath_type (artifact_type_id, filepath_type_id, required) VALUES (%s, %s, %s)""" sql_args = [ - [at_id, qdb.util.convert_to_id(fpt, 'filepath_type'), req] - for fpt, req in filepath_types] + [at_id, qdb.util.convert_to_id(fpt, "filepath_type"), req] + for fpt, req in filepath_types + ] qdb.sql_connection.TRN.add(sql, sql_args, many=True) # When creating a type is expected that a new mountpoint is created @@ -180,9 +193,8 @@ def create_type(name, description, can_be_submitted_to_ebi, # extra path for the mountpoint, which is useful for the test # environment qc = qiita_config - mp = relpath(qc.working_dir, qc.base_data_dir).replace( - 'working_dir', '') - mp = mp + name if mp != '/' and mp != '' else name + mp = relpath(qc.working_dir, qc.base_data_dir).replace("working_dir", "") + mp = mp + name if mp != "/" and mp != "" else name sql = """INSERT INTO qiita.data_directory (data_type, mountpoint, subdirectory, active) VALUES (%s, %s, %s, %s)""" @@ -213,15 +225,13 @@ def copy(cls, artifact, prep_template): visibility_id = qdb.util.convert_to_id("sandbox", "visibility") atype = artifact.artifact_type atype_id = qdb.util.convert_to_id(atype, "artifact_type") - dtype_id = qdb.util.convert_to_id( - prep_template.data_type(), "data_type") + dtype_id = qdb.util.convert_to_id(prep_template.data_type(), "data_type") sql = """INSERT INTO qiita.artifact ( generated_timestamp, visibility_id, artifact_type_id, data_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s) RETURNING artifact_id""" - sql_args = [datetime.now(), visibility_id, atype_id, dtype_id, - False] + sql_args = [datetime.now(), visibility_id, atype_id, dtype_id, False] qdb.sql_connection.TRN.add(sql, sql_args) a_id = qdb.sql_connection.TRN.execute_fetchlast() @@ -243,9 +253,8 @@ def copy(cls, artifact, prep_template): qdb.sql_connection.TRN.add(sql, sql_args) # Associate the artifact with its filepaths - filepaths = [(x['fp'], x['fp_type']) for x in artifact.filepaths] - fp_ids = qdb.util.insert_filepaths( - filepaths, a_id, atype, copy=True) + filepaths = [(x["fp"], x["fp_type"]) for x in artifact.filepaths] + fp_ids = qdb.util.insert_filepaths(filepaths, a_id, atype, copy=True) sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" @@ -256,9 +265,18 @@ def copy(cls, artifact, prep_template): return instance @classmethod - def create(cls, filepaths, artifact_type, name=None, prep_template=None, - parents=None, processing_parameters=None, move_files=True, - analysis=None, data_type=None): + def create( + cls, + filepaths, + artifact_type, + name=None, + prep_template=None, + parents=None, + processing_parameters=None, + move_files=True, + analysis=None, + data_type=None, + ): r"""Creates a new artifact in the system The parameters depend on how the artifact was generated: @@ -332,29 +350,34 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None, # We need at least one file if not filepaths: raise qdb.exceptions.QiitaDBArtifactCreationError( - "at least one filepath is required.") + "at least one filepath is required." + ) # Check that the combination of parameters is correct - counts = (int(bool(parents or processing_parameters)) + - int(prep_template is not None) + - int(bool(analysis or data_type))) + counts = ( + int(bool(parents or processing_parameters)) + + int(prep_template is not None) + + int(bool(analysis or data_type)) + ) if counts != 1: # More than one parameter has been provided raise qdb.exceptions.QiitaDBArtifactCreationError( "One and only one of parents, prep template or analysis must " - "be provided") + "be provided" + ) elif bool(parents) != bool(processing_parameters): # When provided, parents and processing parameters both should be # provided (this is effectively doing an XOR) raise qdb.exceptions.QiitaDBArtifactCreationError( "When provided, both parents and processing parameters should " - "be provided") + "be provided" + ) elif bool(analysis) and not bool(data_type): # When provided, analysis and data_type both should be # provided (this is effectively doing an XOR) raise qdb.exceptions.QiitaDBArtifactCreationError( - "When provided, both analysis and data_type should " - "be provided") + "When provided, both analysis and data_type should be provided" + ) # There are three different ways of creating an Artifact, but all of # them execute a set of common operations. Declare functions to avoid @@ -372,8 +395,15 @@ def _common_creation_steps(atype, cmd_id, data_type, cmd_parameters): artifact_type_id, submitted_to_vamps) VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING artifact_id""" - sql_args = [gen_timestamp, cmd_id, dtype_id, - cmd_parameters, visibility_id, atype_id, False] + sql_args = [ + gen_timestamp, + cmd_id, + dtype_id, + cmd_parameters, + visibility_id, + atype_id, + False, + ] qdb.sql_connection.TRN.add(sql, sql_args) a_id = qdb.sql_connection.TRN.execute_fetchlast() qdb.sql_connection.TRN.execute() @@ -423,12 +453,14 @@ def _associate_with_analysis(instance, analysis_id): raise qdb.exceptions.QiitaDBArtifactCreationError( "All the parents from an artifact should be either " "from the analysis pipeline or all from the processing" - " pipeline") + " pipeline" + ) elif len_studies > 1 or len_studies > 1: raise qdb.exceptions.QiitaDBArtifactCreationError( "Parents from multiple studies/analyses provided. " "Analyses: %s. Studies: %s." - % (', '.join(analyses), ', '.join(studies))) + % (", ".join(analyses), ", ".join(studies)) + ) elif len_studies == 1: # This artifact is part of the processing pipeline study_id = studies.pop() @@ -436,24 +468,30 @@ def _associate_with_analysis(instance, analysis_id): # one dtype if len(dtypes) > 1: raise qdb.exceptions.QiitaDBArtifactCreationError( - "parents have multiple data types: %s" - % ", ".join(dtypes)) + "parents have multiple data types: %s" % ", ".join(dtypes) + ) instance = _common_creation_steps( - artifact_type, processing_parameters.command.id, - dtypes.pop(), processing_parameters.dump()) + artifact_type, + processing_parameters.command.id, + dtypes.pop(), + processing_parameters.dump(), + ) _associate_with_study( - instance, study_id, parents[0].prep_templates[0].id) + instance, study_id, parents[0].prep_templates[0].id + ) else: # This artifact is part of the analysis pipeline analysis_id = analyses.pop() # In the processing pipeline, artifact parents can have # more than one data type - data_type = ("Multiomic" - if len(dtypes) > 1 else dtypes.pop()) + data_type = "Multiomic" if len(dtypes) > 1 else dtypes.pop() instance = _common_creation_steps( - artifact_type, processing_parameters.command.id, - data_type, processing_parameters.dump()) + artifact_type, + processing_parameters.command.id, + data_type, + processing_parameters.dump(), + ) _associate_with_analysis(instance, analysis_id) # Associate the artifact with its parents @@ -466,35 +504,40 @@ def _associate_with_analysis(instance, analysis_id): # inheriting visibility visibilities = {a.visibility for a in instance.parents} # set based on the "lowest" visibility - if 'sandbox' in visibilities: - instance.visibility = 'sandbox' - elif 'private' in visibilities: - instance.visibility = 'private' + if "sandbox" in visibilities: + instance.visibility = "sandbox" + elif "private" in visibilities: + instance.visibility = "private" else: - instance._set_visibility('public') + instance._set_visibility("public") elif prep_template: # This artifact is uploaded by the user in the # processing pipeline instance = _common_creation_steps( - artifact_type, None, prep_template.data_type(), None) + artifact_type, None, prep_template.data_type(), None + ) # Associate the artifact with the prep template prep_template.artifact = instance # Associate the artifact with the study _associate_with_study( - instance, prep_template.study_id, prep_template.id) + instance, prep_template.study_id, prep_template.id + ) else: # This artifact is an initial artifact of an analysis - instance = _common_creation_steps( - artifact_type, None, data_type, None) + instance = _common_creation_steps(artifact_type, None, data_type, None) # Associate the artifact with the analysis if bool(analysis): analysis.add_artifact(instance) # Associate the artifact with its filepaths fp_ids = qdb.util.insert_filepaths( - filepaths, instance.id, artifact_type, - move_files=move_files, copy=(not move_files)) + filepaths, + instance.id, + artifact_type, + move_files=move_files, + copy=(not move_files), + ) sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" @@ -528,9 +571,10 @@ def delete(cls, artifact_id): instance = cls(artifact_id) # Check if the artifact is public - if instance.visibility == 'public': + if instance.visibility == "public": raise qdb.exceptions.QiitaDBArtifactDeletionError( - artifact_id, "it is public") + artifact_id, "it is public" + ) all_artifacts = list(set(instance.descendants.nodes())) all_artifacts.reverse() @@ -546,26 +590,30 @@ def delete(cls, artifact_id): qdb.sql_connection.TRN.add(sql, [all_ids]) analyses = qdb.sql_connection.TRN.execute_fetchindex() if analyses: - analyses = '\n'.join( - ['Analysis id: %s, Owner: %s' % (aid, email) - for email, aid in analyses]) + analyses = "\n".join( + [ + "Analysis id: %s, Owner: %s" % (aid, email) + for email, aid in analyses + ] + ) raise qdb.exceptions.QiitaDBArtifactDeletionError( - artifact_id, 'it or one of its children has been ' - 'analyzed by: \n %s' % analyses) + artifact_id, + "it or one of its children has been analyzed by: \n %s" % analyses, + ) # Check if the artifacts have been submitted to EBI for a in all_artifacts: if a.can_be_submitted_to_ebi and a.ebi_run_accessions: raise qdb.exceptions.QiitaDBArtifactDeletionError( - artifact_id, "Artifact %d has been submitted to " - "EBI" % a.id) + artifact_id, "Artifact %d has been submitted to EBI" % a.id + ) # Check if the artifacts have been submitted to VAMPS for a in all_artifacts: if a.can_be_submitted_to_vamps and a.is_submitted_to_vamps: raise qdb.exceptions.QiitaDBArtifactDeletionError( - artifact_id, "Artifact %d has been submitted to " - "VAMPS" % a.id) + artifact_id, "Artifact %d has been submitted to VAMPS" % a.id + ) # Check if there is a job queued, running, waiting or # in_construction that will use/is using the artifact @@ -584,14 +632,15 @@ def delete(cls, artifact_id): # but we also need to check that if it's only 1 job, that the # job is not the delete_artifact actual job raise_error = True - job_name = qdb.processing_job.ProcessingJob( - jobs[0]).command.name - if len(jobs) == 1 and job_name == 'delete_artifact': + job_name = qdb.processing_job.ProcessingJob(jobs[0]).command.name + if len(jobs) == 1 and job_name == "delete_artifact": raise_error = False if raise_error: raise qdb.exceptions.QiitaDBArtifactDeletionError( - artifact_id, "there is a queued/running job that " - "uses this artifact or one of it's children") + artifact_id, + "there is a queued/running job that " + "uses this artifact or one of it's children", + ) # We can now remove the artifacts filepaths = [f for a in all_artifacts for f in a.filepaths] @@ -624,12 +673,12 @@ def delete(cls, artifact_id): # move the files to the uploads folder. We also need # to nullify the column in the prep template table if not instance.parents and study is not None: - qdb.util.move_filepaths_to_upload_folder(study.id, - filepaths) + qdb.util.move_filepaths_to_upload_folder(study.id, filepaths) # there are cases that an artifact would not be linked to a # study - pt_ids = [tuple([pt.id]) for a in all_artifacts - for pt in a.prep_templates] + pt_ids = [ + tuple([pt.id]) for a in all_artifacts for pt in a.prep_templates + ] if pt_ids: sql = """UPDATE qiita.prep_template SET artifact_id = NULL @@ -678,49 +727,61 @@ def archive(cls, artifact_id, clean_ancestors=True): """ artifact = cls(artifact_id) - if artifact.visibility != 'public': + if artifact.visibility != "public": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - 'Only public artifacts can be archived') - if artifact.artifact_type != 'BIOM': + "Only public artifacts can be archived" + ) + if artifact.artifact_type != "BIOM": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - 'Only BIOM artifacts can be archived') + "Only BIOM artifacts can be archived" + ) if artifact.analysis is not None: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - 'Only non analysis artifacts can be archived') + "Only non analysis artifacts can be archived" + ) if not artifact.parents: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - 'Only non raw artifacts can be archived') + "Only non raw artifacts can be archived" + ) to_delete = [] if clean_ancestors: # let's find all ancestors that can be deleted (it has parents and # no ancestors (that have no descendants), and delete them - to_delete = [x for x in artifact.ancestors.nodes() - if x.id != artifact_id and x.parents and - not [y for y in x.descendants.nodes() - if y.id not in (artifact_id, x.id)]] + to_delete = [ + x + for x in artifact.ancestors.nodes() + if x.id != artifact_id + and x.parents + and not [ + y for y in x.descendants.nodes() if y.id not in (artifact_id, x.id) + ] + ] # ignore artifacts that can and has been submitted to EBI - to_delete = [x for x in to_delete if not x.can_be_submitted_to_ebi - and not x.is_submitted_to_ebi - and not x.is_submitted_to_vamps] + to_delete = [ + x + for x in to_delete + if not x.can_be_submitted_to_ebi + and not x.is_submitted_to_ebi + and not x.is_submitted_to_vamps + ] # get the log file so we can delete - fids = [x['fp_id'] for x in artifact.filepaths - if x['fp_type'] == 'log'] + fids = [x["fp_id"] for x in artifact.filepaths if x["fp_type"] == "log"] archive_data = dumps({"merging_scheme": artifact.merging_scheme}) with qdb.sql_connection.TRN: - artifact._set_visibility('archived', propagate=False) - sql = 'DELETE FROM qiita.parent_artifact WHERE artifact_id = %s' + artifact._set_visibility("archived", propagate=False) + sql = "DELETE FROM qiita.parent_artifact WHERE artifact_id = %s" qdb.sql_connection.TRN.add(sql, [artifact_id]) - sql = '''DELETE FROM qiita.artifact_output_processing_job - WHERE artifact_id = %s''' + sql = """DELETE FROM qiita.artifact_output_processing_job + WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [artifact_id]) if fids: - sql = '''DELETE FROM qiita.artifact_filepath - WHERE filepath_id IN %s''' + sql = """DELETE FROM qiita.artifact_filepath + WHERE filepath_id IN %s""" qdb.sql_connection.TRN.add(sql, [tuple(fids)]) sql = """UPDATE qiita.{0} @@ -732,7 +793,7 @@ def archive(cls, artifact_id, clean_ancestors=True): # cleaning the extra artifacts for x in to_delete: - x._set_visibility('sandbox', propagate=False) + x._set_visibility("sandbox", propagate=False) cls.delete(x.id) @property @@ -806,7 +867,8 @@ def processing_parameters(self): if res[0] is None: return None return qdb.software.Parameters.load( - qdb.software.Command(res[0]), values_dict=res[1]) + qdb.software.Command(res[0]), values_dict=res[1] + ) @property def visibility(self): @@ -866,7 +928,7 @@ def visibility(self, value): # then let's check that the sample/prep info files have the correct # restrictions - if value != 'sandbox' and study is not None: + if value != "sandbox" and study is not None: reply = study.sample_template.validate_restrictions() success = [not reply[0]] message = [reply[1]] @@ -876,7 +938,8 @@ def visibility(self, value): message.append(reply[1]) if any(success): raise ValueError( - "Errors in your info files:%s" % '\n'.join(message)) + "Errors in your info files:%s" % "\n".join(message) + ) self._set_visibility(value) @@ -929,8 +992,9 @@ def can_be_submitted_to_ebi(self): # words has more that one processing step behind it fine_to_send = [] fine_to_send.extend([pt.artifact for pt in self.prep_templates]) - fine_to_send.extend([c for a in fine_to_send if a is not None - for c in a.children]) + fine_to_send.extend( + [c for a in fine_to_send if a is not None for c in a.children] + ) if self not in fine_to_send: return False @@ -958,7 +1022,8 @@ def is_submitted_to_ebi(self): with qdb.sql_connection.TRN: if not self.can_be_submitted_to_ebi: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Artifact %s cannot be submitted to EBI" % self.id) + "Artifact %s cannot be submitted to EBI" % self.id + ) sql = """SELECT EXISTS( SELECT * FROM qiita.ebi_run_accession @@ -983,13 +1048,16 @@ def ebi_run_accessions(self): with qdb.sql_connection.TRN: if not self.can_be_submitted_to_ebi: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Artifact %s cannot be submitted to EBI" % self.id) + "Artifact %s cannot be submitted to EBI" % self.id + ) sql = """SELECT sample_id, ebi_run_accession FROM qiita.ebi_run_accession WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return {s_id: ebi_acc for s_id, ebi_acc in - qdb.sql_connection.TRN.execute_fetchindex()} + return { + s_id: ebi_acc + for s_id, ebi_acc in qdb.sql_connection.TRN.execute_fetchindex() + } @ebi_run_accessions.setter def ebi_run_accessions(self, values): @@ -1009,7 +1077,8 @@ def ebi_run_accessions(self, values): with qdb.sql_connection.TRN: if not self.can_be_submitted_to_ebi: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Artifact %s cannot be submitted to EBI" % self.id) + "Artifact %s cannot be submitted to EBI" % self.id + ) sql = """SELECT EXISTS(SELECT * FROM qiita.ebi_run_accession @@ -1017,13 +1086,15 @@ def ebi_run_accessions(self, values): qdb.sql_connection.TRN.add(sql, [self.id]) if qdb.sql_connection.TRN.execute_fetchlast(): raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Artifact %s already submitted to EBI" % self.id) + "Artifact %s already submitted to EBI" % self.id + ) sql = """INSERT INTO qiita.ebi_run_accession (sample_id, artifact_id, ebi_run_accession) VALUES (%s, %s, %s)""" - sql_args = [[sample, self.id, accession] - for sample, accession in values.items()] + sql_args = [ + [sample, self.id, accession] for sample, accession in values.items() + ] qdb.sql_connection.TRN.add(sql, sql_args, many=True) qdb.sql_connection.TRN.execute() @@ -1061,7 +1132,8 @@ def is_submitted_to_vamps(self): with qdb.sql_connection.TRN: if not self.can_be_submitted_to_vamps: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Artifact %s cannot be submitted to VAMPS" % self.id) + "Artifact %s cannot be submitted to VAMPS" % self.id + ) sql = """SELECT submitted_to_vamps FROM qiita.artifact WHERE artifact_id = %s""" @@ -1084,7 +1156,8 @@ def is_submitted_to_vamps(self, value): """ if not self.can_be_submitted_to_vamps: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Artifact %s cannot be submitted to VAMPS" % self.id) + "Artifact %s cannot be submitted to VAMPS" % self.id + ) sql = """UPDATE qiita.artifact SET submitted_to_vamps = %s WHERE artifact_id = %s""" @@ -1100,7 +1173,8 @@ def filepaths(self): A list of dict as defined by qiita_db.util.retrieve_filepaths """ return qdb.util.retrieve_filepaths( - "artifact_filepath", "artifact_id", self.id, sort='ascending') + "artifact_filepath", "artifact_id", self.id, sort="ascending" + ) @property def html_summary_fp(self): @@ -1111,15 +1185,16 @@ def html_summary_fp(self): tuple of (int, str) The filepath id and the path to the HTML summary """ - fps = qdb.util.retrieve_filepaths("artifact_filepath", "artifact_id", - self.id, fp_type='html_summary') + fps = qdb.util.retrieve_filepaths( + "artifact_filepath", "artifact_id", self.id, fp_type="html_summary" + ) if fps: # If fps is not the empty list, then we have exactly one file # retrieve_filepaths returns a list of lists of 3 values: the # filepath id, the filepath and the filepath type. We don't want # to return the filepath type here, so just grabbing the first and # second element of the list - res = (fps[0]['fp_id'], fps[0]['fp']) + res = (fps[0]["fp_id"], fps[0]["fp"]) else: res = None @@ -1144,9 +1219,9 @@ def set_html_summary(self, html_fp, support_dir=None): # files, if necessary to_delete_ids = [] for x in self.filepaths: - if x['fp_type'] in ('html_summary', 'html_summary_dir'): - to_delete_ids.append([x['fp_id']]) - to_delete_fps.append(x['fp']) + if x["fp_type"] in ("html_summary", "html_summary_dir"): + to_delete_ids.append([x["fp_id"]]) + to_delete_fps.append(x["fp"]) # From the artifact_filepath table sql = """DELETE FROM qiita.artifact_filepath WHERE filepath_id = %s""" @@ -1156,11 +1231,10 @@ def set_html_summary(self, html_fp, support_dir=None): qdb.sql_connection.TRN.add(sql, to_delete_ids, many=True) # Add the new HTML summary - filepaths = [(html_fp, 'html_summary')] + filepaths = [(html_fp, "html_summary")] if support_dir is not None: - filepaths.append((support_dir, 'html_summary_dir')) - fp_ids = qdb.util.insert_filepaths( - filepaths, self.id, self.artifact_type) + filepaths.append((support_dir, "html_summary_dir")) + fp_ids = qdb.util.insert_filepaths(filepaths, self.id, self.artifact_type) sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" @@ -1172,8 +1246,8 @@ def set_html_summary(self, html_fp, support_dir=None): # that check after the previous transaction is commited if to_delete_fps: for x in self.filepaths: - if x['fp'] in to_delete_fps: - to_delete_fps.remove(x['fp']) + if x["fp"] in to_delete_fps: + to_delete_fps.remove(x["fp"]) for fp in to_delete_fps: if isfile(fp): @@ -1195,8 +1269,9 @@ def parents(self): FROM qiita.parent_artifact WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [Artifact(p_id) - for p_id in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + Artifact(p_id) for p_id in qdb.sql_connection.TRN.execute_fetchflatten() + ] def _create_lineage_graph_from_edge_list(self, edge_list): """Generates an artifact graph from the given `edge_list` @@ -1215,8 +1290,9 @@ def _create_lineage_graph_from_edge_list(self, edge_list): # In case the edge list is empty, only 'self' is present in the graph if edge_list: # By creating all the artifacts here we are saving DB calls - nodes = {a_id: Artifact(a_id) - for a_id in set(chain.from_iterable(edge_list))} + nodes = { + a_id: Artifact(a_id) for a_id in set(chain.from_iterable(edge_list)) + } for parent, child in edge_list: lineage.add_edge(nodes[parent], nodes[child]) @@ -1266,6 +1342,7 @@ def descendants_with_jobs(self): networkx.DiGraph The descendants of the artifact """ + def _add_edge(edges, src, dest): """Aux function to add the edge (src, dest) to edges""" edge = (src, dest) @@ -1282,12 +1359,11 @@ def _add_edge(edges, src, dest): def _helper(sql_edges, edges, nodes): for jid, pid, cid in sql_edges: if jid not in nodes: - nodes[jid] = ('job', - qdb.processing_job.ProcessingJob(jid)) + nodes[jid] = ("job", qdb.processing_job.ProcessingJob(jid)) if pid not in nodes: - nodes[pid] = ('artifact', qdb.artifact.Artifact(pid)) + nodes[pid] = ("artifact", qdb.artifact.Artifact(pid)) if cid not in nodes: - nodes[cid] = ('artifact', qdb.artifact.Artifact(cid)) + nodes[cid] = ("artifact", qdb.artifact.Artifact(cid)) edges.add((nodes[pid], nodes[jid])) edges.add((nodes[jid], nodes[cid])) @@ -1299,17 +1375,18 @@ def _helper(sql_edges, edges, nodes): if sql_edges: _helper(sql_edges, edges, nodes) else: - nodes[self.id] = ('artifact', self) + nodes[self.id] = ("artifact", self) lineage.add_node(nodes[self.id]) # if this is an Analysis we need to check if there are extra # edges/nodes as there is a chance that there are connecions # between them if self.analysis is not None: - roots = [a for a in self.analysis.artifacts - if not a.parents and a != self] + roots = [ + a for a in self.analysis.artifacts if not a.parents and a != self + ] for r in roots: # add the root to the options then their children - extra_nodes[r.id] = ('artifact', r) + extra_nodes[r.id] = ("artifact", r) qdb.sql_connection.TRN.add(sql, [r.id]) sql_edges = qdb.sql_connection.TRN.execute_fetchindex() _helper(sql_edges, extra_edges, extra_nodes) @@ -1325,54 +1402,61 @@ def _helper(sql_edges, edges, nodes): if current not in visited: visited.add(current) n_type, n_obj = nodes[current] - if n_type == 'artifact': + if n_type == "artifact": # Add all the jobs to the queue for job in n_obj.jobs(): queue.append(job.id) if job.id not in nodes: - nodes[job.id] = ('job', job) + nodes[job.id] = ("job", job) - elif n_type == 'job': + elif n_type == "job": # skip private and artifact definition jobs as they # don't create new artifacts and they would create # edges without artifacts + they can be safely ignored if n_obj.command.software.type in { - 'private', 'artifact definition'}: + "private", + "artifact definition", + }: continue jstatus = n_obj.status # If the job is in success we don't need to do anything # else since it would've been added by the code above - if jstatus != 'success': - - if jstatus != 'error': + if jstatus != "success": + if jstatus != "error": # If the job is not errored, we can add the # future outputs and the children jobs to # the graph. # Add all the job outputs as new nodes for o_name, o_type in n_obj.command.outputs: - node_id = '%s:%s' % (n_obj.id, o_name) + node_id = "%s:%s" % (n_obj.id, o_name) node = TypeNode( - id=node_id, job_id=n_obj.id, - name=o_name, type=o_type) + id=node_id, + job_id=n_obj.id, + name=o_name, + type=o_type, + ) queue.append(node_id) if node_id not in nodes: - nodes[node_id] = ('type', node) + nodes[node_id] = ("type", node) # Add all his children jobs to the queue for cjob in n_obj.children: queue.append(cjob.id) if cjob.id not in nodes: - nodes[cjob.id] = ('job', cjob) + nodes[cjob.id] = ("job", cjob) # including the outputs for o_name, o_type in cjob.command.outputs: - node_id = '%s:%s' % (cjob.id, o_name) + node_id = "%s:%s" % (cjob.id, o_name) node = TypeNode( - id=node_id, job_id=cjob.id, - name=o_name, type=o_type) + id=node_id, + job_id=cjob.id, + name=o_name, + type=o_type, + ) if node_id not in nodes: - nodes[node_id] = ('type', node) + nodes[node_id] = ("type", node) # Connect the job with his input artifacts, the # input artifacts may or may not exist yet, so we @@ -1387,17 +1471,18 @@ def _helper(sql_edges, edges, nodes): pending = n_obj.pending for pred_id in pending: for pname in pending[pred_id]: - in_node_id = '%s:%s' % ( - pred_id, pending[pred_id][pname]) - _add_edge(edges, nodes[in_node_id], - nodes[n_obj.id]) + in_node_id = "%s:%s" % ( + pred_id, + pending[pred_id][pname], + ) + _add_edge(edges, nodes[in_node_id], nodes[n_obj.id]) - elif n_type == 'type': + elif n_type == "type": # Connect this 'future artifact' with the job that will # generate it _add_edge(edges, nodes[n_obj.job_id], nodes[current]) else: - raise ValueError('Unrecognized type: %s' % n_type) + raise ValueError("Unrecognized type: %s" % n_type) # Add all edges to the lineage graph - adding the edges creates the # nodes in networkx @@ -1420,8 +1505,9 @@ def children(self): FROM qiita.parent_artifact WHERE parent_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [Artifact(c_id) - for c_id in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + Artifact(c_id) for c_id in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def youngest_artifact(self): @@ -1440,7 +1526,8 @@ def youngest_artifact(self): ORDER BY generated_timestamp DESC LIMIT 1""" qdb.sql_connection.TRN.add( - sql, [self.id, qdb.util.artifact_visibilities_to_skip()]) + sql, [self.id, qdb.util.artifact_visibilities_to_skip()] + ) a_id = qdb.sql_connection.TRN.execute_fetchindex() # If the current artifact has no children, the previous call will # return an empty list, so the youngest artifact in the lineage is @@ -1463,8 +1550,10 @@ def prep_templates(self): FROM qiita.preparation_artifact WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - templates = [qdb.metadata_template.prep_template.PrepTemplate(pt_id) # noqa - for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] # noqa + templates = [ + qdb.metadata_template.prep_template.PrepTemplate(pt_id) + for pt_id in qdb.sql_connection.TRN.execute_fetchflatten() + ] if len(templates) > 1: # We never expect an artifact to be associated with multiple @@ -1527,17 +1616,16 @@ def merging_scheme(self): WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) archive_data = qdb.sql_connection.TRN.execute_fetchlast() - merging_schemes = [archive_data['merging_scheme'][0]] - parent_softwares = [archive_data['merging_scheme'][1]] + merging_schemes = [archive_data["merging_scheme"][0]] + parent_softwares = [archive_data["merging_scheme"][1]] else: processing_params = self.processing_parameters if processing_params is None: - return '', '' + return "", "" cmd_name = processing_params.command.name ms = processing_params.command.merging_scheme - afps = [x['fp'] for x in self.filepaths - if x['fp'].endswith('biom')] + afps = [x["fp"] for x in self.filepaths if x["fp"].endswith("biom")] merging_schemes = [] parent_softwares = [] @@ -1551,21 +1639,28 @@ def merging_scheme(self): parent_cmd_name = None parent_merging_scheme = None parent_pp = None - parent_software = 'N/A' + parent_software = "N/A" else: parent_cmd_name = pparent.command.name parent_merging_scheme = pparent.command.merging_scheme parent_pp = pparent.values psoftware = pparent.command.software - parent_software = '%s v%s' % ( - psoftware.name, psoftware.version) - - merging_schemes.append(qdb.util.human_merging_scheme( - cmd_name, ms, parent_cmd_name, parent_merging_scheme, - processing_params.values, afps, parent_pp)) + parent_software = "%s v%s" % (psoftware.name, psoftware.version) + + merging_schemes.append( + qdb.util.human_merging_scheme( + cmd_name, + ms, + parent_cmd_name, + parent_merging_scheme, + processing_params.values, + afps, + parent_pp, + ) + ) parent_softwares.append(parent_software) - return ', '.join(merging_schemes), ', '.join(parent_softwares) + return ", ".join(merging_schemes), ", ".join(parent_softwares) @property def being_deleted_by(self): @@ -1603,9 +1698,9 @@ def has_human(self): tgs = qdb.metadata_template.constants.TARGET_GENE_DATA_TYPES ntg = any([pt.data_type() not in tgs for pt in pts]) chf = any([not pt.current_human_filtering for pt in pts]) - if ntg and chf and self.artifact_type == 'per_sample_FASTQ': + if ntg and chf and self.artifact_type == "per_sample_FASTQ": st = self.study.sample_template - if 'env_package' in st.categories: + if "env_package" in st.categories: sql = f"""SELECT DISTINCT sample_values->>'env_package' FROM qiita.sample_{st.id} WHERE sample_id in ( SELECT sample_id from qiita.preparation_artifact @@ -1616,7 +1711,7 @@ def has_human(self): qdb.sql_connection.TRN.add(sql) for v in qdb.sql_connection.TRN.execute_fetchflatten(): # str is needed as v could be None - if str(v).startswith('human-'): + if str(v).startswith("human-"): has_human = True break @@ -1661,8 +1756,10 @@ def jobs(self, cmd=None, status=None, show_hidden=False): sql_args.append(False) qdb.sql_connection.TRN.add(sql, sql_args) - return [qdb.processing_job.ProcessingJob(jid) - for jid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.processing_job.ProcessingJob(jid) + for jid in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def get_commands(self): @@ -1690,9 +1787,12 @@ def get_commands(self): # get the workflows that match this artifact so we can filter # the available commands based on the commands in the worflows # for that artifact - except is the artifact_type == 'BIOM' - if self.artifact_type != 'BIOM': - dws = [w for w in qdb.software.DefaultWorkflow.iter() - if self.data_type in w.data_type] + if self.artifact_type != "BIOM": + dws = [ + w + for w in qdb.software.DefaultWorkflow.iter() + if self.data_type in w.data_type + ] else: sql += " AND is_analysis = True" @@ -1700,8 +1800,9 @@ def get_commands(self): cids = set(qdb.sql_connection.TRN.execute_fetchflatten()) if dws: - cmds = {n.default_parameter.command.id - for w in dws for n in w.graph.nodes} + cmds = { + n.default_parameter.command.id for w in dws for n in w.graph.nodes + } cids = cmds & cids return [qdb.software.Command(cid) for cid in cids] @@ -1746,8 +1847,7 @@ def human_reads_filter_method(self, value): idx = qdb.sql_connection.TRN.execute_fetchflatten() if len(idx) == 0: - raise ValueError( - f'"{value}" is not a valid human_reads_filter_method') + raise ValueError(f'"{value}" is not a valid human_reads_filter_method') sql = """UPDATE qiita.artifact SET human_reads_filter_method_id = %s diff --git a/qiita_db/base.py b/qiita_db/base.py index 5a069359b..51ed61e76 100644 --- a/qiita_db/base.py +++ b/qiita_db/base.py @@ -23,9 +23,9 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +import qiita_db as qdb from qiita_core.exceptions import IncompetentQiitaDeveloperError from qiita_core.qiita_settings import qiita_config -import qiita_db as qdb class QiitaObject(object): @@ -109,7 +109,8 @@ def _check_subclass(cls): """ if cls._table is None: raise IncompetentQiitaDeveloperError( - "Could not instantiate an object of the base class") + "Could not instantiate an object of the base class" + ) def _check_id(self, id_): r"""Check that the provided ID actually exists on the database @@ -173,10 +174,11 @@ def __init__(self, id_): # as strings (e.g., '5'). Therefore, explicit type-checking is needed # here to accommodate these possibilities. if not isinstance(id_, (int, str)): - raise TypeError("id_ must be a numerical or text type (not %s) " - "when instantiating " - "%s" % (id_.__class__.__name__, - self.__class__.__name__)) + raise TypeError( + "id_ must be a numerical or text type (not %s) " + "when instantiating " + "%s" % (id_.__class__.__name__, self.__class__.__name__) + ) if isinstance(id_, (str)): if id_.isdigit(): @@ -187,7 +189,7 @@ def __init__(self, id_): try: _id = self._check_id(id_) except ValueError as error: - if 'INVALID_TEXT_REPRESENTATION' not in str(error): + if "INVALID_TEXT_REPRESENTATION" not in str(error): raise error _id = False @@ -197,7 +199,8 @@ def __init__(self, id_): if not self._check_portal(id_): raise qdb.exceptions.QiitaDBError( "%s with id %d inaccessible in current portal: %s" - % (self.__class__.__name__, id_, qiita_config.portal)) + % (self.__class__.__name__, id_, qiita_config.portal) + ) self._id = id_ diff --git a/qiita_db/commands.py b/qiita_db/commands.py index d0c5a48d3..70ce22a8f 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -6,17 +6,17 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from configparser import ConfigParser from functools import partial from json import loads import qiita_db as qdb -from configparser import ConfigParser - - -SUPPORTED_PARAMS = ['preprocessed_sequence_illumina_params', - 'preprocessed_sequence_454_params', - 'processed_params_sortmerna'] +SUPPORTED_PARAMS = [ + "preprocessed_sequence_illumina_params", + "preprocessed_sequence_454_params", + "processed_params_sortmerna", +] def load_study_from_cmd(owner, title, info): @@ -36,18 +36,29 @@ def load_study_from_cmd(owner, title, info): config = ConfigParser() config.readfp(info) - optional = dict(config.items('optional')) + optional = dict(config.items("optional")) def get_optional(name): return optional.get(name, None) - get_required = partial(config.get, 'required') - required_fields = ['timeseries_type_id', 'mixs_compliant', - 'reprocess', 'study_alias', - 'study_description', 'study_abstract', - 'metadata_complete', 'principal_investigator'] - optional_fields = ['funding', 'most_recent_contact', 'spatial_series', - 'vamps_id', 'study_id'] + get_required = partial(config.get, "required") + required_fields = [ + "timeseries_type_id", + "mixs_compliant", + "reprocess", + "study_alias", + "study_description", + "study_abstract", + "metadata_complete", + "principal_investigator", + ] + optional_fields = [ + "funding", + "most_recent_contact", + "spatial_series", + "vamps_id", + "study_id", + ] infodict = {} for value in required_fields: infodict[value] = get_required(value) @@ -58,24 +69,32 @@ def get_optional(name): infodict[value] = optvalue with qdb.sql_connection.TRN: - lab_name_email = get_optional('lab_person') + lab_name_email = get_optional("lab_person") if lab_name_email is not None: - lab_name, lab_email, lab_affiliation = lab_name_email.split(',') - infodict['lab_person_id'] = qdb.study.StudyPerson.create( - lab_name.strip(), lab_email.strip(), lab_affiliation.strip()) + lab_name, lab_email, lab_affiliation = lab_name_email.split(",") + infodict["lab_person_id"] = qdb.study.StudyPerson.create( + lab_name.strip(), lab_email.strip(), lab_affiliation.strip() + ) - pi_name_email = infodict.pop('principal_investigator') - pi_name, pi_email, pi_affiliation = pi_name_email.split(',', 2) - infodict['principal_investigator_id'] = qdb.study.StudyPerson.create( - pi_name.strip(), pi_email.strip(), pi_affiliation.strip()) + pi_name_email = infodict.pop("principal_investigator") + pi_name, pi_email, pi_affiliation = pi_name_email.split(",", 2) + infodict["principal_investigator_id"] = qdb.study.StudyPerson.create( + pi_name.strip(), pi_email.strip(), pi_affiliation.strip() + ) return qdb.study.Study.create(qdb.user.User(owner), title, infodict) -def load_artifact_from_cmd(filepaths, filepath_types, artifact_type, - prep_template=None, parents=None, - dflt_params_id=None, required_params=None, - optional_params=None): +def load_artifact_from_cmd( + filepaths, + filepath_types, + artifact_type, + prep_template=None, + parents=None, + dflt_params_id=None, + required_params=None, + optional_params=None, +): r"""Adds an artifact to the system Parameters @@ -109,21 +128,26 @@ def load_artifact_from_cmd(filepaths, filepath_types, artifact_type, length """ if len(filepaths) != len(filepath_types): - raise ValueError("Please provide exactly one filepath_type for each " - "and every filepath") + raise ValueError( + "Please provide exactly one filepath_type for each and every filepath" + ) with qdb.sql_connection.TRN: fp_types_dict = qdb.util.get_filepath_types() - fps = [(fp, fp_types_dict[ftype]) - for fp, ftype in zip(filepaths, filepath_types)] + fps = [ + (fp, fp_types_dict[ftype]) for fp, ftype in zip(filepaths, filepath_types) + ] if prep_template: prep_template = qdb.metadata_template.prep_template.PrepTemplate( - prep_template) + prep_template + ) if parents: if len(parents) > 1 and required_params is None: - raise ValueError("When you pass more than 1 parent you need " - "to also pass required_params") + raise ValueError( + "When you pass more than 1 parent you need " + "to also pass required_params" + ) parents = [qdb.artifact.Artifact(pid) for pid in parents] params = None @@ -136,11 +160,17 @@ def load_artifact_from_cmd(filepaths, filepath_types, artifact_type, optional_dict = loads(optional_params) if optional_params else None params = qdb.software.Parameters.from_default_params( qdb.software.DefaultParameters(dflt_params_id), - required_dict, optional_dict) + required_dict, + optional_dict, + ) return qdb.artifact.Artifact.create( - fps, artifact_type, prep_template=prep_template, parents=parents, - processing_parameters=params) + fps, + artifact_type, + prep_template=prep_template, + parents=parents, + processing_parameters=params, + ) def load_sample_template_from_cmd(sample_temp_path, study_id): @@ -154,9 +184,11 @@ def load_sample_template_from_cmd(sample_temp_path, study_id): The study id to which the sample template belongs """ sample_temp = qdb.metadata_template.util.load_template_to_dataframe( - sample_temp_path) + sample_temp_path + ) return qdb.metadata_template.sample_template.SampleTemplate.create( - sample_temp, qdb.study.Study(study_id)) + sample_temp, qdb.study.Study(study_id) + ) def load_prep_template_from_cmd(prep_temp_path, study_id, data_type): @@ -171,10 +203,10 @@ def load_prep_template_from_cmd(prep_temp_path, study_id, data_type): data_type : str The data type of the prep template """ - prep_temp = qdb.metadata_template.util.load_template_to_dataframe( - prep_temp_path) + prep_temp = qdb.metadata_template.util.load_template_to_dataframe(prep_temp_path) return qdb.metadata_template.prep_template.PrepTemplate.create( - prep_temp, qdb.study.Study(study_id), data_type) + prep_temp, qdb.study.Study(study_id), data_type + ) def update_artifact_from_cmd(filepaths, filepath_types, artifact_id): @@ -199,20 +231,21 @@ def update_artifact_from_cmd(filepaths, filepath_types, artifact_id): If 'filepaths' and 'filepath_types' do not have the same length """ if len(filepaths) != len(filepath_types): - raise ValueError("Please provide exactly one filepath_type for each " - "and every filepath") + raise ValueError( + "Please provide exactly one filepath_type for each and every filepath" + ) with qdb.sql_connection.TRN: artifact = qdb.artifact.Artifact(artifact_id) fp_types_dict = qdb.util.get_filepath_types() - fps = [(fp, fp_types_dict[ftype]) - for fp, ftype in zip(filepaths, filepath_types)] + fps = [ + (fp, fp_types_dict[ftype]) for fp, ftype in zip(filepaths, filepath_types) + ] old_fps = artifact.filepaths sql = "DELETE FROM qiita.artifact_filepath WHERE artifact_id = %s" qdb.sql_connection.TRN.add(sql, [artifact.id]) qdb.sql_connection.TRN.execute() qdb.util.move_filepaths_to_upload_folder(artifact.study.id, old_fps) - fp_ids = qdb.util.insert_filepaths( - fps, artifact.id, artifact.artifact_type) + fp_ids = qdb.util.insert_filepaths(fps, artifact.id, artifact.artifact_type) sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" sql_args = [[artifact.id, fp_id] for fp_id in fp_ids] diff --git a/qiita_db/download_link.py b/qiita_db/download_link.py index 2fd7c971f..7683314d8 100644 --- a/qiita_db/download_link.py +++ b/qiita_db/download_link.py @@ -6,10 +6,11 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -import qiita_db as qdb +from datetime import datetime, timezone from jose import jwt as jose_jwt -from datetime import datetime, timezone + +import qiita_db as qdb from qiita_core.qiita_settings import qiita_config @@ -46,16 +47,13 @@ def create(cls, jwt): If the jti already exists in the database """ - jwt_data = jose_jwt.decode(jwt, - qiita_config.jwt_secret, - algorithms='HS256') + jwt_data = jose_jwt.decode(jwt, qiita_config.jwt_secret, algorithms="HS256") jti = jwt_data["jti"] exp = datetime.utcfromtimestamp(jwt_data["exp"] / 1000) with qdb.sql_connection.TRN: if cls.exists(jti): - raise qdb.exceptions.QiitaDBDuplicateError( - "JTI Already Exists") + raise qdb.exceptions.QiitaDBDuplicateError("JTI Already Exists") # insert token into database sql = """INSERT INTO qiita.{0} (jti, jwt, exp) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index 12226c0af..427dd2f68 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -6,31 +6,29 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import abspath, dirname, join, exists, basename, splitext -from shutil import copytree -from functools import partial -from os import mkdir import gzip +from functools import partial from glob import glob +from os import mkdir +from os.path import abspath, basename, dirname, exists, join, splitext +from shutil import copytree +from urllib.request import urlretrieve + from natsort import natsorted +import qiita_db as qdb from qiita_core.exceptions import QiitaEnvironmentError from qiita_core.qiita_settings import qiita_config, r_client -import qiita_db as qdb - -from urllib.request import urlretrieve - -get_support_file = partial(join, join(dirname(abspath(__file__)), - 'support_files')) +get_support_file = partial(join, join(dirname(abspath(__file__)), "support_files")) reference_base_dir = join(qiita_config.base_data_dir, "reference") get_reference_fp = partial(join, reference_base_dir) -SETTINGS_FP = get_support_file('qiita-db-settings.sql') -LAYOUT_FP = get_support_file('qiita-db-unpatched.sql') -POPULATE_FP = get_support_file('populate_test_db.sql') -PATCHES_DIR = get_support_file('patches') +SETTINGS_FP = get_support_file("qiita-db-settings.sql") +LAYOUT_FP = get_support_file("qiita-db-unpatched.sql") +POPULATE_FP = get_support_file("populate_test_db.sql") +PATCHES_DIR = get_support_file("patches") def create_layout(test=False, verbose=False): @@ -43,7 +41,7 @@ def create_layout(test=False, verbose=False): """ with qdb.sql_connection.TRN: if verbose: - print('Building SQL layout') + print("Building SQL layout") # Create the schema with open(LAYOUT_FP, newline=None) as f: qdb.sql_connection.TRN.add(f.read()) @@ -58,26 +56,26 @@ def _populate_test_db(): def _add_ontology_data(): - print('Loading Ontology Data') + print("Loading Ontology Data") if not exists(reference_base_dir): mkdir(reference_base_dir) - fp = get_reference_fp('ontologies.sql.gz') + fp = get_reference_fp("ontologies.sql.gz") if exists(fp): - print("SKIPPING download of ontologies: File already exists at %s. " - "To download the file again, delete the existing file first." - % fp) + print( + "SKIPPING download of ontologies: File already exists at %s. " + "To download the file again, delete the existing file first." % fp + ) else: - url = 'ftp://ftp.microbio.me/pub/qiita/qiita_ontoandvocab.sql.gz' + url = "ftp://ftp.microbio.me/pub/qiita/qiita_ontoandvocab.sql.gz" try: urlretrieve(url, fp) except Exception: - raise IOError("Error: Could not fetch ontologies file from %s" % - url) + raise IOError("Error: Could not fetch ontologies file from %s" % url) with qdb.sql_connection.TRN: - with gzip.open(fp, 'rb') as f: + with gzip.open(fp, "rb") as f: qdb.sql_connection.TRN.add(f.read()) qdb.sql_connection.TRN.execute() @@ -94,44 +92,58 @@ def _insert_processed_params(ref): def _download_reference_files(): - print('Downloading reference files') + print("Downloading reference files") if not exists(reference_base_dir): mkdir(reference_base_dir) - files = {'tree': (get_reference_fp('gg_13_8-97_otus.tree'), - 'ftp://ftp.microbio.me/greengenes_release/' - 'gg_13_8_otus/trees/97_otus.tree'), - 'taxonomy': (get_reference_fp('gg_13_8-97_otu_taxonomy.txt'), - 'ftp://ftp.microbio.me/greengenes_release/' - 'gg_13_8_otus/taxonomy/97_otu_taxonomy.txt'), - 'sequence': (get_reference_fp('gg_13_8-97_otus.fasta'), - 'ftp://ftp.microbio.me/greengenes_release/' - 'gg_13_8_otus/rep_set/97_otus.fasta')} + files = { + "tree": ( + get_reference_fp("gg_13_8-97_otus.tree"), + "ftp://ftp.microbio.me/greengenes_release/gg_13_8_otus/trees/97_otus.tree", + ), + "taxonomy": ( + get_reference_fp("gg_13_8-97_otu_taxonomy.txt"), + "ftp://ftp.microbio.me/greengenes_release/" + "gg_13_8_otus/taxonomy/97_otu_taxonomy.txt", + ), + "sequence": ( + get_reference_fp("gg_13_8-97_otus.fasta"), + "ftp://ftp.microbio.me/greengenes_release/" + "gg_13_8_otus/rep_set/97_otus.fasta", + ), + } for file_type, (local_fp, url) in files.items(): # Do not download the file if it exists already if exists(local_fp): - print("SKIPPING %s: file already exists at %s. To " - "download the file again, erase the existing file first" % - (file_type, local_fp)) + print( + "SKIPPING %s: file already exists at %s. To " + "download the file again, erase the existing file first" + % (file_type, local_fp) + ) else: try: urlretrieve(url, local_fp) except Exception: - raise IOError("Error: Could not fetch %s file from %s" % - (file_type, url)) + raise IOError( + "Error: Could not fetch %s file from %s" % (file_type, url) + ) with qdb.sql_connection.TRN: ref = qdb.reference.Reference.create( - 'Greengenes', '13_8', files['sequence'][0], - files['taxonomy'][0], files['tree'][0]) + "Greengenes", + "13_8", + files["sequence"][0], + files["taxonomy"][0], + files["tree"][0], + ) _insert_processed_params(ref) def create_mountpoints(): r"""In a fresh qiita setup, sub-directories under - qiita_config.base_data_dir might not yet exist. To avoid failing in - later steps, they are created here. + qiita_config.base_data_dir might not yet exist. To avoid failing in + later steps, they are created here. """ with qdb.sql_connection.TRN: sql = """SELECT DISTINCT mountpoint FROM qiita.data_directory @@ -139,23 +151,28 @@ def create_mountpoints(): qdb.sql_connection.TRN.add(sql) created_subdirs = [] for mountpoint in qdb.sql_connection.TRN.execute_fetchflatten(): - for (ddid, subdir) in qdb.util.get_mountpoint(mountpoint, - retrieve_all=True): + for ddid, subdir in qdb.util.get_mountpoint(mountpoint, retrieve_all=True): if not exists(join(qiita_config.base_data_dir, subdir)): if qiita_config.test_environment: # if in test mode, we want to potentially fill the # new directory with according test data - copytree(get_support_file('test_data', mountpoint), - join(qiita_config.base_data_dir, subdir)) + copytree( + get_support_file("test_data", mountpoint), + join(qiita_config.base_data_dir, subdir), + ) else: # in production mode, an empty directory is created mkdir(join(qiita_config.base_data_dir, subdir)) created_subdirs.append(subdir) if len(created_subdirs) > 0: - print("Created %i sub-directories as 'mount points':\n%s" - % (len(created_subdirs), - ''.join(map(lambda x: ' - %s\n' % x, created_subdirs)))) + print( + "Created %i sub-directories as 'mount points':\n%s" + % ( + len(created_subdirs), + "".join(map(lambda x: " - %s\n" % x, created_subdirs)), + ) + ) def make_environment(load_ontologies, download_reference, add_demo_user): @@ -180,28 +197,32 @@ def make_environment(load_ontologies, download_reference, add_demo_user): If the environment already exists """ if load_ontologies and qiita_config.test_environment: - raise EnvironmentError("Cannot load ontologies in a test environment! " - "Pass --no-load-ontologies, or set " - "TEST_ENVIRONMENT = FALSE in your " - "configuration") + raise EnvironmentError( + "Cannot load ontologies in a test environment! " + "Pass --no-load-ontologies, or set " + "TEST_ENVIRONMENT = FALSE in your " + "configuration" + ) # Connect to the postgres server with qdb.sql_connection.TRNADMIN: - sql = 'SELECT datname FROM pg_database WHERE datname = %s' + sql = "SELECT datname FROM pg_database WHERE datname = %s" qdb.sql_connection.TRNADMIN.add(sql, [qiita_config.database]) if qdb.sql_connection.TRNADMIN.execute_fetchflatten(): raise QiitaEnvironmentError( "Database {0} already present on the system. You can drop it " - "by running 'qiita-env drop'".format(qiita_config.database)) + "by running 'qiita-env drop'".format(qiita_config.database) + ) # Create the database - print('Creating database') + print("Creating database") create_settings_table = True try: with qdb.sql_connection.TRNADMIN: qdb.sql_connection.TRNADMIN.add( - 'CREATE DATABASE %s' % qiita_config.database) + "CREATE DATABASE %s" % qiita_config.database + ) qdb.sql_connection.TRNADMIN.execute() qdb.sql_connection.TRN.close() except ValueError as error: @@ -215,7 +236,7 @@ def make_environment(load_ontologies, download_reference, add_demo_user): qdb.sql_connection.TRN.add(sql) is_test = qdb.sql_connection.TRN.execute_fetchlast() if not is_test: - print('Not a test database') + print("Not a test database") raise create_settings_table = False else: @@ -223,7 +244,7 @@ def make_environment(load_ontologies, download_reference, add_demo_user): qdb.sql_connection.TRNADMIN.close() with qdb.sql_connection.TRN: - print('Inserting database metadata') + print("Inserting database metadata") test = qiita_config.test_environment verbose = True if create_settings_table: @@ -236,9 +257,8 @@ def make_environment(load_ontologies, download_reference, add_demo_user): (test, base_data_dir, base_work_dir) VALUES (%s, %s, %s)""" qdb.sql_connection.TRN.add( - sql, [test, - qiita_config.base_data_dir, - qiita_config.working_dir]) + sql, [test, qiita_config.base_data_dir, qiita_config.working_dir] + ) qdb.sql_connection.TRN.execute() create_layout(test=test, verbose=verbose) patch(verbose=verbose, test=test) @@ -249,9 +269,8 @@ def make_environment(load_ontologies, download_reference, add_demo_user): # these values can only be added if the environment is being loaded # with the ontologies, thus this cannot exist inside intialize.sql # because otherwise loading the ontologies would be a requirement - ontology = qdb.ontology.Ontology( - qdb.util.convert_to_id('ENA', 'ontology')) - ontology.add_user_defined_term('Amplicon Sequencing') + ontology = qdb.ontology.Ontology(qdb.util.convert_to_id("ENA", "ontology")) + ontology.add_user_defined_term("Amplicon Sequencing") if download_reference: _download_reference_files() @@ -278,25 +297,26 @@ def make_environment(load_ontologies, download_reference, add_demo_user): # Add default analysis to all portals sql = "SELECT portal_type_id FROM qiita.portal_type" qdb.sql_connection.TRN.add(sql) - args = [[analysis_id, p_id] - for p_id in qdb.sql_connection.TRN.execute_fetchflatten()] + args = [ + [analysis_id, p_id] + for p_id in qdb.sql_connection.TRN.execute_fetchflatten() + ] sql = """INSERT INTO qiita.analysis_portal (analysis_id, portal_type_id) VALUES (%s, %s)""" qdb.sql_connection.TRN.add(sql, args, many=True) qdb.sql_connection.TRN.execute() - print('Demo user successfully created') + print("Demo user successfully created") if qiita_config.test_environment: - print('Test environment successfully created') + print("Test environment successfully created") else: - print('Production environment successfully created') + print("Production environment successfully created") def drop_environment(ask_for_confirmation): - """Drops the database specified in the configuration - """ + """Drops the database specified in the configuration""" # The transaction has an open connection to the database, so we need # to close it in order to drop the environment qdb.sql_connection.TRN.close() @@ -304,8 +324,7 @@ def drop_environment(ask_for_confirmation): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT test FROM settings") try: - is_test_environment = \ - qdb.sql_connection.TRN.execute_fetchflatten()[0] + is_test_environment = qdb.sql_connection.TRN.execute_fetchflatten()[0] except ValueError as e: # if settings doesn't exist then is fine to treat this as a test # environment and clean up @@ -319,31 +338,31 @@ def drop_environment(ask_for_confirmation): do_drop = True else: if ask_for_confirmation: - confirm = '' - while confirm not in ('Y', 'y', 'N', 'n'): - confirm = input("THIS IS NOT A TEST ENVIRONMENT.\n" - "Proceed with drop? (y/n)") + confirm = "" + while confirm not in ("Y", "y", "N", "n"): + confirm = input( + "THIS IS NOT A TEST ENVIRONMENT.\nProceed with drop? (y/n)" + ) - do_drop = confirm in ('Y', 'y') + do_drop = confirm in ("Y", "y") else: do_drop = True if do_drop: with qdb.sql_connection.TRNADMIN: - qdb.sql_connection.TRNADMIN.add( - 'DROP DATABASE %s' % qiita_config.database) + qdb.sql_connection.TRNADMIN.add("DROP DATABASE %s" % qiita_config.database) qdb.sql_connection.TRNADMIN.execute() else: - print('ABORTING') + print("ABORTING") def drop_and_rebuild_tst_database(drop_labcontrol=False): """Drops the qiita schema and rebuilds the test database - Parameters - ---------- - drop_labcontrol : bool - Whether or not to drop labcontrol + Parameters + ---------- + drop_labcontrol : bool + Whether or not to drop labcontrol """ with qdb.sql_connection.TRN: r_client.flushdb() @@ -354,8 +373,7 @@ def drop_and_rebuild_tst_database(drop_labcontrol=False): qdb.sql_connection.TRN.add("DROP SCHEMA IF EXISTS labman CASCADE") qdb.sql_connection.TRN.add("DROP SCHEMA IF EXISTS qiita CASCADE") # Set the database to unpatched - qdb.sql_connection.TRN.add( - "UPDATE settings SET current_patch = 'unpatched'") + qdb.sql_connection.TRN.add("UPDATE settings SET current_patch = 'unpatched'") # Create the database and apply patches create_layout(test=True) patch(test=True) @@ -390,14 +408,17 @@ def clean_test_environment(): test_db = qdb.sql_connection.TRN.execute_fetchflatten()[0] if not qiita_config.test_environment or not test_db: - raise RuntimeError("Working in a production environment. Not " - "executing the test cleanup to keep the production " - "database safe.") + raise RuntimeError( + "Working in a production environment. Not " + "executing the test cleanup to keep the production " + "database safe." + ) # wrap the dummy function and execute it @reset_test_database def dummyfunc(): pass + dummyfunc() @@ -411,13 +432,13 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): qdb.sql_connection.TRN.add("SELECT current_patch FROM settings") current_patch = qdb.sql_connection.TRN.execute_fetchlast() current_sql_patch_fp = join(patches_dir, current_patch) - corresponding_py_patch = partial(join, patches_dir, 'python_patches') - corresponding_test_sql = partial(join, patches_dir, 'test_db_sql') + corresponding_py_patch = partial(join, patches_dir, "python_patches") + corresponding_test_sql = partial(join, patches_dir, "test_db_sql") - sql_glob = join(patches_dir, '*.sql') + sql_glob = join(patches_dir, "*.sql") sql_patch_files = natsorted(glob(sql_glob)) - if current_patch == 'unpatched': + if current_patch == "unpatched": next_patch_index = 0 elif current_sql_patch_fp not in sql_patch_files: raise RuntimeError("Cannot find patch file %s" % current_patch) @@ -436,21 +457,19 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): sql_patch_filename = basename(sql_patch_fp) patch_prefix = splitext(basename(sql_patch_fp))[0] - py_patch_fp = corresponding_py_patch(f'{patch_prefix}.py') - test_sql_fp = corresponding_test_sql(f'{patch_prefix}.sql') + py_patch_fp = corresponding_py_patch(f"{patch_prefix}.py") + test_sql_fp = corresponding_test_sql(f"{patch_prefix}.sql") with qdb.sql_connection.TRN: with open(sql_patch_fp, newline=None) as patch_file: if verbose: - print('\tApplying patch %s...' % sql_patch_filename) + print("\tApplying patch %s..." % sql_patch_filename) qdb.sql_connection.TRN.add(patch_file.read()) - qdb.sql_connection.TRN.add( - patch_update_sql, [sql_patch_filename]) + qdb.sql_connection.TRN.add(patch_update_sql, [sql_patch_filename]) if test and exists(test_sql_fp): if verbose: - print('\t\tApplying test SQL %s...' - % basename(test_sql_fp)) + print("\t\tApplying test SQL %s..." % basename(test_sql_fp)) with open(test_sql_fp) as test_sql: qdb.sql_connection.TRN.add(test_sql.read()) @@ -458,8 +477,7 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): if exists(py_patch_fp): if verbose: - print('\t\tApplying python patch %s...' - % basename(py_patch_fp)) + print("\t\tApplying python patch %s..." % basename(py_patch_fp)) with open(py_patch_fp) as py_patch: exec(py_patch.read(), globals()) diff --git a/qiita_db/exceptions.py b/qiita_db/exceptions.py index 69e63e6dc..1f02867d5 100644 --- a/qiita_db/exceptions.py +++ b/qiita_db/exceptions.py @@ -12,41 +12,49 @@ class QiitaDBError(QiitaError): """Base class for all qiita_db exceptions""" + pass class QiitaDBNotImplementedError(QiitaDBError): """""" + pass class QiitaDBExecutionError(QiitaDBError): """Exception for error when executing SQL queries""" + pass class QiitaDBConnectionError(QiitaDBError): """Exception for error when connecting to the db""" + pass class QiitaDBColumnError(QiitaDBError): """Exception when missing table information or excess information passed""" + pass class QiitaDBLookupError(QiitaDBError, LookupError): """Exception when converting or getting non-existant values in DB""" + pass class QiitaDBOperationNotPermittedError(QiitaDBError): """Exception when perofrming an operation not permitted""" + pass class QiitaDBArtifactCreationError(QiitaDBError): """Exception when creating an artifact""" + def __init__(self, reason): super(QiitaDBArtifactCreationError, self).__init__() self.args = (f"Cannot create artifact: {reason}",) @@ -54,6 +62,7 @@ def __init__(self, reason): class QiitaDBArtifactDeletionError(QiitaDBError): """Exception when deleting an artifact""" + def __init__(self, a_id, reason): super(QiitaDBArtifactDeletionError, self).__init__() self.args = (f"Cannot delete artifact {a_id}: {reason}",) @@ -61,53 +70,70 @@ def __init__(self, a_id, reason): class QiitaDBDuplicateError(QiitaDBError): """Exception when duplicating something in the database""" + def __init__(self, obj_name, attributes): super(QiitaDBDuplicateError, self).__init__() - self.args = ("The '%s' object with attributes (%s) already exists." - % (obj_name, attributes),) + self.args = ( + "The '%s' object with attributes (%s) already exists." + % (obj_name, attributes), + ) class QiitaDBStatusError(QiitaDBError): """Exception when editing is done with an unallowed status""" + pass class QiitaDBUnknownIDError(QiitaDBError): """Exception for error when an object does not exists in the DB""" + def __init__(self, missing_id, table): super(QiitaDBUnknownIDError, self).__init__() - self.args = ("The object with ID '%s' does not exists in table '%s'" - % (missing_id, table),) + self.args = ( + "The object with ID '%s' does not exists in table '%s'" + % (missing_id, table), + ) class QiitaDBDuplicateHeaderError(QiitaDBError): """Exception for error when a MetadataTemplate has duplicate columns""" + def __init__(self, repeated_headers): super(QiitaDBDuplicateHeaderError, self).__init__() - self.args = ("Duplicate headers found in MetadataTemplate. Note " - "that the headers are not case-sensitive, repeated " - "header(s): %s." % ', '.join(repeated_headers),) + self.args = ( + "Duplicate headers found in MetadataTemplate. Note " + "that the headers are not case-sensitive, repeated " + "header(s): %s." % ", ".join(repeated_headers), + ) class QiitaDBDuplicateSamplesError(QiitaDBError): """Exception for error when a MetadataTemplate has duplicate columns""" + def __init__(self, repeated_samples): super(QiitaDBDuplicateSamplesError, self).__init__() - self.args = ("Duplicate samples found in MetadataTemplate: %s." - % ', '.join(repeated_samples),) + self.args = ( + "Duplicate samples found in MetadataTemplate: %s." + % ", ".join(repeated_samples), + ) class QiitaDBIncompatibleDatatypeError(QiitaDBError): """When arguments are used with incompatible operators in a query""" + def __init__(self, operator, argument_type): super(QiitaDBIncompatibleDatatypeError, self).__init__() - self.args = ("The %s operator is not for use with data of type %s" % - (operator, str(argument_type))) + self.args = "The %s operator is not for use with data of type %s" % ( + operator, + str(argument_type), + ) class QiitaDBWarning(UserWarning): """Warning specific for the QiitaDB domain""" + pass -warnings.simplefilter('always', QiitaDBWarning) +warnings.simplefilter("always", QiitaDBWarning) diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py index 1d4dddd5e..f1e19c75f 100644 --- a/qiita_db/handlers/analysis.py +++ b/qiita_db/handlers/analysis.py @@ -6,11 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from json import dumps + from tornado import gen from tornado.web import HTTPError -from json import dumps import qiita_db as qdb + from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -39,8 +41,9 @@ def _get_analysis(a_id): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason='Error instantiating analysis %s: %s' - % (a_id, str(e))) + raise HTTPError( + 500, reason="Error instantiating analysis %s: %s" % (a_id, str(e)) + ) return a @@ -69,18 +72,18 @@ async def get(self, analysis_id): response = None with qdb.sql_connection.TRN: a = _get_analysis(analysis_id) - mf_fp = qdb.util.get_filepath_information( - a.mapping_file)['fullpath'] + mf_fp = qdb.util.get_filepath_information(a.mapping_file)["fullpath"] if mf_fp is not None: df = qdb.metadata_template.util.load_template_to_dataframe( - mf_fp, index='#SampleID') - response = dumps(df.to_dict(orient='index')) + mf_fp, index="#SampleID" + ) + response = dumps(df.to_dict(orient="index")) if response is not None: - crange = range(chunk_len, len(response)+chunk_len, chunk_len) + crange = range(chunk_len, len(response) + chunk_len, chunk_len) for i, (win) in enumerate(crange): # sending the chunk and flushing - chunk = response[i*chunk_len:win] + chunk = response[i * chunk_len : win] self.write(chunk) await self.flush() diff --git a/qiita_db/handlers/archive.py b/qiita_db/handlers/archive.py index 54224c116..9301a7d2e 100644 --- a/qiita_db/handlers/archive.py +++ b/qiita_db/handlers/archive.py @@ -6,11 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from .oauth2 import OauthBaseHandler, authenticate_oauth -from qiita_db.processing_job import ProcessingJob -from qiita_db.archive import Archive from json import loads +from qiita_db.archive import Archive +from qiita_db.processing_job import ProcessingJob + +from .oauth2 import OauthBaseHandler, authenticate_oauth + class APIArchiveObservations(OauthBaseHandler): @authenticate_oauth @@ -32,12 +34,13 @@ def post(self): Feature identifiers not found in the archive won't be included in the return dictionary. """ - job_id = self.get_argument('job_id') - features = self.request.arguments['features'] + job_id = self.get_argument("job_id") + features = self.request.arguments["features"] ms = Archive.get_merging_scheme_from_job(ProcessingJob(job_id)) response = Archive.retrieve_feature_values( - archive_merging_scheme=ms, features=features) + archive_merging_scheme=ms, features=features + ) self.write(response) @@ -52,8 +55,8 @@ def patch(self): Argument "value" is a json string, i.e. result of a json.dump(obj) of a dictionary, keyed with feature identifiers. """ - req_path = self.get_argument('path') - req_value = self.get_argument('value') + req_path = self.get_argument("path") + req_value = self.get_argument("value") ms = Archive.get_merging_scheme_from_job(ProcessingJob(req_path)) diff --git a/qiita_db/handlers/artifact.py b/qiita_db/handlers/artifact.py index a34c7be1f..ab817b796 100644 --- a/qiita_db/handlers/artifact.py +++ b/qiita_db/handlers/artifact.py @@ -6,12 +6,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import HTTPError from collections import defaultdict -from json import loads, dumps +from json import dumps, loads + +from tornado.web import HTTPError -from qiita_core.qiita_settings import r_client import qiita_db as qdb +from qiita_core.qiita_settings import r_client + from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -40,8 +42,9 @@ def _get_artifact(a_id): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason='Error instantiating artifact %s: %s' - % (a_id, str(e))) + raise HTTPError( + 500, reason="Error instantiating artifact %s: %s" % (a_id, str(e)) + ) return artifact @@ -84,35 +87,41 @@ def get(self, artifact_id): study = artifact.study analysis = artifact.analysis response = { - 'name': artifact.name, - 'timestamp': str(artifact.timestamp), - 'visibility': artifact.visibility, - 'type': artifact.artifact_type, - 'data_type': artifact.data_type, - 'can_be_submitted_to_ebi': artifact.can_be_submitted_to_ebi, - 'can_be_submitted_to_vamps': - artifact.can_be_submitted_to_vamps, - 'prep_information': [p.id for p in artifact.prep_templates], - 'study': study.id if study else None, - 'analysis': analysis.id if analysis else None, - 'parents': [p.id for p in artifact.parents]} + "name": artifact.name, + "timestamp": str(artifact.timestamp), + "visibility": artifact.visibility, + "type": artifact.artifact_type, + "data_type": artifact.data_type, + "can_be_submitted_to_ebi": artifact.can_be_submitted_to_ebi, + "can_be_submitted_to_vamps": artifact.can_be_submitted_to_vamps, + "prep_information": [p.id for p in artifact.prep_templates], + "study": study.id if study else None, + "analysis": analysis.id if analysis else None, + "parents": [p.id for p in artifact.parents], + } params = artifact.processing_parameters - response['processing_parameters'] = ( - params.values if params is not None else None) + response["processing_parameters"] = ( + params.values if params is not None else None + ) - response['ebi_run_accessions'] = ( + response["ebi_run_accessions"] = ( artifact.ebi_run_accessions - if response['can_be_submitted_to_ebi'] else None) - response['is_submitted_to_vamps'] = ( + if response["can_be_submitted_to_ebi"] + else None + ) + response["is_submitted_to_vamps"] = ( artifact.is_submitted_to_vamps - if response['can_be_submitted_to_vamps'] else None) + if response["can_be_submitted_to_vamps"] + else None + ) # Instead of sending a list of files, provide the files as a # dictionary keyed by filepath type - response['files'] = defaultdict(list) + response["files"] = defaultdict(list) for x in artifact.filepaths: - response['files'][x['fp_type']].append( - {'filepath': x['fp'], 'size': x['fp_size']}) + response["files"][x["fp_type"]].append( + {"filepath": x["fp"], "size": x["fp_size"]} + ) self.write(response) @@ -125,21 +134,21 @@ def patch(self, artifact_id): artifact_id : str The id of the artifact whose information is being updated """ - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value') - - if req_op == 'add': - req_path = [v for v in req_path.split('/') if v] - if len(req_path) != 1 or req_path[0] != 'html_summary': - raise HTTPError(400, reason='Incorrect path parameter value') + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value") + + if req_op == "add": + req_path = [v for v in req_path.split("/") if v] + if len(req_path) != 1 or req_path[0] != "html_summary": + raise HTTPError(400, reason="Incorrect path parameter value") else: artifact = _get_artifact(artifact_id) try: html_data = loads(req_value) - html_fp = html_data['html'] - html_dir = html_data['dir'] + html_fp = html_data["html"] + html_dir = html_data["dir"] except ValueError: html_fp = req_value html_dir = None @@ -149,8 +158,11 @@ def patch(self, artifact_id): except Exception as e: raise HTTPError(500, reason=str(e)) else: - raise HTTPError(400, reason='Operation "%s" not supported. ' - 'Current supported operations: add' % req_op) + raise HTTPError( + 400, + reason='Operation "%s" not supported. ' + "Current supported operations: add" % req_op, + ) self.finish() @@ -180,18 +192,19 @@ def post(self): -------- qiita_db.artifact.Artifact.create """ - filepaths = loads(self.get_argument('filepaths')) - artifact_type = self.get_argument('type') - prep_template = self.get_argument('prep', None) - analysis = self.get_argument('analysis', None) - name = self.get_argument('name', None) - dtype = self.get_argument('data_type', None) - parents = self.get_argument('parents', None) - job_id = self.get_argument('job_id', None) + filepaths = loads(self.get_argument("filepaths")) + artifact_type = self.get_argument("type") + prep_template = self.get_argument("prep", None) + analysis = self.get_argument("analysis", None) + name = self.get_argument("name", None) + dtype = self.get_argument("data_type", None) + parents = self.get_argument("parents", None) + job_id = self.get_argument("job_id", None) if prep_template is not None: prep_template = qdb.metadata_template.prep_template.PrepTemplate( - prep_template) + prep_template + ) dtype = None if analysis is not None: analysis = qdb.analysis.Analysis(analysis) @@ -206,11 +219,17 @@ def post(self): pp = None a = qdb.artifact.Artifact.create( - filepaths, artifact_type, name=name, prep_template=prep_template, - parents=parents, processing_parameters=pp, - analysis=analysis, data_type=dtype) + filepaths, + artifact_type, + name=name, + prep_template=prep_template, + parents=parents, + processing_parameters=pp, + analysis=analysis, + data_type=dtype, + ) - self.write({'artifact': a.id}) + self.write({"artifact": a.id}) class ArtifactTypeHandler(OauthBaseHandler): @@ -230,7 +249,7 @@ def get(self): # [0][1]: get latest/active and the actual location atypes[atype] = mountpoints[0][1] # add the upload location - atypes['uploads'] = qdb.util.get_mountpoint('uploads')[0][1] + atypes["uploads"] = qdb.util.get_mountpoint("uploads")[0][1] self.write(atypes) @@ -254,16 +273,15 @@ def post(self): The list filepath types that the new artifact type supports, and if they're required or not in an artifact instance of this type """ - a_type = self.get_argument('type_name') - a_desc = self.get_argument('description') - ebi = self.get_argument('can_be_submitted_to_ebi') - vamps = self.get_argument('can_be_submitted_to_vamps') - raw = self.get_argument('is_user_uploadable') - fp_types = loads(self.get_argument('filepath_types')) + a_type = self.get_argument("type_name") + a_desc = self.get_argument("description") + ebi = self.get_argument("can_be_submitted_to_ebi") + vamps = self.get_argument("can_be_submitted_to_vamps") + raw = self.get_argument("is_user_uploadable") + fp_types = loads(self.get_argument("filepath_types")) try: - qdb.artifact.Artifact.create_type(a_type, a_desc, ebi, vamps, raw, - fp_types) + qdb.artifact.Artifact.create_type(a_type, a_desc, ebi, vamps, raw, fp_types) except qdb.exceptions.QiitaDBDuplicateError: # Ignoring this error as we want this endpoint in the rest api # to be idempotent. @@ -275,28 +293,31 @@ def post(self): class APIArtifactHandler(OauthBaseHandler): @authenticate_oauth def post(self): - user_email = self.get_argument('user_email') - job_id = self.get_argument('job_id', None) - prep_id = self.get_argument('prep_id', None) - atype = self.get_argument('artifact_type') - aname = self.get_argument('command_artifact_name', 'Name') - files = self.get_argument('files') - add_default_workflow = self.get_argument('add_default_workflow', False) + user_email = self.get_argument("user_email") + job_id = self.get_argument("job_id", None) + prep_id = self.get_argument("prep_id", None) + atype = self.get_argument("artifact_type") + aname = self.get_argument("command_artifact_name", "Name") + files = self.get_argument("files") + add_default_workflow = self.get_argument("add_default_workflow", False) if job_id is None and prep_id is None: - raise HTTPError( - 400, reason='You need to specify a job_id or a prep_id') + raise HTTPError(400, reason="You need to specify a job_id or a prep_id") if job_id is not None and prep_id is not None: raise HTTPError( - 400, reason='You need to specify only a job_id or a prep_id') + 400, reason="You need to specify only a job_id or a prep_id" + ) user = qdb.user.User(user_email) values = { - 'files': files, 'artifact_type': atype, 'name': aname, + "files": files, + "artifact_type": atype, + "name": aname, # leaving here in case we need to add a way to add an artifact # directly to an analysis, for more information see # ProcessingJob._complete_artifact_transformation - 'analysis': None} + "analysis": None, + } PJ = qdb.processing_job.ProcessingJob if job_id is not None: TN = qdb.sql_connection.TRN @@ -308,30 +329,34 @@ def post(self): TN.add(sql, [aname, job.command.id]) results = TN.execute_fetchflatten() if len(results) < 1: - raise HTTPError(400, 'The command_artifact_name does not ' - 'exist in the command') + raise HTTPError( + 400, "The command_artifact_name does not exist in the command" + ) cmd_out_id = results[0] - provenance = {'job': job_id, - 'cmd_out_id': cmd_out_id, - # direct_creation is a flag to avoid having to wait - # for the complete job to create the new artifact, - # which is normally ran during regular processing. - # Skipping is fine because we are adding an artifact - # to an existing job outside of regular processing - 'direct_creation': True, - 'name': aname} - values['provenance'] = dumps(provenance) + provenance = { + "job": job_id, + "cmd_out_id": cmd_out_id, + # direct_creation is a flag to avoid having to wait + # for the complete job to create the new artifact, + # which is normally ran during regular processing. + # Skipping is fine because we are adding an artifact + # to an existing job outside of regular processing + "direct_creation": True, + "name": aname, + } + values["provenance"] = dumps(provenance) # inherint the first prep info file from the first input artifact prep_id = job.input_artifacts[0].prep_templates[0].id else: prep_id = int(prep_id) - values['template'] = prep_id + values["template"] = prep_id cmd = qdb.software.Command.get_validator(atype) params = qdb.software.Parameters.load(cmd, values_dict=values) - if add_default_workflow or add_default_workflow == 'True': + if add_default_workflow or add_default_workflow == "True": pwk = qdb.processing_job.ProcessingWorkflow.from_scratch( - user, params, name=f'ProcessingWorkflow for {prep_id}') + user, params, name=f"ProcessingWorkflow for {prep_id}" + ) # the new job is the first job in the workflow new_job = list(pwk.graph.nodes())[0] # adding default pipeline to the preparation @@ -342,7 +367,9 @@ def post(self): new_job = PJ.create(user, params, True) new_job.submit() - r_client.set('prep_template_%d' % prep_id, - dumps({'job_id': new_job.id, 'is_qiita_job': True})) + r_client.set( + "prep_template_%d" % prep_id, + dumps({"job_id": new_job.id, "is_qiita_job": True}), + ) - self.finish({'job_id': new_job.id}) + self.finish({"job_id": new_job.id}) diff --git a/qiita_db/handlers/core.py b/qiita_db/handlers/core.py index e5a8b5709..427b9dedf 100644 --- a/qiita_db/handlers/core.py +++ b/qiita_db/handlers/core.py @@ -6,12 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from .oauth2 import OauthBaseHandler, authenticate_oauth import qiita_db as qdb +from .oauth2 import OauthBaseHandler, authenticate_oauth + class ResetAPItestHandler(OauthBaseHandler): @authenticate_oauth def post(self): - drop_labcontrol = self.get_argument('drop_labcontrol', False) + drop_labcontrol = self.get_argument("drop_labcontrol", False) qdb.environment_manager.drop_and_rebuild_tst_database(drop_labcontrol) diff --git a/qiita_db/handlers/oauth2.py b/qiita_db/handlers/oauth2.py index 3abc384fd..4f8b9ddc7 100644 --- a/qiita_db/handlers/oauth2.py +++ b/qiita_db/handlers/oauth2.py @@ -6,44 +6,46 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from base64 import urlsafe_b64decode -from string import ascii_letters, digits import datetime -from random import SystemRandom import functools +from base64 import urlsafe_b64decode +from random import SystemRandom +from string import ascii_letters, digits from traceback import format_exception from tornado.web import RequestHandler -from qiita_core.qiita_settings import r_client -from qiita_core.exceptions import (IncorrectPasswordError, IncorrectEmailError, - UnverifiedEmailError) import qiita_db as qdb +from qiita_core.exceptions import ( + IncorrectEmailError, + IncorrectPasswordError, + UnverifiedEmailError, +) +from qiita_core.qiita_settings import r_client def _oauth_error(handler, error_msg, error): """Set expected status and error formatting for Oauth2 style error - Parameters - ---------- - error_msg : str - Human parsable error message - error : str - Oauth2 controlled vocab error - - Returns - ------- - Writes out Oauth2 formatted error JSON of - {error: error, - error_description: error_msg} + Parameters + ---------- + error_msg : str + Human parsable error message + error : str + Oauth2 controlled vocab error + + Returns + ------- + Writes out Oauth2 formatted error JSON of + {error: error, + error_description: error_msg} - Notes - ----- - Expects handler to be a tornado RequestHandler or subclass + Notes + ----- + Expects handler to be a tornado RequestHandler or subclass """ handler.set_status(400) - handler.write({'error': error, - 'error_description': error_msg}) + handler.write({"error": error, "error_description": error_msg}) handler.finish() @@ -67,33 +69,34 @@ def authenticate_oauth(f): [1] The OAuth 2.0 Authorization Framework. http://tools.ietf.org/html/rfc6749 """ + @functools.wraps(f) def wrapper(handler, *args, **kwargs): - header = handler.request.headers.get('Authorization', None) + header = handler.request.headers.get("Authorization", None) if header is None: - _oauth_error(handler, 'Oauth2 error: invalid access token', - 'invalid_request') + _oauth_error( + handler, "Oauth2 error: invalid access token", "invalid_request" + ) return token_info = header.split() # Based on RFC6750 if reply is not 2 elements in the format of: # ['Bearer', token] we assume a wrong reply - if len(token_info) != 2 or token_info[0] != 'Bearer': - _oauth_error(handler, 'Oauth2 error: invalid access token', - 'invalid_grant') + if len(token_info) != 2 or token_info[0] != "Bearer": + _oauth_error(handler, "Oauth2 error: invalid access token", "invalid_grant") return token = token_info[1] db_token = r_client.hgetall(token) if not db_token: # token has timed out or never existed - _oauth_error(handler, 'Oauth2 error: token has timed out', - 'invalid_grant') + _oauth_error(handler, "Oauth2 error: token has timed out", "invalid_grant") return # Check daily rate limit for key if password style key - if db_token[b'grant_type'] == b'password': - limit_key = '%s_%s_daily_limit' % ( - db_token[b'client_id'].decode('ascii'), - db_token[b'user'].decode('ascii')) + if db_token[b"grant_type"] == b"password": + limit_key = "%s_%s_daily_limit" % ( + db_token[b"client_id"].decode("ascii"), + db_token[b"user"].decode("ascii"), + ) limiter = r_client.get(limit_key) if limiter is None: # Set limit to 5,000 requests per day @@ -102,11 +105,14 @@ def wrapper(handler, *args, **kwargs): r_client.decr(limit_key) if int(r_client.get(limit_key)) <= 0: _oauth_error( - handler, 'Oauth2 error: daily request limit reached', - 'invalid_grant') + handler, + "Oauth2 error: daily request limit reached", + "invalid_grant", + ) return return f(handler, *args, **kwargs) + return wrapper @@ -129,28 +135,32 @@ def write_error(self, status_code, **kwargs): This function is automatically called by the tornado package on errors, and should never be called directly. """ - exc_info = kwargs['exc_info'] + exc_info = kwargs["exc_info"] # We don't need to log 403, 404 or 405 failures in the logging table if status_code not in {403, 404, 405}: # log the error - error_lines = ['%s\n' % line - for line in format_exception(*exc_info)] - trace_info = ''.join(error_lines) + error_lines = ["%s\n" % line for line in format_exception(*exc_info)] + trace_info = "".join(error_lines) req_dict = self.request.__dict__ # must trim body to 1024 chars to prevent huge error messages - req_dict['body'] = req_dict.get('body', '')[:1024] - request_info = ''.join(['%s: %s\n' % - (k, req_dict[k]) for k in - req_dict.keys() if k != 'files']) + req_dict["body"] = req_dict.get("body", "")[:1024] + request_info = "".join( + [ + "%s: %s\n" % (k, req_dict[k]) + for k in req_dict.keys() + if k != "files" + ] + ) error = exc_info[1] qdb.logger.LogEntry.create( - 'Runtime', - 'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' % - (error, trace_info, request_info)) + "Runtime", + "ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n" + % (error, trace_info, request_info), + ) message = str(exc_info[1]) - if hasattr(exc_info[1], 'log_message'): + if hasattr(exc_info[1], "log_message"): message = exc_info[1].log_message self.finish(message) @@ -184,7 +194,7 @@ def generate_access_token(self, length=55): raise ValueError("Invalid token length: %d" % length) pool = ascii_letters + digits - return ''.join((SystemRandom().choice(pool) for _ in range(length))) + return "".join((SystemRandom().choice(pool) for _ in range(length))) def set_token(self, client_id, grant_type, user=None, timeout=3600): """Create access token for the client on redis and send json response @@ -214,26 +224,26 @@ def set_token(self, client_id, grant_type, user=None, timeout=3600): token = self.generate_access_token() token_info = { - 'timestamp': datetime.datetime.now().strftime('%m-%d-%y %H:%M:%S'), - 'client_id': client_id, - 'grant_type': grant_type + "timestamp": datetime.datetime.now().strftime("%m-%d-%y %H:%M:%S"), + "client_id": client_id, + "grant_type": grant_type, } if user: - token_info['user'] = user + token_info["user"] = user r_client.hmset(token, token_info) r_client.expire(token, timeout) - if grant_type == 'password': + if grant_type == "password": # Check if client has access limit key, and if not, create it - limit_key = '%s_%s_daily_limit' % (client_id, user) + limit_key = "%s_%s_daily_limit" % (client_id, user) limiter = r_client.get(limit_key) if limiter is None: # Set limit to 5,000 requests per day r_client.setex(limit_key, 86400, 5000) - self.write({'access_token': token, - 'token_type': 'Bearer', - 'expires_in': timeout}) + self.write( + {"access_token": token, "token_type": "Bearer", "expires_in": timeout} + ) self.finish() def validate_client(self, client_id, client_secret): @@ -262,10 +272,11 @@ def validate_client(self, client_id, client_secret): WHERE client_id = %s AND client_secret = %s)""" qdb.sql_connection.TRN.add(sql, [client_id, client_secret]) if qdb.sql_connection.TRN.execute_fetchlast(): - self.set_token(client_id, 'client') + self.set_token(client_id, "client") else: - _oauth_error(self, 'Oauth2 error: invalid client information', - 'invalid_client') + _oauth_error( + self, "Oauth2 error: invalid client information", "invalid_client" + ) def validate_resource_owner(self, username, password, client_id): """Make sure user and client exist, then set the token and send it @@ -290,10 +301,10 @@ def validate_resource_owner(self, username, password, client_id): """ try: qdb.user.User.login(username, password) - except (IncorrectEmailError, IncorrectPasswordError, - UnverifiedEmailError): - _oauth_error(self, 'Oauth2 error: invalid user information', - 'invalid_client') + except (IncorrectEmailError, IncorrectPasswordError, UnverifiedEmailError): + _oauth_error( + self, "Oauth2 error: invalid user information", "invalid_client" + ) return with qdb.sql_connection.TRN: @@ -303,13 +314,14 @@ def validate_resource_owner(self, username, password, client_id): WHERE client_id = %s AND client_secret IS NULL)""" qdb.sql_connection.TRN.add(sql, [client_id]) if qdb.sql_connection.TRN.execute_fetchlast(): - self.set_token(client_id, 'password', user=username) + self.set_token(client_id, "password", user=username) else: - _oauth_error(self, 'Oauth2 error: invalid client information', - 'invalid_client') + _oauth_error( + self, "Oauth2 error: invalid client information", "invalid_client" + ) def post(self): - """ Authenticate given information as per RFC6750 + """Authenticate given information as per RFC6750 Parameters ---------- @@ -350,57 +362,64 @@ def post(self): error: RFC6750 controlled vocabulary of errors error_description: Human readable explanation of error - """ + """ # first check for header version of sending auth, meaning client ID - header = self.request.headers.get('Authorization', None) + header = self.request.headers.get("Authorization", None) if header is not None: header_info = header.split() # Based on RFC6750 if reply is not 2 elements in the format of: # ['Basic', base64 encoded username:password] we assume the header # is invalid - if len(header_info) != 2 or header_info[0] != 'Basic': + if len(header_info) != 2 or header_info[0] != "Basic": # Invalid Authorization header type for this page - _oauth_error(self, 'Oauth2 error: invalid token type', - 'invalid_request') + _oauth_error( + self, "Oauth2 error: invalid token type", "invalid_request" + ) return # Get client information from the header and validate it - grant_type = self.get_argument('grant_type', None) - if grant_type != 'client': - _oauth_error(self, 'Oauth2 error: invalid grant_type', - 'invalid_request') + grant_type = self.get_argument("grant_type", None) + if grant_type != "client": + _oauth_error( + self, "Oauth2 error: invalid grant_type", "invalid_request" + ) return try: - client_id, client_secret = urlsafe_b64decode( - header_info[1]).decode('ascii').split(':') + client_id, client_secret = ( + urlsafe_b64decode(header_info[1]).decode("ascii").split(":") + ) except ValueError: # Split didn't work, so invalid information sent - _oauth_error(self, 'Oauth2 error: invalid base64 encoded info', - 'invalid_request') + _oauth_error( + self, "Oauth2 error: invalid base64 encoded info", "invalid_request" + ) return self.validate_client(client_id, client_secret) return # Otherwise, do either password or client based authentication - client_id = self.get_argument('client_id', None) - grant_type = self.get_argument('grant_type', None) - if grant_type == 'password': - username = self.get_argument('username', None) - password = self.get_argument('password', None) + client_id = self.get_argument("client_id", None) + grant_type = self.get_argument("grant_type", None) + if grant_type == "password": + username = self.get_argument("username", None) + password = self.get_argument("password", None) if not all([username, password, client_id]): - _oauth_error(self, 'Oauth2 error: missing user information', - 'invalid_request') + _oauth_error( + self, "Oauth2 error: missing user information", "invalid_request" + ) else: self.validate_resource_owner(username, password, client_id) - elif grant_type == 'client': - client_secret = self.get_argument('client_secret', None) + elif grant_type == "client": + client_secret = self.get_argument("client_secret", None) if not all([client_id, client_secret]): - _oauth_error(self, 'Oauth2 error: missing client information', - 'invalid_request') + _oauth_error( + self, "Oauth2 error: missing client information", "invalid_request" + ) return self.validate_client(client_id, client_secret) else: - _oauth_error(self, 'Oauth2 error: invalid grant_type', - 'unsupported_grant_type') + _oauth_error( + self, "Oauth2 error: invalid grant_type", "unsupported_grant_type" + ) return diff --git a/qiita_db/handlers/plugin.py b/qiita_db/handlers/plugin.py index 31c900b98..ec9a3fc54 100644 --- a/qiita_db/handlers/plugin.py +++ b/qiita_db/handlers/plugin.py @@ -6,15 +6,16 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from json import loads from glob import glob +from json import loads from os.path import join from tornado.web import HTTPError -from .oauth2 import OauthBaseHandler, authenticate_oauth -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb +from qiita_core.qiita_settings import qiita_config + +from .oauth2 import OauthBaseHandler, authenticate_oauth def _get_plugin(name, version): @@ -43,8 +44,9 @@ def _get_plugin(name, version): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason='Error instantiating plugin %s %s: %s' - % (name, version, str(e))) + raise HTTPError( + 500, reason="Error instantiating plugin %s %s: %s" % (name, version, str(e)) + ) return plugin @@ -77,14 +79,17 @@ def get(self, name, version): with qdb.sql_connection.TRN: plugin = _get_plugin(name, version) response = { - 'name': plugin.name, - 'version': plugin.version, - 'description': plugin.description, - 'commands': [c.name for c in plugin.commands], - 'publications': [{'DOI': doi, 'PubMed': pubmed} - for doi, pubmed in plugin.publications], - 'type': plugin.type, - 'active': plugin.active} + "name": plugin.name, + "version": plugin.version, + "description": plugin.description, + "commands": [c.name for c in plugin.commands], + "publications": [ + {"DOI": doi, "PubMed": pubmed} + for doi, pubmed in plugin.publications + ], + "type": plugin.type, + "active": plugin.active, + } self.write(response) @@ -103,43 +108,47 @@ def post(self, name, version): with qdb.sql_connection.TRN: plugin = _get_plugin(name, version) - cmd_name = self.get_argument('name') - cmd_desc = self.get_argument('description') - req_params = loads(self.get_argument('required_parameters')) - opt_params = loads(self.get_argument('optional_parameters')) + cmd_name = self.get_argument("name") + cmd_desc = self.get_argument("description") + req_params = loads(self.get_argument("required_parameters")) + opt_params = loads(self.get_argument("optional_parameters")) for p_name, vals in opt_params.items(): - if vals[0].startswith('mchoice'): + if vals[0].startswith("mchoice"): opt_params[p_name] = [vals[0], loads(vals[1])] if len(vals) == 2: opt_params[p_name] = [vals[0], loads(vals[1])] elif len(vals) == 4: - opt_params[p_name] = [vals[0], loads(vals[1]), vals[2], - vals[3]] + opt_params[p_name] = [vals[0], loads(vals[1]), vals[2], vals[3]] else: raise qdb.exceptions.QiitaDBError( "Malformed parameters dictionary, the format " "should be either {param_name: [parameter_type, " "default]} or {parameter_name: (parameter_type, " "default, name_order, check_biom_merge)}. Found: " - "%s for parameter name %s" - % (vals, p_name)) + "%s for parameter name %s" % (vals, p_name) + ) # adding an extra element to make sure the parser knows this is # an optional parameter - opt_params[p_name].extend(['qiita_optional_parameter']) + opt_params[p_name].extend(["qiita_optional_parameter"]) - outputs = self.get_argument('outputs', None) + outputs = self.get_argument("outputs", None) if outputs: outputs = loads(outputs) - dflt_param_set = loads(self.get_argument('default_parameter_sets')) - analysis_only = self.get_argument('analysis_only', False) + dflt_param_set = loads(self.get_argument("default_parameter_sets")) + analysis_only = self.get_argument("analysis_only", False) parameters = req_params parameters.update(opt_params) cmd = qdb.software.Command.create( - plugin, cmd_name, cmd_desc, parameters, outputs, - analysis_only=analysis_only) + plugin, + cmd_name, + cmd_desc, + parameters, + outputs, + analysis_only=analysis_only, + ) if dflt_param_set is not None: for name, vals in dflt_param_set.items(): @@ -177,9 +186,11 @@ def _get_command(plugin_name, plugin_version, cmd_name): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason='Error instantiating cmd %s of plugin ' - '%s %s: %s' % (cmd_name, plugin_name, - plugin_version, str(e))) + raise HTTPError( + 500, + reason="Error instantiating cmd %s of plugin " + "%s %s: %s" % (cmd_name, plugin_name, plugin_version, str(e)), + ) return cmd @@ -214,13 +225,15 @@ def get(self, plugin_name, plugin_version, cmd_name): with qdb.sql_connection.TRN: cmd = _get_command(plugin_name, plugin_version, cmd_name) response = { - 'name': cmd.name, - 'description': cmd.description, - 'required_parameters': cmd.required_parameters, - 'optional_parameters': cmd.optional_parameters, - 'default_parameter_sets': { - p.name: p.values for p in cmd.default_parameter_sets}, - 'outputs': cmd.outputs} + "name": cmd.name, + "description": cmd.description, + "required_parameters": cmd.required_parameters, + "optional_parameters": cmd.optional_parameters, + "default_parameter_sets": { + p.name: p.values for p in cmd.default_parameter_sets + }, + "outputs": cmd.outputs, + } self.write(response) @@ -250,10 +263,10 @@ class ReloadPluginAPItestHandler(OauthBaseHandler): def post(self): """Reloads the plugins""" conf_files = sorted(glob(join(qiita_config.plugin_dir, "*.conf"))) - software = set([qdb.software.Software.from_file(fp, update=True) - for fp in conf_files]) - definition = set( - [s for s in software if s.type == 'artifact definition']) + software = set( + [qdb.software.Software.from_file(fp, update=True) for fp in conf_files] + ) + definition = set([s for s in software if s.type == "artifact definition"]) transformation = software - definition for s in definition: s.activate() diff --git a/qiita_db/handlers/prep_template.py b/qiita_db/handlers/prep_template.py index 997193440..f03565313 100644 --- a/qiita_db/handlers/prep_template.py +++ b/qiita_db/handlers/prep_template.py @@ -9,10 +9,11 @@ from json import loads from os.path import basename -from tornado.web import HTTPError import pandas as pd +from tornado.web import HTTPError import qiita_db as qdb + from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -41,8 +42,9 @@ def _get_prep_template(pid): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason='Error instantiating prep template %s: %s' - % (pid, str(e))) + raise HTTPError( + 500, reason="Error instantiating prep template %s: %s" % (pid, str(e)) + ) return pt @@ -71,23 +73,25 @@ def get(self, prep_id): """ with qdb.sql_connection.TRN: pt = _get_prep_template(prep_id) - prep_files = [fp for _, fp in pt.get_filepaths() - if 'qiime' not in basename(fp)] + prep_files = [ + fp for _, fp in pt.get_filepaths() if "qiime" not in basename(fp) + ] artifact = pt.artifact.id if pt.artifact is not None else None sid = pt.study_id response = { - 'data_type': pt.data_type(), - 'artifact': artifact, - 'investigation_type': pt.investigation_type, - 'study': sid, - 'status': pt.status, + "data_type": pt.data_type(), + "artifact": artifact, + "investigation_type": pt.investigation_type, + "study": sid, + "status": pt.status, # get_filepaths returns an ordered list of [filepath_id, # filepath] and we want the last pair - 'sample-file': qdb.study.Study( - sid).sample_template.get_filepaths()[0][1], + "sample-file": qdb.study.Study(sid).sample_template.get_filepaths()[0][ + 1 + ], # The first element in the prep_files is the newest # prep information file - hence the correct one - 'prep-file': prep_files[0] + "prep-file": prep_files[0], } self.write(response) @@ -108,16 +112,19 @@ def get(self, prep_id): dict The contents of the prep information keyed by sample id """ - sample_info = self.get_argument('sample_information', False) + sample_info = self.get_argument("sample_information", False) with qdb.sql_connection.TRN: pt = _get_prep_template(prep_id) if not sample_info: - response = {'data': pt.to_dataframe().to_dict(orient='index')} + response = {"data": pt.to_dataframe().to_dict(orient="index")} else: ST = qdb.metadata_template.sample_template.SampleTemplate - response = {'data': ST(pt.study_id).to_dataframe( - samples=list(pt)).to_dict(orient='index')} + response = { + "data": ST(pt.study_id) + .to_dataframe(samples=list(pt)) + .to_dict(orient="index") + } self.write(response) @@ -125,17 +132,17 @@ def get(self, prep_id): class PrepTemplateAPIHandler(OauthBaseHandler): @authenticate_oauth def post(self): - prep_info_dict = loads(self.get_argument('prep_info')) - study = self.get_argument('study') - data_type = self.get_argument('data_type') - name = self.get_argument('name', None) - jid = self.get_argument('job-id', None) + prep_info_dict = loads(self.get_argument("prep_info")) + study = self.get_argument("study") + data_type = self.get_argument("data_type") + name = self.get_argument("name", None) + jid = self.get_argument("job-id", None) - metadata = pd.DataFrame.from_dict(prep_info_dict, orient='index') + metadata = pd.DataFrame.from_dict(prep_info_dict, orient="index") pt = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(study), data_type, name=name, - creation_job_id=jid) - self.write({'prep': pt.id}) + metadata, qdb.study.Study(study), data_type, name=name, creation_job_id=jid + ) + self.write({"prep": pt.id}) class PrepTemplateAPItestHandler(PrepTemplateAPIHandler): diff --git a/qiita_db/handlers/processing_job.py b/qiita_db/handlers/processing_job.py index 832d2407a..13e93fc6d 100644 --- a/qiita_db/handlers/processing_job.py +++ b/qiita_db/handlers/processing_job.py @@ -11,6 +11,7 @@ from tornado.web import HTTPError import qiita_db as qdb + from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -40,7 +41,7 @@ def _get_job(job_id): try: job = qdb.processing_job.ProcessingJob(job_id) except Exception as e: - raise HTTPError(500, reason='Error instantiating the job: %s' % str(e)) + raise HTTPError(500, reason="Error instantiating the job: %s" % str(e)) return job @@ -71,10 +72,9 @@ def get(self, job_id): cmd = job.command.name params = job.parameters.values status = job.status - msg = '' if status != 'error' else job.log.msg + msg = "" if status != "error" else job.log.msg - response = {'command': cmd, 'parameters': params, - 'status': status, 'msg': msg} + response = {"command": cmd, "parameters": params, "status": status, "msg": msg} self.write(response) @@ -113,7 +113,7 @@ def post(self, job_id): with qdb.sql_connection.TRN: job = _get_job(job_id) payload = loads(self.request.body) - step = payload['step'] + step = payload["step"] try: job.step = step except qdb.exceptions.QiitaDBOperationNotPermittedError as e: @@ -135,20 +135,20 @@ def post(self, job_id): with qdb.sql_connection.TRN: job = _get_job(job_id) - if job.status != 'running': - raise HTTPError( - 403, "Can't complete job: not in a running state") + if job.status != "running": + raise HTTPError(403, "Can't complete job: not in a running state") - qiita_plugin = qdb.software.Software.from_name_and_version( - 'Qiita', 'alpha') - cmd = qiita_plugin.get_command('complete_job') + qiita_plugin = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("complete_job") params = qdb.software.Parameters.load( - cmd, values_dict={'job_id': job_id, - 'payload': self.request.body.decode( - 'ascii')}) + cmd, + values_dict={ + "job_id": job_id, + "payload": self.request.body.decode("ascii"), + }, + ) # complete_job are unique so it is fine to force them to be created - job = qdb.processing_job.ProcessingJob.create( - job.user, params, force=True) + job = qdb.processing_job.ProcessingJob.create(job.user, params, force=True) job.submit() self.finish() @@ -157,20 +157,20 @@ def post(self, job_id): class ProcessingJobAPItestHandler(OauthBaseHandler): @authenticate_oauth def post(self): - user = self.get_argument('user', 'test@foo.bar') - s_name, s_version, cmd_name = loads(self.get_argument('command')) - params_dict = self.get_argument('parameters') - status = self.get_argument('status', None) + user = self.get_argument("user", "test@foo.bar") + s_name, s_version, cmd_name = loads(self.get_argument("command")) + params_dict = self.get_argument("parameters") + status = self.get_argument("status", None) cmd = qdb.software.Software.from_name_and_version( - s_name, s_version).get_command(cmd_name) + s_name, s_version + ).get_command(cmd_name) params = qdb.software.Parameters.load(cmd, json_str=params_dict) - job = qdb.processing_job.ProcessingJob.create( - qdb.user.User(user), params, True) + job = qdb.processing_job.ProcessingJob.create(qdb.user.User(user), params, True) if status: job._set_status(status) - self.write({'job': job.id}) + self.write({"job": job.id}) diff --git a/qiita_db/handlers/reference.py b/qiita_db/handlers/reference.py index 446103024..a33225db9 100644 --- a/qiita_db/handlers/reference.py +++ b/qiita_db/handlers/reference.py @@ -8,9 +8,10 @@ from tornado.web import HTTPError -from .oauth2 import OauthBaseHandler, authenticate_oauth import qiita_db as qdb +from .oauth2 import OauthBaseHandler, authenticate_oauth + def _get_reference(r_id): """Returns the reference with the given id if exists @@ -36,8 +37,7 @@ def _get_reference(r_id): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason='Error instantiating the reference: ' - '%s' % str(e)) + raise HTTPError(500, reason="Error instantiating the reference: %s" % str(e)) return reference @@ -66,7 +66,7 @@ def get(self, reference_id): with qdb.sql_connection.TRN: reference = _get_reference(reference_id) - fps = {'reference_seqs': reference.sequence_fp} + fps = {"reference_seqs": reference.sequence_fp} tax_fp = reference.taxonomy_fp if tax_fp: fps["reference_tax"] = tax_fp @@ -75,9 +75,9 @@ def get(self, reference_id): fps["reference_tree"] = tree_fp response = { - 'name': reference.name, - 'version': reference.version, - 'files': fps + "name": reference.name, + "version": reference.version, + "files": fps, } self.write(response) diff --git a/qiita_db/handlers/sample_information.py b/qiita_db/handlers/sample_information.py index 99ed212aa..80f43b85d 100644 --- a/qiita_db/handlers/sample_information.py +++ b/qiita_db/handlers/sample_information.py @@ -7,6 +7,7 @@ # ----------------------------------------------------------------------------- import qiita_db as qdb + from .oauth2 import OauthBaseHandler, authenticate_oauth from .util import _get_instance @@ -28,7 +29,7 @@ def get(self, study_id): """ with qdb.sql_connection.TRN: ST = qdb.metadata_template.sample_template.SampleTemplate - st = _get_instance(ST, study_id, 'Error instantiating sample info') - response = {'data': st.to_dataframe().to_dict(orient='index')} + st = _get_instance(ST, study_id, "Error instantiating sample info") + response = {"data": st.to_dataframe().to_dict(orient="index")} self.write(response) diff --git a/qiita_db/handlers/studies.py b/qiita_db/handlers/studies.py index e8b3780c3..c0ae836c9 100644 --- a/qiita_db/handlers/studies.py +++ b/qiita_db/handlers/studies.py @@ -9,6 +9,7 @@ from tornado.web import HTTPError from qiita_db.sql_connection import TRN + from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -25,7 +26,7 @@ def _generate_study_list_for_api(visibility, only_biom=True): list of dict The list of studies and their information """ - artifact_type = '' + artifact_type = "" if only_biom: artifact_type = "AND artifact_type = 'BIOM'" @@ -58,10 +59,10 @@ def get(self, visibility): ------- see qiita_db.util.generate_study_list """ - if visibility not in {'public', 'private'}: + if visibility not in {"public", "private"}: raise HTTPError( - 403, reason='You can only request public or private studies') + 403, reason="You can only request public or private studies" + ) - response = { - 'data': _generate_study_list_for_api(visibility=visibility)} + response = {"data": _generate_study_list_for_api(visibility=visibility)} self.write(response) diff --git a/qiita_db/handlers/tests/oauthbase.py b/qiita_db/handlers/tests/oauthbase.py index 4a36a4f4b..004f0bd0b 100644 --- a/qiita_db/handlers/tests/oauthbase.py +++ b/qiita_db/handlers/tests/oauthbase.py @@ -6,26 +6,27 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -import requests import os import sys -from qiita_core.qiita_settings import r_client, qiita_config +import requests + +from qiita_core.qiita_settings import qiita_config, r_client from qiita_pet.test.tornado_test_base import TestHandlerBase class OauthTestingBase(TestHandlerBase): def setUp(self): - self.token = 'TESTINGOAUTHSTUFF' - self.header = {'Authorization': 'Bearer ' + self.token} - r_client.hset(self.token, 'timestamp', '12/12/12 12:12:00') - r_client.hset(self.token, 'grant_type', 'client') + self.token = "TESTINGOAUTHSTUFF" + self.header = {"Authorization": "Bearer " + self.token} + r_client.hset(self.token, "timestamp", "12/12/12 12:12:00") + r_client.hset(self.token, "grant_type", "client") r_client.expire(self.token, 20) super(OauthTestingBase, self).setUp() self._session = requests.Session() # should point to client certificat file: # /qiita/qiita_core/support_files/ci_rootca.crt - self._verify = os.environ['QIITA_ROOTCA_CERT'] + self._verify = os.environ["QIITA_ROOTCA_CERT"] self._fetch_token() self._files_to_remove = [] @@ -37,34 +38,41 @@ def tearDown(self): def _fetch_token(self): data = { - 'client_id': '4MOBzUBHBtUmwhaC258H7PS0rBBLyGQrVxGPgc9g305bvVhf6h', - 'client_secret': - ('rFb7jwAb3UmSUN57Bjlsi4DTl2owLwRpwCc0SggRN' - 'EVb2Ebae2p5Umnq20rNMhmqN'), - 'grant_type': 'client'} + "client_id": "4MOBzUBHBtUmwhaC258H7PS0rBBLyGQrVxGPgc9g305bvVhf6h", + "client_secret": ( + "rFb7jwAb3UmSUN57Bjlsi4DTl2owLwRpwCc0SggRNEVb2Ebae2p5Umnq20rNMhmqN" + ), + "grant_type": "client", + } resp = self._session.post( "%s/qiita_db/authenticate/" % qiita_config.base_url, - verify=self._verify, data=data, timeout=80) + verify=self._verify, + data=data, + timeout=80, + ) if resp.status_code != 200: raise ValueError("_fetchToken() POST request failed") - self._token = resp.json()['access_token'] - print('obtained access_token = %s' % self._token, file=sys.stderr) + self._token = resp.json()["access_token"] + print("obtained access_token = %s" % self._token, file=sys.stderr) def post_authed(self, url, **kwargs): - if 'headers' not in kwargs: - kwargs['headers'] = {} - if 'Authorization' not in kwargs['headers']: - kwargs['headers']['Authorization'] = 'Bearer %s' % self._token + if "headers" not in kwargs: + kwargs["headers"] = {} + if "Authorization" not in kwargs["headers"]: + kwargs["headers"]["Authorization"] = "Bearer %s" % self._token r = self._session.post( - qiita_config.base_url + url, verify=self._verify, **kwargs) + qiita_config.base_url + url, verify=self._verify, **kwargs + ) r.close() return r def get_authed(self, url): - r = self._session.get(qiita_config.base_url + url, verify=self._verify, - headers={'Authorization': 'Bearer %s' % - self._token}) + r = self._session.get( + qiita_config.base_url + url, + verify=self._verify, + headers={"Authorization": "Bearer %s" % self._token}, + ) r.close() return r diff --git a/qiita_db/handlers/tests/test_analysis.py b/qiita_db/handlers/tests/test_analysis.py index 603b4c326..db20122a0 100644 --- a/qiita_db/handlers/tests/test_analysis.py +++ b/qiita_db/handlers/tests/test_analysis.py @@ -6,14 +6,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase from json import loads +from unittest import TestCase, main from tornado.web import HTTPError -from qiita_db.handlers.tests.oauthbase import OauthTestingBase -from qiita_db.handlers.analysis import _get_analysis import qiita_db as qdb +from qiita_db.handlers.analysis import _get_analysis +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(TestCase): @@ -29,78 +29,98 @@ def test_get_analysis(self): class APIAnalysisMetadataHandlerTests(OauthTestingBase): def test_get_does_not_exist(self): - obs = self.get('/qiita_db/analysis/100/metadata/', headers=self.header) + obs = self.get("/qiita_db/analysis/100/metadata/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/analysis/1/metadata/') + obs = self.get("/qiita_db/analysis/1/metadata/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/analysis/1/metadata/', headers=self.header) + obs = self.get("/qiita_db/analysis/1/metadata/", headers=self.header) self.assertEqual(obs.code, 200) obs = loads(obs.body) - exp = ['1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM9.640192', '1.SKB7.640196'] + exp = [ + "1.SKM4.640180", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM9.640192", + "1.SKB7.640196", + ] self.assertCountEqual(obs, exp) - exp = {'platform': 'Illumina', 'longitude': '95.5088566087', - 'experiment_center': 'ANL', 'center_name': 'ANL', - 'run_center': 'ANL', 'run_prefix': 's_G1_L001_sequences', - 'sample_type': 'ENVO:soil', - 'common_name': 'rhizosphere metagenome', 'samp_size': '.25,g', - 'has_extracted_data': 'True', 'water_content_soil': '0.101', - 'target_gene': '16S rRNA', - 'env_feature': 'ENVO:plant-associated habitat', - 'sequencing_meth': 'Sequencing by synthesis', - 'Description': 'Cannabis Soil Microbiome', 'run_date': '8/1/12', - 'qiita_owner': 'Dude', 'altitude': '0.0', - 'BarcodeSequence': 'TCGACCAAACAC', - 'env_biome': 'ENVO:Temperate grasslands, savannas, and ' - 'shrubland biome', - 'texture': '63.1 sand, 17.7 silt, 19.2 clay', - 'pcr_primers': 'FWD:GTGCCAGCMGCCGCGGTAA; ' - 'REV:GGACTACHVGGGTWTCTAAT', - 'experiment_title': 'Cannabis Soil Microbiome', - 'library_construction_protocol': - 'This analysis was done as in Caporaso et al 2011 Genome ' - 'research. The PCR primers (F515/R806) were developed ' - 'against the V4 region of the 16S rRNA (both bacteria and ' - 'archaea), which we determined would yield optimal ' - 'community clustering with reads of this length using a ' - 'procedure similar to that of ref. 15. [For reference, ' - 'this primer pair amplifies the region 533_786 in the ' - 'Escherichia coli strain 83972 sequence (greengenes ' - 'accession no. prokMSA_id:470367).] The reverse PCR primer ' - 'is barcoded with a 12-base error-correcting Golay code to ' - 'facilitate multiplexing of up to 1,500 samples per lane, ' - 'and both PCR primers contain sequencer adapter regions.', - 'experiment_design_description': - 'micro biome of soil and rhizosphere of cannabis plants ' - 'from CA', - 'study_center': 'CCME', 'physical_location': 'ANL', - 'qiita_prep_id': '1', 'taxon_id': '939928', - 'has_physical_specimen': 'True', 'ph': '6.82', - 'description_duplicate': 'Bucu Rhizo', - 'qiita_study_alias': 'Cannabis Soils', 'sample_center': 'ANL', - 'elevation': '114.0', 'illumina_technology': 'MiSeq', - 'assigned_from_geo': 'n', - 'collection_timestamp': '2011-11-11 13:00:00', - 'latitude': '31.7167821863', - 'LinkerPrimerSequence': 'GTGCCAGCMGCCGCGGTAA', - 'qiita_principal_investigator': 'PIDude', 'host_taxid': '3483', - 'samp_salinity': '7.44', 'host_subject_id': '1001:D2', - 'target_subfragment': 'V4', 'season_environment': 'winter', - 'temp': '15.0', 'emp_status': 'EMP', - 'country': 'GAZ:United States of America', - 'instrument_model': 'Illumina MiSeq', - 'qiita_study_title': 'Identification of the Microbiomes for ' - 'Cannabis Soils', - 'tot_nitro': '1.3', 'depth': '0.15', - 'anonymized_name': 'SKM4', 'tot_org_carb': '3.31'} - self.assertEqual(obs['1.SKM4.640180'], exp) + exp = { + "platform": "Illumina", + "longitude": "95.5088566087", + "experiment_center": "ANL", + "center_name": "ANL", + "run_center": "ANL", + "run_prefix": "s_G1_L001_sequences", + "sample_type": "ENVO:soil", + "common_name": "rhizosphere metagenome", + "samp_size": ".25,g", + "has_extracted_data": "True", + "water_content_soil": "0.101", + "target_gene": "16S rRNA", + "env_feature": "ENVO:plant-associated habitat", + "sequencing_meth": "Sequencing by synthesis", + "Description": "Cannabis Soil Microbiome", + "run_date": "8/1/12", + "qiita_owner": "Dude", + "altitude": "0.0", + "BarcodeSequence": "TCGACCAAACAC", + "env_biome": "ENVO:Temperate grasslands, savannas, and shrubland biome", + "texture": "63.1 sand, 17.7 silt, 19.2 clay", + "pcr_primers": "FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT", + "experiment_title": "Cannabis Soil Microbiome", + "library_construction_protocol": "This analysis was done as in Caporaso et al 2011 Genome " + "research. The PCR primers (F515/R806) were developed " + "against the V4 region of the 16S rRNA (both bacteria and " + "archaea), which we determined would yield optimal " + "community clustering with reads of this length using a " + "procedure similar to that of ref. 15. [For reference, " + "this primer pair amplifies the region 533_786 in the " + "Escherichia coli strain 83972 sequence (greengenes " + "accession no. prokMSA_id:470367).] The reverse PCR primer " + "is barcoded with a 12-base error-correcting Golay code to " + "facilitate multiplexing of up to 1,500 samples per lane, " + "and both PCR primers contain sequencer adapter regions.", + "experiment_design_description": "micro biome of soil and rhizosphere of cannabis plants " + "from CA", + "study_center": "CCME", + "physical_location": "ANL", + "qiita_prep_id": "1", + "taxon_id": "939928", + "has_physical_specimen": "True", + "ph": "6.82", + "description_duplicate": "Bucu Rhizo", + "qiita_study_alias": "Cannabis Soils", + "sample_center": "ANL", + "elevation": "114.0", + "illumina_technology": "MiSeq", + "assigned_from_geo": "n", + "collection_timestamp": "2011-11-11 13:00:00", + "latitude": "31.7167821863", + "LinkerPrimerSequence": "GTGCCAGCMGCCGCGGTAA", + "qiita_principal_investigator": "PIDude", + "host_taxid": "3483", + "samp_salinity": "7.44", + "host_subject_id": "1001:D2", + "target_subfragment": "V4", + "season_environment": "winter", + "temp": "15.0", + "emp_status": "EMP", + "country": "GAZ:United States of America", + "instrument_model": "Illumina MiSeq", + "qiita_study_title": "Identification of the Microbiomes for Cannabis Soils", + "tot_nitro": "1.3", + "depth": "0.15", + "anonymized_name": "SKM4", + "tot_org_carb": "3.31", + } + self.assertEqual(obs["1.SKM4.640180"], exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_archive.py b/qiita_db/handlers/tests/test_archive.py index 7081ed96e..18c89b7d4 100644 --- a/qiita_db/handlers/tests/test_archive.py +++ b/qiita_db/handlers/tests/test_archive.py @@ -6,16 +6,15 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import exists, isfile +from json import dumps, loads from os import remove +from os.path import exists, isfile from shutil import rmtree - from unittest import main -from json import loads, dumps +from qiita_db.archive import Archive from qiita_db.handlers.tests.oauthbase import OauthTestingBase from qiita_db.sql_connection import TRN -from qiita_db.archive import Archive class APIArchiveObservationsTests(OauthTestingBase): @@ -55,44 +54,44 @@ def test_full_query_and_insertion(self): exp_all_features = {} for j in jobs: - featureA = 'AA - %s' % j - featureB = 'BB - %s' % j + featureA = "AA - %s" % j + featureB = "BB - %s" % j # testing that nothing is there - data = {'job_id': j, 'features': [featureA, featureB]} + data = {"job_id": j, "features": [featureA, featureB]} obs = self.post( - '/qiita_db/archive/observations/', headers=self.header, - data=data) + "/qiita_db/archive/observations/", headers=self.header, data=data + ) exp = {} self.assertEqual(obs.code, 200) self.assertEqual(loads(obs.body), exp) # inserting and testing insertion - data = {'path': j, - 'value': dumps({featureA: 'CA', featureB: 'CB'})} + data = {"path": j, "value": dumps({featureA: "CA", featureB: "CB"})} obs = self.patch( - '/qiita_db/archive/observations/', headers=self.header, - data=data) - exp = {featureA: 'CA', featureB: 'CB'} + "/qiita_db/archive/observations/", headers=self.header, data=data + ) + exp = {featureA: "CA", featureB: "CB"} self.assertEqual(obs.code, 200) self.assertEqual(loads(obs.body), exp) - exp_all_features[featureA] = 'CA' - exp_all_features[featureB] = 'CB' + exp_all_features[featureA] = "CA" + exp_all_features[featureB] = "CB" # testing retrieve all featues obs = Archive.retrieve_feature_values() self.assertEqual(obs, exp_all_features) # this doesn't exist so should be empty - obs = Archive.retrieve_feature_values(archive_merging_scheme='') + obs = Archive.retrieve_feature_values(archive_merging_scheme="") self.assertEqual(obs, {}) obs = Archive.retrieve_feature_values( - archive_merging_scheme='Pick closed-reference OTUs | Split ' - 'libraries FASTQ (barcode_type: golay_12)') + archive_merging_scheme="Pick closed-reference OTUs | Split " + "libraries FASTQ (barcode_type: golay_12)" + ) self.assertEqual(obs, exp_all_features) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_artifact.py b/qiita_db/handlers/tests/test_artifact.py index c42a9120d..9abb3b941 100644 --- a/qiita_db/handlers/tests/test_artifact.py +++ b/qiita_db/handlers/tests/test_artifact.py @@ -6,23 +6,23 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase -from json import loads, dumps from functools import partial -from os.path import join, exists, isfile +from json import dumps, loads from os import close, remove +from os.path import exists, isfile, join from shutil import rmtree -from tempfile import mkstemp, mkdtemp +from tempfile import mkdtemp, mkstemp from time import sleep +from unittest import TestCase, main -from tornado.web import HTTPError import pandas as pd from biom import example_table as et from biom.util import biom_open +from tornado.web import HTTPError -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb from qiita_db.handlers.artifact import _get_artifact +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(TestCase): @@ -52,71 +52,78 @@ def tearDown(self): rmtree(fp) def test_get_artifact_does_not_exist(self): - obs = self.get('/qiita_db/artifacts/100/', headers=self.header) + obs = self.get("/qiita_db/artifacts/100/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/artifacts/100/') + obs = self.get("/qiita_db/artifacts/100/") self.assertEqual(obs.code, 400) def test_get_artifact(self): - obs = self.get('/qiita_db/artifacts/1/', headers=self.header) + obs = self.get("/qiita_db/artifacts/1/", headers=self.header) self.assertEqual(obs.code, 200) - db_test_raw_dir = qdb.util.get_mountpoint('raw_data')[0][1] + db_test_raw_dir = qdb.util.get_mountpoint("raw_data")[0][1] path_builder = partial(join, db_test_raw_dir) exp_fps = { - "raw_forward_seqs": - [{'filepath': path_builder('1_s_G1_L001_sequences.fastq.gz'), - 'size': 58}], - "raw_barcodes": - [{'filepath': path_builder( - '1_s_G1_L001_sequences_barcodes.fastq.gz'), - 'size': 58}] + "raw_forward_seqs": [ + {"filepath": path_builder("1_s_G1_L001_sequences.fastq.gz"), "size": 58} + ], + "raw_barcodes": [ + { + "filepath": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "size": 58, + } + ], } exp = { - 'name': 'Raw data 1', - 'timestamp': '2012-10-01 09:30:27', - 'visibility': 'private', - 'type': 'FASTQ', - 'data_type': '18S', - 'can_be_submitted_to_ebi': False, - 'ebi_run_accessions': None, - 'can_be_submitted_to_vamps': False, - 'is_submitted_to_vamps': None, - 'prep_information': [1], - 'study': 1, - 'analysis': None, - 'parents': [], - 'processing_parameters': None, - 'files': exp_fps} + "name": "Raw data 1", + "timestamp": "2012-10-01 09:30:27", + "visibility": "private", + "type": "FASTQ", + "data_type": "18S", + "can_be_submitted_to_ebi": False, + "ebi_run_accessions": None, + "can_be_submitted_to_vamps": False, + "is_submitted_to_vamps": None, + "prep_information": [1], + "study": 1, + "analysis": None, + "parents": [], + "processing_parameters": None, + "files": exp_fps, + } self.assertEqual(loads(obs.body), exp) - obs = self.get('/qiita_db/artifacts/9/', headers=self.header) + obs = self.get("/qiita_db/artifacts/9/", headers=self.header) self.assertEqual(obs.code, 200) - db_test_raw_dir = qdb.util.get_mountpoint('analysis')[0][1] + db_test_raw_dir = qdb.util.get_mountpoint("analysis")[0][1] path_builder = partial(join, db_test_raw_dir) exp_fps = { - "biom": [{'filepath': path_builder('1_analysis_18S.biom'), - 'size': 1093210}]} + "biom": [{"filepath": path_builder("1_analysis_18S.biom"), "size": 1093210}] + } exp = { - 'name': 'noname', - 'visibility': 'sandbox', - 'type': 'BIOM', - 'data_type': '18S', - 'can_be_submitted_to_ebi': False, - 'ebi_run_accessions': None, - 'can_be_submitted_to_vamps': False, - 'is_submitted_to_vamps': None, - 'prep_information': [], - 'study': None, - 'analysis': 1, - 'parents': [8], - 'processing_parameters': {'biom_table': '8', 'depth': '9000', - 'subsample_multinomial': 'False'}, - 'files': exp_fps} + "name": "noname", + "visibility": "sandbox", + "type": "BIOM", + "data_type": "18S", + "can_be_submitted_to_ebi": False, + "ebi_run_accessions": None, + "can_be_submitted_to_vamps": False, + "is_submitted_to_vamps": None, + "prep_information": [], + "study": None, + "analysis": 1, + "parents": [8], + "processing_parameters": { + "biom_table": "8", + "depth": "9000", + "subsample_multinomial": "False", + }, + "files": exp_fps, + } obs = loads(obs.body) # The timestamp is genreated at patch time, so we can't check for it - del obs['timestamp'] + del obs["timestamp"] self.assertEqual(obs, exp) def test_patch(self): @@ -124,13 +131,10 @@ def test_patch(self): close(fd) self._clean_up_files.append(html_fp) # correct argument with a single HTML - arguments = {'op': 'add', 'path': '/html_summary/', - 'value': html_fp} + arguments = {"op": "add", "path": "/html_summary/", "value": html_fp} artifact = qdb.artifact.Artifact(1) self.assertIsNone(artifact.html_summary_fp) - obs = self.patch('/qiita_db/artifacts/1/', - headers=self.header, - data=arguments) + obs = self.patch("/qiita_db/artifacts/1/", headers=self.header, data=arguments) self.assertEqual(obs.code, 200) self.assertIsNotNone(artifact.html_summary_fp) @@ -140,44 +144,39 @@ def test_patch(self): self._clean_up_files.append(html_fp) html_dir = mkdtemp() self._clean_up_files.append(html_dir) - arguments = {'op': 'add', 'path': '/html_summary/', - 'value': dumps({'html': html_fp, 'dir': html_dir})} - obs = self.patch('/qiita_db/artifacts/1/', - headers=self.header, - data=arguments) + arguments = { + "op": "add", + "path": "/html_summary/", + "value": dumps({"html": html_fp, "dir": html_dir}), + } + obs = self.patch("/qiita_db/artifacts/1/", headers=self.header, data=arguments) self.assertEqual(obs.code, 200) self.assertIsNotNone(artifact.html_summary_fp) - html_dir = [x['fp'] for x in artifact.filepaths - if x['fp_type'] == 'html_summary_dir'] + html_dir = [ + x["fp"] for x in artifact.filepaths if x["fp_type"] == "html_summary_dir" + ] self.assertEqual(len(html_dir), 1) # Wrong operation - arguments = {'op': 'wrong', 'path': '/html_summary/', - 'value': html_fp} - obs = self.patch('/qiita_db/artifacts/1/', - headers=self.header, - data=arguments) + arguments = {"op": "wrong", "path": "/html_summary/", "value": html_fp} + obs = self.patch("/qiita_db/artifacts/1/", headers=self.header, data=arguments) self.assertEqual(obs.code, 400) - self.assertEqual(obs.reason, 'Operation "wrong" not supported. ' - 'Current supported operations: add') + self.assertEqual( + obs.reason, + 'Operation "wrong" not supported. Current supported operations: add', + ) # Wrong path parameter - arguments = {'op': 'add', 'path': '/wrong/', - 'value': html_fp} - obs = self.patch('/qiita_db/artifacts/1/', - headers=self.header, - data=arguments) + arguments = {"op": "add", "path": "/wrong/", "value": html_fp} + obs = self.patch("/qiita_db/artifacts/1/", headers=self.header, data=arguments) self.assertEqual(obs.code, 400) - self.assertEqual(obs.reason, 'Incorrect path parameter value') + self.assertEqual(obs.reason, "Incorrect path parameter value") # Wrong value parameter - arguments = {'op': 'add', 'path': '/html_summary/', - 'value': html_fp} - obs = self.patch('/qiita_db/artifacts/1/', - headers=self.header, - data=arguments) + arguments = {"op": "add", "path": "/html_summary/", "value": html_fp} + obs = self.patch("/qiita_db/artifacts/1/", headers=self.header, data=arguments) self.assertEqual(obs.code, 500) - self.assertIn('No such file or directory', obs.reason) + self.assertIn("No such file or directory", obs.reason) class ArtifactAPItestHandlerTests(OauthTestingBase): @@ -185,18 +184,21 @@ def setUp(self): super(ArtifactAPItestHandlerTests, self).setUp() metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}} - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') - self.prep_template = \ - qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "16S") + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index") + self.prep_template = qdb.metadata_template.prep_template.PrepTemplate.create( + metadata, qdb.study.Study(1), "16S" + ) self._clean_up_files = [] @@ -208,95 +210,107 @@ def tearDown(self): remove(f) def test_post(self): - fd, fp1 = mkstemp(suffix='_seqs.fastq') + fd, fp1 = mkstemp(suffix="_seqs.fastq") close(fd) self._clean_up_files.append(fp1) - with open(fp1, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") - - fd, fp2 = mkstemp(suffix='_barcodes.fastq') + with open(fp1, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) + + fd, fp2 = mkstemp(suffix="_barcodes.fastq") close(fd) self._clean_up_files.append(fp2) - with open(fp2, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" - "NNNCNNNNNNNNN\n" - "+\n" - "#############\n") - - data = {'filepaths': dumps([(fp1, 'raw_forward_seqs'), - (fp2, 'raw_barcodes')]), - 'type': "FASTQ", - 'name': "New test artifact", - 'prep': self.prep_template.id} - obs = self.post('/apitest/artifact/', headers=self.header, data=data) + with open(fp2, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" + "NNNCNNNNNNNNN\n" + "+\n" + "#############\n" + ) + + data = { + "filepaths": dumps([(fp1, "raw_forward_seqs"), (fp2, "raw_barcodes")]), + "type": "FASTQ", + "name": "New test artifact", + "prep": self.prep_template.id, + } + obs = self.post("/apitest/artifact/", headers=self.header, data=data) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['artifact']) + self.assertCountEqual(obs.keys(), ["artifact"]) - a = qdb.artifact.Artifact(obs['artifact']) - self._clean_up_files.extend([x['fp'] for x in a.filepaths]) + a = qdb.artifact.Artifact(obs["artifact"]) + self._clean_up_files.extend([x["fp"] for x in a.filepaths]) self.assertEqual(a.name, "New test artifact") def test_post_analysis(self): - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") close(fd) - with biom_open(fp, 'w') as f: + with biom_open(fp, "w") as f: et.to_hdf5(f, "test") self._clean_up_files.append(fp) - data = {'filepaths': dumps([(fp, 'biom')]), - 'type': "BIOM", - 'name': "New biom artifact", - 'analysis': 1, - 'data_type': '16S'} - obs = self.post('/apitest/artifact/', headers=self.header, data=data) + data = { + "filepaths": dumps([(fp, "biom")]), + "type": "BIOM", + "name": "New biom artifact", + "analysis": 1, + "data_type": "16S", + } + obs = self.post("/apitest/artifact/", headers=self.header, data=data) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['artifact']) + self.assertCountEqual(obs.keys(), ["artifact"]) - a = qdb.artifact.Artifact(obs['artifact']) - self._clean_up_files.extend([x['fp'] for x in a.filepaths]) + a = qdb.artifact.Artifact(obs["artifact"]) + self._clean_up_files.extend([x["fp"] for x in a.filepaths]) self.assertEqual(a.name, "New biom artifact") def test_post_error(self): - data = {'filepaths': dumps([('Do not exist', 'raw_forward_seqs')]), - 'type': "FASTQ", - 'name': "New test artifact", - 'prep': 1} - obs = self.post('/apitest/artifact/', headers=self.header, data=data) + data = { + "filepaths": dumps([("Do not exist", "raw_forward_seqs")]), + "type": "FASTQ", + "name": "New test artifact", + "prep": 1, + } + obs = self.post("/apitest/artifact/", headers=self.header, data=data) self.assertEqual(obs.code, 500) - self.assertIn("Prep template 1 already has an artifact associated", - obs.body.decode('ascii')) + self.assertIn( + "Prep template 1 already has an artifact associated", + obs.body.decode("ascii"), + ) class ArtifactTypeHandlerTests(OauthTestingBase): def test_post_no_header(self): - obs = self.post('/qiita_db/artifacts/types/', data={}) + obs = self.post("/qiita_db/artifacts/types/", data={}) self.assertEqual(obs.code, 400) def test_post(self): - data = {'type_name': 'new_type', - 'description': 'some_description', - 'can_be_submitted_to_ebi': False, - 'can_be_submitted_to_vamps': False, - 'is_user_uploadable': False, - 'filepath_types': dumps([("log", False), - ("raw_forward_seqs", True)])} - obs = self.post('/qiita_db/artifacts/types/', headers=self.header, - data=data) + data = { + "type_name": "new_type", + "description": "some_description", + "can_be_submitted_to_ebi": False, + "can_be_submitted_to_vamps": False, + "is_user_uploadable": False, + "filepath_types": dumps([("log", False), ("raw_forward_seqs", True)]), + } + obs = self.post("/qiita_db/artifacts/types/", headers=self.header, data=data) self.assertEqual(obs.code, 200) - self.assertIn(['new_type', 'some_description', False, False, False], - qdb.artifact.Artifact.types()) + self.assertIn( + ["new_type", "some_description", False, False, False], + qdb.artifact.Artifact.types(), + ) - obs = self.post('/qiita_db/artifacts/types/', headers=self.header, - data=data) + obs = self.post("/qiita_db/artifacts/types/", headers=self.header, data=data) self.assertEqual(obs.code, 200) def test_get(self): - obs = self.get('/qiita_db/artifacts/types/', headers=self.header) + obs = self.get("/qiita_db/artifacts/types/", headers=self.header) self.assertEqual(obs.code, 200) basedir = qdb.util.get_db_files_base_dir() @@ -308,7 +322,8 @@ def test_get(self): "FASTQ": f"{basedir}/FASTQ", "per_sample_FASTQ": f"{basedir}/per_sample_FASTQ", "BIOM": f"{basedir}/BIOM", - "uploads": f"{basedir}/uploads"} + "uploads": f"{basedir}/uploads", + } self.assertDictEqual(loads(obs.body), exp) @@ -326,54 +341,55 @@ def tearDown(self): def test_post(self): # no header - obs = self.post('/qiita_db/artifact/', data={}) + obs = self.post("/qiita_db/artifact/", data={}) self.assertEqual(obs.code, 400) - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") close(fd) # renaming samples - et.update_ids({'S1': '1.SKB1.640202', - 'S2': '1.SKD3.640198', - 'S3': '1.SKM4.640180'}, inplace=True) - with biom_open(fp, 'w') as f: + et.update_ids( + {"S1": "1.SKB1.640202", "S2": "1.SKD3.640198", "S3": "1.SKM4.640180"}, + inplace=True, + ) + with biom_open(fp, "w") as f: et.to_hdf5(f, "test") self._clean_up_files.append(fp) # no job_id or prep_id - data = {'user_email': 'demo@microbio.me', - 'artifact_type': 'BIOM', - 'command_artifact_name': 'OTU table', - 'files': dumps({'biom': [fp]})} + data = { + "user_email": "demo@microbio.me", + "artifact_type": "BIOM", + "command_artifact_name": "OTU table", + "files": dumps({"biom": [fp]}), + } - obs = self.post('/qiita_db/artifact/', headers=self.header, data=data) + obs = self.post("/qiita_db/artifact/", headers=self.header, data=data) self.assertEqual(obs.code, 400) - self.assertIn( - 'You need to specify a job_id or a prep_id', str(obs.error)) + self.assertIn("You need to specify a job_id or a prep_id", str(obs.error)) # both job_id and prep_id defined - data['job_id'] = 'e5609746-a985-41a1-babf-6b3ebe9eb5a9' - data['prep_id'] = 'prep_id' - obs = self.post('/qiita_db/artifact/', headers=self.header, data=data) + data["job_id"] = "e5609746-a985-41a1-babf-6b3ebe9eb5a9" + data["prep_id"] = "prep_id" + obs = self.post("/qiita_db/artifact/", headers=self.header, data=data) self.assertEqual(obs.code, 400) - self.assertIn( - 'You need to specify only a job_id or a prep_id', str(obs.error)) + self.assertIn("You need to specify only a job_id or a prep_id", str(obs.error)) # make sure that all the plugins are on qdb.util.activate_or_update_plugins(update=True) # tests success by inserting a new artifact into an existing job - original_job = qdb.processing_job.ProcessingJob(data['job_id']) + original_job = qdb.processing_job.ProcessingJob(data["job_id"]) input_artifact = original_job.input_artifacts[0] original_children = input_artifact.children self.assertEqual(len(original_children), 3) # send the new data - del data['prep_id'] - obs = self.post('/qiita_db/artifact/', headers=self.header, data=data) - jid = loads(obs.body)['job_id'] + del data["prep_id"] + obs = self.post("/qiita_db/artifact/", headers=self.header, data=data) + jid = loads(obs.body)["job_id"] job = qdb.processing_job.ProcessingJob(jid) - while job.status not in ('error', 'success'): + while job.status not in ("error", "success"): sleep(0.5) # now the original job should have 4 children and make sure they have @@ -382,8 +398,9 @@ def test_post(self): new_children = list(set(children) - set(original_children))[0] self.assertEqual(len(children), 4) for c in children[1:]: - self.assertCountEqual(children[0].processing_parameters.values, - c.processing_parameters.values) + self.assertCountEqual( + children[0].processing_parameters.values, c.processing_parameters.values + ) self.assertEqual(children[0].parents, c.parents) # making sure the new artifact is part of the descendants, which is a @@ -392,24 +409,35 @@ def test_post(self): # now let's test adding an artifact directly to a new prep new_prep = qdb.metadata_template.prep_template.PrepTemplate.create( - pd.DataFrame({'new_col': {'1.SKB1.640202': 1, - '1.SKD3.640198': 2, - '1.SKM4.640180': 3}}), - qdb.study.Study(1), '16S') - fd, fp = mkstemp(suffix='_table.biom') + pd.DataFrame( + { + "new_col": { + "1.SKB1.640202": 1, + "1.SKD3.640198": 2, + "1.SKM4.640180": 3, + } + } + ), + qdb.study.Study(1), + "16S", + ) + fd, fp = mkstemp(suffix="_table.biom") close(fd) - with biom_open(fp, 'w') as f: + with biom_open(fp, "w") as f: et.to_hdf5(f, "test") self._clean_up_files.append(fp) - data = {'user_email': 'demo@microbio.me', - 'artifact_type': 'BIOM', 'prep_id': new_prep.id, - 'files': dumps({'biom': [fp]})} + data = { + "user_email": "demo@microbio.me", + "artifact_type": "BIOM", + "prep_id": new_prep.id, + "files": dumps({"biom": [fp]}), + } - obs = self.post('/qiita_db/artifact/', headers=self.header, data=data) - jid = loads(obs.body)['job_id'] + obs = self.post("/qiita_db/artifact/", headers=self.header, data=data) + jid = loads(obs.body)["job_id"] job = qdb.processing_job.ProcessingJob(jid) - while job.status not in ('error', 'success'): + while job.status not in ("error", "success"): sleep(0.5) self.assertIsNotNone(new_prep.artifact) @@ -417,54 +445,66 @@ def test_post_insert_artifact_and_add_default_processing(self): # now let's test adding an artifact + default processing to a new # preparation new_prep = qdb.metadata_template.prep_template.PrepTemplate.create( - pd.DataFrame({'new_col': {'1.SKB1.640202': 1, - '1.SKD3.640198': 2, - '1.SKM4.640180': 3}}), - qdb.study.Study(1), '16S') + pd.DataFrame( + { + "new_col": { + "1.SKB1.640202": 1, + "1.SKD3.640198": 2, + "1.SKM4.640180": 3, + } + } + ), + qdb.study.Study(1), + "16S", + ) # creating the fastq files to be added - fd, fp1 = mkstemp(suffix='_seqs.fastq') + fd, fp1 = mkstemp(suffix="_seqs.fastq") close(fd) self._clean_up_files.append(fp1) - with open(fp1, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") - - fd, fp2 = mkstemp(suffix='_barcodes.fastq') + with open(fp1, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) + + fd, fp2 = mkstemp(suffix="_barcodes.fastq") close(fd) self._clean_up_files.append(fp2) - with open(fp2, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" - "NNNCNNNNNNNNN\n" - "+\n" - "#############\n") - - data = {'user_email': 'demo@microbio.me', - 'artifact_type': 'FASTQ', - 'prep_id': new_prep.id, - 'files': dumps([(fp1, 'raw_forward_seqs'), - (fp2, 'raw_barcodes')]), - 'add_default_workflow': False} - obs = self.post('/qiita_db/artifact/', headers=self.header, data=data) + with open(fp2, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" + "NNNCNNNNNNNNN\n" + "+\n" + "#############\n" + ) + + data = { + "user_email": "demo@microbio.me", + "artifact_type": "FASTQ", + "prep_id": new_prep.id, + "files": dumps([(fp1, "raw_forward_seqs"), (fp2, "raw_barcodes")]), + "add_default_workflow": False, + } + obs = self.post("/qiita_db/artifact/", headers=self.header, data=data) self.assertEqual(obs.code, 200) - jid = loads(obs.body)['job_id'] + jid = loads(obs.body)["job_id"] # if we got to this point, then we should have a job and that job # should have children jobs (generated by the default workflow) job = qdb.processing_job.ProcessingJob(jid) children = [c.command.name for c in job.children] - grandchildren = [gc.command.name for c in job.children - for gc in c.children] - self.assertEqual('Validate', job.command.name) - self.assertEqual(['Split libraries FASTQ'], children) - self.assertEqual(['Pick closed-reference OTUs'], grandchildren) + grandchildren = [gc.command.name for c in job.children for gc in c.children] + self.assertEqual("Validate", job.command.name) + self.assertEqual(["Split libraries FASTQ"], children) + self.assertEqual(["Pick closed-reference OTUs"], grandchildren) # just to avoid any tentative issues, let's wait for the main job to # finish - while job.status not in ('error', 'success'): + while job.status not in ("error", "success"): sleep(0.5) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_core.py b/qiita_db/handlers/tests/test_core.py index 63be60a2f..481d1427b 100644 --- a/qiita_db/handlers/tests/test_core.py +++ b/qiita_db/handlers/tests/test_core.py @@ -8,18 +8,18 @@ from unittest import main -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class ResetAPItestHandler(OauthTestingBase): def test_post(self): - qdb.user.User.create('new_user@test.foo', 'password') - self.assertTrue(qdb.user.User.exists('new_user@test.foo')) - obs = self.post('/apitest/reset/', headers=self.header, data="") + qdb.user.User.create("new_user@test.foo", "password") + self.assertTrue(qdb.user.User.exists("new_user@test.foo")) + obs = self.post("/apitest/reset/", headers=self.header, data="") self.assertEqual(obs.code, 200) - self.assertFalse(qdb.user.User.exists('new_user@test.foo')) + self.assertFalse(qdb.user.User.exists("new_user@test.foo")) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_oauth2.py b/qiita_db/handlers/tests/test_oauth2.py index e58a57cfd..56e8e139f 100644 --- a/qiita_db/handlers/tests/test_oauth2.py +++ b/qiita_db/handlers/tests/test_oauth2.py @@ -5,84 +5,100 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from qiita_core.qiita_settings import r_client - from qiita_pet.test.tornado_test_base import TestHandlerBase class OAuth2BaseHandlerTests(TestHandlerBase): def setUp(self): # Create client test authentication token - self.client_token = 'SOMEAUTHTESTINGTOKENHERE2122' + self.client_token = "SOMEAUTHTESTINGTOKENHERE2122" token_info = { - 'timestamp': '12/12/12 12:12:00', - 'client_id': 'test123123123', - 'grant_type': 'client' - + "timestamp": "12/12/12 12:12:00", + "client_id": "test123123123", + "grant_type": "client", } r_client.hmset(self.client_token, token_info) r_client.expire(self.client_token, 5) # Create username test authentication token - self.user_token = 'SOMEAUTHTESTINGTOKENHEREUSERNAME' + self.user_token = "SOMEAUTHTESTINGTOKENHEREUSERNAME" token_info = { - 'timestamp': '12/12/12 12:12:00', - 'client_id': 'testuser', - 'grant_type': 'password', - 'user': 'test@foo.bar' + "timestamp": "12/12/12 12:12:00", + "client_id": "testuser", + "grant_type": "password", + "user": "test@foo.bar", } r_client.hmset(self.user_token, token_info) r_client.expire(self.user_token, 5) # Create test access limit token - self.user_rate_key = 'testuser_test@foo.bar_daily_limit' + self.user_rate_key = "testuser_test@foo.bar_daily_limit" r_client.setex(self.user_rate_key, 5, 2) super(OAuth2BaseHandlerTests, self).setUp() def test_authenticate_header_client(self): - obs = self.get('/qiita_db/artifacts/1/', headers={ - 'Authorization': 'Bearer ' + self.client_token}) + obs = self.get( + "/qiita_db/artifacts/1/", + headers={"Authorization": "Bearer " + self.client_token}, + ) self.assertEqual(obs.code, 200) def test_authenticate_header_username(self): - obs = self.get('/qiita_db/artifacts/1/', headers={ - 'Authorization': 'Bearer ' + self.user_token}) + obs = self.get( + "/qiita_db/artifacts/1/", + headers={"Authorization": "Bearer " + self.user_token}, + ) self.assertEqual(obs.code, 200) # Check rate limiting works self.assertEqual(int(r_client.get(self.user_rate_key)), 1) - r_client.setex('testuser_test@foo.bar_daily_limit', 1, 0) - obs = self.get('/qiita_db/artifacts/100/', headers={ - 'Authorization': 'Bearer ' + self.user_token}) - exp = {'error': 'invalid_grant', - 'error_description': 'Oauth2 error: daily request limit reached' - } + r_client.setex("testuser_test@foo.bar_daily_limit", 1, 0) + obs = self.get( + "/qiita_db/artifacts/100/", + headers={"Authorization": "Bearer " + self.user_token}, + ) + exp = { + "error": "invalid_grant", + "error_description": "Oauth2 error: daily request limit reached", + } self.assertEqual(loads(obs.body), exp) def test_authenticate_header_missing(self): - obs = self.get('/qiita_db/artifacts/100/') + obs = self.get("/qiita_db/artifacts/100/") self.assertEqual(obs.code, 400) - self.assertEqual(loads(obs.body), { - 'error': 'invalid_request', - 'error_description': 'Oauth2 error: invalid access token'}) + self.assertEqual( + loads(obs.body), + { + "error": "invalid_request", + "error_description": "Oauth2 error: invalid access token", + }, + ) def test_authenticate_header_bad_token(self): - obs = self.get('/qiita_db/artifacts/100/', headers={ - 'Authorization': 'Bearer BADTOKEN'}) + obs = self.get( + "/qiita_db/artifacts/100/", headers={"Authorization": "Bearer BADTOKEN"} + ) self.assertEqual(obs.code, 400) - exp = {'error': 'invalid_grant', - 'error_description': 'Oauth2 error: token has timed out'} + exp = { + "error": "invalid_grant", + "error_description": "Oauth2 error: token has timed out", + } self.assertEqual(loads(obs.body), exp) def test_authenticate_header_bad_header_type(self): - obs = self.get('/qiita_db/artifacts/100/', headers={ - 'Authorization': 'WRONG ' + self.client_token}) + obs = self.get( + "/qiita_db/artifacts/100/", + headers={"Authorization": "WRONG " + self.client_token}, + ) self.assertEqual(obs.code, 400) - exp = {'error': 'invalid_grant', - 'error_description': 'Oauth2 error: invalid access token'} + exp = { + "error": "invalid_grant", + "error_description": "Oauth2 error: invalid access token", + } self.assertEqual(loads(obs.body), exp) @@ -90,212 +106,271 @@ class OAuth2HandlerTests(TestHandlerBase): def test_authenticate_client_header(self): # Authenticate using header obs = self.post( - '/qiita_db/authenticate/', {'grant_type': 'client'}, { - 'Authorization': 'Basic MTluZGtPM29NS3NvQ2hqVlZXbHVGN1FreEhSZl' - 'loVEtTRmJBVnQ4SWhLN2daZ0RhTzQ6SjdGZlE3Q1FkT3' - 'h1S2hRQWYxZW9HZ0JBRTgxTnM4R3UzRUthV0ZtM0lPMk' - 'pLaEFtbUNXWnVhYmUwTzVNcDI4czE='}) + "/qiita_db/authenticate/", + {"grant_type": "client"}, + { + "Authorization": "Basic MTluZGtPM29NS3NvQ2hqVlZXbHVGN1FreEhSZl" + "loVEtTRmJBVnQ4SWhLN2daZ0RhTzQ6SjdGZlE3Q1FkT3" + "h1S2hRQWYxZW9HZ0JBRTgxTnM4R3UzRUthV0ZtM0lPMk" + "pLaEFtbUNXWnVhYmUwTzVNcDI4czE=" + }, + ) self.assertEqual(obs.code, 200) obs_body = loads(obs.body) - exp = {'access_token': obs_body['access_token'], - 'token_type': 'Bearer', - 'expires_in': 3600} + exp = { + "access_token": obs_body["access_token"], + "token_type": "Bearer", + "expires_in": 3600, + } self.assertDictEqual(obs_body, exp) # Make sure token in system with proper ttl - token = r_client.hgetall(obs_body['access_token']) + token = r_client.hgetall(obs_body["access_token"]) exp = { - b'timestamp': token[b'timestamp'], - b'client_id': (b'19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAV' - b't8IhK7gZgDaO4'), - b'grant_type': b'client' + b"timestamp": token[b"timestamp"], + b"client_id": (b"19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4"), + b"grant_type": b"client", } self.assertDictEqual(token, exp) - self.assertEqual(r_client.ttl(obs_body['access_token']), 3600) + self.assertEqual(r_client.ttl(obs_body["access_token"]), 3600) def test_authenticate_client_post(self): # Authenticate using post only obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'client', - 'client_id': '19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDa' - 'O4', - 'client_secret': 'J7FfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2J' - 'KhAmmCWZuabe0O5Mp28s1'}) + "/qiita_db/authenticate/", + { + "grant_type": "client", + "client_id": "19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4", + "client_secret": "J7FfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2J" + "KhAmmCWZuabe0O5Mp28s1", + }, + ) self.assertEqual(obs.code, 200) obs_body = loads(obs.body) - exp = {'access_token': obs_body['access_token'], - 'token_type': 'Bearer', - 'expires_in': 3600} + exp = { + "access_token": obs_body["access_token"], + "token_type": "Bearer", + "expires_in": 3600, + } self.assertDictEqual(obs_body, exp) # Make sure token in system with proper ttl - token = r_client.hgetall(obs_body['access_token']) + token = r_client.hgetall(obs_body["access_token"]) exp = { - b'timestamp': token[b'timestamp'], - b'client_id': (b'19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8' - b'IhK7gZgDaO4'), - b'grant_type': b'client' + b"timestamp": token[b"timestamp"], + b"client_id": (b"19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4"), + b"grant_type": b"client", } self.assertDictEqual(token, exp) - self.assertEqual(r_client.ttl(obs_body['access_token']), 3600) + self.assertEqual(r_client.ttl(obs_body["access_token"]), 3600) def test_authenticate_client_bad_base64_hash(self): # Authenticate using bad header obs = self.post( - '/qiita_db/authenticate/', {'grant_type': 'client'}, { - 'Authorization': 'Basic MTluZGtPM29NS3NvQ2hqVlZXbHVGN1FreEhSZl' - 'loVEtTRmJBVnQ4SBADN2daZ0RhTzQ6SjdGZlE3Q1FkT3' - 'h1S2hRQWYxZW9HZ0JBRTgxTnM4R3UzRUthV0ZtM0lPMk' - 'pLaEFtbUNXWnVhYmUwTzVNcDI4czE='}) + "/qiita_db/authenticate/", + {"grant_type": "client"}, + { + "Authorization": "Basic MTluZGtPM29NS3NvQ2hqVlZXbHVGN1FreEhSZl" + "loVEtTRmJBVnQ4SBADN2daZ0RhTzQ6SjdGZlE3Q1FkT3" + "h1S2hRQWYxZW9HZ0JBRTgxTnM4R3UzRUthV0ZtM0lPMk" + "pLaEFtbUNXWnVhYmUwTzVNcDI4czE=" + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid client information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid client information", + } self.assertEqual(obs_body, exp) def test_authenticate_client_bad_header_base64_hash(self): obs = self.post( - '/qiita_db/authenticate/', {'grant_type': 'client'}, { - 'Authorization': 'WRONG MTluZGtPM29NS3NvQ2hqVlZXbHVGN1FreEhSZl' - 'loVEtTRmJBVnQ4SWhLN2daZ0RhTzQ6SjdGZlE3Q1FkT3' - 'h1S2hRQWYxZW9HZ0JBRTgxTnM4R3UzRUthV0ZtM0lPMk' - 'pLaEFtbUNXWnVhYmUwTzVNcDI4czE='}) + "/qiita_db/authenticate/", + {"grant_type": "client"}, + { + "Authorization": "WRONG MTluZGtPM29NS3NvQ2hqVlZXbHVGN1FreEhSZl" + "loVEtTRmJBVnQ4SWhLN2daZ0RhTzQ6SjdGZlE3Q1FkT3" + "h1S2hRQWYxZW9HZ0JBRTgxTnM4R3UzRUthV0ZtM0lPMk" + "pLaEFtbUNXWnVhYmUwTzVNcDI4czE=" + }, + ) obs_body = loads(obs.body) - exp = {'error': 'invalid_request', - 'error_description': 'Oauth2 error: invalid token type'} + exp = { + "error": "invalid_request", + "error_description": "Oauth2 error: invalid token type", + } self.assertEqual(obs_body, exp) def test_authenticate_client_bad_client_id(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'client', - 'client_id': 'BADdkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDa' - 'O4', - 'client_secret': 'J7FfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2J' - 'KhAmmCWZuabe0O5Mp28s1'}) + "/qiita_db/authenticate/", + { + "grant_type": "client", + "client_id": "BADdkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4", + "client_secret": "J7FfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2J" + "KhAmmCWZuabe0O5Mp28s1", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid client information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid client information", + } self.assertEqual(obs_body, exp) def test_authenticate_client_bad_client_secret(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'client', - 'client_id': '19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDa' - 'O4', - 'client_secret': 'BADfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2J' - 'KhAmmCWZuabe0O5Mp28s1'}) + "/qiita_db/authenticate/", + { + "grant_type": "client", + "client_id": "19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4", + "client_secret": "BADfQ7CQdOxuKhQAf1eoGgBAE81Ns8Gu3EKaWFm3IO2J" + "KhAmmCWZuabe0O5Mp28s1", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid client information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid client information", + } self.assertEqual(obs_body, exp) def test_authenticate_client_missing_info(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'client', - 'client_id': '19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDa' - 'O4'}) + "/qiita_db/authenticate/", + { + "grant_type": "client", + "client_id": "19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_request', - 'error_description': 'Oauth2 error: missing client information'} + exp = { + "error": "invalid_request", + "error_description": "Oauth2 error: missing client information", + } self.assertEqual(obs_body, exp) def test_authenticate_password(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'password', - 'client_id': 'DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4U' - 'qE', - 'username': 'test@foo.bar', - 'password': 'password'}) + "/qiita_db/authenticate/", + { + "grant_type": "password", + "client_id": "DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4UqE", + "username": "test@foo.bar", + "password": "password", + }, + ) self.assertEqual(obs.code, 200) obs_body = loads(obs.body) - exp = {'access_token': obs_body['access_token'], - 'token_type': 'Bearer', - 'expires_in': 3600} + exp = { + "access_token": obs_body["access_token"], + "token_type": "Bearer", + "expires_in": 3600, + } self.assertDictEqual(obs_body, exp) # Make sure token in system with proper ttl - token = r_client.hgetall(obs_body['access_token']) - exp = {b'timestamp': token[b'timestamp'], - b'user': b'test@foo.bar', - b'client_id': token[b'client_id'], - b'grant_type': b'password'} + token = r_client.hgetall(obs_body["access_token"]) + exp = { + b"timestamp": token[b"timestamp"], + b"user": b"test@foo.bar", + b"client_id": token[b"client_id"], + b"grant_type": b"password", + } self.assertDictEqual(token, exp) - self.assertEqual(r_client.ttl(obs_body['access_token']), 3600) + self.assertEqual(r_client.ttl(obs_body["access_token"]), 3600) def test_authenticate_password_non_user_client_id_header(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'password', - 'client_id': '19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDa' - 'O4', - 'username': 'test@foo.bar', - 'password': 'password'}) + "/qiita_db/authenticate/", + { + "grant_type": "password", + "client_id": "19ndkO3oMKsoChjVVWluF7QkxHRfYhTKSFbAVt8IhK7gZgDaO4", + "username": "test@foo.bar", + "password": "password", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid client information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid client information", + } self.assertEqual(obs_body, exp) def test_authenticate_password_non_user_client_id(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'password', - 'client_id': 'WAAAAAAAAAARG', - 'username': 'test@foo.bar', - 'password': 'password'}) + "/qiita_db/authenticate/", + { + "grant_type": "password", + "client_id": "WAAAAAAAAAARG", + "username": "test@foo.bar", + "password": "password", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid client information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid client information", + } self.assertEqual(obs_body, exp) def test_authenticate_password_bad_user_id(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'password', - 'client_id': 'DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4U' - 'qE', - 'username': 'BROKEN@FAKE.COM', - 'password': 'password'}) + "/qiita_db/authenticate/", + { + "grant_type": "password", + "client_id": "DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4UqE", + "username": "BROKEN@FAKE.COM", + "password": "password", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid user information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid user information", + } self.assertEqual(obs_body, exp) def test_authenticate_password_bad_password(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'password', - 'client_id': 'DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4U' - 'qE', - 'username': 'test@foo.bar', - 'password': 'NOTAReALPASSworD'}) + "/qiita_db/authenticate/", + { + "grant_type": "password", + "client_id": "DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4UqE", + "username": "test@foo.bar", + "password": "NOTAReALPASSworD", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_client', - 'error_description': 'Oauth2 error: invalid user information'} + exp = { + "error": "invalid_client", + "error_description": "Oauth2 error: invalid user information", + } self.assertEqual(obs_body, exp) def test_authenticate_password_missing_info(self): obs = self.post( - '/qiita_db/authenticate/', { - 'grant_type': 'password', - 'client_id': 'DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4U' - 'qE', - 'username': 'test@foo.bar'}) + "/qiita_db/authenticate/", + { + "grant_type": "password", + "client_id": "DWelYzEYJYcZ4wlqUp0bHGXojrvZVz0CNBJvOqUKcrPQ5p4UqE", + "username": "test@foo.bar", + }, + ) self.assertEqual(obs.code, 400) obs_body = loads(obs.body) - exp = {'error': 'invalid_request', - 'error_description': 'Oauth2 error: missing user information'} + exp = { + "error": "invalid_request", + "error_description": "Oauth2 error: missing user information", + } self.assertEqual(obs_body, exp) diff --git a/qiita_db/handlers/tests/test_plugin.py b/qiita_db/handlers/tests/test_plugin.py index f5291c277..d1a397ad9 100644 --- a/qiita_db/handlers/tests/test_plugin.py +++ b/qiita_db/handlers/tests/test_plugin.py @@ -6,14 +6,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase -from json import loads, dumps +from json import dumps, loads +from unittest import TestCase, main from tornado.web import HTTPError -from qiita_db.handlers.tests.oauthbase import OauthTestingBase -from qiita_db.handlers.plugin import _get_plugin, _get_command import qiita_db as qdb +from qiita_db.handlers.plugin import _get_command, _get_plugin +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(TestCase): @@ -27,204 +27,234 @@ def test_get_plugin(self): _get_plugin("QiIME", "1.9.1") def test_get_command(self): - obs = _get_command('QIIMEq2', '1.9.1', 'Split libraries FASTQ') + obs = _get_command("QIIMEq2", "1.9.1", "Split libraries FASTQ") exp = qdb.software.Command(1) self.assertEqual(obs, exp) # It does not exist with self.assertRaises(HTTPError): - _get_command('QIIME', '1.9.1', 'UNKNOWN') + _get_command("QIIME", "1.9.1", "UNKNOWN") class PluginHandlerTests(OauthTestingBase): def test_get_plugin_does_not_exist(self): - obs = self.get('/qiita_db/plugins/QIIMEq2/1.9.0/', headers=self.header) + obs = self.get("/qiita_db/plugins/QIIMEq2/1.9.0/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/plugins/QIIMEq2/1.9.0/') + obs = self.get("/qiita_db/plugins/QIIMEq2/1.9.0/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/plugins/QIIMEq2/1.9.1/', headers=self.header) + obs = self.get("/qiita_db/plugins/QIIMEq2/1.9.1/", headers=self.header) self.assertEqual(obs.code, 200) exp = { - 'name': 'QIIMEq2', - 'version': '1.9.1', - 'description': 'Quantitative Insights Into Microbial Ecology ' - '(QIIME) is an open-source bioinformatics pipeline ' - 'for performing microbiome analysis from raw DNA ' - 'sequencing data', - 'commands': ['Split libraries FASTQ', 'Split libraries', - 'Pick closed-reference OTUs', 'Summarize Taxa', - 'Beta Diversity', 'Alpha Rarefaction', - 'Single Rarefaction'], - 'publications': [{'DOI': '10.1038/nmeth.f.303', - 'PubMed': '20383131'}], - 'type': 'artifact transformation', - 'active': False} + "name": "QIIMEq2", + "version": "1.9.1", + "description": "Quantitative Insights Into Microbial Ecology " + "(QIIME) is an open-source bioinformatics pipeline " + "for performing microbiome analysis from raw DNA " + "sequencing data", + "commands": [ + "Split libraries FASTQ", + "Split libraries", + "Pick closed-reference OTUs", + "Summarize Taxa", + "Beta Diversity", + "Alpha Rarefaction", + "Single Rarefaction", + ], + "publications": [{"DOI": "10.1038/nmeth.f.303", "PubMed": "20383131"}], + "type": "artifact transformation", + "active": False, + } self.assertEqual(loads(obs.body), exp) class CommandListHandlerTests(OauthTestingBase): def test_post(self): data = { - 'name': 'New Command', - 'description': 'Command added for testing', - 'required_parameters': dumps( - {'in_data': ['artifact:["FASTA"]', None]}), - 'optional_parameters': dumps( - {'param1': ['string', ''], - 'param2': ['float', '1.5'], - 'param3': ['boolean', 'True'], - 'param4': ['mchoice:["opt1", "opt2", "opt3"]', - dumps(['opt1', 'opt2'])]}), - 'outputs': dumps({'out1': 'BIOM'}), - 'default_parameter_sets': dumps( - {'dflt1': {'param1': 'test', - 'param2': '2.4', - 'param3': 'False'}}) - } - obs = self.post('/qiita_db/plugins/QIIMEq2/1.9.1/commands/', data=data, - headers=self.header) + "name": "New Command", + "description": "Command added for testing", + "required_parameters": dumps({"in_data": ['artifact:["FASTA"]', None]}), + "optional_parameters": dumps( + { + "param1": ["string", ""], + "param2": ["float", "1.5"], + "param3": ["boolean", "True"], + "param4": [ + 'mchoice:["opt1", "opt2", "opt3"]', + dumps(["opt1", "opt2"]), + ], + } + ), + "outputs": dumps({"out1": "BIOM"}), + "default_parameter_sets": dumps( + {"dflt1": {"param1": "test", "param2": "2.4", "param3": "False"}} + ), + } + obs = self.post( + "/qiita_db/plugins/QIIMEq2/1.9.1/commands/", data=data, headers=self.header + ) self.assertEqual(obs.code, 200) - obs = _get_command('QIIMEq2', '1.9.1', 'New Command') - self.assertEqual(obs.name, 'New Command') + obs = _get_command("QIIMEq2", "1.9.1", "New Command") + self.assertEqual(obs.name, "New Command") self.assertFalse(obs.analysis_only) # Create a new command that is analysis only data = { - 'name': 'New analysis command', - 'description': 'Analysis command added for testing', - 'required_parameters': dumps( - {'in_data': ['artifact:["BIOM"]', None]}), - 'optional_parameters': dumps( - {'param1': ['string', 'default'], - 'param4': ['mchoice:["opt1", "opt2", "opt3"]', - dumps(['opt1', 'opt2']), None, True]}), - 'outputs': dumps({'outtable': 'BIOM'}), - 'default_parameter_sets': dumps({'dflt1': {'param1': 'test'}}), - 'analysis_only': True + "name": "New analysis command", + "description": "Analysis command added for testing", + "required_parameters": dumps({"in_data": ['artifact:["BIOM"]', None]}), + "optional_parameters": dumps( + { + "param1": ["string", "default"], + "param4": [ + 'mchoice:["opt1", "opt2", "opt3"]', + dumps(["opt1", "opt2"]), + None, + True, + ], + } + ), + "outputs": dumps({"outtable": "BIOM"}), + "default_parameter_sets": dumps({"dflt1": {"param1": "test"}}), + "analysis_only": True, } - obs = self.post('/qiita_db/plugins/QIIMEq2/1.9.1/commands/', data=data, - headers=self.header) + obs = self.post( + "/qiita_db/plugins/QIIMEq2/1.9.1/commands/", data=data, headers=self.header + ) self.assertEqual(obs.code, 200) - obs = _get_command('QIIMEq2', '1.9.1', 'New analysis command') - self.assertEqual(obs.name, 'New analysis command') + obs = _get_command("QIIMEq2", "1.9.1", "New analysis command") + self.assertEqual(obs.name, "New analysis command") self.assertTrue(obs.analysis_only) - self.assertEqual(obs.merging_scheme, - {'parameters': ['param4'], 'outputs': [], - 'ignore_parent_command': False}) + self.assertEqual( + obs.merging_scheme, + {"parameters": ["param4"], "outputs": [], "ignore_parent_command": False}, + ) class CommandHandlerTests(OauthTestingBase): def test_get_command_does_not_exist(self): - obs = self.get('/qiita_db/plugins/QIIME/1.9.1/commands/UNKNOWN/', - headers=self.header) + obs = self.get( + "/qiita_db/plugins/QIIME/1.9.1/commands/UNKNOWN/", headers=self.header + ) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get( - '/qiita_db/plugins/QIIMEq2/1.9.1/commands/Split%20libraries/') + obs = self.get("/qiita_db/plugins/QIIMEq2/1.9.1/commands/Split%20libraries/") self.assertEqual(obs.code, 400) def test_get(self): obs = self.get( - '/qiita_db/plugins/QIIMEq2/1.9.1/commands/Split%20libraries/', - headers=self.header) + "/qiita_db/plugins/QIIMEq2/1.9.1/commands/Split%20libraries/", + headers=self.header, + ) self.assertEqual(obs.code, 200) - exp = {'name': 'Split libraries', - 'description': 'Demultiplexes and applies quality control to ' - 'FASTA data', - 'required_parameters': { - 'input_data': ['artifact', ['FASTA', 'FASTA_Sanger', - 'SFF']]}, - 'optional_parameters': { - 'barcode_type': ['string', 'golay_12'], - 'disable_bc_correction': ['bool', 'False'], - 'disable_primers': ['bool', 'False'], - 'max_ambig': ['integer', '6'], - 'max_barcode_errors': ['float', '1.5'], - 'max_homopolymer': ['integer', '6'], - 'max_primer_mismatch': ['integer', '0'], - 'max_seq_len': ['integer', '1000'], - 'min_qual_score': ['integer', '25'], - 'min_seq_len': ['integer', '200'], - 'qual_score_window': ['integer', '0'], - 'reverse_primer_mismatches': ['integer', '0'], - 'reverse_primers': ['choice:["disable", "truncate_only", ' - '"truncate_remove"]', 'disable'], - 'trim_seq_length': ['bool', 'False'], - 'truncate_ambi_bases': ['bool', 'False']}, - 'default_parameter_sets': { - 'Defaults with Golay 12 barcodes': { - 'reverse_primers': 'disable', - 'reverse_primer_mismatches': 0, - 'disable_bc_correction': False, - 'max_barcode_errors': 1.5, - 'disable_primers': False, - 'min_seq_len': 200, - 'truncate_ambi_bases': False, - 'max_ambig': 6, - 'min_qual_score': 25, - 'trim_seq_length': False, - 'max_seq_len': 1000, - 'max_primer_mismatch': 0, - 'max_homopolymer': 6, - 'qual_score_window': 0, - 'barcode_type': 'golay_12'}, - 'Defaults with Hamming 8 barcodes': { - 'reverse_primers': 'disable', - 'reverse_primer_mismatches': 0, - 'disable_bc_correction': False, - 'max_barcode_errors': 1.5, - 'disable_primers': False, - 'min_seq_len': 200, - 'truncate_ambi_bases': False, - 'max_ambig': 6, - 'min_qual_score': 25, - 'trim_seq_length': False, - 'max_seq_len': 1000, - 'max_primer_mismatch': 0, - 'max_homopolymer': 6, - 'qual_score_window': 0, - 'barcode_type': 'hamming_8'}}, - 'outputs': [['demultiplexed', 'Demultiplexed']]} + exp = { + "name": "Split libraries", + "description": "Demultiplexes and applies quality control to FASTA data", + "required_parameters": { + "input_data": ["artifact", ["FASTA", "FASTA_Sanger", "SFF"]] + }, + "optional_parameters": { + "barcode_type": ["string", "golay_12"], + "disable_bc_correction": ["bool", "False"], + "disable_primers": ["bool", "False"], + "max_ambig": ["integer", "6"], + "max_barcode_errors": ["float", "1.5"], + "max_homopolymer": ["integer", "6"], + "max_primer_mismatch": ["integer", "0"], + "max_seq_len": ["integer", "1000"], + "min_qual_score": ["integer", "25"], + "min_seq_len": ["integer", "200"], + "qual_score_window": ["integer", "0"], + "reverse_primer_mismatches": ["integer", "0"], + "reverse_primers": [ + 'choice:["disable", "truncate_only", "truncate_remove"]', + "disable", + ], + "trim_seq_length": ["bool", "False"], + "truncate_ambi_bases": ["bool", "False"], + }, + "default_parameter_sets": { + "Defaults with Golay 12 barcodes": { + "reverse_primers": "disable", + "reverse_primer_mismatches": 0, + "disable_bc_correction": False, + "max_barcode_errors": 1.5, + "disable_primers": False, + "min_seq_len": 200, + "truncate_ambi_bases": False, + "max_ambig": 6, + "min_qual_score": 25, + "trim_seq_length": False, + "max_seq_len": 1000, + "max_primer_mismatch": 0, + "max_homopolymer": 6, + "qual_score_window": 0, + "barcode_type": "golay_12", + }, + "Defaults with Hamming 8 barcodes": { + "reverse_primers": "disable", + "reverse_primer_mismatches": 0, + "disable_bc_correction": False, + "max_barcode_errors": 1.5, + "disable_primers": False, + "min_seq_len": 200, + "truncate_ambi_bases": False, + "max_ambig": 6, + "min_qual_score": 25, + "trim_seq_length": False, + "max_seq_len": 1000, + "max_primer_mismatch": 0, + "max_homopolymer": 6, + "qual_score_window": 0, + "barcode_type": "hamming_8", + }, + }, + "outputs": [["demultiplexed", "Demultiplexed"]], + } self.assertEqual(loads(obs.body), exp) class CommandActivateHandlerTests(OauthTestingBase): def test_post_command_does_not_exist(self): - obs = self.post('/qiita_db/plugins/QIIMEq2/1.9.1/commands/' - 'UNKNOWN/activate/', - headers=self.header, data={}) + obs = self.post( + "/qiita_db/plugins/QIIMEq2/1.9.1/commands/UNKNOWN/activate/", + headers=self.header, + data={}, + ) self.assertEqual(obs.code, 404) def test_post_no_header(self): - obs = self.post('/qiita_db/plugins/QIIMEq2/1.9.1/commands/' - 'Split%20libraries/activate/', data={}) + obs = self.post( + "/qiita_db/plugins/QIIMEq2/1.9.1/commands/Split%20libraries/activate/", + data={}, + ) self.assertEqual(obs.code, 400) def test_post(self): qdb.software.Software.deactivate_all() self.assertFalse(qdb.software.Command(2).active) - obs = self.post('/qiita_db/plugins/QIIMEq2/1.9.1/commands/' - 'Split%20libraries/activate/', headers=self.header, - data={}) + obs = self.post( + "/qiita_db/plugins/QIIMEq2/1.9.1/commands/Split%20libraries/activate/", + headers=self.header, + data={}, + ) self.assertEqual(obs.code, 200) self.assertTrue(qdb.software.Command(2).active) class ReloadPluginAPItestHandlerTests(OauthTestingBase): def test_post_no_header(self): - obs = self.post('/apitest/reload_plugins/', data={}) + obs = self.post("/apitest/reload_plugins/", data={}) self.assertEqual(obs.code, 400) def test_post(self): - obs = self.post('/apitest/reload_plugins/', headers=self.header, - data={}) + obs = self.post("/apitest/reload_plugins/", headers=self.header, data={}) self.assertEqual(obs.code, 200) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_prep_template.py b/qiita_db/handlers/tests/test_prep_template.py index 32a6abd0d..5fb7d58ea 100644 --- a/qiita_db/handlers/tests/test_prep_template.py +++ b/qiita_db/handlers/tests/test_prep_template.py @@ -6,16 +6,16 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase -from json import loads, dumps -from os.path import join from functools import partial +from json import dumps, loads +from os.path import join +from unittest import TestCase, main from tornado.web import HTTPError -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb from qiita_db.handlers.prep_template import _get_prep_template +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(TestCase): @@ -31,187 +31,248 @@ def test_get_prep_template(self): class PrepTemplateHandlerTests(OauthTestingBase): def test_get_does_not_exist(self): - obs = self.get('/qiita_db/prep_template/100/', headers=self.header) + obs = self.get("/qiita_db/prep_template/100/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/prep_template/1/') + obs = self.get("/qiita_db/prep_template/1/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/prep_template/1/', headers=self.header) + obs = self.get("/qiita_db/prep_template/1/", headers=self.header) self.assertEqual(obs.code, 200) - db_test_template_dir = qdb.util.get_mountpoint('templates')[0][1] + db_test_template_dir = qdb.util.get_mountpoint("templates")[0][1] path_builder = partial(join, db_test_template_dir) obs = loads(obs.body) # have to check per key because since patch 51 we are updating the # test info files - self.assertEqual(obs['data_type'], '18S') - self.assertEqual(obs['artifact'], 1) - self.assertEqual(obs['investigation_type'], 'Metagenomics') - self.assertEqual(obs['study'], 1) - self.assertEqual(obs['status'], 'private') - self.assertTrue(obs['sample-file'].startswith( - path_builder('1_'))) - self.assertTrue(obs['prep-file'].startswith( - path_builder('1_prep_1_'))) + self.assertEqual(obs["data_type"], "18S") + self.assertEqual(obs["artifact"], 1) + self.assertEqual(obs["investigation_type"], "Metagenomics") + self.assertEqual(obs["study"], 1) + self.assertEqual(obs["status"], "private") + self.assertTrue(obs["sample-file"].startswith(path_builder("1_"))) + self.assertTrue(obs["prep-file"].startswith(path_builder("1_prep_1_"))) class PrepTemplateDataHandlerTests(OauthTestingBase): def test_get_does_not_exist(self): - obs = self.get('/qiita_db/prep_template/100/data/', - headers=self.header) + obs = self.get("/qiita_db/prep_template/100/data/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/prep_template/100/data/') + obs = self.get("/qiita_db/prep_template/100/data/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/prep_template/1/data/', headers=self.header) + obs = self.get("/qiita_db/prep_template/1/data/", headers=self.header) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['data']) - - obs = obs['data'] - exp = ['1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', - '1.SKB6.640176', '1.SKD6.640190', '1.SKM6.640187', - '1.SKD9.640182', '1.SKM8.640201', '1.SKM2.640199', - '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185', - '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', - '1.SKB1.640202', '1.SKM1.640183', '1.SKD1.640179', - '1.SKD3.640198', '1.SKB5.640181', '1.SKB4.640189', - '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184', - '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191'] + self.assertCountEqual(obs.keys(), ["data"]) + + obs = obs["data"] + exp = [ + "1.SKB2.640194", + "1.SKM4.640180", + "1.SKB3.640195", + "1.SKB6.640176", + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + ] self.assertCountEqual(list(obs.keys()), exp) - obs = obs['1.SKB1.640202'] + obs = obs["1.SKB1.640202"] exp = { - 'barcode': 'GTCCGCAAGTTA', - 'center_name': 'ANL', - 'center_project_name': None, - 'emp_status': 'EMP', - 'experiment_center': 'ANL', - 'experiment_design_description': - 'micro biome of soil and rhizosphere of cannabis plants ' - 'from CA', - 'experiment_title': 'Cannabis Soil Microbiome', - 'illumina_technology': 'MiSeq', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': - 'This analysis was done as in Caporaso et al 2011 Genome ' - 'research. The PCR primers (F515/R806) were developed against ' - 'the V4 region of the 16S rRNA (both bacteria and archaea), ' - 'which we determined would yield optimal community clustering ' - 'with reads of this length using a procedure similar to that ' - 'of ref. 15. [For reference, this primer pair amplifies the ' - 'region 533_786 in the Escherichia coli strain 83972 sequence ' - '(greengenes accession no. prokMSA_id:470367).] The reverse ' - 'PCR primer is barcoded with a 12-base error-correcting Golay ' - 'code to facilitate multiplexing of up to 1,500 samples per ' - 'lane, and both PCR primers contain sequencer adapter ' - 'regions.', - 'pcr_primers': 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', - 'platform': 'Illumina', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'run_center': 'ANL', - 'run_date': '8/1/12', - 'run_prefix': 's_G1_L001_sequences', - 'samp_size': '.25,g', - 'sample_center': 'ANL', - 'sequencing_meth': 'Sequencing by synthesis', - 'study_center': 'CCME', - 'target_gene': '16S rRNA', - 'target_subfragment': 'V4', - 'qiita_prep_id': '1'} + "barcode": "GTCCGCAAGTTA", + "center_name": "ANL", + "center_project_name": None, + "emp_status": "EMP", + "experiment_center": "ANL", + "experiment_design_description": "micro biome of soil and rhizosphere of cannabis plants " + "from CA", + "experiment_title": "Cannabis Soil Microbiome", + "illumina_technology": "MiSeq", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "This analysis was done as in Caporaso et al 2011 Genome " + "research. The PCR primers (F515/R806) were developed against " + "the V4 region of the 16S rRNA (both bacteria and archaea), " + "which we determined would yield optimal community clustering " + "with reads of this length using a procedure similar to that " + "of ref. 15. [For reference, this primer pair amplifies the " + "region 533_786 in the Escherichia coli strain 83972 sequence " + "(greengenes accession no. prokMSA_id:470367).] The reverse " + "PCR primer is barcoded with a 12-base error-correcting Golay " + "code to facilitate multiplexing of up to 1,500 samples per " + "lane, and both PCR primers contain sequencer adapter " + "regions.", + "pcr_primers": "FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT", + "platform": "Illumina", + "primer": "GTGCCAGCMGCCGCGGTAA", + "run_center": "ANL", + "run_date": "8/1/12", + "run_prefix": "s_G1_L001_sequences", + "samp_size": ".25,g", + "sample_center": "ANL", + "sequencing_meth": "Sequencing by synthesis", + "study_center": "CCME", + "target_gene": "16S rRNA", + "target_subfragment": "V4", + "qiita_prep_id": "1", + } self.assertEqual(obs, exp) def test_get_sample_information(self): obs = self.get( - '/qiita_db/prep_template/1/data/?sample_information=true', - headers=self.header) + "/qiita_db/prep_template/1/data/?sample_information=true", + headers=self.header, + ) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['data']) + self.assertCountEqual(obs.keys(), ["data"]) # let's just check that the samples and the keys from the first element # match - this assures us that is the sample_info, the rest is # basically the same as the regular prep_info - obs = obs['data'] - exp = ['1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', - '1.SKB6.640176', '1.SKD6.640190', '1.SKM6.640187', - '1.SKD9.640182', '1.SKM8.640201', '1.SKM2.640199', - '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185', - '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', - '1.SKB1.640202', '1.SKM1.640183', '1.SKD1.640179', - '1.SKD3.640198', '1.SKB5.640181', '1.SKB4.640189', - '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184', - '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191'] + obs = obs["data"] + exp = [ + "1.SKB2.640194", + "1.SKM4.640180", + "1.SKB3.640195", + "1.SKB6.640176", + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + ] self.assertCountEqual(list(obs.keys()), exp) - exp = ['ph', 'temp', 'depth', 'country', 'texture', 'altitude', - 'latitude', 'taxon_id', 'elevation', 'env_biome', 'longitude', - 'tot_nitro', 'host_taxid', 'common_name', 'description', - 'env_feature', 'env_package', 'sample_type', 'tot_org_carb', - 'dna_extracted', 'samp_salinity', 'anonymized_name', - 'host_subject_id', 'scientific_name', 'assigned_from_geo', - 'season_environment', 'water_content_soil', - 'collection_timestamp', 'description_duplicate', - 'physical_specimen_location', 'physical_specimen_remaining', - 'qiita_study_id'] - self.assertCountEqual(obs['1.SKB2.640194'].keys(), exp) + exp = [ + "ph", + "temp", + "depth", + "country", + "texture", + "altitude", + "latitude", + "taxon_id", + "elevation", + "env_biome", + "longitude", + "tot_nitro", + "host_taxid", + "common_name", + "description", + "env_feature", + "env_package", + "sample_type", + "tot_org_carb", + "dna_extracted", + "samp_salinity", + "anonymized_name", + "host_subject_id", + "scientific_name", + "assigned_from_geo", + "season_environment", + "water_content_soil", + "collection_timestamp", + "description_duplicate", + "physical_specimen_location", + "physical_specimen_remaining", + "qiita_study_id", + ] + self.assertCountEqual(obs["1.SKB2.640194"].keys(), exp) class PrepTemplateAPItestHandlerTests(OauthTestingBase): def test_post(self): metadata_dict = { - 'SKB8.640193': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq'}, - 'SKD8.640184': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq'}} - data = {'prep_info': dumps(metadata_dict), - 'study': 1, - 'data_type': '16S'} - obs = self.post('/apitest/prep_template/', headers=self.header, - data=data) + "SKB8.640193": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + }, + "SKD8.640184": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + }, + } + data = {"prep_info": dumps(metadata_dict), "study": 1, "data_type": "16S"} + obs = self.post("/apitest/prep_template/", headers=self.header, data=data) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['prep']) + self.assertCountEqual(obs.keys(), ["prep"]) - pt = qdb.metadata_template.prep_template.PrepTemplate(obs['prep']) + pt = qdb.metadata_template.prep_template.PrepTemplate(obs["prep"]) # default name, and creation_job_id - self.assertTrue(pt.name.startswith('Prep information')) + self.assertTrue(pt.name.startswith("Prep information")) self.assertIsNone(pt.creation_job_id) - self.assertCountEqual(pt.keys(), ['1.SKB8.640193', '1.SKD8.640184']) + self.assertCountEqual(pt.keys(), ["1.SKB8.640193", "1.SKD8.640184"]) # testing that a new prep doesn't break the call due to empty artifact - obs = self.get('/qiita_db/prep_template/%d/' % pt.id, - headers=self.header) + obs = self.get("/qiita_db/prep_template/%d/" % pt.id, headers=self.header) self.assertEqual(obs.code, 200) # testing that using a non default value actually works - data['name'] = 'my long and informative name' - data['job-id'] = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' - obs = self.post('/apitest/prep_template/', headers=self.header, - data=data) + data["name"] = "my long and informative name" + data["job-id"] = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + obs = self.post("/apitest/prep_template/", headers=self.header, data=data) obs = loads(obs.body) - pt = qdb.metadata_template.prep_template.PrepTemplate(obs['prep']) - self.assertEqual(pt.name, data['name']) - self.assertEqual(pt.creation_job_id, data['job-id']) + pt = qdb.metadata_template.prep_template.PrepTemplate(obs["prep"]) + self.assertEqual(pt.name, data["name"]) + self.assertEqual(pt.creation_job_id, data["job-id"]) # testing setter - jid = 'aaaaaaaa-aaaa-bbbb-aaaa-aaaaaaaaaaaa' + jid = "aaaaaaaa-aaaa-bbbb-aaaa-aaaaaaaaaaaa" pt.creation_job_id = jid self.assertEqual(pt.creation_job_id, jid) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_processing_job.py b/qiita_db/handlers/tests/test_processing_job.py index b747b1f3e..bbd559052 100644 --- a/qiita_db/handlers/tests/test_processing_job.py +++ b/qiita_db/handlers/tests/test_processing_job.py @@ -6,132 +6,145 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase -from tempfile import mkstemp -from json import loads, dumps from datetime import datetime +from json import dumps, loads from os import close, remove from os.path import exists +from tempfile import mkstemp +from unittest import TestCase, main -from tornado.web import HTTPError import numpy.testing as npt import pandas as pd +from tornado.web import HTTPError -from qiita_core.testing import wait_for_processing_job -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb +from qiita_core.testing import wait_for_processing_job from qiita_db.handlers.processing_job import _get_job +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(TestCase): def test_get_job(self): - obs = _get_job('6d368e16-2242-4cf8-87b4-a5dc40bb890b') - exp = qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b') + obs = _get_job("6d368e16-2242-4cf8-87b4-a5dc40bb890b") + exp = qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b") self.assertEqual(obs, exp) with self.assertRaises(HTTPError): - _get_job('do-not-exist') + _get_job("do-not-exist") class JobHandlerTests(OauthTestingBase): def test_get_job_does_not_exists(self): - obs = self.get('/qiita_db/jobs/do-not-exist', headers=self.header) + obs = self.get("/qiita_db/jobs/do-not-exist", headers=self.header) self.assertEqual(obs.code, 404) def test_get(self): - obs = self.get('/qiita_db/jobs/6d368e16-2242-4cf8-87b4-a5dc40bb890b', - headers=self.header) + obs = self.get( + "/qiita_db/jobs/6d368e16-2242-4cf8-87b4-a5dc40bb890b", headers=self.header + ) self.assertEqual(obs.code, 200) - cmd = 'Split libraries FASTQ' - params = {"max_bad_run_length": 3, - "min_per_read_length_fraction": 0.75, "sequence_max_n": 0, - "rev_comp_barcode": False, - "rev_comp_mapping_barcodes": False, "rev_comp": False, - "phred_quality_threshold": 3, "barcode_type": "golay_12", - "max_barcode_errors": 1.5, "input_data": 1, - 'phred_offset': 'auto'} - exp = {'command': cmd, 'parameters': params, 'status': 'success', - 'msg': ''} + cmd = "Split libraries FASTQ" + params = { + "max_bad_run_length": 3, + "min_per_read_length_fraction": 0.75, + "sequence_max_n": 0, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "rev_comp": False, + "phred_quality_threshold": 3, + "barcode_type": "golay_12", + "max_barcode_errors": 1.5, + "input_data": 1, + "phred_offset": "auto", + } + exp = {"command": cmd, "parameters": params, "status": "success", "msg": ""} self.assertEqual(loads(obs.body), exp) def test_get_no_header(self): - obs = self.get('/qiita_db/jobs/6d368e16-2242-4cf8-87b4-a5dc40bb890b') + obs = self.get("/qiita_db/jobs/6d368e16-2242-4cf8-87b4-a5dc40bb890b") self.assertEqual(obs.code, 400) class HeartbeatHandlerTests(OauthTestingBase): def test_post_job_does_not_exists(self): - obs = self.post('/qiita_db/jobs/do-not-exist/heartbeat/', '', - headers=self.header) + obs = self.post( + "/qiita_db/jobs/do-not-exist/heartbeat/", "", headers=self.header + ) self.assertEqual(obs.code, 404) def test_post_job_already_finished(self): obs = self.post( - '/qiita_db/jobs/6d368e16-2242-4cf8-87b4-a5dc40bb890b/heartbeat/', - '', headers=self.header) + "/qiita_db/jobs/6d368e16-2242-4cf8-87b4-a5dc40bb890b/heartbeat/", + "", + headers=self.header, + ) self.assertEqual(obs.code, 403) - self.assertEqual(obs.reason, - "Can't execute heartbeat on job: already completed") + self.assertEqual( + obs.reason, "Can't execute heartbeat on job: already completed" + ) def test_post(self): before = datetime.now() obs = self.post( - '/qiita_db/jobs/bcc7ebcd-39c1-43e4-af2d-822e3589f14d/heartbeat/', - '', headers=self.header) + "/qiita_db/jobs/bcc7ebcd-39c1-43e4-af2d-822e3589f14d/heartbeat/", + "", + headers=self.header, + ) self.assertEqual(obs.code, 200) - job = qdb.processing_job.ProcessingJob( - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d') + job = qdb.processing_job.ProcessingJob("bcc7ebcd-39c1-43e4-af2d-822e3589f14d") self.assertTrue(before < job.heartbeat < datetime.now()) def test_post_no_header(self): obs = self.post( - '/qiita_db/jobs/bcc7ebcd-39c1-43e4-af2d-822e3589f14d/heartbeat/', - '') + "/qiita_db/jobs/bcc7ebcd-39c1-43e4-af2d-822e3589f14d/heartbeat/", "" + ) self.assertEqual(obs.code, 400) def test_post_first_heartbeat(self): before = datetime.now() - job = qdb.processing_job.ProcessingJob( - '063e553b-327c-4818-ab4a-adfe58e49860') - self.assertEqual(job.status, 'queued') + job = qdb.processing_job.ProcessingJob("063e553b-327c-4818-ab4a-adfe58e49860") + self.assertEqual(job.status, "queued") obs = self.post( - '/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/heartbeat/', - '', headers=self.header) + "/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/heartbeat/", + "", + headers=self.header, + ) self.assertEqual(obs.code, 200) self.assertTrue(before < job.heartbeat < datetime.now()) - self.assertEqual(job.status, 'running') + self.assertEqual(job.status, "running") class ActiveStepHandlerTests(OauthTestingBase): def test_post_no_header(self): - obs = self.post( - '/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/step/', '') + obs = self.post("/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/step/", "") self.assertEqual(obs.code, 400) def test_post_job_does_not_exists(self): - obs = self.post('/qiita_db/jobs/do-not-exist/step/', '', - headers=self.header) + obs = self.post("/qiita_db/jobs/do-not-exist/step/", "", headers=self.header) self.assertEqual(obs.code, 404) def test_post_non_running_job(self): - payload = dumps({'step': 'Step 1 of 4: demultiplexing'}) + payload = dumps({"step": "Step 1 of 4: demultiplexing"}) obs = self.post( - '/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/step/', - payload, headers=self.header) + "/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/step/", + payload, + headers=self.header, + ) self.assertEqual(obs.code, 403) - self.assertEqual(obs.reason, "Cannot change the step of a job whose " - "status is not 'running'") + self.assertEqual( + obs.reason, "Cannot change the step of a job whose status is not 'running'" + ) def test_post(self): - payload = dumps({'step': 'Step 1 of 4: demultiplexing'}) + payload = dumps({"step": "Step 1 of 4: demultiplexing"}) obs = self.post( - '/qiita_db/jobs/bcc7ebcd-39c1-43e4-af2d-822e3589f14d/step/', - payload, headers=self.header) + "/qiita_db/jobs/bcc7ebcd-39c1-43e4-af2d-822e3589f14d/step/", + payload, + headers=self.header, + ) self.assertEqual(obs.code, 200) - job = qdb.processing_job.ProcessingJob( - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d') - self.assertEqual(job.step, 'Step 1 of 4: demultiplexing') + job = qdb.processing_job.ProcessingJob("bcc7ebcd-39c1-43e4-af2d-822e3589f14d") + self.assertEqual(job.step, "Step 1 of 4: demultiplexing") class CompleteHandlerTests(OauthTestingBase): @@ -147,128 +160,162 @@ def tearDown(self): def test_post_no_header(self): obs = self.post( - '/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/complete/', - '') + "/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/complete/", "" + ) self.assertEqual(obs.code, 400) def test_post_job_does_not_exists(self): - obs = self.post('/qiita_db/jobs/do-not-exist/complete/', '', - headers=self.header) + obs = self.post( + "/qiita_db/jobs/do-not-exist/complete/", "", headers=self.header + ) self.assertEqual(obs.code, 404) def test_post_job_not_running(self): - payload = dumps({'success': True, 'artifacts': []}) + payload = dumps({"success": True, "artifacts": []}) obs = self.post( - '/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/complete/', - payload, headers=self.header) + "/qiita_db/jobs/063e553b-327c-4818-ab4a-adfe58e49860/complete/", + payload, + headers=self.header, + ) self.assertEqual(obs.code, 403) - self.assertEqual(obs.body.decode('ascii'), - "Can't complete job: not in a running state") + self.assertEqual( + obs.body.decode("ascii"), "Can't complete job: not in a running state" + ) def test_post_job_failure(self): pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.prep_template.PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), - qdb.study.Study(1), '16S') + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + qdb.study.Study(1), + "16S", + ) job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( - qdb.software.Command.get_validator('BIOM'), - values_dict={'template': pt.id, 'files': - dumps({'BIOM': ['file']}), - 'artifact_type': 'BIOM'})) - job._set_status('running') - - payload = dumps({'success': False, 'error': 'Job failure'}) + qdb.software.Command.get_validator("BIOM"), + values_dict={ + "template": pt.id, + "files": dumps({"BIOM": ["file"]}), + "artifact_type": "BIOM", + }, + ), + ) + job._set_status("running") + + payload = dumps({"success": False, "error": "Job failure"}) obs = self.post( - '/qiita_db/jobs/%s/complete/' % job.id, - payload, headers=self.header) + "/qiita_db/jobs/%s/complete/" % job.id, payload, headers=self.header + ) self.assertEqual(obs.code, 200) wait_for_processing_job(job.id) - self.assertEqual(job.status, 'error') - self.assertEqual(job.log, - qdb.logger.LogEntry.newest_records(numrecords=1)[0]) - self.assertEqual(job.log.msg, 'Job failure') + self.assertEqual(job.status, "error") + self.assertEqual(job.log, qdb.logger.LogEntry.newest_records(numrecords=1)[0]) + self.assertEqual(job.log.msg, "Job failure") def test_post_job_success(self): pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.prep_template.PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), - qdb.study.Study(1), '16S') + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + qdb.study.Study(1), + "16S", + ) job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( - qdb.software.Command.get_validator('BIOM'), - values_dict={'template': pt.id, 'files': - dumps({'BIOM': ['file']}), - 'artifact_type': 'BIOM'})) - job._set_status('running') + qdb.software.Command.get_validator("BIOM"), + values_dict={ + "template": pt.id, + "files": dumps({"BIOM": ["file"]}), + "artifact_type": "BIOM", + }, + ), + ) + job._set_status("running") # here we can test that the complete_processing_job is None self.assertIsNone(job.complete_processing_job) - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") self._clean_up_files.append(fp) - exp_artifact_count = qdb.util.get_count('qiita.artifact') + 1 + exp_artifact_count = qdb.util.get_count("qiita.artifact") + 1 payload = dumps( - {'success': True, 'error': '', - 'artifacts': {'OTU table': {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'}}}) + { + "success": True, + "error": "", + "artifacts": { + "OTU table": {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} + }, + } + ) obs = self.post( - '/qiita_db/jobs/%s/complete/' % job.id, - payload, headers=self.header) + "/qiita_db/jobs/%s/complete/" % job.id, payload, headers=self.header + ) wait_for_processing_job(job.id) self.assertEqual(obs.code, 200) - self.assertEqual(job.status, 'success') - self.assertEqual(qdb.util.get_count('qiita.artifact'), - exp_artifact_count) + self.assertEqual(job.status, "success") + self.assertEqual(qdb.util.get_count("qiita.artifact"), exp_artifact_count) # and now that is not None cj = job.complete_processing_job self.assertIsNotNone(cj) # additionally we can test that job.print_trace is correct - self.assertEqual(job.trace, [ - f'{job.id} [Not Available] (success): Validate | ' - '-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00 --nice=10000', - f' {cj.id} [{cj.external_id}] (success)| ' - '-p qiita -N 1 -n 1 --mem 16gb --time 10:00:00 --nice=10000']) + self.assertEqual( + job.trace, + [ + f"{job.id} [Not Available] (success): Validate | " + "-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00 --nice=10000", + f" {cj.id} [{cj.external_id}] (success)| " + "-p qiita -N 1 -n 1 --mem 16gb --time 10:00:00 --nice=10000", + ], + ) def test_post_job_success_with_archive(self): pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.prep_template.PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), - qdb.study.Study(1), '16S') + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + qdb.study.Study(1), + "16S", + ) job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( - qdb.software.Command.get_validator('BIOM'), - values_dict={'template': pt.id, 'files': - dumps({'BIOM': ['file']}), - 'artifact_type': 'BIOM'})) - job._set_status('running') - - fd, fp = mkstemp(suffix='_table.biom') + qdb.software.Command.get_validator("BIOM"), + values_dict={ + "template": pt.id, + "files": dumps({"BIOM": ["file"]}), + "artifact_type": "BIOM", + }, + ), + ) + job._set_status("running") + + fd, fp = mkstemp(suffix="_table.biom") close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") self._clean_up_files.append(fp) payload = dumps( - {'success': True, 'error': '', - 'artifacts': {'OTU table': {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'}}, - 'archive': {'AAAA': 'AAA', 'CCC': 'CCC'}}) + { + "success": True, + "error": "", + "artifacts": { + "OTU table": {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} + }, + "archive": {"AAAA": "AAA", "CCC": "CCC"}, + } + ) obs = self.post( - '/qiita_db/jobs/%s/complete/' % job.id, - payload, headers=self.header) + "/qiita_db/jobs/%s/complete/" % job.id, payload, headers=self.header + ) wait_for_processing_job(job.id) self.assertEqual(obs.code, 200) @@ -276,52 +323,55 @@ def test_post_job_success_with_archive(self): class ProcessingJobAPItestHandlerTests(OauthTestingBase): def test_post_processing_job(self): data = { - 'user': 'demo@microbio.me', - 'command': dumps(['QIIMEq2', '1.9.1', - 'Pick closed-reference OTUs']), - 'parameters': dumps({"reference": 1, - "sortmerna_e_value": 1, - "sortmerna_max_pos": 10000, - "similarity": 0.97, - "sortmerna_coverage": 0.97, - "threads": 1, - "input_data": 1}) - } - - obs = self.post('/apitest/processing_job/', headers=self.header, - data=data) + "user": "demo@microbio.me", + "command": dumps(["QIIMEq2", "1.9.1", "Pick closed-reference OTUs"]), + "parameters": dumps( + { + "reference": 1, + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "similarity": 0.97, + "sortmerna_coverage": 0.97, + "threads": 1, + "input_data": 1, + } + ), + } + + obs = self.post("/apitest/processing_job/", headers=self.header, data=data) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['job']) - self.assertIsNotNone(obs['job']) + self.assertCountEqual(obs.keys(), ["job"]) + self.assertIsNotNone(obs["job"]) def test_post_processing_job_status(self): data = { - 'user': 'demo@microbio.me', - 'command': dumps(['QIIMEq2', '1.9.1', - 'Pick closed-reference OTUs']), - 'status': 'running', - 'parameters': dumps({"reference": 1, - "sortmerna_e_value": 1, - "sortmerna_max_pos": 10000, - "similarity": 0.97, - "sortmerna_coverage": 0.97, - "threads": 1, - "input_data": 1}) - } - - obs = self.post('/apitest/processing_job/', headers=self.header, - data=data) + "user": "demo@microbio.me", + "command": dumps(["QIIMEq2", "1.9.1", "Pick closed-reference OTUs"]), + "status": "running", + "parameters": dumps( + { + "reference": 1, + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "similarity": 0.97, + "sortmerna_coverage": 0.97, + "threads": 1, + "input_data": 1, + } + ), + } + + obs = self.post("/apitest/processing_job/", headers=self.header, data=data) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['job']) - job_id = obs['job'] + self.assertCountEqual(obs.keys(), ["job"]) + job_id = obs["job"] self.assertTrue(qdb.processing_job.ProcessingJob.exists(job_id)) - self.assertEqual(qdb.processing_job.ProcessingJob(job_id).status, - 'running') + self.assertEqual(qdb.processing_job.ProcessingJob(job_id).status, "running") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_reference.py b/qiita_db/handlers/tests/test_reference.py index 6403de40f..b0523f9a2 100644 --- a/qiita_db/handlers/tests/test_reference.py +++ b/qiita_db/handlers/tests/test_reference.py @@ -6,16 +6,16 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase -from json import loads from functools import partial +from json import loads from os.path import join +from unittest import TestCase, main from tornado.web import HTTPError -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb from qiita_db.handlers.reference import _get_reference +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(TestCase): @@ -29,28 +29,26 @@ def test_get_reference(self): class ReferenceHandler(OauthTestingBase): def test_get_reference_no_header(self): - obs = self.get('/qiita_db/references/1/') + obs = self.get("/qiita_db/references/1/") self.assertEqual(obs.code, 400) def test_get_reference_does_not_exist(self): - obs = self.get('/qiita_db/references/100/', - headers=self.header) + obs = self.get("/qiita_db/references/100/", headers=self.header) self.assertEqual(obs.code, 404) def test_get(self): - obs = self.get('/qiita_db/references/1/', - headers=self.header) + obs = self.get("/qiita_db/references/1/", headers=self.header) self.assertEqual(obs.code, 200) - db_test_raw_dir = qdb.util.get_mountpoint('reference')[0][1] + db_test_raw_dir = qdb.util.get_mountpoint("reference")[0][1] path_builder = partial(join, db_test_raw_dir) fps = { - 'reference_seqs': path_builder("GreenGenes_13_8_97_otus.fasta"), - 'reference_tax': path_builder( - "GreenGenes_13_8_97_otu_taxonomy.txt"), - 'reference_tree': path_builder("GreenGenes_13_8_97_otus.tree")} - exp = {'name': 'Greengenes', 'version': '13_8', 'files': fps} + "reference_seqs": path_builder("GreenGenes_13_8_97_otus.fasta"), + "reference_tax": path_builder("GreenGenes_13_8_97_otu_taxonomy.txt"), + "reference_tree": path_builder("GreenGenes_13_8_97_otus.tree"), + } + exp = {"name": "Greengenes", "version": "13_8", "files": fps} self.assertEqual(loads(obs.body), exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_sample_information.py b/qiita_db/handlers/tests/test_sample_information.py index 620f3da0c..3951b3d03 100644 --- a/qiita_db/handlers/tests/test_sample_information.py +++ b/qiita_db/handlers/tests/test_sample_information.py @@ -6,68 +6,99 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from qiita_db.handlers.tests.oauthbase import OauthTestingBase class SampleInfoDBHandlerTests(OauthTestingBase): def test_get_does_not_exist(self): - obs = self.get('/qiita_db/sample_information/100/data/', - headers=self.header) + obs = self.get("/qiita_db/sample_information/100/data/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/sample_information/100/data/') + obs = self.get("/qiita_db/sample_information/100/data/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/sample_information/1/data/', - headers=self.header) + obs = self.get("/qiita_db/sample_information/1/data/", headers=self.header) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['data']) + self.assertCountEqual(obs.keys(), ["data"]) # for simplicity we will only test that the keys are the same # and that one of the key's info is correct - obs = obs['data'] - exp = ['1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', - '1.SKB6.640176', '1.SKD6.640190', '1.SKM6.640187', - '1.SKD9.640182', '1.SKM8.640201', '1.SKM2.640199', - '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185', - '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', - '1.SKB1.640202', '1.SKM1.640183', '1.SKD1.640179', - '1.SKD3.640198', '1.SKB5.640181', '1.SKB4.640189', - '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184', - '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191'] + obs = obs["data"] + exp = [ + "1.SKB2.640194", + "1.SKM4.640180", + "1.SKB3.640195", + "1.SKB6.640176", + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + ] self.assertCountEqual(list(obs.keys()), exp) - obs = obs['1.SKB1.640202'] - exp = {'qiita_study_id': '1', 'physical_specimen_location': 'ANL', - 'tot_org_carb': '5', 'common_name': 'soil metagenome', - 'water_content_soil': '0.164', - 'env_feature': 'ENVO:plant-associated habitat', - 'assigned_from_geo': 'n', 'altitude': '0', - 'env_biome': ('ENVO:Temperate grasslands, savannas, and ' - 'shrubland biome'), - 'texture': '64.6 sand, 17.6 silt, 17.8 clay', - 'scientific_name': '1118232', - 'description_duplicate': 'Burmese bulk', - 'latitude': '4.59216095574', 'ph': '6.94', 'host_taxid': '3483', - 'elevation': '114', 'description': 'Cannabis Soil Microbiome', - 'collection_timestamp': '2011-11-11 13:00:00', - 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', - 'taxon_id': '410658', 'samp_salinity': '7.15', - 'host_subject_id': '1001:M2', 'sample_type': 'ENVO:soil', - 'env_package': 'soil', - 'season_environment': 'winter', 'temp': '15', - 'country': 'GAZ:United States of America', - 'longitude': '63.5115213108', 'tot_nitro': '1.41', - 'depth': '0.15', 'anonymized_name': 'SKB1'} + obs = obs["1.SKB1.640202"] + exp = { + "qiita_study_id": "1", + "physical_specimen_location": "ANL", + "tot_org_carb": "5", + "common_name": "soil metagenome", + "water_content_soil": "0.164", + "env_feature": "ENVO:plant-associated habitat", + "assigned_from_geo": "n", + "altitude": "0", + "env_biome": ("ENVO:Temperate grasslands, savannas, and shrubland biome"), + "texture": "64.6 sand, 17.6 silt, 17.8 clay", + "scientific_name": "1118232", + "description_duplicate": "Burmese bulk", + "latitude": "4.59216095574", + "ph": "6.94", + "host_taxid": "3483", + "elevation": "114", + "description": "Cannabis Soil Microbiome", + "collection_timestamp": "2011-11-11 13:00:00", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "taxon_id": "410658", + "samp_salinity": "7.15", + "host_subject_id": "1001:M2", + "sample_type": "ENVO:soil", + "env_package": "soil", + "season_environment": "winter", + "temp": "15", + "country": "GAZ:United States of America", + "longitude": "63.5115213108", + "tot_nitro": "1.41", + "depth": "0.15", + "anonymized_name": "SKB1", + } self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_studies.py b/qiita_db/handlers/tests/test_studies.py index c54589a97..f7ca3cb57 100644 --- a/qiita_db/handlers/tests/test_studies.py +++ b/qiita_db/handlers/tests/test_studies.py @@ -6,8 +6,8 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from qiita_db.handlers.tests.oauthbase import OauthTestingBase @@ -17,21 +17,22 @@ def setUp(self): super(TestAPIStudiesListing, self).setUp() def test_get_studies_failure(self): - obs = self.get('/qiita_db/studies/not-valid', headers=self.header) + obs = self.get("/qiita_db/studies/not-valid", headers=self.header) self.assertEqual(obs.code, 403) - self.assertEqual(str(obs.error), 'HTTP 403: You can only request ' - 'public or private studies') + self.assertEqual( + str(obs.error), "HTTP 403: You can only request public or private studies" + ) def test_get_studies_private(self): - obs = self.get('/qiita_db/studies/private', headers=self.header) - exp = {'data': {'1': [4, 5, 6, 7]}} + obs = self.get("/qiita_db/studies/private", headers=self.header) + exp = {"data": {"1": [4, 5, 6, 7]}} self.assertEqual(loads(obs.body), exp) def test_get_studies_public(self): - obs = self.get('/qiita_db/studies/public', headers=self.header) - exp = {'data': {}} + obs = self.get("/qiita_db/studies/public", headers=self.header) + exp = {"data": {}} self.assertEqual(loads(obs.body), exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_user.py b/qiita_db/handlers/tests/test_user.py index 90d412190..3aea8ba93 100644 --- a/qiita_db/handlers/tests/test_user.py +++ b/qiita_db/handlers/tests/test_user.py @@ -6,61 +6,63 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UserInfoDBHandlerTests(OauthTestingBase): def test_get_does_not_exist(self): - obs = self.get('/qiita_db/user/no-exists@foo.bar/data/', - headers=self.header) + obs = self.get("/qiita_db/user/no-exists@foo.bar/data/", headers=self.header) self.assertEqual(obs.code, 404) def test_get_no_header(self): - obs = self.get('/qiita_db/user/no-exists@foo.bar/data/') + obs = self.get("/qiita_db/user/no-exists@foo.bar/data/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/user/shared@foo.bar/data/', - headers=self.header) + obs = self.get("/qiita_db/user/shared@foo.bar/data/", headers=self.header) self.assertEqual(obs.code, 200) obs = loads(obs.body) - self.assertCountEqual(obs.keys(), ['data']) + self.assertCountEqual(obs.keys(), ["data"]) # for simplicity we will only test that the keys are the same # and that one of the key's info is correct - obs = obs['data'] - exp = {"password": "$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJk" - "UM8nXG9Efe", "email": "shared@foo.bar", "level": "user", - "name": "Shared"} + obs = obs["data"] + exp = { + "password": "$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe", + "email": "shared@foo.bar", + "level": "user", + "name": "Shared", + } self.assertEqual(obs, exp) class UsersListDBHandlerTests(OauthTestingBase): def test_get_no_header(self): - obs = self.get('/qiita_db/users/') + obs = self.get("/qiita_db/users/") self.assertEqual(obs.code, 400) def test_get(self): - obs = self.get('/qiita_db/users/', - headers=self.header) + obs = self.get("/qiita_db/users/", headers=self.header) self.assertEqual(obs.code, 200) obs = loads(obs.body) - exp = {'data': [ - {'email': 'shared@foo.bar', 'name': 'Shared'}, - {'email': 'admin@foo.bar', 'name': 'Admin'}, - {'email': 'demo@microbio.me', 'name': 'Demo'}, - {'email': 'test@foo.bar', 'name': 'Dude'}, - {'email': 'justnow@nonvalidat.ed', 'name': 'JustNow'}, - {'email': 'ayearago@nonvalidat.ed', 'name': 'Oldie'}, - {'email': '3Xdays@nonvalidat.ed', 'name': 'TooLate'} - ]} + exp = { + "data": [ + {"email": "shared@foo.bar", "name": "Shared"}, + {"email": "admin@foo.bar", "name": "Admin"}, + {"email": "demo@microbio.me", "name": "Demo"}, + {"email": "test@foo.bar", "name": "Dude"}, + {"email": "justnow@nonvalidat.ed", "name": "JustNow"}, + {"email": "ayearago@nonvalidat.ed", "name": "Oldie"}, + {"email": "3Xdays@nonvalidat.ed", "name": "TooLate"}, + ] + } self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/tests/test_util.py b/qiita_db/handlers/tests/test_util.py index 4a1bf53fd..b15dfca15 100644 --- a/qiita_db/handlers/tests/test_util.py +++ b/qiita_db/handlers/tests/test_util.py @@ -10,31 +10,31 @@ from tornado.web import HTTPError -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class UtilTests(OauthTestingBase): def test_get_sample_info(self): ST = qdb.metadata_template.sample_template.SampleTemplate exp = ST(1) - obs = qdb.handlers.util._get_instance(ST, 1, 'error') + obs = qdb.handlers.util._get_instance(ST, 1, "error") self.assertEqual(obs, exp) # It does not exist with self.assertRaises(HTTPError): - qdb.handlers.util._get_instance(ST, 100, 'error') + qdb.handlers.util._get_instance(ST, 100, "error") def test_get_user_info(self): US = qdb.user.User - obs = qdb.handlers.util._get_instance(US, 'shared@foo.bar', 'error') - exp = US('shared@foo.bar') + obs = qdb.handlers.util._get_instance(US, "shared@foo.bar", "error") + exp = US("shared@foo.bar") self.assertEqual(obs, exp) # It does not exist with self.assertRaises(HTTPError): - qdb.handlers.util._get_instance(US, 'no-exists@foo.bar', 'error') + qdb.handlers.util._get_instance(US, "no-exists@foo.bar", "error") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/handlers/user.py b/qiita_db/handlers/user.py index bcf00a037..606abe155 100644 --- a/qiita_db/handlers/user.py +++ b/qiita_db/handlers/user.py @@ -7,6 +7,7 @@ # ----------------------------------------------------------------------------- import qiita_db as qdb + from .oauth2 import OauthBaseHandler, authenticate_oauth from .util import _get_instance @@ -27,11 +28,15 @@ def get(self, email): The user information as a dict """ with qdb.sql_connection.TRN: - user = _get_instance(qdb.user.User, email, - 'Error instantiating user') - response = {'data': {'email': email, 'level': user.level, - 'password': user.password, - 'name': user.info['name']}} + user = _get_instance(qdb.user.User, email, "Error instantiating user") + response = { + "data": { + "email": email, + "level": user.level, + "password": user.password, + "name": user.info["name"], + } + } self.write(response) @@ -47,6 +52,6 @@ def get(self): The user information as a dict """ with qdb.sql_connection.TRN: - response = {'data': [dict(d) for d in qdb.user.User.iter()]} + response = {"data": [dict(d) for d in qdb.user.User.iter()]} self.write(response) diff --git a/qiita_db/handlers/util.py b/qiita_db/handlers/util.py index 12620038e..fe32bff06 100644 --- a/qiita_db/handlers/util.py +++ b/qiita_db/handlers/util.py @@ -40,6 +40,6 @@ def _get_instance(klass, oid, reason): except qdb.exceptions.QiitaDBUnknownIDError: raise HTTPError(404) except Exception as e: - raise HTTPError(500, reason=reason + ', id=%s: %s' % (oid, str(e))) + raise HTTPError(500, reason=reason + ", id=%s: %s" % (oid, str(e))) return object diff --git a/qiita_db/investigation.py b/qiita_db/investigation.py index 092bc33ad..a857716e3 100644 --- a/qiita_db/investigation.py +++ b/qiita_db/investigation.py @@ -40,6 +40,7 @@ class Investigation(qdb.base.QiitaObject): add_study Adds a study to the investigation """ + _table = "investigation" @classmethod diff --git a/qiita_db/logger.py b/qiita_db/logger.py index 3934fedcd..913cc8ef3 100644 --- a/qiita_db/logger.py +++ b/qiita_db/logger.py @@ -22,7 +22,7 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from json import loads, dumps +from json import dumps, loads import qiita_db as qdb @@ -42,7 +42,7 @@ class LogEntry(qdb.base.QiitaObject): add_info """ - _table = 'logging' + _table = "logging" @classmethod def newest_records(cls, numrecords=100): @@ -64,8 +64,7 @@ def newest_records(cls, numrecords=100): ORDER BY logging_id DESC LIMIT %s""".format(cls._table) qdb.sql_connection.TRN.add(sql, [numrecords]) - return [cls(i) - for i in qdb.sql_connection.TRN.execute_fetchflatten()] + return [cls(i) for i in qdb.sql_connection.TRN.execute_fetchflatten()] @classmethod def create(cls, severity, msg, info=None): @@ -126,8 +125,7 @@ def time(self): datetime """ with qdb.sql_connection.TRN: - sql = "SELECT time FROM qiita.{} WHERE logging_id = %s".format( - self._table) + sql = "SELECT time FROM qiita.{} WHERE logging_id = %s".format(self._table) qdb.sql_connection.TRN.add(sql, [self.id]) return qdb.sql_connection.TRN.execute_fetchlast() @@ -172,15 +170,13 @@ def msg(self): str """ with qdb.sql_connection.TRN: - sql = "SELECT msg FROM qiita.{0} WHERE logging_id = %s".format( - self._table) + sql = "SELECT msg FROM qiita.{0} WHERE logging_id = %s".format(self._table) qdb.sql_connection.TRN.add(sql, [self.id]) return qdb.sql_connection.TRN.execute_fetchlast() def clear_info(self): - """Resets the list of info dicts to be an empty list - """ + """Resets the list of info dicts to be an empty list""" sql = """UPDATE qiita.{} SET information = %s WHERE logging_id = %s""".format(self._table) qdb.sql_connection.perform_as_transaction(sql, [dumps([]), self.id]) diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py index 04b5ad525..58c002464 100644 --- a/qiita_db/meta_util.py +++ b/qiita_db/meta_util.py @@ -15,6 +15,7 @@ get_lat_longs """ + # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # @@ -22,35 +23,52 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os import stat -from shutil import move -from os.path import join, relpath, basename -from time import strftime, localtime -import matplotlib.pyplot as plt -import matplotlib as mpl from base64 import b64encode -from urllib.parse import quote -from io import BytesIO +from collections import Counter, defaultdict from datetime import datetime -from collections import defaultdict, Counter -from tarfile import open as topen, TarInfo from hashlib import md5 +from io import BytesIO +from json import dump, dumps, loads +from os import stat +from os.path import basename, join, relpath from re import sub -from json import loads, dump, dumps +from shutil import move +from tarfile import TarInfo +from tarfile import open as topen +from time import localtime, strftime +from urllib.parse import quote + +import matplotlib as mpl +import matplotlib.pyplot as plt -from qiita_db.util import create_nested_path, retrieve_resource_data -from qiita_db.util import resource_allocation_plot -from qiita_core.qiita_settings import qiita_config, r_client -from qiita_core.configuration_manager import ConfigurationManager import qiita_db as qdb +from qiita_core.configuration_manager import ConfigurationManager +from qiita_core.qiita_settings import qiita_config, r_client +from qiita_db.util import ( + create_nested_path, + resource_allocation_plot, + retrieve_resource_data, +) # global constant list used in resource_allocation_page COLUMNS = [ - "sName", "sVersion", "cID", "cName", "processing_job_id", - "parameters", "samples", "columns", "input_size", "extra_info", - "MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"] -RAW_DATA_ARTIFACT_TYPE = { - 'SFF', 'FASTQ', 'FASTA', 'FASTA_Sanger', 'per_sample_FASTQ'} + "sName", + "sVersion", + "cID", + "cName", + "processing_job_id", + "parameters", + "samples", + "columns", + "input_size", + "extra_info", + "MaxRSSRaw", + "ElapsedRaw", + "Start", + "node_name", + "node_model", +] +RAW_DATA_ARTIFACT_TYPE = {"SFF", "FASTQ", "FASTA", "FASTA_Sanger", "per_sample_FASTQ"} def _get_data_fpids(constructor, object_id): @@ -119,12 +137,12 @@ def validate_filepath_access_by_user(user, filepath_id): # [0] cause we should only have 1 artifact = qdb.artifact.Artifact(arid[0]) - if artifact.visibility == 'public': + if artifact.visibility == "public": # TODO: https://github.com/biocore/qiita/issues/1724 if artifact.artifact_type in RAW_DATA_ARTIFACT_TYPE: study = artifact.study has_access = study.has_access(user, no_public=True) - if (not study.public_raw_download and not has_access): + if not study.public_raw_download and not has_access: return False return True else: @@ -134,8 +152,7 @@ def validate_filepath_access_by_user(user, filepath_id): return artifact.study.has_access(user) else: analysis = artifact.analysis - return analysis in ( - user.private_analyses | user.shared_analyses) + return analysis in (user.private_analyses | user.shared_analyses) # sample info files elif sid: # the visibility of the sample info file is given by the @@ -147,20 +164,18 @@ def validate_filepath_access_by_user(user, filepath_id): # the prep access is given by it's artifacts, if the user has # access to any artifact, it should have access to the prep # [0] cause we should only have 1 - pt = qdb.metadata_template.prep_template.PrepTemplate( - pid[0]) + pt = qdb.metadata_template.prep_template.PrepTemplate(pid[0]) a = pt.artifact # however, the prep info file could not have any artifacts attached # , in that case we will use the study access level if a is None: return qdb.study.Study(pt.study_id).has_access(user) else: - if (a.visibility == 'public' or a.study.has_access(user)): + if a.visibility == "public" or a.study.has_access(user): return True else: for c in a.descendants.nodes(): - if ((c.visibility == 'public' or - c.study.has_access(user))): + if c.visibility == "public" or c.study.has_access(user): return True return False # analyses @@ -168,8 +183,9 @@ def validate_filepath_access_by_user(user, filepath_id): # [0] cause we should only have 1 aid = anid[0] analysis = qdb.analysis.Analysis(aid) - return analysis.is_public | (analysis in ( - user.private_analyses | user.shared_analyses)) + return analysis.is_public | ( + analysis in (user.private_analyses | user.shared_analyses) + ) return False @@ -183,8 +199,8 @@ def update_redis_stats(): """ STUDY = qdb.study.Study - number_studies = {'public': 0, 'private': 0, 'sandbox': 0} - number_of_samples = {'public': 0, 'private': 0, 'sandbox': 0} + number_studies = {"public": 0, "private": 0, "sandbox": 0} + number_of_samples = {"public": 0, "private": 0, "sandbox": 0} num_studies_ebi = 0 num_samples_ebi = 0 number_samples_ebi_prep = 0 @@ -197,8 +213,9 @@ def update_redis_stats(): continue # counting samples submitted to EBI-ENA - len_samples_ebi = sum([esa is not None - for esa in st.ebi_sample_accessions.values()]) + len_samples_ebi = sum( + [esa is not None for esa in st.ebi_sample_accessions.values()] + ) if len_samples_ebi != 0: num_studies_ebi += 1 num_samples_ebi += len_samples_ebi @@ -207,47 +224,51 @@ def update_redis_stats(): for pt in study.prep_templates(): pt_samples = list(pt.keys()) pt_status = pt.status - if pt_status == 'public': + if pt_status == "public": per_data_type_stats[pt.data_type()] += len(pt_samples) samples_status[pt_status].update(pt_samples) # counting experiments (samples in preps) submitted to EBI-ENA - number_samples_ebi_prep += sum([ - esa is not None - for esa in pt.ebi_experiment_accessions.values()]) + number_samples_ebi_prep += sum( + [esa is not None for esa in pt.ebi_experiment_accessions.values()] + ) # counting studies - if 'public' in samples_status: - number_studies['public'] += 1 - elif 'private' in samples_status: - number_studies['private'] += 1 + if "public" in samples_status: + number_studies["public"] += 1 + elif "private" in samples_status: + number_studies["private"] += 1 else: # note that this is a catch all for other status; at time of # writing there is status: awaiting_approval - number_studies['sandbox'] += 1 + number_studies["sandbox"] += 1 # counting samples; note that some of these lines could be merged with # the block above but I decided to split it in 2 for clarity - if 'public' in samples_status: - number_of_samples['public'] += len(samples_status['public']) - if 'private' in samples_status: - number_of_samples['private'] += len(samples_status['private']) - if 'sandbox' in samples_status: - number_of_samples['sandbox'] += len(samples_status['sandbox']) + if "public" in samples_status: + number_of_samples["public"] += len(samples_status["public"]) + if "private" in samples_status: + number_of_samples["private"] += len(samples_status["private"]) + if "sandbox" in samples_status: + number_of_samples["sandbox"] += len(samples_status["sandbox"]) # processing filepaths for artifact in study.artifacts(): for adata in artifact.filepaths: try: - s = stat(adata['fp']) + s = stat(adata["fp"]) except OSError: - missing_files.append(adata['fp']) + missing_files.append(adata["fp"]) else: stats.append( - (adata['fp_type'], s.st_size, strftime('%Y-%m', - localtime(s.st_mtime)))) + ( + adata["fp_type"], + s.st_size, + strftime("%Y-%m", localtime(s.st_mtime)), + ) + ) - num_users = qdb.util.get_count('qiita.qiita_user') - num_processing_jobs = qdb.util.get_count('qiita.processing_job') + num_users = qdb.util.get_count("qiita.qiita_user") + num_processing_jobs = qdb.util.get_count("qiita.processing_job") lat_longs = dumps(get_lat_longs()) @@ -255,12 +276,23 @@ def update_redis_stats(): all_dates = [] # these are some filetypes that are too small to plot alone so we'll merge # in other - group_other = {'html_summary', 'tgz', 'directory', 'raw_fasta', 'log', - 'raw_sff', 'raw_qual', 'qza', 'html_summary_dir', - 'qza', 'plain_text', 'raw_barcodes'} + group_other = { + "html_summary", + "tgz", + "directory", + "raw_fasta", + "log", + "raw_sff", + "raw_qual", + "qza", + "html_summary_dir", + "qza", + "plain_text", + "raw_barcodes", + } for ft, size, ym in stats: if ft in group_other: - ft = 'other' + ft = "other" if ft not in summary: summary[ft] = {} if ym not in summary[ft]: @@ -280,24 +312,25 @@ def update_redis_stats(): new_list.append(current_value) ordered_summary[dt] = new_list - plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary], - key=lambda x: x[1]) + plot_order = sorted( + [(k, ordered_summary[k][-1]) for k in ordered_summary], key=lambda x: x[1] + ) # helper function to generate y axis, modified from: # http://stackoverflow.com/a/1094933 def sizeof_fmt(value, position): number = None - for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: + for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: if abs(value) < 1024.0: number = "%3.1f%s" % (value, unit) break value /= 1024.0 if number is None: - number = "%.1f%s" % (value, 'Yi') + number = "%.1f%s" % (value, "Yi") return number all_dates_axis = range(len(all_dates)) - plt.locator_params(axis='y', nbins=10) + plt.locator_params(axis="y", nbins=10) plt.figure(figsize=(20, 10)) for k, v in plot_order: plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k) @@ -308,35 +341,36 @@ def sizeof_fmt(value, position): ax = plt.gca() ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt)) plt.xticks(rotation=90) - plt.xlabel('Date') - plt.ylabel('Storage space per data type') + plt.xlabel("Date") + plt.ylabel("Storage space per data type") plot = BytesIO() - plt.savefig(plot, format='png') + plt.savefig(plot, format="png") plot.seek(0) - img = 'data:image/png;base64,' + quote(b64encode(plot.getbuffer())) + img = "data:image/png;base64," + quote(b64encode(plot.getbuffer())) - time = datetime.now().strftime('%m-%d-%y %H:%M:%S') + time = datetime.now().strftime("%m-%d-%y %H:%M:%S") portal = qiita_config.portal # making sure per_data_type_stats has some data so hmset doesn't fail if per_data_type_stats == {}: - per_data_type_stats['No data'] = 0 + per_data_type_stats["No data"] = 0 vals = [ - ('number_studies', number_studies, r_client.hmset), - ('number_of_samples', number_of_samples, r_client.hmset), - ('per_data_type_stats', dict(per_data_type_stats), r_client.hmset), - ('num_users', num_users, r_client.set), - ('lat_longs', (lat_longs), r_client.set), - ('num_studies_ebi', num_studies_ebi, r_client.set), - ('num_samples_ebi', num_samples_ebi, r_client.set), - ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set), - ('img', img, r_client.set), - ('time', time, r_client.set), - ('num_processing_jobs', num_processing_jobs, r_client.set)] + ("number_studies", number_studies, r_client.hmset), + ("number_of_samples", number_of_samples, r_client.hmset), + ("per_data_type_stats", dict(per_data_type_stats), r_client.hmset), + ("num_users", num_users, r_client.set), + ("lat_longs", (lat_longs), r_client.set), + ("num_studies_ebi", num_studies_ebi, r_client.set), + ("num_samples_ebi", num_samples_ebi, r_client.set), + ("number_samples_ebi_prep", number_samples_ebi_prep, r_client.set), + ("img", img, r_client.set), + ("time", time, r_client.set), + ("num_processing_jobs", num_processing_jobs, r_client.set), + ] for k, v, f in vals: - redis_key = '%s:stats:%s' % (portal, k) + redis_key = "%s:stats:%s" % (portal, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v) @@ -360,7 +394,7 @@ def get_lat_longs(): """ with qdb.sql_connection.TRN: # getting all the public studies - studies = qdb.study.Study.get_by_status('public') + studies = qdb.study.Study.get_by_status("public") results = [] if studies: @@ -376,7 +410,7 @@ def get_lat_longs(): isnumeric(sample_values->>'latitude') AND isnumeric(sample_values->>'longitude')""" sql = [sql_query.format(s.id) for s in studies] - sql = ' UNION '.join(sql) + sql = " UNION ".join(sql) qdb.sql_connection.TRN.add(sql) # note that we are returning set to remove duplicates @@ -385,7 +419,7 @@ def get_lat_longs(): return results -def generate_biom_and_metadata_release(study_status='public'): +def generate_biom_and_metadata_release(study_status="public"): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters @@ -400,66 +434,79 @@ def generate_biom_and_metadata_release(study_status='public'): working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() - time = datetime.now().strftime('%m-%d-%y %H:%M:%S') + time = datetime.now().strftime("%m-%d-%y %H:%M:%S") data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) - for a in s.artifacts(artifact_type='BIOM'): + for a in s.artifacts(artifact_type="BIOM"): if a.processing_parameters is None or a.visibility != study_status: continue merging_schemes, parent_softwares = a.merging_scheme software = a.processing_parameters.command.software - software = '%s v%s' % (software.name, software.version) + software = "%s v%s" % (software.name, software.version) for x in a.filepaths: - if x['fp_type'] != 'biom' or 'only-16s' in x['fp']: + if x["fp_type"] != "biom" or "only-16s" in x["fp"]: continue - fp = relpath(x['fp'], bdir) + fp = relpath(x["fp"], bdir) for pt in a.prep_templates: categories = pt.categories - platform = '' - target_gene = '' - if 'platform' in categories: - platform = ', '.join( - set(pt.get_category('platform').values())) - if 'target_gene' in categories: - target_gene = ', '.join( - set(pt.get_category('target_gene').values())) + platform = "" + target_gene = "" + if "platform" in categories: + platform = ", ".join(set(pt.get_category("platform").values())) + if "target_gene" in categories: + target_gene = ", ".join( + set(pt.get_category("target_gene").values()) + ) for _, prep_fp in pt.get_filepaths(): - if 'qiime' not in prep_fp: + if "qiime" not in prep_fp: break prep_fp = relpath(prep_fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # platform, target gene, merging schemes, # artifact software/version, # parent sofware/version) - data.append((fp, sample_fp, prep_fp, a.id, platform, - target_gene, merging_schemes, software, - parent_softwares)) + data.append( + ( + fp, + sample_fp, + prep_fp, + a.id, + platform, + target_gene, + merging_schemes, + software, + parent_softwares, + ) + ) # writing text and tgz file - ts = datetime.now().strftime('%m%d%y-%H%M%S') - tgz_dir = join(working_dir, 'releases') + ts = datetime.now().strftime("%m%d%y-%H%M%S") + tgz_dir = join(working_dir, "releases") create_nested_path(tgz_dir) - tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) - tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) + tgz_name = join(tgz_dir, "%s-%s-building.tgz" % (portal, study_status)) + tgz_name_final = join(tgz_dir, "%s-%s.tgz" % (portal, study_status)) txt_lines = [ "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" - "target gene\tmerging scheme\tartifact software\tparent software"] + "target gene\tmerging scheme\tartifact software\tparent software" + ] with topen(tgz_name, "w|gz") as tgz: for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data: - txt_lines.append("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( - biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv)) + txt_lines.append( + "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" + % (biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv) + ) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) - info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) + info = TarInfo(name="%s-%s-%s.txt" % (portal, study_status, ts)) txt_hd = BytesIO() - txt_hd.write(bytes('\n'.join(txt_lines), 'ascii')) + txt_hd.write(bytes("\n".join(txt_lines), "ascii")) txt_hd.seek(0) info.size = len(txt_hd.read()) txt_hd.seek(0) @@ -473,70 +520,75 @@ def generate_biom_and_metadata_release(study_status='public'): move(tgz_name, tgz_name_final) vals = [ - ('filepath', tgz_name_final[len(working_dir):], r_client.set), - ('md5sum', md5sum.hexdigest(), r_client.set), - ('time', time, r_client.set)] + ("filepath", tgz_name_final[len(working_dir) :], r_client.set), + ("md5sum", md5sum.hexdigest(), r_client.set), + ("time", time, r_client.set), + ] for k, v, f in vals: - redis_key = '%s:release:%s:%s' % (portal, study_status, k) + redis_key = "%s:release:%s:%s" % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v) def generate_plugin_releases(): - """Generate releases for plugins - """ + """Generate releases for plugins""" ARCHIVE = qdb.archive.Archive qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir - commands = [c for s in qdb.software.Software.iter(active=True) - for c in s.commands if c.post_processing_cmd is not None] + commands = [ + c + for s in qdb.software.Software.iter(active=True) + for c in s.commands + if c.post_processing_cmd is not None + ] tnow = datetime.now() - ts = tnow.strftime('%m%d%y-%H%M%S') - tgz_dir = join(working_dir, 'releases', 'archive') + ts = tnow.strftime("%m%d%y-%H%M%S") + tgz_dir = join(working_dir, "releases", "archive") create_nested_path(tgz_dir) tgz_dir_release = join(tgz_dir, ts) create_nested_path(tgz_dir_release) for cmd in commands: cmd_name = cmd.name - mschemes = [v for _, v in ARCHIVE.merging_schemes().items() - if cmd_name in v] + mschemes = [v for _, v in ARCHIVE.merging_schemes().items() if cmd_name in v] for ms in mschemes: - ms_name = sub('[^0-9a-zA-Z]+', '', ms) + ms_name = sub("[^0-9a-zA-Z]+", "", ms) ms_fp = join(tgz_dir_release, ms_name) create_nested_path(ms_fp) - pfp = join(ms_fp, 'archive.json') - archives = {k: loads(v) - for k, v in ARCHIVE.retrieve_feature_values( - archive_merging_scheme=ms).items() - if v != ''} - with open(pfp, 'w') as f: + pfp = join(ms_fp, "archive.json") + archives = { + k: loads(v) + for k, v in ARCHIVE.retrieve_feature_values( + archive_merging_scheme=ms + ).items() + if v != "" + } + with open(pfp, "w") as f: dump(archives, f) # now let's run the post_processing_cmd ppc = cmd.post_processing_cmd # concatenate any other parameters into a string - params = ' '.join(["%s=%s" % (k, v) for k, v in - ppc['script_params'].items()]) + params = " ".join( + ["%s=%s" % (k, v) for k, v in ppc["script_params"].items()] + ) # append archives file and output dir parameters - params = ("%s --fp_archive=%s --output_dir=%s" % ( - params, pfp, ms_fp)) + params = "%s --fp_archive=%s --output_dir=%s" % (params, pfp, ms_fp) - ppc_cmd = "%s %s %s" % ( - ppc['script_env'], ppc['script_path'], params) + ppc_cmd = "%s %s %s" % (ppc["script_env"], ppc["script_path"], params) p_out, p_err, rv = qdb.processing_job._system_call(ppc_cmd) p_out = p_out.rstrip() if rv != 0: - raise ValueError('Error %d: %s' % (rv, p_out)) + raise ValueError("Error %d: %s" % (rv, p_out)) p_out = loads(p_out) # tgz-ing all files - tgz_name = join(tgz_dir, 'archive-%s-building.tgz' % ts) - tgz_name_final = join(tgz_dir, 'archive.tgz') + tgz_name = join(tgz_dir, "archive-%s-building.tgz" % ts) + tgz_name_final = join(tgz_dir, "archive.tgz") with topen(tgz_name, "w|gz") as tgz: tgz.add(tgz_dir_release, arcname=basename(tgz_dir_release)) # getting the release md5 @@ -546,11 +598,12 @@ def generate_plugin_releases(): md5sum.update(c) move(tgz_name, tgz_name_final) vals = [ - ('filepath', tgz_name_final[len(working_dir):], r_client.set), - ('md5sum', md5sum.hexdigest(), r_client.set), - ('time', tnow.strftime('%m-%d-%y %H:%M:%S'), r_client.set)] + ("filepath", tgz_name_final[len(working_dir) :], r_client.set), + ("md5sum", md5sum.hexdigest(), r_client.set), + ("time", tnow.strftime("%m-%d-%y %H:%M:%S"), r_client.set), + ] for k, v, f in vals: - redis_key = 'release-archive:%s' % k + redis_key = "release-archive:%s" % k # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v) @@ -581,9 +634,9 @@ def update_resource_allocation_redis(active=True): Defaults to True. Should only be False when testing. """ - time = datetime.now().strftime('%m-%d-%y') + time = datetime.now().strftime("%m-%d-%y") scommands = get_software_commands(active) - redis_key = 'resources:commands' + redis_key = "resources:commands" r_client.set(redis_key, str(scommands)) for sname, versions in scommands.items(): @@ -607,30 +660,35 @@ def update_resource_allocation_redis(active=True): new_fig = plt.figure() new_ax = new_fig.add_subplot(111) line = ax.lines[0] - new_ax.plot(line.get_xdata(), line.get_ydata(), - linewidth=1, color='orange') + new_ax.plot( + line.get_xdata(), line.get_ydata(), linewidth=1, color="orange" + ) handles, labels = ax.get_legend_handles_labels() - for handle, label, scatter_data in zip(handles, - labels, - ax.collections): + for handle, label, scatter_data in zip( + handles, labels, ax.collections + ): color = handle.get_facecolor() - new_ax.scatter(scatter_data.get_offsets()[:, 0], - scatter_data.get_offsets()[:, 1], - s=scatter_data.get_sizes(), label=label, - color=color) - - new_ax.set_xscale('log') - new_ax.set_yscale('log') + new_ax.scatter( + scatter_data.get_offsets()[:, 0], + scatter_data.get_offsets()[:, 1], + s=scatter_data.get_sizes(), + label=label, + color=color, + ) + + new_ax.set_xscale("log") + new_ax.set_yscale("log") new_ax.set_xlabel(ax.get_xlabel()) new_ax.set_ylabel(ax.get_ylabel()) - new_ax.legend(loc='upper left') + new_ax.legend(loc="upper left") new_fig.tight_layout() plot = BytesIO() - new_fig.savefig(plot, format='png') + new_fig.savefig(plot, format="png") plot.seek(0) - img = 'data:image/png;base64,' + quote( - b64encode(plot.getvalue()).decode('ascii')) + img = "data:image/png;base64," + quote( + b64encode(plot.getvalue()).decode("ascii") + ) images[i] = img plt.close(new_fig) plt.close(fig) @@ -639,13 +697,18 @@ def update_resource_allocation_redis(active=True): values = [ ("img_mem", images[0], r_client.set), ("img_time", images[1], r_client.set), - ('time', time, r_client.set), + ("time", time, r_client.set), ("title_mem", titles[0], r_client.set), - ("title_time", titles[1], r_client.set) + ("title_time", titles[1], r_client.set), ] for k, v, f in values: - redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % ( - cname, sname, version, col_name, k) + redis_key = "resources$#%s$#%s$#%s$#%s:%s" % ( + cname, + sname, + version, + col_name, + k, + ) r_client.delete(redis_key) f(redis_key, v) diff --git a/qiita_db/metadata_template/__init__.py b/qiita_db/metadata_template/__init__.py index ab92c9d3a..9a709a615 100644 --- a/qiita_db/metadata_template/__init__.py +++ b/qiita_db/metadata_template/__init__.py @@ -6,9 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from . import constants -from . import util -from . import sample_template -from . import prep_template +from . import constants, prep_template, sample_template, util __all__ = ["sample_template", "prep_template", "util", "constants"] diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 38f1143d9..36db2d207 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -34,40 +34,38 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from itertools import chain +import warnings from copy import deepcopy from datetime import datetime -from json import loads, dumps +from itertools import chain +from json import dumps, loads +from string import ascii_letters, digits -import pandas as pd import numpy as np +import pandas as pd from iteration_utilities import duplicates -import warnings -from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb - -from string import ascii_letters, digits - +from qiita_core.exceptions import IncompetentQiitaDeveloperError # this is the name of the sample where we store all columns for a sample/prep # information -QIITA_COLUMN_NAME = 'qiita_sample_column_names' +QIITA_COLUMN_NAME = "qiita_sample_column_names" INSDC_NULL_VALUES = { - 'not collected': 'not collected', - 'not provided': 'not provided', - 'restricted access': 'restricted access', - 'not applicable': 'not applicable', - 'unspecified': 'not applicable', - 'not_collected': 'not collected', - 'not_provided': 'not provided', - 'restricted_access': 'restricted access', - 'not_applicable': 'not applicable', - 'missing: not collected': 'not collected', - 'missing: not provided': 'not provided', - 'missing: restricted access': 'restricted access', - 'missing: not applicable': 'not applicable', + "not collected": "not collected", + "not provided": "not provided", + "restricted access": "restricted access", + "not applicable": "not applicable", + "unspecified": "not applicable", + "not_collected": "not collected", + "not_provided": "not provided", + "restricted_access": "restricted access", + "not_applicable": "not applicable", + "missing: not collected": "not collected", + "missing: not provided": "not provided", + "missing: restricted access": "restricted access", + "missing: not applicable": "not applicable", } @@ -118,6 +116,7 @@ class BaseSample(qdb.base.QiitaObject): Sample PrepSample """ + # Used to find the right SQL tables - should be defined on the subclasses _table_prefix = None _id_column = None @@ -161,12 +160,12 @@ def __init__(self, sample_id, md_template): # This test will check that the sample id is actually present on the db if sample_id not in md_template: raise qdb.exceptions.QiitaDBUnknownIDError( - sample_id, self.__class__.__name__) + sample_id, self.__class__.__name__ + ) # Assign private attributes self._id = sample_id self._md_template = md_template - self._dynamic_table = "%s%d" % (self._table_prefix, - self._md_template.id) + self._dynamic_table = "%s%d" % (self._table_prefix, self._md_template.id) def __hash__(self): r"""Defines the hash function so samples are hashable""" @@ -233,7 +232,7 @@ def _to_dict(self): result = qdb.sql_connection.TRN.execute_fetchindex() - return result[0]['sample_values'] + return result[0]["sample_values"] def __len__(self): r"""Returns the number of metadata categories @@ -274,12 +273,12 @@ def __getitem__(self, key): # The key is not available for the sample, so raise a KeyError raise KeyError( "Metadata category %s does not exists for sample %s" - " in template %d" % (key, self._id, self._md_template.id)) + " in template %d" % (key, self._id, self._md_template.id) + ) sql = """SELECT sample_values->>'{0}' as {0} FROM qiita.{1} - WHERE sample_id = %s""".format( - key, self._dynamic_table) + WHERE sample_id = %s""".format(key, self._dynamic_table) qdb.sql_connection.TRN.add(sql, [self._id]) return qdb.sql_connection.TRN.execute_fetchlast() @@ -303,15 +302,16 @@ def setitem(self, column, value): # Check if the column exist in the table if column not in self._get_categories(): raise qdb.exceptions.QiitaDBColumnError( - "Column %s does not exist in %s" % - (column, self._dynamic_table)) + "Column %s does not exist in %s" % (column, self._dynamic_table) + ) sql = """UPDATE qiita.{0} SET sample_values = sample_values || %s WHERE sample_id = %s""".format(self._dynamic_table) qdb.sql_connection.perform_as_transaction( - sql, [dumps({column: value}), self.id]) + sql, [dumps({column: value}), self.id] + ) def __setitem__(self, column, value): r"""Sets the metadata value for the category `column` @@ -474,7 +474,7 @@ class MetadataTemplate(qdb.base.QiitaObject): @classmethod def _check_id(cls, id_): r"""Checks that the MetadataTemplate id_ exists on the database""" - return qdb.util.exists_table(f'{cls._table_prefix}{id_}') + return qdb.util.exists_table(f"{cls._table_prefix}{id_}") @classmethod def _table_name(cls, obj_id): @@ -497,12 +497,12 @@ def _table_name(cls, obj_id): """ if not cls._table_prefix: raise IncompetentQiitaDeveloperError( - "_table_prefix should be defined in the subclasses") + "_table_prefix should be defined in the subclasses" + ) return "%s%d" % (cls._table_prefix, obj_id) @classmethod - def _clean_validate_template(cls, md_template, study_id, - current_columns=None): + def _clean_validate_template(cls, md_template, study_id, current_columns=None): """Takes care of all validation and cleaning of metadata templates Parameters @@ -531,16 +531,19 @@ def _clean_validate_template(cls, md_template, study_id, """ cls._check_subclass() invalid_ids = qdb.metadata_template.util.get_invalid_sample_names( - md_template.index) + md_template.index + ) if invalid_ids: raise qdb.exceptions.QiitaDBColumnError( "The following sample names in the template contain invalid " "characters (only alphanumeric characters or periods are " - "allowed): %s." % ", ".join(invalid_ids)) + "allowed): %s." % ", ".join(invalid_ids) + ) if len(set(md_template.index)) != len(md_template.index): raise qdb.exceptions.QiitaDBDuplicateSamplesError( - set(duplicates(md_template.index))) + set(duplicates(md_template.index)) + ) # We are going to modify the md_template. We create a copy so # we don't modify the user one @@ -550,10 +553,10 @@ def _clean_validate_template(cls, md_template, study_id, md_template.columns = [c.lower() for c in md_template.columns] # drop these columns in the result - if 'qiita_study_id' in md_template.columns: - del md_template['qiita_study_id'] - if 'qiita_prep_id' in md_template.columns: - del md_template['qiita_prep_id'] + if "qiita_study_id" in md_template.columns: + del md_template["qiita_study_id"] + if "qiita_prep_id" in md_template.columns: + del md_template["qiita_prep_id"] # validating pgsql reserved words not to be column headers current_headers = set(md_template.columns.values) @@ -562,44 +565,49 @@ def _clean_validate_template(cls, md_template, study_id, # tests. pgsql_reserved = cls._identify_pgsql_reserved_words_in_column_names( - current_headers) - invalid = cls._identify_column_names_with_invalid_characters( - current_headers) - forbidden = cls._identify_forbidden_words_in_column_names( - current_headers) + current_headers + ) + invalid = cls._identify_column_names_with_invalid_characters(current_headers) + forbidden = cls._identify_forbidden_words_in_column_names(current_headers) qiime2_reserved = cls._identify_qiime2_reserved_words_in_column_names( - current_headers) + current_headers + ) error = [] if pgsql_reserved: error.append( "These column names are PgSQL reserved words, replace them: " - "~~ %s ~~." % ", ".join(pgsql_reserved)) + "~~ %s ~~." % ", ".join(pgsql_reserved) + ) if invalid: error.append( "These column names contain invalid chars, remove or replace " - "them: ~~ %s ~~." % ", ".join(invalid)) + "them: ~~ %s ~~." % ", ".join(invalid) + ) if forbidden: error.append( "These column names are not valid in this information file, " - "remove them: ~~ %s ~~." % ", ".join(forbidden)) + "remove them: ~~ %s ~~." % ", ".join(forbidden) + ) if qiime2_reserved: error.append( "These columns are QIIME2 reserved words, replace them: " - " ~~ %s ~~." % ", ".join(pgsql_reserved)) + " ~~ %s ~~." % ", ".join(pgsql_reserved) + ) if error: raise qdb.exceptions.QiitaDBColumnError( - "%s\nYou need to modify them." % '\n'.join(error)) + "%s\nYou need to modify them." % "\n".join(error) + ) # Prefix the sample names with the study_id - qdb.metadata_template.util.prefix_sample_names_with_id(md_template, - study_id) + qdb.metadata_template.util.prefix_sample_names_with_id(md_template, study_id) # Check that we don't have duplicate columns if len(set(md_template.columns)) != len(md_template.columns): raise qdb.exceptions.QiitaDBDuplicateHeaderError( - set(duplicates(md_template.columns))) + set(duplicates(md_template.columns)) + ) # validate the INSDC_NULL_VALUES _df = md_template.fillna("").applymap(str).applymap(str.lower) @@ -648,8 +656,7 @@ def _common_creation_steps(cls, md_template, obj_id): values = dumps({"columns": md_template.columns.tolist()}) sql = """INSERT INTO qiita.{0} (sample_id, sample_values) - VALUES ('{1}', %s)""".format( - table_name, QIITA_COLUMN_NAME) + VALUES ('{1}', %s)""".format(table_name, QIITA_COLUMN_NAME) qdb.sql_connection.TRN.add(sql, [values]) values = [(k, df.to_json()) for k, df in md_template.iterrows()] @@ -678,13 +685,11 @@ def metadata_headers(cls): table_name != 'prep_template_sample' AND table_name != 'prep_template_processing_job' AND table_name != 'preparation_artifact' AND - table_name != 'prep_template'""".format( - cls._table_prefix) + table_name != 'prep_template'""".format(cls._table_prefix) qdb.sql_connection.TRN.add(sql) tables = qdb.sql_connection.TRN.execute_fetchflatten() sql = """SELECT sample_values->>'columns' - FROM qiita.%s WHERE sample_id = '{0}'""".format( - QIITA_COLUMN_NAME) + FROM qiita.%s WHERE sample_id = '{0}'""".format(QIITA_COLUMN_NAME) results = [] for t in tables: qdb.sql_connection.TRN.add(sql % t) @@ -710,18 +715,18 @@ def _common_delete_sample_steps(self, sample_names): keys = list(self.keys()) missing = [sn for sn in sample_names if sn not in keys] if missing: - raise qdb.exceptions.QiitaDBUnknownIDError( - ', '.join(missing), self._id) + raise qdb.exceptions.QiitaDBUnknownIDError(", ".join(missing), self._id) with qdb.sql_connection.TRN: # to simplify the sql strings, we are creating a base_sql, which # will be used to create sql1 and sql2. sql1 will delete the # sample_names from the main table ([sample | prep]_[id]), then # sql2 will delete the sample_names from [study | prep]_sample - base_sql = 'DELETE FROM qiita.{0} WHERE sample_id=%s' + base_sql = "DELETE FROM qiita.{0} WHERE sample_id=%s" sql1 = base_sql.format(self._table_name(self._id)) - sql2 = '{0} AND {1}=%s'.format( - base_sql.format(self._table), self._id_column) + sql2 = "{0} AND {1}=%s".format( + base_sql.format(self._table), self._id_column + ) for sn in sample_names: qdb.sql_connection.TRN.add(sql1, [sn]) qdb.sql_connection.TRN.add(sql2, [sn, self.id]) @@ -729,14 +734,15 @@ def _common_delete_sample_steps(self, sample_names): # making sure we don't delete all the samples qdb.sql_connection.TRN.add( - "SELECT COUNT(*) FROM qiita.{0}".format( - self._table_name(self._id))) + "SELECT COUNT(*) FROM qiita.{0}".format(self._table_name(self._id)) + ) # 1 as the JSON formated tables have an extra "sample" where we # store the column information if qdb.sql_connection.TRN.execute_fetchlast() <= 1: raise ValueError( - 'You cannot delete all samples from an information file') + "You cannot delete all samples from an information file" + ) self.generate_files(samples=sample_names) @@ -757,13 +763,15 @@ def delete_column(self, column_name): """ if column_name not in self.categories: raise qdb.exceptions.QiitaDBColumnError( - "'%s' not in info file %d" % (column_name, self._id)) + "'%s' not in info file %d" % (column_name, self._id) + ) if not self.can_be_updated(columns={column_name}): raise qdb.exceptions.QiitaDBOperationNotPermittedError( - '%s cannot be deleted' % column_name) + "%s cannot be deleted" % column_name + ) with qdb.sql_connection.TRN: - table_name = 'qiita.{0}{1}'.format(self._table_prefix, self._id) + table_name = "qiita.{0}{1}".format(self._table_prefix, self._id) # deleting from all samples; note that (-) in pgsql jsonb means # delete that key and value sql = """UPDATE {0} @@ -777,8 +785,7 @@ def delete_column(self, column_name): values = '{"columns": %s}' % dumps(columns) sql = """UPDATE {0} SET sample_values = %s - WHERE sample_id = '{1}'""".format( - table_name, QIITA_COLUMN_NAME) + WHERE sample_id = '{1}'""".format(table_name, QIITA_COLUMN_NAME) qdb.sql_connection.TRN.add(sql, [values]) qdb.sql_connection.TRN.execute() @@ -808,8 +815,8 @@ def can_be_extended(self, new_samples, new_cols): This method should be implemented in the subclasses """ raise qdb.exceptions.QiitaDBNotImplementedError( - "The method 'can_be_extended' should be implemented in " - "the subclasses") + "The method 'can_be_extended' should be implemented in the subclasses" + ) def can_be_updated(self, **kwargs): """Whether the template can be updated or not @@ -825,8 +832,8 @@ def can_be_updated(self, **kwargs): This method should be implemented in the subclasses """ raise qdb.exceptions.QiitaDBNotImplementedError( - "The method 'can_be_updated' should be implemented in " - "the subclasses") + "The method 'can_be_updated' should be implemented in the subclasses" + ) def _common_extend_steps(self, md_template): r"""executes the common extend steps @@ -854,8 +861,9 @@ def _common_extend_steps(self, md_template): ms = self.max_samples() nsamples = len(curr_samples) + len(new_samples) if ms is not None and nsamples > ms: - raise ValueError(f'{nsamples} exceeds the max allowed number ' - f'of samples: {ms}') + raise ValueError( + f"{nsamples} exceeds the max allowed number of samples: {ms}" + ) # Check if we are adding new columns headers = md_template.keys().tolist() @@ -864,8 +872,7 @@ def _common_extend_steps(self, md_template): if not new_cols and not new_samples: return None, None - is_extendable, error_msg = self.can_be_extended(new_samples, - new_cols) + is_extendable, error_msg = self.can_be_extended(new_samples, new_cols) if not is_extendable: raise qdb.exceptions.QiitaDBError(error_msg) @@ -875,7 +882,8 @@ def _common_extend_steps(self, md_template): warnings.warn( "The following columns have been added to the existing" " template: %s" % ", ".join(sorted(new_cols)), - qdb.exceptions.QiitaDBWarning) + qdb.exceptions.QiitaDBWarning, + ) # If we are adding new columns, add them first (simplifies # code). Sorting the new columns to enforce an order new_cols = sorted(new_cols) @@ -887,7 +895,8 @@ def _common_extend_steps(self, md_template): sql = """UPDATE qiita.{0} SET sample_values = %s WHERE sample_id = '{1}'""".format( - table_name, QIITA_COLUMN_NAME) + table_name, QIITA_COLUMN_NAME + ) qdb.sql_connection.TRN.add(sql, [values]) if existing_samples: @@ -902,14 +911,16 @@ def _common_extend_steps(self, md_template): sql = """UPDATE qiita.{0} SET sample_values = sample_values || %s WHERE sample_id = %s""".format( - self._table_name(self._id)) + self._table_name(self._id) + ) qdb.sql_connection.TRN.add(sql, [dumps(values), sid]) if new_samples: warnings.warn( "The following samples have been added to the existing" " template: %s" % ", ".join(new_samples), - qdb.exceptions.QiitaDBWarning) + qdb.exceptions.QiitaDBWarning, + ) new_samples = sorted(new_samples) @@ -920,13 +931,11 @@ def _common_extend_steps(self, md_template): # Insert new samples to the study sample table values = [[self._id, s_id] for s_id in new_samples] sql = """INSERT INTO qiita.{0} ({1}, sample_id) - VALUES (%s, %s)""".format(self._table, - self._id_column) + VALUES (%s, %s)""".format(self._table, self._id_column) qdb.sql_connection.TRN.add(sql, values, many=True) # inserting new samples to the info file - values = [(k, row.to_json()) - for k, row in md_filtered.iterrows()] + values = [(k, row.to_json()) for k, row in md_filtered.iterrows()] sql = """INSERT INTO qiita.{0} (sample_id, sample_values) VALUES (%s, %s)""".format(table_name) qdb.sql_connection.TRN.add(sql, values, many=True) @@ -963,7 +972,8 @@ def _get_sample_ids(self): """ with qdb.sql_connection.TRN: sql = "SELECT sample_id FROM qiita.{0} WHERE {1}=%s".format( - self._table, self._id_column) + self._table, self._id_column + ) qdb.sql_connection.TRN.add(sql, [self._id]) return set(qdb.sql_connection.TRN.execute_fetchflatten()) @@ -1003,8 +1013,9 @@ def __getitem__(self, key): if key in self: return self._sample_cls(key, self) else: - raise KeyError("Sample id %s does not exists in template %d" - % (key, self._id)) + raise KeyError( + "Sample id %s does not exists in template %d" % (key, self._id) + ) def __setitem__(self, key, value): r"""Sets the metadata values for sample id `key` @@ -1081,8 +1092,10 @@ def values(self): Iterator over Sample obj """ with qdb.sql_connection.TRN: - return iter(self._sample_cls(sample_id, self) - for sample_id in self._get_sample_ids()) + return iter( + self._sample_cls(sample_id, self) + for sample_id in self._get_sample_ids() + ) def items(self): r"""Iterator over (sample_id, values) tuples, in sample id order @@ -1093,8 +1106,10 @@ def items(self): Iterator over (sample_ids, values) tuples """ with qdb.sql_connection.TRN: - return iter((sample_id, self._sample_cls(sample_id, self)) - for sample_id in self._get_sample_ids()) + return iter( + (sample_id, self._sample_cls(sample_id, self)) + for sample_id in self._get_sample_ids() + ) def get(self, key): r"""Returns the metadata values for sample id `key`, or None if the @@ -1137,8 +1152,8 @@ def _transform_to_dict(self, values): # Transform the row to a dictionary values_dict = dict(row) # Get the sample id of this row - sid = values_dict['sample_id'] - del values_dict['sample_id'] + sid = values_dict["sample_id"] + del values_dict["sample_id"] # Remove _id_column from this row (if present) if self._id_column in values_dict: del values_dict[self._id_column] @@ -1155,7 +1170,8 @@ def generate_files(self): This method should be implemented by the subclasses """ raise qdb.exceptions.QiitaDBNotImplementedError( - "generate_files should be implemented in the subclass!") + "generate_files should be implemented in the subclass!" + ) def to_file(self, fp, samples=None): r"""Writes the MetadataTemplate to the file `fp` in tab-delimited @@ -1180,8 +1196,9 @@ def to_file(self, fp, samples=None): df.sort_index(axis=1, inplace=True) # Store the template in a file - df.to_csv(fp, index_label='sample_name', na_rep="", sep='\t', - encoding='utf-8') + df.to_csv( + fp, index_label="sample_name", na_rep="", sep="\t", encoding="utf-8" + ) def _common_to_dataframe_steps(self, samples=None): """Perform the common to_dataframe steps @@ -1198,21 +1215,23 @@ def _common_to_dataframe_steps(self, samples=None): sql = """SELECT sample_id, sample_values FROM qiita.{0} WHERE sample_id != '{1}'""".format( - self._table_name(self._id), QIITA_COLUMN_NAME) + self._table_name(self._id), QIITA_COLUMN_NAME + ) if samples is None: qdb.sql_connection.TRN.add(sql) else: - sql += ' AND sample_id IN %s' + sql += " AND sample_id IN %s" qdb.sql_connection.TRN.add(sql, [tuple(samples)]) data = qdb.sql_connection.TRN.execute_fetchindex() - df = pd.DataFrame([d for _, d in data], index=[i for i, _ in data], - dtype=str) - df.index.name = 'sample_name' + df = pd.DataFrame( + [d for _, d in data], index=[i for i, _ in data], dtype=str + ) + df.index.name = "sample_name" df.where((pd.notnull(df)), None) - id_column_name = 'qiita_%sid' % (self._table_prefix) - if id_column_name == 'qiita_sample_id': - id_column_name = 'qiita_study_id' + id_column_name = "qiita_%sid" % (self._table_prefix) + if id_column_name == "qiita_sample_id": + id_column_name = "qiita_study_id" df[id_column_name] = str(self.id) return df @@ -1225,25 +1244,29 @@ def add_filepath(self, filepath, fp_id=None): try: fpp_id = qdb.util.insert_filepaths( - [(filepath, fp_id)], None, "templates", - move_files=False)[0] + [(filepath, fp_id)], None, "templates", move_files=False + )[0] sql = """INSERT INTO qiita.{0} ({1}, filepath_id) - VALUES (%s, %s)""".format(self._filepath_table, - self._id_column) + VALUES (%s, %s)""".format( + self._filepath_table, self._id_column + ) qdb.sql_connection.TRN.add(sql, [self._id, fpp_id]) qdb.sql_connection.TRN.execute() except Exception as e: qdb.logger.LogEntry.create( - 'Runtime', str(e), info={self.__class__.__name__: self.id}) + "Runtime", str(e), info={self.__class__.__name__: self.id} + ) raise e def get_filepaths(self): r"""Retrieves the list of (filepath_id, filepath)""" with qdb.sql_connection.TRN: - return [(x['fp_id'], x['fp']) - for x in qdb.util.retrieve_filepaths( - self._filepath_table, self._id_column, self.id, - sort='descending')] + return [ + (x["fp_id"], x["fp"]) + for x in qdb.util.retrieve_filepaths( + self._filepath_table, self._id_column, self.id, sort="descending" + ) + ] @property def categories(self): @@ -1266,7 +1289,8 @@ def extend(self, md_template): """ with qdb.sql_connection.TRN: md_template = self._clean_validate_template( - md_template, self.study_id, current_columns=self.categories) + md_template, self.study_id, current_columns=self.categories + ) new_samples, new_columns = self._common_extend_steps(md_template) if new_samples or new_columns: self.validate(self.columns_restrictions) @@ -1305,25 +1329,23 @@ def _update(self, md_template): samples_diff = set(md_template.index).difference(current_map.index) if samples_diff: raise qdb.exceptions.QiitaDBError( - 'The new template differs from what is stored ' - 'in database by these samples names: %s' - % ', '.join(samples_diff)) + "The new template differs from what is stored " + "in database by these samples names: %s" % ", ".join(samples_diff) + ) if not set(current_map.columns).issuperset(md_template.columns): - columns_diff = set(md_template.columns).difference( - current_map.columns) + columns_diff = set(md_template.columns).difference(current_map.columns) raise qdb.exceptions.QiitaDBError( - 'Some of the columns in your template are not present in ' + "Some of the columns in your template are not present in " 'the system. Use "extend" if you want to add more columns ' - 'to the template. Missing columns: %s' - % ', '.join(columns_diff)) + "to the template. Missing columns: %s" % ", ".join(columns_diff) + ) # In order to speed up some computation, let's compare only the # common columns and rows. current_map.columns and # current_map.index are supersets of md_template.columns and # md_template.index, respectivelly, so this will not fail - current_map = current_map[ - md_template.columns].loc[md_template.index] + current_map = current_map[md_template.columns].loc[md_template.index] # Get the values that we need to change # diff_map is a DataFrame that hold boolean values. If a cell is @@ -1342,10 +1364,11 @@ def _update(self, md_template): warnings.warn( "There are no differences between the data stored in the " "DB and the new data provided", - qdb.exceptions.QiitaDBWarning) + qdb.exceptions.QiitaDBWarning, + ) return None, None - changed.index.names = ['sample_name', 'column'] + changed.index.names = ["sample_name", "column"] # the combination of np.where and boolean indexing produces # a numpy array with only the values that actually changed # between the current_map and md_template @@ -1361,7 +1384,7 @@ def _update(self, md_template): # host_subject_id the only one # XX.Sample3 sample_type 10 # physical_specimen_location new location - to_update = pd.DataFrame({'to': changed_to}, index=changed.index) + to_update = pd.DataFrame({"to": changed_to}, index=changed.index) # reset_index will expand the multi-index and convert the example # to: # sample_name column to @@ -1373,7 +1396,7 @@ def _update(self, md_template): to_update.reset_index(inplace=True) new_columns = [] samples_updated = [] - for sid, df in to_update.groupby('sample_name'): + for sid, df in to_update.groupby("sample_name"): samples_updated.append(sid) # getting just columns: column and to, and then using column # as index will generate this for XX.Sample2: @@ -1381,16 +1404,15 @@ def _update(self, md_template): # column # sample_type 5 # host_subject_id the only one - df = df[['column', 'to']].set_index('column') + df = df[["column", "to"]].set_index("column") # finally to_dict in XX.Sample2: # {'to': {'host_subject_id': 'the only one', # 'sample_type': '5'}} - values = df.to_dict()['to'] + values = df.to_dict()["to"] new_columns.extend(values.keys()) sql = """UPDATE qiita.{0} SET sample_values = sample_values || %s - WHERE sample_id = %s""".format( - self._table_name(self._id)) + WHERE sample_id = %s""".format(self._table_name(self._id)) qdb.sql_connection.TRN.add(sql, [dumps(values), sid]) nc = list(set(new_columns).union(set(self.categories))) @@ -1398,8 +1420,7 @@ def _update(self, md_template): values = dumps({"columns": nc}) sql = """UPDATE qiita.{0} SET sample_values = %s - WHERE sample_id = '{1}'""".format( - table_name, QIITA_COLUMN_NAME) + WHERE sample_id = '{1}'""".format(table_name, QIITA_COLUMN_NAME) qdb.sql_connection.TRN.add(sql, [values]) qdb.sql_connection.TRN.execute() @@ -1427,7 +1448,8 @@ def update(self, md_template): with qdb.sql_connection.TRN: # Clean and validate the metadata template given new_map = self._clean_validate_template( - md_template, self.study_id, current_columns=self.categories) + md_template, self.study_id, current_columns=self.categories + ) samples, columns = self._update(new_map) self.validate(self.columns_restrictions) self.generate_files(samples, columns) @@ -1447,7 +1469,8 @@ def extend_and_update(self, md_template): """ with qdb.sql_connection.TRN: md_template = self._clean_validate_template( - md_template, self.study_id, current_columns=self.categories) + md_template, self.study_id, current_columns=self.categories + ) new_samples, new_columns = self._common_extend_steps(md_template) samples, columns = self._update(md_template) if samples is None: @@ -1519,7 +1542,8 @@ def get_category(self, category): COALESCE(sample_values->>'{0}', 'None') AS {0} FROM qiita.{1} WHERE sample_id != '{2}'""".format( - category, self._table_name(self._id), QIITA_COLUMN_NAME) + category, self._table_name(self._id), QIITA_COLUMN_NAME + ) qdb.sql_connection.TRN.add(sql) return dict(qdb.sql_connection.TRN.execute_fetchindex()) @@ -1536,8 +1560,7 @@ def check_restrictions(self, restrictions): set of str The missing columns """ - cols = {col for restriction in restrictions - for col in restriction.columns} + cols = {col for restriction in restrictions for col in restriction.columns} return cols.difference(self.categories) @@ -1557,12 +1580,10 @@ def _get_accession_numbers(self, column): with qdb.sql_connection.TRN: sql = """SELECT sample_id, {0} FROM qiita.{1} - WHERE {2}=%s""".format(column, self._table, - self._id_column) + WHERE {2}=%s""".format(column, self._table, self._id_column) qdb.sql_connection.TRN.add(sql, [self.id]) dbresult = qdb.sql_connection.TRN.execute_fetchindex() - result = {sample_id: accession - for sample_id, accession in dbresult} + result = {sample_id: accession for sample_id, accession in dbresult} return result def _update_accession_numbers(self, column, values): @@ -1586,19 +1607,21 @@ def _update_accession_numbers(self, column, values): sql = """SELECT sample_id, {0} FROM qiita.{1} WHERE {2}=%s - AND {0} IS NOT NULL""".format(column, self._table, - self._id_column) + AND {0} IS NOT NULL""".format( + column, self._table, self._id_column + ) qdb.sql_connection.TRN.add(sql, [self.id]) dbresult = qdb.sql_connection.TRN.execute_fetchindex() - db_vals = {sample_id: accession - for sample_id, accession in dbresult} + db_vals = {sample_id: accession for sample_id, accession in dbresult} common_samples = set(db_vals) & set(values) - diff = [sample for sample in common_samples - if db_vals[sample] != values[sample]] + diff = [ + sample for sample in common_samples if db_vals[sample] != values[sample] + ] if diff: raise qdb.exceptions.QiitaDBError( "The following samples already have an accession number: " - "%s" % ', '.join(diff)) + "%s" % ", ".join(diff) + ) # Remove the common samples form the values dictionary values = deepcopy(values) @@ -1606,24 +1629,24 @@ def _update_accession_numbers(self, column, values): del values[sample] if values: - sql_vals = ', '.join(["(%s, %s)"] * len(values)) + sql_vals = ", ".join(["(%s, %s)"] * len(values)) sql = """UPDATE qiita.{0} AS t SET {1}=c.{1} FROM (VALUES {2}) AS c(sample_id, {1}) WHERE c.sample_id = t.sample_id AND t.{3} = %s - """.format(self._table, column, sql_vals, - self._id_column) + """.format(self._table, column, sql_vals, self._id_column) sql_vals = list(chain.from_iterable(values.items())) sql_vals.append(self.id) qdb.sql_connection.TRN.add(sql, sql_vals) qdb.sql_connection.TRN.execute() else: - warnings.warn("No new accession numbers to update", - qdb.exceptions.QiitaDBWarning) + warnings.warn( + "No new accession numbers to update", qdb.exceptions.QiitaDBWarning + ) def validate(self, restriction_dict): - """ Validate the values in the restricted fields in info files + """Validate the values in the restricted fields in info files Parameters ---------- @@ -1642,8 +1665,8 @@ def validate(self, restriction_dict): missing = set(restriction.columns).difference(columns) if missing: warning_msg.append( - "%s: %s" % (restriction.error_msg, - ', '.join(sorted(missing)))) + "%s: %s" % (restriction.error_msg, ", ".join(sorted(missing))) + ) else: valid_null = qdb.metadata_template.constants.EBI_NULL_VALUES for column, datatype in restriction.columns.items(): @@ -1660,9 +1683,13 @@ def validate(self, restriction_dict): val = str(val) formats = [ # 4 digits year - '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M', - '%Y-%m-%d %H', '%Y-%m-%d', '%Y-%m', '%Y' - ] + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H", + "%Y-%m-%d", + "%Y-%m", + "%Y", + ] date = None for fmt in formats: try: @@ -1671,21 +1698,20 @@ def validate(self, restriction_dict): except ValueError: pass if date is None: - warning_msg.append( - wrong_msg % (sample, column, val)) + warning_msg.append(wrong_msg % (sample, column, val)) else: try: datatype(val) except (ValueError, TypeError): - warning_msg.append( - wrong_msg % (sample, column, val)) + warning_msg.append(wrong_msg % (sample, column, val)) if warning_msg: warnings.warn( "Some functionality will be disabled due to missing " "columns:\n\t%s.\nSee the Templates tutorial for a description" " of these fields." % ";\n\t".join(warning_msg), - qdb.exceptions.QiitaDBWarning) + qdb.exceptions.QiitaDBWarning, + ) @classmethod def _identify_forbidden_words_in_column_names(cls, column_names): @@ -1719,8 +1745,7 @@ def _identify_pgsql_reserved_words_in_column_names(cls, column_names): ---------- .. [1] postgresql SQL-SYNTAX-IDENTIFIERS: https://goo.gl/EF0cUV. """ - return (qdb.metadata_template.util.get_pgsql_reserved_words() & - set(column_names)) + return qdb.metadata_template.util.get_pgsql_reserved_words() & set(column_names) @classmethod def _identify_column_names_with_invalid_characters(cls, column_names): @@ -1736,7 +1761,7 @@ def _identify_column_names_with_invalid_characters(cls, column_names): set of words containing invalid (illegal) characters. """ valid_initial_char = ascii_letters - valid_rest = set(ascii_letters+digits+'_:|') + valid_rest = set(ascii_letters + digits + "_:|") invalid = [] for s in column_names: if s[0] not in valid_initial_char: @@ -1758,8 +1783,9 @@ def _identify_qiime2_reserved_words_in_column_names(cls, column_names): ------ set of words containing QIIME2-reserved words. """ - return (qdb.metadata_template.util.get_qiime2_reserved_words() & - set(column_names)) + return qdb.metadata_template.util.get_qiime2_reserved_words() & set( + column_names + ) @property def restrictions(cls): @@ -1789,7 +1815,7 @@ def validate_restrictions(self): """ with qdb.sql_connection.TRN: # [:-1] removing last _ - name = '%s %d' % (self._table_prefix[:-1], self.id) + name = "%s %d" % (self._table_prefix[:-1], self.id) success = True message = [] restrictions = self.restrictions @@ -1799,8 +1825,8 @@ def validate_restrictions(self): if difference: success = False message.append( - '%s is missing columns "%s"' % (name, ', '.join( - difference))) + '%s is missing columns "%s"' % (name, ", ".join(difference)) + ) to_review = set(restrictions.keys()) & set(categories) for key in to_review: @@ -1813,7 +1839,7 @@ def validate_restrictions(self): success = False message.append( '%s has invalid values: "%s", valid values are: ' - '"%s"' % (name, ', '.join(msg), - ', '.join(restrictions[key]))) + '"%s"' % (name, ", ".join(msg), ", ".join(restrictions[key])) + ) - return success, '\n'.join(message) + return success, "\n".join(message) diff --git a/qiita_db/metadata_template/constants.py b/qiita_db/metadata_template/constants.py index 9fbeb7ddf..f447e388d 100644 --- a/qiita_db/metadata_template/constants.py +++ b/qiita_db/metadata_template/constants.py @@ -9,75 +9,91 @@ from collections import namedtuple from datetime import datetime -Restriction = namedtuple('Restriction', ['columns', 'error_msg']) +Restriction = namedtuple("Restriction", ["columns", "error_msg"]) # A dict containing the restrictions that apply to the sample templates SAMPLE_TEMPLATE_COLUMNS = { # The following columns are required by EBI for submission - 'EBI': Restriction(columns={'collection_timestamp': datetime, - 'physical_specimen_location': str, - 'taxon_id': int, - 'description': str, - 'scientific_name': str}, - error_msg="EBI submission disabled"), + "EBI": Restriction( + columns={ + "collection_timestamp": datetime, + "physical_specimen_location": str, + "taxon_id": int, + "description": str, + "scientific_name": str, + }, + error_msg="EBI submission disabled", + ), # The following columns are required for the official main QIITA site - 'qiita_main': Restriction(columns={'sample_type': str, - 'description': str, - 'physical_specimen_remaining': bool, - 'dna_extracted': bool, - 'latitude': float, - 'longitude': float, - 'host_subject_id': str}, - error_msg="Processed data approval disabled") + "qiita_main": Restriction( + columns={ + "sample_type": str, + "description": str, + "physical_specimen_remaining": bool, + "dna_extracted": bool, + "latitude": float, + "longitude": float, + "host_subject_id": str, + }, + error_msg="Processed data approval disabled", + ), } # A dict containing the restrictions that apply to the prep templates PREP_TEMPLATE_COLUMNS = { # The following columns are required by EBI for submission - 'EBI': Restriction( - columns={'center_name': str, - 'platform': str, - 'instrument_model': str, - 'library_construction_protocol': str, - 'experiment_design_description': str}, - error_msg="EBI submission disabled") + "EBI": Restriction( + columns={ + "center_name": str, + "platform": str, + "instrument_model": str, + "library_construction_protocol": str, + "experiment_design_description": str, + }, + error_msg="EBI submission disabled", + ) } # Different prep templates have different requirements depending on the data # type. We create a dictionary for each of these special datatypes -TARGET_GENE_DATA_TYPES = ['16S', '18S', 'ITS'] +TARGET_GENE_DATA_TYPES = ["16S", "18S", "ITS"] PREP_TEMPLATE_COLUMNS_TARGET_GENE = { # The following columns are required by QIIME to execute split libraries - 'demultiplex': Restriction( - columns={'barcode': str}, - error_msg="Demultiplexing disabled."), + "demultiplex": Restriction( + columns={"barcode": str}, error_msg="Demultiplexing disabled." + ), # The following columns are required by Qiita to know how to execute split # libraries using QIIME over a study with multiple illumina lanes - 'demultiplex_multiple': Restriction( - columns={'barcode': str, - 'primer': str, - 'run_prefix': str}, - error_msg="Demultiplexing with multiple input files disabled.") + "demultiplex_multiple": Restriction( + columns={"barcode": str, "primer": str, "run_prefix": str}, + error_msg="Demultiplexing with multiple input files disabled.", + ), } # This list is useful to have if we want to loop through all the restrictions # in a template-independent manner -ALL_RESTRICTIONS = [SAMPLE_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS, - PREP_TEMPLATE_COLUMNS_TARGET_GENE] +ALL_RESTRICTIONS = [ + SAMPLE_TEMPLATE_COLUMNS, + PREP_TEMPLATE_COLUMNS, + PREP_TEMPLATE_COLUMNS_TARGET_GENE, +] # This is what we consider as "NaN" cell values on metadata import # from http://www.ebi.ac.uk/ena/about/missing-values-reporting -EBI_NULL_VALUES = ['Not applicable', 'Missing: Not collected', - 'Missing: Not provided', 'Missing: Restricted access'] +EBI_NULL_VALUES = [ + "Not applicable", + "Missing: Not collected", + "Missing: Not provided", + "Missing: Restricted access", +] # These are what will be considered 'True' bool values on metadata import -TRUE_VALUES = ['Yes', 'yes', 'YES', 'Y', 'y', 'True', 'true', 'TRUE', 't', 'T'] +TRUE_VALUES = ["Yes", "yes", "YES", "Y", "y", "True", "true", "TRUE", "t", "T"] # These are what will be considered 'False' bool values on metadata import -FALSE_VALUES = ['No', 'no', 'NO', 'N', 'n', 'False', 'false', 'FALSE', - 'f', 'F'] +FALSE_VALUES = ["No", "no", "NO", "N", "n", "False", "false", "FALSE", "f", "F"] # A set holding all the controlled columns, useful to avoid recalculating it diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 059ccb55f..3808c4efd 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -5,17 +5,21 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from copy import deepcopy from itertools import chain from os.path import join -from copy import deepcopy + from iteration_utilities import duplicates -from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb -from .constants import (PREP_TEMPLATE_COLUMNS, TARGET_GENE_DATA_TYPES, - PREP_TEMPLATE_COLUMNS_TARGET_GENE) -from .base_metadata_template import ( - BaseSample, MetadataTemplate, QIITA_COLUMN_NAME) +from qiita_core.exceptions import IncompetentQiitaDeveloperError + +from .base_metadata_template import QIITA_COLUMN_NAME, BaseSample, MetadataTemplate +from .constants import ( + PREP_TEMPLATE_COLUMNS, + PREP_TEMPLATE_COLUMNS_TARGET_GENE, + TARGET_GENE_DATA_TYPES, +) def _check_duplicated_columns(prep_cols, sample_cols): @@ -37,9 +41,9 @@ def _check_duplicated_columns(prep_cols, sample_cols): dups = set(duplicates(prep_cols)) if dups: raise qdb.exceptions.QiitaDBColumnError( - 'Duplicated column names in the sample and prep info ' - 'files: %s. You need to delete that duplicated field' % - ','.join(dups)) + "Duplicated column names in the sample and prep info " + "files: %s. You need to delete that duplicated field" % ",".join(dups) + ) class PrepSample(BaseSample): @@ -50,6 +54,7 @@ class PrepSample(BaseSample): BaseSample Sample """ + _table = "prep_template_sample" _table_prefix = "prep_" _id_column = "prep_template_id" @@ -80,20 +85,29 @@ class PrepTemplate(MetadataTemplate): MetadataTemplate SampleTemplate """ + _table = "prep_template_sample" _table_prefix = "prep_" _id_column = "prep_template_id" _sample_cls = PrepSample - _filepath_table = 'prep_template_filepath' + _filepath_table = "prep_template_filepath" _forbidden_words = { - 'sampleid', - 'qiita_study_id', - 'qiita_prep_id', - QIITA_COLUMN_NAME} + "sampleid", + "qiita_study_id", + "qiita_prep_id", + QIITA_COLUMN_NAME, + } @classmethod - def create(cls, md_template, study, data_type, investigation_type=None, - name=None, creation_job_id=None): + def create( + cls, + md_template, + study, + data_type, + investigation_type=None, + name=None, + creation_job_id=None, + ): r"""Creates the metadata template in the database Parameters @@ -125,8 +139,7 @@ def create(cls, md_template, study, data_type, investigation_type=None, # Check if the data_type is the id or the string if isinstance(data_type, int): data_type_id = data_type - data_type_str = qdb.util.convert_from_id(data_type, - "data_type") + data_type_str = qdb.util.convert_from_id(data_type, "data_type") else: data_type_id = qdb.util.convert_to_id(data_type, "data_type") data_type_str = data_type @@ -135,11 +148,11 @@ def create(cls, md_template, study, data_type, investigation_type=None, # data_type being created - if possible if investigation_type is None: if data_type_str in TARGET_GENE_DATA_TYPES: - investigation_type = 'AMPLICON' - elif data_type_str == 'Metagenomic': - investigation_type = 'WGS' - elif data_type_str == 'Metatranscriptomic': - investigation_type = 'RNA-Seq' + investigation_type = "AMPLICON" + elif data_type_str == "Metagenomic": + investigation_type = "WGS" + elif data_type_str == "Metatranscriptomic": + investigation_type = "RNA-Seq" if investigation_type is not None: cls.validate_investigation_type(investigation_type) @@ -149,15 +162,17 @@ def create(cls, md_template, study, data_type, investigation_type=None, pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE) md_template = cls._clean_validate_template(md_template, study.id) - _check_duplicated_columns(list(md_template.columns), - study.sample_template.categories) + _check_duplicated_columns( + list(md_template.columns), study.sample_template.categories + ) # check that we are within the limit of number of samples ms = cls.max_samples() nsamples = md_template.shape[0] if ms is not None and nsamples > ms: - raise ValueError(f"{nsamples} exceeds the max allowed number " - f"of samples: {ms}") + raise ValueError( + f"{nsamples} exceeds the max allowed number of samples: {ms}" + ) # Insert the metadata template if creation_job_id: @@ -166,14 +181,14 @@ def create(cls, md_template, study, data_type, investigation_type=None, VALUES (%s, %s, %s) RETURNING prep_template_id""" qdb.sql_connection.TRN.add( - sql, [data_type_id, investigation_type, creation_job_id]) + sql, [data_type_id, investigation_type, creation_job_id] + ) else: sql = """INSERT INTO qiita.prep_template (data_type_id, investigation_type) VALUES (%s, %s) RETURNING prep_template_id""" - qdb.sql_connection.TRN.add( - sql, [data_type_id, investigation_type]) + qdb.sql_connection.TRN.add(sql, [data_type_id, investigation_type]) prep_id = qdb.sql_connection.TRN.execute_fetchlast() try: @@ -187,11 +202,13 @@ def create(cls, md_template, study, data_type, investigation_type=None, qdb.sql_connection.TRN.add(sql, [study.id]) prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( - qdb.sql_connection.TRN.execute_fetchflatten()) + qdb.sql_connection.TRN.execute_fetchflatten() + ) if unknown_samples: raise qdb.exceptions.QiitaDBExecutionError( - 'Samples found in prep template but not sample ' - 'template: %s' % ', '.join(unknown_samples)) + "Samples found in prep template but not sample " + "template: %s" % ", ".join(unknown_samples) + ) # some other error we haven't seen before so raise it raise @@ -209,8 +226,7 @@ def create(cls, md_template, study, data_type, investigation_type=None, pt.generate_files() # Add the name to the prep information - pt.name = (name if name is not None - else "Prep information %s" % pt.id) + pt.name = name if name is not None else "Prep information %s" % pt.id return pt @@ -229,13 +245,13 @@ def validate_investigation_type(self, investigation_type): The investigation type is not in the ENA ontology """ with qdb.sql_connection.TRN: - ontology = qdb.ontology.Ontology( - qdb.util.convert_to_id('ENA', 'ontology')) + ontology = qdb.ontology.Ontology(qdb.util.convert_to_id("ENA", "ontology")) terms = ontology.terms + ontology.user_defined_terms if investigation_type not in terms: raise qdb.exceptions.QiitaDBColumnError( "'%s' is Not a valid investigation_type. Choose from: %s" - % (investigation_type, ', '.join(terms))) + % (investigation_type, ", ".join(terms)) + ) @classmethod def delete(cls, id_): @@ -270,7 +286,8 @@ def delete(cls, id_): if artifact_attached: raise qdb.exceptions.QiitaDBExecutionError( "Cannot remove prep template %d because it has an artifact" - " associated with it" % id_) + " associated with it" % id_ + ) # artifacts that are archived are not returned as part of the code # above and we need to clean them before moving forward @@ -278,8 +295,7 @@ def delete(cls, id_): FROM qiita.preparation_artifact WHERE prep_template_id = %s""" qdb.sql_connection.TRN.add(sql, args) - archived_artifacts = set( - qdb.sql_connection.TRN.execute_fetchflatten()) + archived_artifacts = set(qdb.sql_connection.TRN.execute_fetchflatten()) ANALYSIS = qdb.analysis.Analysis if archived_artifacts: for aid in archived_artifacts: @@ -292,8 +308,7 @@ def delete(cls, id_): FROM qiita.analysis_sample WHERE artifact_id IN %s)""" qdb.sql_connection.TRN.add(sql, [tuple([aid])]) - analyses = set( - qdb.sql_connection.TRN.execute_fetchflatten()) + analyses = set(qdb.sql_connection.TRN.execute_fetchflatten()) for _id in analyses: ANALYSIS.delete_analysis_artifacts(_id) qdb.artifact.Artifact.delete(aid) @@ -309,7 +324,8 @@ def delete(cls, id_): # Remove the rows from prep_template_samples sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format( - cls._table, cls._id_column) + cls._table, cls._id_column + ) qdb.sql_connection.TRN.add(sql, args) # Remove the row from study_prep_template @@ -319,7 +335,8 @@ def delete(cls, id_): # Remove the row from prep_template sql = "DELETE FROM qiita.prep_template WHERE {0} = %s".format( - cls._id_column) + cls._id_column + ) qdb.sql_connection.TRN.add(sql, args) qdb.sql_connection.TRN.execute() @@ -398,9 +415,11 @@ def can_be_updated(self, columns): if not qdb.sql_connection.TRN.execute_fetchlast(): return True - tg_columns = set(chain.from_iterable( - [v.columns for v in - PREP_TEMPLATE_COLUMNS_TARGET_GENE.values()])) + tg_columns = set( + chain.from_iterable( + [v.columns for v in PREP_TEMPLATE_COLUMNS_TARGET_GENE.values()] + ) + ) if not columns & tg_columns: return True @@ -438,13 +457,16 @@ def can_be_extended(self, new_samples, new_columns): WHERE parent_id = %s)""" qdb.sql_connection.TRN.add(sql, [artifact.id]) if qdb.sql_connection.TRN.execute_fetchlast(): - return False, ("The artifact attached to the prep " - "template has already been processed. " - "No new samples can be added to the " - "prep template") + return False, ( + "The artifact attached to the prep " + "template has already been processed. " + "No new samples can be added to the " + "prep template" + ) - _check_duplicated_columns(list(new_columns), qdb.study.Study( - self.study_id).sample_template.categories) + _check_duplicated_columns( + list(new_columns), qdb.study.Study(self.study_id).sample_template.categories + ) return True, "" @@ -470,8 +492,8 @@ def artifact(self, artifact): qdb.sql_connection.TRN.add(sql, [self.id]) if qdb.sql_connection.TRN.execute_fetchlast(): raise qdb.exceptions.QiitaDBError( - "Prep template %d already has an artifact associated" - % self.id) + "Prep template %d already has an artifact associated" % self.id + ) sql = """UPDATE qiita.prep_template SET artifact_id = %s WHERE prep_template_id = %s""" @@ -505,8 +527,7 @@ def investigation_type(self, investigation_type): sql = """UPDATE qiita.prep_template SET investigation_type = %s WHERE {0} = %s""".format(self._id_column) - qdb.sql_connection.perform_as_transaction( - sql, [investigation_type, self.id]) + qdb.sql_connection.perform_as_transaction(sql, [investigation_type, self.id]) @property def study_id(self): @@ -556,15 +577,20 @@ def generate_files(self, samples=None, columns=None): """ with qdb.sql_connection.TRN: # figuring out the filepath of the prep template - _id, fp = qdb.util.get_mountpoint('templates')[0] + _id, fp = qdb.util.get_mountpoint("templates")[0] # update timestamp in the DB first qdb.sql_connection.TRN.add( """UPDATE qiita.prep_template SET modification_timestamp = CURRENT_TIMESTAMP - WHERE prep_template_id = %s""", [self._id]) + WHERE prep_template_id = %s""", + [self._id], + ) ctime = self.modification_timestamp - fp = join(fp, '%d_prep_%d_%s.txt' % (self.study_id, self._id, - ctime.strftime("%Y%m%d-%H%M%S"))) + fp = join( + fp, + "%d_prep_%d_%s.txt" + % (self.study_id, self._id, ctime.strftime("%Y%m%d-%H%M%S")), + ) # storing the template self.to_file(fp) # adding the fp to the object @@ -594,10 +620,10 @@ def status(self): JOIN qiita.visibility USING (visibility_id) WHERE prep_template_id = %s and visibility_id NOT IN %s""" qdb.sql_connection.TRN.add( - sql, [self._id, qdb.util.artifact_visibilities_to_skip()]) + sql, [self._id, qdb.util.artifact_visibilities_to_skip()] + ) - return qdb.util.infer_status( - qdb.sql_connection.TRN.execute_fetchindex()) + return qdb.util.infer_status(qdb.sql_connection.TRN.execute_fetchindex()) @property def qiime_map_fp(self): @@ -609,10 +635,10 @@ def qiime_map_fp(self): The filepath of the QIIME mapping file """ for x in qdb.util.retrieve_filepaths( - self._filepath_table, self._id_column, self.id, - sort='descending'): - if x['fp_type'] == 'qiime_map': - return x['fp'] + self._filepath_table, self._id_column, self.id, sort="descending" + ): + if x["fp_type"] == "qiime_map": + return x["fp"] @property def ebi_experiment_accessions(self): @@ -623,7 +649,7 @@ def ebi_experiment_accessions(self): dict of {str: str} The EBI experiment accessions numbers keyed by sample id """ - return self._get_accession_numbers('ebi_experiment_accession') + return self._get_accession_numbers("ebi_experiment_accession") @ebi_experiment_accessions.setter def ebi_experiment_accessions(self, value): @@ -639,7 +665,7 @@ def ebi_experiment_accessions(self, value): QiitaDBError If a sample in `value` already has an accession number """ - self._update_accession_numbers('ebi_experiment_accession', value) + self._update_accession_numbers("ebi_experiment_accession", value) @property def is_submitted_to_ebi(self): @@ -678,7 +704,8 @@ def delete_samples(self, sample_names): if self.artifact: raise qdb.exceptions.QiitaDBOperationNotPermittedError( "Prep info file '%d' has files attached, you cannot delete " - "samples." % (self._id)) + "samples." % (self._id) + ) self._common_delete_sample_steps(sample_names) @@ -718,8 +745,9 @@ def to_dataframe(self, add_ebi_accessions=False): if add_ebi_accessions: accessions = self.ebi_experiment_accessions - df['qiita_ebi_experiment_accessions'] = df.index.map( - lambda sid: accessions[sid]) + df["qiita_ebi_experiment_accessions"] = df.index.map( + lambda sid: accessions[sid] + ) return df @@ -812,12 +840,18 @@ def _get_node_info(workflow, node): if pcmd is not None: parent_cmd_name = pcmd.name parent_merging_scheme = pcmd.merging_scheme - if not parent_merging_scheme['ignore_parent_command']: + if not parent_merging_scheme["ignore_parent_command"]: phms = _get_node_info(workflow, parent) hms = qdb.util.human_merging_scheme( - ccmd.name, ccmd.merging_scheme, parent_cmd_name, - parent_merging_scheme, cparams, [], pparams) + ccmd.name, + ccmd.merging_scheme, + parent_cmd_name, + parent_merging_scheme, + cparams, + [], + pparams, + ) # if the parent should not ignore its parent command, then we need # to merge the previous result with the new one @@ -833,9 +867,12 @@ def _get_predecessors(workflow, node): parents = list(workflow.graph.predecessors(node)) for pnode in parents: pred = _get_predecessors(workflow, pnode) - cxns = {x[0]: x[2] - for x in workflow.graph.get_edge_data( - pnode, node)['connections'].connections} + cxns = { + x[0]: x[2] + for x in workflow.graph.get_edge_data(pnode, node)[ + "connections" + ].connections + } data = [pnode, node, cxns] if pred is None: pred = [] @@ -875,19 +912,23 @@ def _get_predecessors(workflow, node): if workflow is not None: prep_jobs = [] else: - prep_jobs = [j for c in self.artifact.descendants.nodes() - for j in c.jobs(show_hidden=True) - if j.command.software.type == - 'artifact transformation'] + prep_jobs = [ + j + for c in self.artifact.descendants.nodes() + for j in c.jobs(show_hidden=True) + if j.command.software.type == "artifact transformation" + ] merging_schemes = { qdb.archive.Archive.get_merging_scheme_from_job(j): { - x: str(y.id) for x, y in j.outputs.items()} + x: str(y.id) for x, y in j.outputs.items() + } # we are going to select only the jobs that were a 'success', that # are not 'hidden' and that have an output - jobs that are not # hidden and a successs but that do not have outputs are jobs which # resulting artifacts (outputs) were deleted - for j in prep_jobs if j.status == 'success' and not j.hidden - and j.outputs} + for j in prep_jobs + if j.status == "success" and not j.hidden and j.outputs + } # 2. pt_dt = self.data_type() @@ -895,7 +936,7 @@ def _get_predecessors(workflow, node): # the job if workflow is not None: starting_job = list(workflow.graph.nodes())[0] - pt_artifact = starting_job.parameters.values['artifact_type'] + pt_artifact = starting_job.parameters.values["artifact_type"] else: starting_job = None pt_artifact = self.artifact.artifact_type @@ -910,20 +951,22 @@ def _get_predecessors(workflow, node): reqs_satisfied = True total_conditions_satisfied = 0 - if wk_params['sample']: + if wk_params["sample"]: df = ST(self.study_id).to_dataframe(samples=list(self)) - for k, v in wk_params['sample'].items(): - if k not in df.columns or (v != '*' and v not in - df[k].unique()): + for k, v in wk_params["sample"].items(): + if k not in df.columns or ( + v != "*" and v not in df[k].unique() + ): reqs_satisfied = False else: total_conditions_satisfied += 1 - if wk_params['prep']: + if wk_params["prep"]: df = self.to_dataframe() - for k, v in wk_params['prep'].items(): - if k not in df.columns or (v != '*' and v not in - df[k].unique()): + for k, v in wk_params["prep"].items(): + if k not in df.columns or ( + v != "*" and v not in df[k].unique() + ): reqs_satisfied = False else: total_conditions_satisfied += 1 @@ -933,10 +976,12 @@ def _get_predecessors(workflow, node): if not workflows: # raises option a. - msg = (f'This preparation data type: "{pt_dt}" and/or artifact ' - f'type "{pt_artifact}" does not have valid workflows; this ' - 'could be due to required parameters, please check the ' - 'available workflows.') + msg = ( + f'This preparation data type: "{pt_dt}" and/or artifact ' + f'type "{pt_artifact}" does not have valid workflows; this ' + "could be due to required parameters, please check the " + "available workflows." + ) raise ValueError(msg) # let's just keep one, let's give it preference to the one with the @@ -952,7 +997,7 @@ def _get_predecessors(workflow, node): missing_artifacts[mscheme] = node if not missing_artifacts: # raises option b. - raise ValueError('This preparation is complete') + raise ValueError("This preparation is complete") # 3. previous_jobs = dict() @@ -967,8 +1012,9 @@ def _get_predecessors(workflow, node): params = cdp.values.copy() icxns = {y: x for x, y in cxns.items()} - reqp = {x: icxns[y[1][0]] - for x, y in cdp_cmd.required_parameters.items()} + reqp = { + x: icxns[y[1][0]] for x, y in cdp_cmd.required_parameters.items() + } cmds_to_create.append([cdp, cdp_cmd, params, reqp]) info = _get_node_info(wk, pnode) @@ -988,16 +1034,17 @@ def _get_predecessors(workflow, node): reqp = dict() for x, y in pdp_cmd.required_parameters.items(): if wkartifact_type not in y[1]: - raise ValueError(f'{wkartifact_type} is not part ' - 'of this preparation and cannot ' - 'be applied') + raise ValueError( + f"{wkartifact_type} is not part " + "of this preparation and cannot " + "be applied" + ) reqp[x] = wkartifact_type cmds_to_create.append([pdp, pdp_cmd, params, reqp]) if starting_job is not None: - init_artifacts = { - wkartifact_type: f'{starting_job.id}:'} + init_artifacts = {wkartifact_type: f"{starting_job.id}:"} else: init_artifacts = {wkartifact_type: str(self.artifact.id)} @@ -1016,45 +1063,52 @@ def _get_predecessors(workflow, node): if previous_job is None: for iname, dname in rp.items(): if dname not in init_artifacts: - msg = (f'Missing Artifact type: "{dname}" in ' - 'this preparation; this might be due ' - 'to missing steps or not having the ' - 'correct raw data.') + msg = ( + f'Missing Artifact type: "{dname}" in ' + "this preparation; this might be due " + "to missing steps or not having the " + "correct raw data." + ) # raises option c. raise ValueError(msg) req_params[iname] = init_artifacts[dname] if len(dp.command.required_parameters) > 1: for pn in GH.predecessors(node): info = _get_node_info(wk, pn) - n, cnx, _ = GH.get_edge_data( - pn, node)['connections'].connections[0] - if info not in merging_schemes or \ - n not in merging_schemes[info]: - msg = ('This workflow contains a step with ' - 'multiple inputs so it cannot be ' - 'completed automatically, please add ' - 'the commands by hand.') + n, cnx, _ = GH.get_edge_data(pn, node)[ + "connections" + ].connections[0] + if ( + info not in merging_schemes + or n not in merging_schemes[info] + ): + msg = ( + "This workflow contains a step with " + "multiple inputs so it cannot be " + "completed automatically, please add " + "the commands by hand." + ) raise ValueError(msg) req_params[cnx] = merging_schemes[info][n] else: if len(dp.command.required_parameters) == 1: cxns = dict() for iname, dname in rp.items(): - req_params[iname] = f'{previous_job.id}{dname}' + req_params[iname] = f"{previous_job.id}{dname}" cxns[dname] = iname connections = {previous_job: cxns} else: connections = dict() for pn in GH.predecessors(node): pndp = pn.default_parameter - n, cnx, _ = GH.get_edge_data( - pn, node)['connections'].connections[0] + n, cnx, _ = GH.get_edge_data(pn, node)[ + "connections" + ].connections[0] _job = previous_dps[pndp.id] - req_params[cnx] = f'{_job.id}{n}' + req_params[cnx] = f"{_job.id}{n}" connections[_job] = {n: cnx} params.update(req_params) - job_params = qdb.software.Parameters.load( - cmd, values_dict=params) + job_params = qdb.software.Parameters.load(cmd, values_dict=params) if params in previous_jobs.values(): for x, y in previous_jobs.items(): @@ -1064,16 +1118,18 @@ def _get_predecessors(workflow, node): if workflow is None: PW = qdb.processing_job.ProcessingWorkflow workflow = PW.from_scratch(user, job_params) - current_job = [ - j for j in workflow.graph.nodes()][0] + current_job = [j for j in workflow.graph.nodes()][0] else: if previous_job is None: current_job = workflow.add( - job_params, req_params=req_params) + job_params, req_params=req_params + ) else: current_job = workflow.add( - job_params, req_params=req_params, - connections=connections) + job_params, + req_params=req_params, + connections=connections, + ) previous_jobs[current_job] = params previous_dps[dp.id] = current_job @@ -1089,15 +1145,17 @@ def archived_artifacts(self): The list of archivde Artifacts """ with qdb.sql_connection.TRN: - sql = """SELECT artifact_id FROM qiita.preparation_artifact LEFT JOIN qiita.artifact USING (artifact_id) WHERE prep_template_id = %s AND visibility_id IN %s""" qdb.sql_connection.TRN.add( - sql, [self.id, qdb.util.artifact_visibilities_to_skip()]) - return [qdb.artifact.Artifact(ai) - for ai in qdb.sql_connection.TRN.execute_fetchflatten()] + sql, [self.id, qdb.util.artifact_visibilities_to_skip()] + ) + return [ + qdb.artifact.Artifact(ai) + for ai in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def creation_job_id(self): diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index 78422c3d4..e8bac7b25 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -8,11 +8,10 @@ from os.path import join from time import strftime +import qiita_db as qdb from qiita_core.exceptions import IncompetentQiitaDeveloperError -import qiita_db as qdb -from .base_metadata_template import ( - BaseSample, MetadataTemplate, QIITA_COLUMN_NAME) +from .base_metadata_template import QIITA_COLUMN_NAME, BaseSample, MetadataTemplate class Sample(BaseSample): @@ -23,6 +22,7 @@ class Sample(BaseSample): BaseSample PrepSample """ + _table = "study_sample" _table_prefix = "sample_" _id_column = "study_id" @@ -53,22 +53,24 @@ class SampleTemplate(MetadataTemplate): MetadataTemplate PrepTemplate """ + _table = "study_sample" _table_prefix = "sample_" _id_column = "study_id" _sample_cls = Sample - _filepath_table = 'sample_template_filepath' + _filepath_table = "sample_template_filepath" _forbidden_words = { - 'barcodesequence', - 'linkerprimersequence', - 'barcode', - 'linker', - 'primer', - 'run_prefix', - 'sampleid', - 'qiita_study_id', - 'qiita_prep_id', - QIITA_COLUMN_NAME} + "barcodesequence", + "linkerprimersequence", + "barcode", + "linker", + "primer", + "run_prefix", + "sampleid", + "qiita_study_id", + "qiita_prep_id", + QIITA_COLUMN_NAME, + } @classmethod def create(cls, md_template, study): @@ -87,7 +89,8 @@ def create(cls, md_template, study): # Check that we don't have a MetadataTemplate for study if cls.exists(study.id): raise qdb.exceptions.QiitaDBDuplicateError( - cls.__name__, 'id: %d' % study.id) + cls.__name__, "id: %d" % study.id + ) # Clean and validate the metadata template given md_template = cls._clean_validate_template(md_template, study.id) @@ -95,8 +98,7 @@ def create(cls, md_template, study): cls._common_creation_steps(md_template, study.id) st = cls(study.id) - st.validate( - qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS) + st.validate(qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS) st.generate_files() return st @@ -131,7 +133,8 @@ def delete(cls, id_): if has_prep_templates: raise qdb.exceptions.QiitaDBError( "Sample template cannot be erased because there are prep " - "templates associated.") + "templates associated." + ) table_name = cls._table_name(id_) @@ -145,7 +148,8 @@ def delete(cls, id_): qdb.sql_connection.TRN.add(sql) sql = "DELETE FROM qiita.{0} WHERE {1} = %s".format( - cls._table, cls._id_column) + cls._table, cls._id_column + ) qdb.sql_connection.TRN.add(sql, args) qdb.sql_connection.TRN.execute() @@ -185,14 +189,17 @@ def delete_samples(self, sample_names): QiitaDBOperationNotPermittedError If the `sample_name` has been used in a prep info file """ - pts = {pt.id: [sn for sn in sample_names if pt.get(sn) is not None] - for pt in qdb.study.Study(self.study_id).prep_templates()} + pts = { + pt.id: [sn for sn in sample_names if pt.get(sn) is not None] + for pt in qdb.study.Study(self.study_id).prep_templates() + } if any(pts.values()): - sids = ', '.join({vv for v in pts.values() for vv in v}) - pts = ', '.join(map(str, pts.keys())) + sids = ", ".join({vv for v in pts.values() for vv in v}) + pts = ", ".join(map(str, pts.keys())) raise qdb.exceptions.QiitaDBOperationNotPermittedError( "'%s' cannot be deleted as they have been found in a prep " - "information file: '%s'" % (sids, pts)) + "information file: '%s'" % (sids, pts) + ) self._common_delete_sample_steps(sample_names) @@ -259,8 +266,8 @@ def generate_files(self, samples=None, columns=None): """ with qdb.sql_connection.TRN: # figuring out the filepath of the sample template - _id, fp = qdb.util.get_mountpoint('templates')[0] - fp = join(fp, '%d_%s.txt' % (self.id, strftime("%Y%m%d-%H%M%S"))) + _id, fp = qdb.util.get_mountpoint("templates")[0] + fp = join(fp, "%d_%s.txt" % (self.id, strftime("%Y%m%d-%H%M%S"))) # storing the sample template self.to_file(fp) @@ -277,7 +284,7 @@ def ebi_sample_accessions(self): dict of {str: str} The EBI sample accession numbers keyed by sample id """ - return self._get_accession_numbers('ebi_sample_accession') + return self._get_accession_numbers("ebi_sample_accession") @ebi_sample_accessions.setter def ebi_sample_accessions(self, value): @@ -293,7 +300,7 @@ def ebi_sample_accessions(self, value): QiitaDBError If a sample in `value` already has an accession number """ - self._update_accession_numbers('ebi_sample_accession', value) + self._update_accession_numbers("ebi_sample_accession", value) @property def biosample_accessions(self): @@ -304,7 +311,7 @@ def biosample_accessions(self): dict of {str: str} The biosample accession numbers keyed by sample id """ - return self._get_accession_numbers('biosample_accession') + return self._get_accession_numbers("biosample_accession") @biosample_accessions.setter def biosample_accessions(self, value): @@ -320,7 +327,7 @@ def biosample_accessions(self, value): QiitaDBError If a sample in `value` already has an accession number """ - self._update_accession_numbers('biosample_accession', value) + self._update_accession_numbers("biosample_accession", value) def to_dataframe(self, add_ebi_accessions=False, samples=None): """Returns the metadata template as a dataframe @@ -336,8 +343,9 @@ def to_dataframe(self, add_ebi_accessions=False, samples=None): if add_ebi_accessions: accessions = self.ebi_sample_accessions - df['qiita_ebi_sample_accessions'] = df.index.map( - lambda sid: accessions[sid]) + df["qiita_ebi_sample_accessions"] = df.index.map( + lambda sid: accessions[sid] + ) return df diff --git a/qiita_db/metadata_template/test/test_base_metadata_template.py b/qiita_db/metadata_template/test/test_base_metadata_template.py index 7b83350c8..d2142231b 100644 --- a/qiita_db/metadata_template/test/test_base_metadata_template.py +++ b/qiita_db/metadata_template/test/test_base_metadata_template.py @@ -8,8 +8,8 @@ from unittest import TestCase, main -from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb +from qiita_core.exceptions import IncompetentQiitaDeveloperError class TestBaseSample(TestCase): @@ -19,19 +19,20 @@ def test_init(self): """BaseSample init should raise an error (it's a base class)""" with self.assertRaises(IncompetentQiitaDeveloperError): qdb.metadata_template.base_metadata_template.BaseSample( - 'SKM7.640188', - qdb.metadata_template.sample_template.SampleTemplate(1)) + "SKM7.640188", qdb.metadata_template.sample_template.SampleTemplate(1) + ) def test_exists(self): """exists should raise an error if called from the base class""" with self.assertRaises(IncompetentQiitaDeveloperError): qdb.metadata_template.base_metadata_template.BaseSample.exists( - 'SKM7.640188', - qdb.metadata_template.sample_template.SampleTemplate(1)) + "SKM7.640188", qdb.metadata_template.sample_template.SampleTemplate(1) + ) class TestMetadataTemplateReadOnly(TestCase): """Tests the MetadataTemplate base class""" + def setUp(self): self.study = qdb.study.Study(1) @@ -48,15 +49,13 @@ def test_exist(self): MT.exists(self.study) def test_table_name(self): - """table name raises an error because it's not called from a subclass - """ + """table name raises an error because it's not called from a subclass""" MT = qdb.metadata_template.base_metadata_template.MetadataTemplate with self.assertRaises(IncompetentQiitaDeveloperError): MT._table_name(self.study) def test_common_creation_steps(self): - """common_creation_steps raises an error from base class - """ + """common_creation_steps raises an error from base class""" MT = qdb.metadata_template.base_metadata_template.MetadataTemplate with self.assertRaises(IncompetentQiitaDeveloperError): MT._common_creation_steps(None, 1) @@ -69,80 +68,131 @@ def test_clean_validate_template(self): def test_identify_pgsql_reserved_words(self): MT = qdb.metadata_template.base_metadata_template.MetadataTemplate - results = MT._identify_pgsql_reserved_words_in_column_names([ - 'select', - 'column', - 'just_fine1']) - self.assertCountEqual(set(results), {'column', 'select'}) + results = MT._identify_pgsql_reserved_words_in_column_names( + ["select", "column", "just_fine1"] + ) + self.assertCountEqual(set(results), {"column", "select"}) def test_identify_qiime2_reserved_words(self): MT = qdb.metadata_template.base_metadata_template.MetadataTemplate - results = MT._identify_qiime2_reserved_words_in_column_names([ - 'feature id', - 'feature-id', - 'featureid', - 'id', - 'sample id', - 'sample-id', - 'sampleid']) - self.assertCountEqual(set(results), {'feature id', 'feature-id', - 'featureid', 'id', 'sample id', - 'sample-id', 'sampleid'}) + results = MT._identify_qiime2_reserved_words_in_column_names( + [ + "feature id", + "feature-id", + "featureid", + "id", + "sample id", + "sample-id", + "sampleid", + ] + ) + self.assertCountEqual( + set(results), + { + "feature id", + "feature-id", + "featureid", + "id", + "sample id", + "sample-id", + "sampleid", + }, + ) def test_identify_invalid_characters(self): MT = qdb.metadata_template.base_metadata_template.MetadataTemplate - results = MT._identify_column_names_with_invalid_characters([ - 'tax on', - 'bla.', - '.', - 'sampleid', - 'sample_id', - '{', - 'bla:1', - 'bla|2', - 'bla1:2|3', - 'this&is', - '4column', - 'just_fine2']) - self.assertCountEqual(set(results), {'tax on', - 'bla.', - '.', - '{', - 'this&is', - '4column'}) + results = MT._identify_column_names_with_invalid_characters( + [ + "tax on", + "bla.", + ".", + "sampleid", + "sample_id", + "{", + "bla:1", + "bla|2", + "bla1:2|3", + "this&is", + "4column", + "just_fine2", + ] + ) + self.assertCountEqual( + set(results), {"tax on", "bla.", ".", "{", "this&is", "4column"} + ) def test_restrictions(self): MT = qdb.metadata_template obs = MT.sample_template.SampleTemplate(1).restrictions exp = { - 'env_package': [ - 'air', 'built environment', 'host-associated', - 'human-associated', 'human-skin', 'human-oral', - 'human-gut', 'human-vaginal', 'microbial mat/biofilm', - 'misc environment', 'plant-associated', 'sediment', 'soil', - 'wastewater/sludge', 'water']} + "env_package": [ + "air", + "built environment", + "host-associated", + "human-associated", + "human-skin", + "human-oral", + "human-gut", + "human-vaginal", + "microbial mat/biofilm", + "misc environment", + "plant-associated", + "sediment", + "soil", + "wastewater/sludge", + "water", + ] + } self.assertEqual(obs, exp) obs = MT.prep_template.PrepTemplate(1).restrictions exp = { - 'target_gene': ['16S rRNA', '18S rRNA', 'ITS1/2', 'LSU'], - 'platform': ['DNBSEQ', 'FASTA', 'Illumina', 'Ion_Torrent', 'LS454', - 'Oxford Nanopore'], - 'target_subfragment': ['V3', 'V4', 'V6', 'V9', 'ITS1/2'], - 'instrument_model': [ - '454 GS', '454 GS 20', '454 GS FLX', '454 GS FLX+', - '454 GS FLX Titanium', '454 GS Junior', 'DNBSEQ-G400', - 'DNBSEQ-T7', 'DNBSEQ-G800', - 'Illumina Genome Analyzer', 'Illumina Genome Analyzer II', - 'Illumina Genome Analyzer IIx', 'Illumina HiScanSQ', - 'Illumina HiSeq 1000', 'Illumina HiSeq 1500', - 'Illumina HiSeq 2000', 'Illumina HiSeq 2500', - 'Illumina HiSeq 3000', 'Illumina HiSeq 4000', 'Illumina MiSeq', - 'Illumina MiniSeq', 'Illumina NovaSeq 6000', 'NextSeq 500', - 'NextSeq 550', 'Ion Torrent PGM', 'Ion Torrent Proton', - 'Ion Torrent S5', 'Ion Torrent S5 XL', 'MinION', 'GridION', - 'PromethION', 'unspecified']} + "target_gene": ["16S rRNA", "18S rRNA", "ITS1/2", "LSU"], + "platform": [ + "DNBSEQ", + "FASTA", + "Illumina", + "Ion_Torrent", + "LS454", + "Oxford Nanopore", + ], + "target_subfragment": ["V3", "V4", "V6", "V9", "ITS1/2"], + "instrument_model": [ + "454 GS", + "454 GS 20", + "454 GS FLX", + "454 GS FLX+", + "454 GS FLX Titanium", + "454 GS Junior", + "DNBSEQ-G400", + "DNBSEQ-T7", + "DNBSEQ-G800", + "Illumina Genome Analyzer", + "Illumina Genome Analyzer II", + "Illumina Genome Analyzer IIx", + "Illumina HiScanSQ", + "Illumina HiSeq 1000", + "Illumina HiSeq 1500", + "Illumina HiSeq 2000", + "Illumina HiSeq 2500", + "Illumina HiSeq 3000", + "Illumina HiSeq 4000", + "Illumina MiSeq", + "Illumina MiniSeq", + "Illumina NovaSeq 6000", + "NextSeq 500", + "NextSeq 550", + "Ion Torrent PGM", + "Ion Torrent Proton", + "Ion Torrent S5", + "Ion Torrent S5 XL", + "MinION", + "GridION", + "PromethION", + "unspecified", + ], + } self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index c4978f47b..2e61229b6 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -5,61 +5,77 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from tempfile import mkstemp -from os import close, remove -from os.path import join, exists from collections import Iterable from copy import deepcopy from datetime import datetime +from os import close, remove +from os.path import exists, join +from tempfile import mkstemp +from unittest import TestCase, main import numpy.testing as npt import pandas as pd from pandas.testing import assert_frame_equal -from qiita_core.util import qiita_test_checker -from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_core.util import qiita_test_checker @qiita_test_checker() class TestPrepSample(TestCase): def setUp(self): - self.prep_template = \ - qdb.metadata_template.prep_template.PrepTemplate(1) - self.sample_id = '1.SKB8.640193' + self.prep_template = qdb.metadata_template.prep_template.PrepTemplate(1) + self.sample_id = "1.SKB8.640193" self.tester = qdb.metadata_template.prep_template.PrepSample( - self.sample_id, self.prep_template) - self.exp_categories = {'center_name', 'center_project_name', - 'emp_status', 'barcode', 'instrument_model', - 'library_construction_protocol', - 'primer', 'target_subfragment', - 'target_gene', 'run_center', 'run_prefix', - 'run_date', 'experiment_center', - 'experiment_design_description', - 'experiment_title', 'platform', 'samp_size', - 'sequencing_meth', 'illumina_technology', - 'sample_center', 'pcr_primers', 'study_center'} + self.sample_id, self.prep_template + ) + self.exp_categories = { + "center_name", + "center_project_name", + "emp_status", + "barcode", + "instrument_model", + "library_construction_protocol", + "primer", + "target_subfragment", + "target_gene", + "run_center", + "run_prefix", + "run_date", + "experiment_center", + "experiment_design_description", + "experiment_title", + "platform", + "samp_size", + "sequencing_meth", + "illumina_technology", + "sample_center", + "pcr_primers", + "study_center", + } def test_init_unknown_error(self): """Init errors if the PrepSample id is not found in the template""" with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): qdb.metadata_template.prep_template.PrepSample( - 'Not_a_Sample', self.prep_template) + "Not_a_Sample", self.prep_template + ) def test_init_wrong_template(self): """Raises an error if using a SampleTemplate instead of PrepTemplate""" with self.assertRaises(IncompetentQiitaDeveloperError): qdb.metadata_template.prep_template.PrepSample( - '1.SKB8.640193', - qdb.metadata_template.sample_template.SampleTemplate(1)) + "1.SKB8.640193", qdb.metadata_template.sample_template.SampleTemplate(1) + ) def test_init(self): """Init correctly initializes the PrepSample object""" sample = qdb.metadata_template.prep_template.PrepSample( - self.sample_id, self.prep_template) + self.sample_id, self.prep_template + ) # Check that the internal id have been correctly set - self.assertEqual(sample._id, '1.SKB8.640193') + self.assertEqual(sample._id, "1.SKB8.640193") # Check that the internal template have been correctly set self.assertEqual(sample._md_template, self.prep_template) # Check that the internal dynamic table name have been correctly set @@ -68,31 +84,39 @@ def test_init(self): def test_eq_true(self): """Equality correctly returns true""" other = qdb.metadata_template.prep_template.PrepSample( - self.sample_id, self.prep_template) + self.sample_id, self.prep_template + ) self.assertTrue(self.tester == other) def test_eq_false_type(self): """Equality returns false if types are not equal""" other = qdb.metadata_template.sample_template.Sample( - self.sample_id, - qdb.metadata_template.sample_template.SampleTemplate(1)) + self.sample_id, qdb.metadata_template.sample_template.SampleTemplate(1) + ) self.assertFalse(self.tester == other) def test_eq_false_id(self): """Equality returns false if ids are different""" other = qdb.metadata_template.prep_template.PrepSample( - '1.SKD8.640184', self.prep_template) + "1.SKD8.640184", self.prep_template + ) self.assertFalse(self.tester == other) def test_exists_true(self): """Exists returns true if the PrepSample exists""" - self.assertTrue(qdb.metadata_template.prep_template.PrepSample.exists( - self.sample_id, self.prep_template)) + self.assertTrue( + qdb.metadata_template.prep_template.PrepSample.exists( + self.sample_id, self.prep_template + ) + ) def test_exists_false(self): """Exists returns false if the PrepSample does not exists""" - self.assertFalse(qdb.metadata_template.prep_template.PrepSample.exists( - 'Not_a_Sample', self.prep_template)) + self.assertFalse( + qdb.metadata_template.prep_template.PrepSample.exists( + "Not_a_Sample", self.prep_template + ) + ) def test_get_categories(self): """Correctly returns the set of category headers""" @@ -104,27 +128,26 @@ def test_len(self): self.assertEqual(len(self.tester), 22) def test_getitem_required(self): - """Get item returns the correct metadata value from the required table - """ - self.assertEqual(self.tester['center_name'], 'ANL') - self.assertTrue(self.tester['center_project_name'] is None) + """Get item returns the correct metadata value from the required table""" + self.assertEqual(self.tester["center_name"], "ANL") + self.assertTrue(self.tester["center_project_name"] is None) def test_getitem_dynamic(self): - """Get item returns the correct metadata value from the dynamic table - """ - self.assertEqual(self.tester['pcr_primers'], - 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT') - self.assertEqual(self.tester['barcode'], 'AGCGCTCACATC') + """Get item returns the correct metadata value from the dynamic table""" + self.assertEqual( + self.tester["pcr_primers"], + "FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT", + ) + self.assertEqual(self.tester["barcode"], "AGCGCTCACATC") def test_getitem_id_column(self): - """Get item returns the correct metadata value from the changed column - """ - self.assertEqual(self.tester['emp_status'], 'EMP') + """Get item returns the correct metadata value from the changed column""" + self.assertEqual(self.tester["emp_status"], "EMP") def test_getitem_error(self): """Get item raises an error if category does not exists""" with self.assertRaises(KeyError): - self.tester['Not_a_Category'] + self.tester["Not_a_Category"] def test_iter(self): """iter returns an iterator over the category headers""" @@ -134,12 +157,12 @@ def test_iter(self): def test_contains_true(self): """contains returns true if the category header exists""" - self.assertTrue('Barcode' in self.tester) - self.assertTrue('barcode' in self.tester) + self.assertTrue("Barcode" in self.tester) + self.assertTrue("barcode" in self.tester) def test_contains_false(self): """contains returns false if the category header does not exists""" - self.assertFalse('Not_a_Category' in self.tester) + self.assertFalse("Not_a_Category" in self.tester) def test_keys(self): """keys returns an iterator over the metadata headers""" @@ -151,81 +174,111 @@ def test_values(self): """values returns an iterator over the values""" obs = self.tester.values() self.assertTrue(isinstance(obs, Iterable)) - exp = {'ANL', None, None, None, 'EMP', 'AGCGCTCACATC', - 'This analysis was done as in Caporaso et al 2011 Genome ' - 'research. The PCR primers (F515/R806) were developed against ' - 'the V4 region of the 16S rRNA (both bacteria and archaea), ' - 'which we determined would yield optimal community clustering ' - 'with reads of this length using a procedure similar to that of' - ' ref. 15. [For reference, this primer pair amplifies the ' - 'region 533_786 in the Escherichia coli strain 83972 sequence ' - '(greengenes accession no. prokMSA_id:470367).] The reverse PCR' - ' primer is barcoded with a 12-base error-correcting Golay code' - ' to facilitate multiplexing of up to 1,500 samples per lane, ' - 'and both PCR primers contain sequencer adapter regions.', - 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', - 's_G1_L001_sequences', '8/1/12', 'ANL', - 'micro biome of soil and rhizosphere of cannabis plants from ' - 'CA', 'Cannabis Soil Microbiome', 'Illumina', 'Illumina MiSeq', - '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', - 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'} + exp = { + "ANL", + None, + None, + None, + "EMP", + "AGCGCTCACATC", + "This analysis was done as in Caporaso et al 2011 Genome " + "research. The PCR primers (F515/R806) were developed against " + "the V4 region of the 16S rRNA (both bacteria and archaea), " + "which we determined would yield optimal community clustering " + "with reads of this length using a procedure similar to that of" + " ref. 15. [For reference, this primer pair amplifies the " + "region 533_786 in the Escherichia coli strain 83972 sequence " + "(greengenes accession no. prokMSA_id:470367).] The reverse PCR" + " primer is barcoded with a 12-base error-correcting Golay code" + " to facilitate multiplexing of up to 1,500 samples per lane, " + "and both PCR primers contain sequencer adapter regions.", + "GTGCCAGCMGCCGCGGTAA", + "V4", + "16S rRNA", + "ANL", + "s_G1_L001_sequences", + "8/1/12", + "ANL", + "micro biome of soil and rhizosphere of cannabis plants from CA", + "Cannabis Soil Microbiome", + "Illumina", + "Illumina MiSeq", + ".25,g", + "Sequencing by synthesis", + "MiSeq", + "ANL", + "FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT", + "CCME", + } self.assertEqual(set(obs), exp) def test_items(self): """items returns an iterator over the (key, value) tuples""" obs = self.tester.items() self.assertTrue(isinstance(obs, Iterable)) - exp = {('center_name', 'ANL'), ('center_project_name', None), - ('emp_status', 'EMP'), ('barcode', 'AGCGCTCACATC'), - ('library_construction_protocol', - 'This analysis was done as in Caporaso et al 2011 Genome ' - 'research. The PCR primers (F515/R806) were developed against ' - 'the V4 region of the 16S rRNA (both bacteria and archaea), ' - 'which we determined would yield optimal community clustering ' - 'with reads of this length using a procedure similar to that ' - 'of ref. 15. [For reference, this primer pair amplifies the ' - 'region 533_786 in the Escherichia coli strain 83972 sequence ' - '(greengenes accession no. prokMSA_id:470367).] The reverse ' - 'PCR primer is barcoded with a 12-base error-correcting Golay ' - 'code to facilitate multiplexing of up to 1,500 samples per ' - 'lane, and both PCR primers contain sequencer adapter ' - 'regions.'), ('primer', 'GTGCCAGCMGCCGCGGTAA'), - ('target_subfragment', 'V4'), ('target_gene', '16S rRNA'), - ('run_center', 'ANL'), ('run_prefix', 's_G1_L001_sequences'), - ('run_date', '8/1/12'), ('experiment_center', 'ANL'), - ('experiment_design_description', - 'micro biome of soil and rhizosphere of cannabis plants ' - 'from CA'), ('experiment_title', 'Cannabis Soil Microbiome'), - ('platform', 'Illumina'), - ('instrument_model', 'Illumina MiSeq'), ('samp_size', '.25,g'), - ('sequencing_meth', 'Sequencing by synthesis'), - ('illumina_technology', 'MiSeq'), ('sample_center', 'ANL'), - ('pcr_primers', - 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT'), - ('study_center', 'CCME')} + exp = { + ("center_name", "ANL"), + ("center_project_name", None), + ("emp_status", "EMP"), + ("barcode", "AGCGCTCACATC"), + ( + "library_construction_protocol", + "This analysis was done as in Caporaso et al 2011 Genome " + "research. The PCR primers (F515/R806) were developed against " + "the V4 region of the 16S rRNA (both bacteria and archaea), " + "which we determined would yield optimal community clustering " + "with reads of this length using a procedure similar to that " + "of ref. 15. [For reference, this primer pair amplifies the " + "region 533_786 in the Escherichia coli strain 83972 sequence " + "(greengenes accession no. prokMSA_id:470367).] The reverse " + "PCR primer is barcoded with a 12-base error-correcting Golay " + "code to facilitate multiplexing of up to 1,500 samples per " + "lane, and both PCR primers contain sequencer adapter " + "regions.", + ), + ("primer", "GTGCCAGCMGCCGCGGTAA"), + ("target_subfragment", "V4"), + ("target_gene", "16S rRNA"), + ("run_center", "ANL"), + ("run_prefix", "s_G1_L001_sequences"), + ("run_date", "8/1/12"), + ("experiment_center", "ANL"), + ( + "experiment_design_description", + "micro biome of soil and rhizosphere of cannabis plants from CA", + ), + ("experiment_title", "Cannabis Soil Microbiome"), + ("platform", "Illumina"), + ("instrument_model", "Illumina MiSeq"), + ("samp_size", ".25,g"), + ("sequencing_meth", "Sequencing by synthesis"), + ("illumina_technology", "MiSeq"), + ("sample_center", "ANL"), + ("pcr_primers", "FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT"), + ("study_center", "CCME"), + } self.assertEqual(set(obs), exp) def test_get(self): """get returns the correct sample object""" - self.assertEqual(self.tester.get('barcode'), 'AGCGCTCACATC') + self.assertEqual(self.tester.get("barcode"), "AGCGCTCACATC") def test_get_none(self): """get returns none if the sample id is not present""" - self.assertTrue(self.tester.get('Not_a_Category') is None) + self.assertTrue(self.tester.get("Not_a_Category") is None) def test_columns_restrictions(self): """that it returns SAMPLE_TEMPLATE_COLUMNS""" exp = deepcopy(qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS) - exp.update( - qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS_TARGET_GENE) + exp.update(qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS_TARGET_GENE) self.assertEqual(self.prep_template.columns_restrictions, exp) def test_can_be_updated(self): """test if the template can be updated""" # you can't update restricted colums in a pt with data - self.assertFalse(self.prep_template.can_be_updated({'barcode'})) + self.assertFalse(self.prep_template.can_be_updated({"barcode"})) # but you can if not restricted - self.assertTrue(self.prep_template.can_be_updated({'center_name'})) + self.assertTrue(self.prep_template.can_be_updated({"center_name"})) def test_can_be_extended(self): """test if the template can be extended""" @@ -234,12 +287,13 @@ def test_can_be_extended(self): self.assertTrue(obs_bool) self.assertEqual(obs_msg, "") # You can't add samples if there are preprocessed data generated - obs_bool, obs_msg = self.prep_template.can_be_extended( - ["NEW_SAMPLE"], []) + obs_bool, obs_msg = self.prep_template.can_be_extended(["NEW_SAMPLE"], []) self.assertFalse(obs_bool) - exp_msg = ("The artifact attached to the prep template has already " - "been processed. No new samples can be added to the prep " - "template") + exp_msg = ( + "The artifact attached to the prep template has already " + "been processed. No new samples can be added to the prep " + "template" + ) self.assertEqual(obs_msg, exp_msg) def test_can_be_extended_duplicated_column(self): @@ -250,30 +304,48 @@ def test_can_be_extended_duplicated_column(self): def test_metadata_headers(self): PT = qdb.metadata_template.prep_template.PrepTemplate obs = PT.metadata_headers() - exp = ['barcode', 'center_name', 'center_project_name', 'emp_status', - 'experiment_center', 'experiment_design_description', - 'experiment_title', 'illumina_technology', 'instrument_model', - 'library_construction_protocol', 'pcr_primers', 'platform', - 'primer', 'run_center', 'run_date', 'run_prefix', 'samp_size', - 'sample_center', 'sequencing_meth', 'study_center', - 'target_gene', 'target_subfragment'] + exp = [ + "barcode", + "center_name", + "center_project_name", + "emp_status", + "experiment_center", + "experiment_design_description", + "experiment_title", + "illumina_technology", + "instrument_model", + "library_construction_protocol", + "pcr_primers", + "platform", + "primer", + "run_center", + "run_date", + "run_prefix", + "samp_size", + "sample_center", + "sequencing_meth", + "study_center", + "target_gene", + "target_subfragment", + ] self.assertCountEqual(obs, exp) def test_setitem(self): with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - self.tester['column that does not exist'] = 0.3 + self.tester["column that does not exist"] = 0.3 tester = qdb.metadata_template.prep_template.PrepSample( - '1.SKD8.640184', self.prep_template) + "1.SKD8.640184", self.prep_template + ) - self.assertEqual(tester['center_name'], 'ANL') - tester['center_name'] = "FOO" - self.assertEqual(tester['center_name'], "FOO") + self.assertEqual(tester["center_name"], "ANL") + tester["center_name"] = "FOO" + self.assertEqual(tester["center_name"], "FOO") def test_delitem(self): """delitem raises an error (currently not allowed)""" with self.assertRaises(qdb.exceptions.QiitaDBNotImplementedError): - del self.tester['pcr_primers'] + del self.tester["pcr_primers"] @qiita_test_checker() @@ -283,101 +355,114 @@ def setUp(self): # the name of the sample where we store all columns for a sample/prep # information and in this tests we want to avoid having to import it # in different places so adding to the setUp - self.QCN = \ - qdb.metadata_template.base_metadata_template.QIITA_COLUMN_NAME + self.QCN = qdb.metadata_template.base_metadata_template.QIITA_COLUMN_NAME self.metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 1', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'qiita_prep_id': 1000, - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': '3.6', - 'experiment_design_description': 'BBBB'}, - 'SKD8.640184': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 2', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTC', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'qiita_prep_id': 1000, - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': 'NoT applicable', - 'experiment_design_description': 'BBBB'}, - 'SKB7.640196': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 3', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CCTCTGAGAGCT', - 'run_prefix': "s_G1_L002_sequences", - 'platform': 'Illumina', - 'qiita_prep_id': 1000, - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': 'unspecified', - 'experiment_design_description': 'BBBB'} - } - self.metadata = pd.DataFrame.from_dict(self.metadata_dict, - orient='index', dtype=str) + "SKB8.640193": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 1", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "qiita_prep_id": 1000, + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "3.6", + "experiment_design_description": "BBBB", + }, + "SKD8.640184": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 2", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTC", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "qiita_prep_id": 1000, + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "NoT applicable", + "experiment_design_description": "BBBB", + }, + "SKB7.640196": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 3", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CCTCTGAGAGCT", + "run_prefix": "s_G1_L002_sequences", + "platform": "Illumina", + "qiita_prep_id": 1000, + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "unspecified", + "experiment_design_description": "BBBB", + }, + } + self.metadata = pd.DataFrame.from_dict( + self.metadata_dict, orient="index", dtype=str + ) metadata_prefixed_dict = { - '1.SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 1', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'qiita_prep_id': 1000, - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': '3.6', - 'experiment_design_description': 'BBBB'}, - '1.SKD8.640184': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 2', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTC', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'qiita_prep_id': 1000, - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': 'not applicable', - 'experiment_design_description': 'BBBB'}, - '1.SKB7.640196': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 3', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CCTCTGAGAGCT', - 'run_prefix': "s_G1_L002_sequences", - 'platform': 'Illumina', - 'qiita_prep_id': 1000, - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': 'not applicable', - 'experiment_design_description': 'BBBB'} - } - self.metadata_prefixed = pd.DataFrame.from_dict(metadata_prefixed_dict, - orient='index') + "1.SKB8.640193": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 1", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "qiita_prep_id": 1000, + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "3.6", + "experiment_design_description": "BBBB", + }, + "1.SKD8.640184": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 2", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTC", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "qiita_prep_id": 1000, + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "not applicable", + "experiment_design_description": "BBBB", + }, + "1.SKB7.640196": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 3", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CCTCTGAGAGCT", + "run_prefix": "s_G1_L002_sequences", + "platform": "Illumina", + "qiita_prep_id": 1000, + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "not applicable", + "experiment_design_description": "BBBB", + }, + } + self.metadata_prefixed = pd.DataFrame.from_dict( + metadata_prefixed_dict, orient="index" + ) self.test_study = qdb.study.Study(1) self.data_type = "18S" @@ -385,29 +470,54 @@ def setUp(self): self.tester = qdb.metadata_template.prep_template.PrepTemplate(1) self.exp_sample_ids = { - '1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', '1.SKB4.640189', - '1.SKB5.640181', '1.SKB6.640176', '1.SKB7.640196', '1.SKB8.640193', - '1.SKB9.640200', '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', '1.SKD7.640191', - '1.SKD8.640184', '1.SKD9.640182', '1.SKM1.640183', '1.SKM2.640199', - '1.SKM3.640197', '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192'} + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + } # Generate some files for new artifact - fd, fp1 = mkstemp(suffix='_seqs.fastq') + fd, fp1 = mkstemp(suffix="_seqs.fastq") close(fd) - with open(fp1, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") - fd, fp2 = mkstemp(suffix='_barcodes.fastq') + with open(fp1, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) + fd, fp2 = mkstemp(suffix="_barcodes.fastq") close(fd) - with open(fp2, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" - "NNNCNNNNNNNNN\n" - "+\n" - "#############\n") + with open(fp2, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" + "NNNCNNNNNNNNN\n" + "+\n" + "#############\n" + ) self.filepaths = [(fp1, 1), (fp2, 3)] self._clean_up_files = [fp1, fp2] @@ -437,13 +547,11 @@ def test_table_name(self): def test_exists_true(self): """Exists returns true when the PrepTemplate already exists""" - self.assertTrue( - qdb.metadata_template.prep_template.PrepTemplate.exists(1)) + self.assertTrue(qdb.metadata_template.prep_template.PrepTemplate.exists(1)) def test_exists_false(self): """Exists returns false when the PrepTemplate does not exists""" - self.assertFalse( - qdb.metadata_template.prep_template.PrepTemplate.exists(30000)) + self.assertFalse(qdb.metadata_template.prep_template.PrepTemplate.exists(30000)) def test_get_sample_ids(self): """get_sample_ids returns the correct set of sample ids""" @@ -456,15 +564,16 @@ def test_len(self): def test_getitem(self): """Get item returns the correct sample object""" - obs = self.tester['1.SKM7.640188'] + obs = self.tester["1.SKM7.640188"] exp = qdb.metadata_template.prep_template.PrepSample( - '1.SKM7.640188', self.tester) + "1.SKM7.640188", self.tester + ) self.assertEqual(obs, exp) def test_getitem_error(self): """Get item raises an error if key does not exists""" with self.assertRaises(KeyError): - self.tester['Not_a_Sample'] + self.tester["Not_a_Sample"] def test_iter(self): """iter returns an iterator over the sample ids""" @@ -474,11 +583,11 @@ def test_iter(self): def test_contains_true(self): """contains returns true if the sample id exists""" - self.assertTrue('1.SKM7.640188' in self.tester) + self.assertTrue("1.SKM7.640188" in self.tester) def test_contains_false(self): """contains returns false if the sample id does not exists""" - self.assertFalse('Not_a_Sample' in self.tester) + self.assertFalse("Not_a_Sample" in self.tester) def test_keys(self): """keys returns an iterator over the sample ids""" @@ -490,151 +599,264 @@ def test_values(self): """values returns an iterator over the values""" obs = self.tester.values() self.assertTrue(isinstance(obs, Iterable)) - exp = {qdb.metadata_template.prep_template.PrepSample('1.SKB1.640202', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB2.640194', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB3.640195', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB4.640189', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB5.640181', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB6.640176', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB7.640196', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB8.640193', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKB9.640200', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD1.640179', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD2.640178', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD3.640198', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD4.640185', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD5.640186', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD6.640190', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD7.640191', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD8.640184', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKD9.640182', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM1.640183', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM2.640199', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM3.640197', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM4.640180', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM5.640177', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM6.640187', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM7.640188', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM8.640201', - self.tester), - qdb.metadata_template.prep_template.PrepSample('1.SKM9.640192', - self.tester)} + exp = { + qdb.metadata_template.prep_template.PrepSample( + "1.SKB1.640202", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB2.640194", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB3.640195", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB4.640189", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB5.640181", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB6.640176", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB7.640196", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB8.640193", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKB9.640200", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD1.640179", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD2.640178", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD3.640198", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD4.640185", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD5.640186", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD6.640190", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD7.640191", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD8.640184", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKD9.640182", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM1.640183", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM2.640199", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM3.640197", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM4.640180", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM5.640177", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM6.640187", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM7.640188", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM8.640201", self.tester + ), + qdb.metadata_template.prep_template.PrepSample( + "1.SKM9.640192", self.tester + ), + } # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects - for o, e in zip(sorted(list(obs), key=lambda x: x.id), - sorted(exp, key=lambda x: x.id)): + for o, e in zip( + sorted(list(obs), key=lambda x: x.id), sorted(exp, key=lambda x: x.id) + ): self.assertEqual(o, e) def test_items(self): """items returns an iterator over the (key, value) tuples""" obs = self.tester.items() self.assertTrue(isinstance(obs, Iterable)) - exp = [('1.SKB1.640202', - qdb.metadata_template.prep_template.PrepSample('1.SKB1.640202', - self.tester)), - ('1.SKB2.640194', - qdb.metadata_template.prep_template.PrepSample('1.SKB2.640194', - self.tester)), - ('1.SKB3.640195', - qdb.metadata_template.prep_template.PrepSample('1.SKB3.640195', - self.tester)), - ('1.SKB4.640189', - qdb.metadata_template.prep_template.PrepSample('1.SKB4.640189', - self.tester)), - ('1.SKB5.640181', - qdb.metadata_template.prep_template.PrepSample('1.SKB5.640181', - self.tester)), - ('1.SKB6.640176', - qdb.metadata_template.prep_template.PrepSample('1.SKB6.640176', - self.tester)), - ('1.SKB7.640196', - qdb.metadata_template.prep_template.PrepSample('1.SKB7.640196', - self.tester)), - ('1.SKB8.640193', - qdb.metadata_template.prep_template.PrepSample('1.SKB8.640193', - self.tester)), - ('1.SKB9.640200', - qdb.metadata_template.prep_template.PrepSample('1.SKB9.640200', - self.tester)), - ('1.SKD1.640179', - qdb.metadata_template.prep_template.PrepSample('1.SKD1.640179', - self.tester)), - ('1.SKD2.640178', - qdb.metadata_template.prep_template.PrepSample('1.SKD2.640178', - self.tester)), - ('1.SKD3.640198', - qdb.metadata_template.prep_template.PrepSample('1.SKD3.640198', - self.tester)), - ('1.SKD4.640185', - qdb.metadata_template.prep_template.PrepSample('1.SKD4.640185', - self.tester)), - ('1.SKD5.640186', - qdb.metadata_template.prep_template.PrepSample('1.SKD5.640186', - self.tester)), - ('1.SKD6.640190', - qdb.metadata_template.prep_template.PrepSample('1.SKD6.640190', - self.tester)), - ('1.SKD7.640191', - qdb.metadata_template.prep_template.PrepSample('1.SKD7.640191', - self.tester)), - ('1.SKD8.640184', - qdb.metadata_template.prep_template.PrepSample('1.SKD8.640184', - self.tester)), - ('1.SKD9.640182', - qdb.metadata_template.prep_template.PrepSample('1.SKD9.640182', - self.tester)), - ('1.SKM1.640183', - qdb.metadata_template.prep_template.PrepSample('1.SKM1.640183', - self.tester)), - ('1.SKM2.640199', - qdb.metadata_template.prep_template.PrepSample('1.SKM2.640199', - self.tester)), - ('1.SKM3.640197', - qdb.metadata_template.prep_template.PrepSample('1.SKM3.640197', - self.tester)), - ('1.SKM4.640180', - qdb.metadata_template.prep_template.PrepSample('1.SKM4.640180', - self.tester)), - ('1.SKM5.640177', - qdb.metadata_template.prep_template.PrepSample('1.SKM5.640177', - self.tester)), - ('1.SKM6.640187', - qdb.metadata_template.prep_template.PrepSample('1.SKM6.640187', - self.tester)), - ('1.SKM7.640188', - qdb.metadata_template.prep_template.PrepSample('1.SKM7.640188', - self.tester)), - ('1.SKM8.640201', - qdb.metadata_template.prep_template.PrepSample('1.SKM8.640201', - self.tester)), - ('1.SKM9.640192', - qdb.metadata_template.prep_template.PrepSample('1.SKM9.640192', - self.tester))] + exp = [ + ( + "1.SKB1.640202", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB1.640202", self.tester + ), + ), + ( + "1.SKB2.640194", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB2.640194", self.tester + ), + ), + ( + "1.SKB3.640195", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB3.640195", self.tester + ), + ), + ( + "1.SKB4.640189", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB4.640189", self.tester + ), + ), + ( + "1.SKB5.640181", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB5.640181", self.tester + ), + ), + ( + "1.SKB6.640176", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB6.640176", self.tester + ), + ), + ( + "1.SKB7.640196", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB7.640196", self.tester + ), + ), + ( + "1.SKB8.640193", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB8.640193", self.tester + ), + ), + ( + "1.SKB9.640200", + qdb.metadata_template.prep_template.PrepSample( + "1.SKB9.640200", self.tester + ), + ), + ( + "1.SKD1.640179", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD1.640179", self.tester + ), + ), + ( + "1.SKD2.640178", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD2.640178", self.tester + ), + ), + ( + "1.SKD3.640198", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD3.640198", self.tester + ), + ), + ( + "1.SKD4.640185", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD4.640185", self.tester + ), + ), + ( + "1.SKD5.640186", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD5.640186", self.tester + ), + ), + ( + "1.SKD6.640190", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD6.640190", self.tester + ), + ), + ( + "1.SKD7.640191", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD7.640191", self.tester + ), + ), + ( + "1.SKD8.640184", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD8.640184", self.tester + ), + ), + ( + "1.SKD9.640182", + qdb.metadata_template.prep_template.PrepSample( + "1.SKD9.640182", self.tester + ), + ), + ( + "1.SKM1.640183", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM1.640183", self.tester + ), + ), + ( + "1.SKM2.640199", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM2.640199", self.tester + ), + ), + ( + "1.SKM3.640197", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM3.640197", self.tester + ), + ), + ( + "1.SKM4.640180", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM4.640180", self.tester + ), + ), + ( + "1.SKM5.640177", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM5.640177", self.tester + ), + ), + ( + "1.SKM6.640187", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM6.640187", self.tester + ), + ), + ( + "1.SKM7.640188", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM7.640188", self.tester + ), + ), + ( + "1.SKM8.640201", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM8.640201", self.tester + ), + ), + ( + "1.SKM9.640192", + qdb.metadata_template.prep_template.PrepSample( + "1.SKM9.640192", self.tester + ), + ), + ] # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects for o, e in zip(sorted(list(obs)), sorted(exp)): @@ -642,14 +864,15 @@ def test_items(self): def test_get(self): """get returns the correct PrepSample object""" - obs = self.tester.get('1.SKM7.640188') + obs = self.tester.get("1.SKM7.640188") exp = qdb.metadata_template.prep_template.PrepSample( - '1.SKM7.640188', self.tester) + "1.SKM7.640188", self.tester + ) self.assertEqual(obs, exp) def test_get_none(self): """get returns none if the sample id is not present""" - self.assertTrue(self.tester.get('Not_a_Sample') is None) + self.assertTrue(self.tester.get("Not_a_Sample") is None) def test_data_type(self): """data_type returns the string with the data_type""" @@ -670,53 +893,92 @@ def test_to_dataframe(self): # 27 samples self.assertEqual(len(obs), 27) - self.assertEqual(set(obs.index), { - u'1.SKB1.640202', u'1.SKB2.640194', u'1.SKB3.640195', - u'1.SKB4.640189', u'1.SKB5.640181', u'1.SKB6.640176', - u'1.SKB7.640196', u'1.SKB8.640193', u'1.SKB9.640200', - u'1.SKD1.640179', u'1.SKD2.640178', u'1.SKD3.640198', - u'1.SKD4.640185', u'1.SKD5.640186', u'1.SKD6.640190', - u'1.SKD7.640191', u'1.SKD8.640184', u'1.SKD9.640182', - u'1.SKM1.640183', u'1.SKM2.640199', u'1.SKM3.640197', - u'1.SKM4.640180', u'1.SKM5.640177', u'1.SKM6.640187', - u'1.SKM7.640188', u'1.SKM8.640201', u'1.SKM9.640192'}) - - self.assertEqual(set(obs.columns), { - u'center_name', u'center_project_name', - u'emp_status', u'barcode', - u'library_construction_protocol', u'primer', - u'target_subfragment', u'target_gene', u'run_center', - u'run_prefix', u'run_date', u'experiment_center', - u'experiment_design_description', u'experiment_title', u'platform', - u'instrument_model', u'samp_size', u'sequencing_meth', - u'illumina_technology', u'sample_center', u'pcr_primers', - u'study_center', 'qiita_prep_id'}) + self.assertEqual( + set(obs.index), + { + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + }, + ) + + self.assertEqual( + set(obs.columns), + { + "center_name", + "center_project_name", + "emp_status", + "barcode", + "library_construction_protocol", + "primer", + "target_subfragment", + "target_gene", + "run_center", + "run_prefix", + "run_date", + "experiment_center", + "experiment_design_description", + "experiment_title", + "platform", + "instrument_model", + "samp_size", + "sequencing_meth", + "illumina_technology", + "sample_center", + "pcr_primers", + "study_center", + "qiita_prep_id", + }, + ) # test with add_ebi_accessions as True obs = self.tester.to_dataframe(True) self.assertEqual( self.tester.ebi_experiment_accessions, - obs.qiita_ebi_experiment_accessions.to_dict()) + obs.qiita_ebi_experiment_accessions.to_dict(), + ) def test_clean_validate_template_error_bad_chars(self): - """Raises an error if there are invalid characters in the sample names - """ - self.metadata.index = ['o()xxxx[{::::::::>', 'sample.1', 'sample.3'] + """Raises an error if there are invalid characters in the sample names""" + self.metadata.index = ["o()xxxx[{::::::::>", "sample.1", "sample.3"] PT = qdb.metadata_template.prep_template.PrepTemplate with self.assertRaises(qdb.exceptions.QiitaDBColumnError): PT._clean_validate_template(self.metadata, 2) def test_clean_validate_template_error_duplicate_cols(self): """Raises an error if there are duplicated columns in the template""" - self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], - index=self.metadata.index) + self.metadata["STR_COLUMN"] = pd.Series(["", "", ""], index=self.metadata.index) PT = qdb.metadata_template.prep_template.PrepTemplate with self.assertRaises(qdb.exceptions.QiitaDBDuplicateHeaderError): PT._clean_validate_template(self.metadata, 2) def test_clean_validate_template_error_duplicate_samples(self): """Raises an error if there are duplicated samples in the templates""" - self.metadata.index = ['sample.1', 'sample.1', 'sample.3'] + self.metadata.index = ["sample.1", "sample.1", "sample.3"] PT = qdb.metadata_template.prep_template.PrepTemplate with self.assertRaises(qdb.exceptions.QiitaDBDuplicateSamplesError): PT._clean_validate_template(self.metadata, 2) @@ -725,50 +987,56 @@ def test_clean_validate_template(self): PT = qdb.metadata_template.prep_template.PrepTemplate # modify input to make sure we hit all cases md = self.metadata.copy() - md.loc['SKB7.640196']['str_column'] = 'UnSpeciFied' + md.loc["SKB7.640196"]["str_column"] = "UnSpeciFied" obs = PT._clean_validate_template(md, 2) metadata_dict = { - '2.SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'emp_status': 'EMP', - 'str_column': 'Value for sample 1', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': '3.6', - 'experiment_design_description': 'BBBB'}, - '2.SKD8.640184': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'emp_status': 'EMP', - 'str_column': 'Value for sample 2', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTC', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': 'not applicable', - 'experiment_design_description': 'BBBB'}, - '2.SKB7.640196': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'emp_status': 'EMP', - 'str_column': 'not applicable', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CCTCTGAGAGCT', - 'run_prefix': "s_G1_L002_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'insdc_nulls': 'not applicable', - 'experiment_design_description': 'BBBB'} - } - exp = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) + "2.SKB8.640193": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "emp_status": "EMP", + "str_column": "Value for sample 1", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "3.6", + "experiment_design_description": "BBBB", + }, + "2.SKD8.640184": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "emp_status": "EMP", + "str_column": "Value for sample 2", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTC", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "not applicable", + "experiment_design_description": "BBBB", + }, + "2.SKB7.640196": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "emp_status": "EMP", + "str_column": "not applicable", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CCTCTGAGAGCT", + "run_prefix": "s_G1_L002_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "insdc_nulls": "not applicable", + "experiment_design_description": "BBBB", + }, + } + exp = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) @@ -779,14 +1047,15 @@ def test_clean_validate_template(self): def test_clean_validate_template_no_forbidden_words1(self): PT = qdb.metadata_template.prep_template.PrepTemplate - self.metadata.rename(columns={'center_name': 'sampleid'}, inplace=True) + self.metadata.rename(columns={"center_name": "sampleid"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): PT._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_forbidden_words2(self): PT = qdb.metadata_template.prep_template.PrepTemplate - self.metadata.rename(columns={'center_name': 'linkerprimersequence'}, - inplace=True) + self.metadata.rename( + columns={"center_name": "linkerprimersequence"}, inplace=True + ) raised = False try: PT._clean_validate_template(self.metadata, 2) @@ -796,104 +1065,109 @@ def test_clean_validate_template_no_forbidden_words2(self): def test_clean_validate_template_no_pgsql_reserved_words(self): PT = qdb.metadata_template.prep_template.PrepTemplate - self.metadata.rename(columns={'center_name': 'select'}, inplace=True) + self.metadata.rename(columns={"center_name": "select"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): PT._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_qiime2_reserved_words(self): PT = qdb.metadata_template.prep_template.PrepTemplate - self.metadata.rename(columns={'center_name': 'featureid'}, - inplace=True) + self.metadata.rename(columns={"center_name": "featureid"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): PT._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_invalid_chars(self): PT = qdb.metadata_template.prep_template.PrepTemplate - self.metadata.rename(columns={'center_name': 'taxon id'}, inplace=True) + self.metadata.rename(columns={"center_name": "taxon id"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): PT._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_invalid_chars2(self): PT = qdb.metadata_template.prep_template.PrepTemplate - self.metadata.rename(columns={'center_name': 'bla.'}, inplace=True) + self.metadata.rename(columns={"center_name": "bla."}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): PT._clean_validate_template(self.metadata, 2) def test_get_category(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) - obs = pt.get_category('primer') + obs = pt.get_category("primer") exp = { - '1.SKB2.640194': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM4.640180': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB3.640195': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB6.640176': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD6.640190': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM6.640187': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD9.640182': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM8.640201': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM2.640199': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD2.640178': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB7.640196': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD4.640185': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB8.640193': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM3.640197': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD5.640186': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB1.640202': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM1.640183': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD1.640179': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD3.640198': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB5.640181': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB4.640189': 'GTGCCAGCMGCCGCGGTAA', - '1.SKB9.640200': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM9.640192': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD8.640184': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM5.640177': 'GTGCCAGCMGCCGCGGTAA', - '1.SKM7.640188': 'GTGCCAGCMGCCGCGGTAA', - '1.SKD7.640191': 'GTGCCAGCMGCCGCGGTAA'} + "1.SKB2.640194": "GTGCCAGCMGCCGCGGTAA", + "1.SKM4.640180": "GTGCCAGCMGCCGCGGTAA", + "1.SKB3.640195": "GTGCCAGCMGCCGCGGTAA", + "1.SKB6.640176": "GTGCCAGCMGCCGCGGTAA", + "1.SKD6.640190": "GTGCCAGCMGCCGCGGTAA", + "1.SKM6.640187": "GTGCCAGCMGCCGCGGTAA", + "1.SKD9.640182": "GTGCCAGCMGCCGCGGTAA", + "1.SKM8.640201": "GTGCCAGCMGCCGCGGTAA", + "1.SKM2.640199": "GTGCCAGCMGCCGCGGTAA", + "1.SKD2.640178": "GTGCCAGCMGCCGCGGTAA", + "1.SKB7.640196": "GTGCCAGCMGCCGCGGTAA", + "1.SKD4.640185": "GTGCCAGCMGCCGCGGTAA", + "1.SKB8.640193": "GTGCCAGCMGCCGCGGTAA", + "1.SKM3.640197": "GTGCCAGCMGCCGCGGTAA", + "1.SKD5.640186": "GTGCCAGCMGCCGCGGTAA", + "1.SKB1.640202": "GTGCCAGCMGCCGCGGTAA", + "1.SKM1.640183": "GTGCCAGCMGCCGCGGTAA", + "1.SKD1.640179": "GTGCCAGCMGCCGCGGTAA", + "1.SKD3.640198": "GTGCCAGCMGCCGCGGTAA", + "1.SKB5.640181": "GTGCCAGCMGCCGCGGTAA", + "1.SKB4.640189": "GTGCCAGCMGCCGCGGTAA", + "1.SKB9.640200": "GTGCCAGCMGCCGCGGTAA", + "1.SKM9.640192": "GTGCCAGCMGCCGCGGTAA", + "1.SKD8.640184": "GTGCCAGCMGCCGCGGTAA", + "1.SKM5.640177": "GTGCCAGCMGCCGCGGTAA", + "1.SKM7.640188": "GTGCCAGCMGCCGCGGTAA", + "1.SKD7.640191": "GTGCCAGCMGCCGCGGTAA", + } self.assertEqual(obs, exp) def test_get_category_no_exists(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - pt.get_category('DOESNOTEXIST') + pt.get_category("DOESNOTEXIST") def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" - self.metadata['STR_COLUMN'] = pd.Series(['', '', ''], - index=self.metadata.index) + self.metadata["STR_COLUMN"] = pd.Series(["", "", ""], index=self.metadata.index) with self.assertRaises(qdb.exceptions.QiitaDBDuplicateHeaderError): qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) def test_create_bad_sample_names(self): # set a horrible list of sample names - self.metadata.index = ['o()xxxx[{::::::::>', 'sample.1', 'sample.3'] + self.metadata.index = ["o()xxxx[{::::::::>", "sample.1", "sample.3"] with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) def test_create_unknown_sample_names(self): # set two real and one fake sample name - self.metadata_dict['NOTREAL'] = self.metadata_dict['SKB7.640196'] - del self.metadata_dict['SKB7.640196'] - self.metadata = pd.DataFrame.from_dict(self.metadata_dict, - orient='index', dtype=str) + self.metadata_dict["NOTREAL"] = self.metadata_dict["SKB7.640196"] + del self.metadata_dict["SKB7.640196"] + self.metadata = pd.DataFrame.from_dict( + self.metadata_dict, orient="index", dtype=str + ) # Test error raised and correct error given with self.assertRaises(qdb.exceptions.QiitaDBExecutionError) as err: qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) self.assertEqual( str(err.exception), - 'Samples found in prep template but not sample template: 1.NOTREAL' - ) + "Samples found in prep template but not sample template: 1.NOTREAL", + ) def test_create_shorter_prep_template(self): # remove one sample so not all samples in the prep template - del self.metadata_dict['SKB7.640196'] - self.metadata = pd.DataFrame.from_dict(self.metadata_dict, - orient='index', dtype=str) + del self.metadata_dict["SKB7.640196"] + self.metadata = pd.DataFrame.from_dict( + self.metadata_dict, orient="index", dtype=str + ) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) with qdb.sql_connection.TRN: sql = """SELECT sample_id @@ -901,7 +1175,7 @@ def test_create_shorter_prep_template(self): WHERE sample_id != '%s'""" % (pt.id, self.QCN) qdb.sql_connection.TRN.add(sql) obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [['1.SKB8.640193'], ['1.SKD8.640184']] + exp = [["1.SKB8.640193"], ["1.SKD8.640184"]] self.assertEqual(obs, exp) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -911,63 +1185,78 @@ def _common_creation_checks(self, pt, fp_count, name): self.assertEqual(pt.data_type(), self.data_type) self.assertEqual(pt.data_type(ret_id=True), self.data_type_id) self.assertEqual(pt.artifact, None) - self.assertEqual(pt.investigation_type, 'AMPLICON') + self.assertEqual(pt.investigation_type, "AMPLICON") self.assertEqual(pt.study_id, self.test_study.id) self.assertEqual(pt.status, "sandbox") - exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id, - '%s.SKD8.640184' % self.test_study.id, - '%s.SKB7.640196' % self.test_study.id} + exp_sample_ids = { + "%s.SKB8.640193" % self.test_study.id, + "%s.SKD8.640184" % self.test_study.id, + "%s.SKB7.640196" % self.test_study.id, + } self.assertEqual(pt._get_sample_ids(), exp_sample_ids) self.assertEqual(len(pt), 3) - exp_categories = {'str_column', 'ebi_submission_accession', - 'run_prefix', 'barcode', 'primer', 'platform', - 'instrument_model', 'experiment_design_description', - 'library_construction_protocol', 'center_name', - 'center_project_name', 'insdc_nulls', 'emp_status'} + exp_categories = { + "str_column", + "ebi_submission_accession", + "run_prefix", + "barcode", + "primer", + "platform", + "instrument_model", + "experiment_design_description", + "library_construction_protocol", + "center_name", + "center_project_name", + "insdc_nulls", + "emp_status", + } self.assertCountEqual(pt.categories, exp_categories) exp_dict = { - '%s.SKB7.640196' % self.test_study.id: { - 'barcode': 'CCTCTGAGAGCT', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L002_sequences', - 'str_column': 'Value for sample 3', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'insdc_nulls': 'not applicable', - 'emp_status': 'EMP'}, - '%s.SKB8.640193' % self.test_study.id: { - 'barcode': 'GTCCGCAAGTTA', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 1', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'insdc_nulls': '3.6', - 'emp_status': 'EMP'}, - '%s.SKD8.640184' % self.test_study.id: { - 'barcode': 'CGTAGAGCTCTC', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 2', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'insdc_nulls': 'not applicable', - 'emp_status': 'EMP'} + "%s.SKB7.640196" % self.test_study.id: { + "barcode": "CCTCTGAGAGCT", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L002_sequences", + "str_column": "Value for sample 3", + "center_name": "ANL", + "center_project_name": "Test Project", + "insdc_nulls": "not applicable", + "emp_status": "EMP", + }, + "%s.SKB8.640193" % self.test_study.id: { + "barcode": "GTCCGCAAGTTA", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 1", + "center_name": "ANL", + "center_project_name": "Test Project", + "insdc_nulls": "3.6", + "emp_status": "EMP", + }, + "%s.SKD8.640184" % self.test_study.id: { + "barcode": "CGTAGAGCTCTC", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 2", + "center_name": "ANL", + "center_project_name": "Test Project", + "insdc_nulls": "not applicable", + "emp_status": "EMP", + }, } for s_id in exp_sample_ids: self.assertEqual(pt[s_id]._to_dict(), exp_dict[s_id]) @@ -978,24 +1267,30 @@ def _common_creation_checks(self, pt, fp_count, name): def test_validate_restrictions(self): PT = qdb.metadata_template.prep_template.PrepTemplate - pt = PT.create(self.metadata, self.test_study, self.data_type, - name='New Prep For Test') + pt = PT.create( + self.metadata, self.test_study, self.data_type, name="New Prep For Test" + ) success, message = pt.validate_restrictions() - self.assertEqual(message, 'prep %d is missing columns "target_gene, ' - 'target_subfragment"' % pt.id) + self.assertEqual( + message, + 'prep %d is missing columns "target_gene, target_subfragment"' % pt.id, + ) self.assertFalse(success) metadata = self.metadata.copy() - metadata['target_gene'] = 'Should Warn' - metadata['target_subfragment'] = 'V4' + metadata["target_gene"] = "Should Warn" + metadata["target_subfragment"] = "V4" pt.extend(metadata) success, message = pt.validate_restrictions() - self.assertEqual(message, 'prep %d has invalid values: "Should ' - 'Warn", valid values are: "16S rRNA, 18S rRNA, ' - 'ITS1/2, LSU"' % pt.id) + self.assertEqual( + message, + 'prep %d has invalid values: "Should ' + 'Warn", valid values are: "16S rRNA, 18S rRNA, ' + 'ITS1/2, LSU"' % pt.id, + ) self.assertFalse(success) - metadata['target_gene'] = '16S rRNA' + metadata["target_gene"] = "16S rRNA" # as we are testing the update functionality of a prep info file, we # can also test that the timestamps are working correctly current_ct = pt.creation_timestamp @@ -1006,7 +1301,7 @@ def test_validate_restrictions(self): self.assertTrue(current_mt < pt.modification_timestamp) success, message = pt.validate_restrictions() success, message = pt.validate_restrictions() - self.assertEqual(message, '') + self.assertEqual(message, "") self.assertTrue(success) # cleaning @@ -1014,10 +1309,10 @@ def test_validate_restrictions(self): def test_create(self): """Creates a new PrepTemplate""" - fp_count = qdb.util.get_count('qiita.filepath') + fp_count = qdb.util.get_count("qiita.filepath") pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type, - name='New Prep For Test') + self.metadata, self.test_study, self.data_type, name="New Prep For Test" + ) self._common_creation_checks(pt, fp_count, "New Prep For Test") # checking that the creation and modification timestamps are within # 2 seconds of current time @@ -1030,21 +1325,23 @@ def test_create(self): def test_create_already_prefixed_samples(self): """Creates a new PrepTemplate""" - fp_count = qdb.util.get_count('qiita.filepath') + fp_count = qdb.util.get_count("qiita.filepath") pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata_prefixed, self.test_study, self.data_type) - self._common_creation_checks(pt, fp_count, - "Prep information %s" % pt.id) + self.metadata_prefixed, self.test_study, self.data_type + ) + self._common_creation_checks(pt, fp_count, "Prep information %s" % pt.id) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) def test_empty_prep(self): """Creates a new PrepTemplate""" metadata = pd.DataFrame.from_dict( - {'SKB8.640193': {}, 'SKD8.640184': {}}, orient='index', dtype=str) + {"SKB8.640193": {}, "SKD8.640184": {}}, orient="index", dtype=str + ) with self.assertRaises(ValueError): qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, self.test_study, self.data_type) + metadata, self.test_study, self.data_type + ) def test_generate_files(self): fp_count = qdb.util.get_count("qiita.filepath") @@ -1056,80 +1353,96 @@ def test_generate_files(self): def test_create_data_type_id(self): """Creates a new PrepTemplate passing the data_type_id""" - fp_count = qdb.util.get_count('qiita.filepath') + fp_count = qdb.util.get_count("qiita.filepath") pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) - self._common_creation_checks(pt, fp_count, - "Prep information %s" % pt.id) + self.metadata, self.test_study, self.data_type_id + ) + self._common_creation_checks(pt, fp_count, "Prep information %s" % pt.id) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) def test_create_warning(self): - """Warns if a required columns is missing for a given functionality - """ - del self.metadata['barcode'] + """Warns if a required columns is missing for a given functionality""" + del self.metadata["barcode"] pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.prep_template.PrepTemplate.create, - self.metadata, self.test_study, self.data_type) + self.metadata, + self.test_study, + self.data_type, + ) self.assertEqual(pt.data_type(), self.data_type) self.assertEqual(pt.data_type(ret_id=True), self.data_type_id) self.assertEqual(pt.artifact, None) - self.assertEqual(pt.investigation_type, 'AMPLICON') + self.assertEqual(pt.investigation_type, "AMPLICON") self.assertEqual(pt.study_id, self.test_study.id) - self.assertEqual(pt.status, 'sandbox') - exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id, - '%s.SKD8.640184' % self.test_study.id, - '%s.SKB7.640196' % self.test_study.id} + self.assertEqual(pt.status, "sandbox") + exp_sample_ids = { + "%s.SKB8.640193" % self.test_study.id, + "%s.SKD8.640184" % self.test_study.id, + "%s.SKB7.640196" % self.test_study.id, + } self.assertEqual(pt._get_sample_ids(), exp_sample_ids) self.assertEqual(len(pt), 3) - exp_categories = {'str_column', 'ebi_submission_accession', - 'run_prefix', 'primer', 'platform', 'insdc_nulls', - 'instrument_model', 'experiment_design_description', - 'library_construction_protocol', 'center_name', - 'center_project_name', 'emp_status'} + exp_categories = { + "str_column", + "ebi_submission_accession", + "run_prefix", + "primer", + "platform", + "insdc_nulls", + "instrument_model", + "experiment_design_description", + "library_construction_protocol", + "center_name", + "center_project_name", + "emp_status", + } self.assertCountEqual(pt.categories, exp_categories) exp_dict = { - '%s.SKB7.640196' % self.test_study.id: { - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L002_sequences', - 'str_column': 'Value for sample 3', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'insdc_nulls': 'not applicable', - 'emp_status': 'EMP'}, - '%s.SKB8.640193' % self.test_study.id: { - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 1', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'insdc_nulls': '3.6', - 'emp_status': 'EMP'}, - '%s.SKD8.640184' % self.test_study.id: { - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 2', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'insdc_nulls': 'not applicable', - 'emp_status': 'EMP'} + "%s.SKB7.640196" % self.test_study.id: { + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L002_sequences", + "str_column": "Value for sample 3", + "center_name": "ANL", + "center_project_name": "Test Project", + "insdc_nulls": "not applicable", + "emp_status": "EMP", + }, + "%s.SKB8.640193" % self.test_study.id: { + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 1", + "center_name": "ANL", + "center_project_name": "Test Project", + "insdc_nulls": "3.6", + "emp_status": "EMP", + }, + "%s.SKD8.640184" % self.test_study.id: { + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 2", + "center_name": "ANL", + "center_project_name": "Test Project", + "insdc_nulls": "not applicable", + "emp_status": "EMP", + }, } for s_id in exp_sample_ids: self.assertEqual(pt[s_id]._to_dict(), exp_dict[s_id]) @@ -1145,15 +1458,16 @@ def test_create_investigation_type_error(self): """Create raises an error if the investigation_type does not exists""" with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id, - 'Not a term') + self.metadata, self.test_study, self.data_type_id, "Not a term" + ) def test_create_duplicated_column_error(self): """Create raises an error if the prep has a duplicated column name""" - self.metadata['season_environment'] = self.metadata['primer'] + self.metadata["season_environment"] = self.metadata["primer"] with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) + self.metadata, self.test_study, self.data_type_id + ) def test_delete_error(self): """Try to delete a prep template that already has preprocessed data""" @@ -1168,9 +1482,11 @@ def test_delete_unkonwn_id_error(self): def test_delete_error_raw_data(self): """Try to delete a prep template with a raw data attached to id""" pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) + self.metadata, self.test_study, self.data_type_id + ) artifact = qdb.artifact.Artifact.create( - self.filepaths, "FASTQ", prep_template=pt) + self.filepaths, "FASTQ", prep_template=pt + ) with self.assertRaises(qdb.exceptions.QiitaDBExecutionError): qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -1181,7 +1497,8 @@ def test_delete_error_raw_data(self): def test_delete(self): """Deletes prep template 2""" pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) + self.metadata, self.test_study, self.data_type_id + ) qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) exp = [] @@ -1211,28 +1528,34 @@ def test_delete(self): with self.assertRaises(ValueError): with qdb.sql_connection.TRN: - sql = """SELECT * - FROM qiita.prep_%d""" % pt.id + sql = ( + """SELECT * + FROM qiita.prep_%d""" + % pt.id + ) qdb.sql_connection.TRN.add(sql) def test_setitem(self): """setitem raises an error (currently not allowed)""" with self.assertRaises(qdb.exceptions.QiitaDBNotImplementedError): - self.tester['1.SKM7.640188'] = \ - qdb.metadata_template.prep_template.PrepSample('1.SKM7.640188', - self.tester) + self.tester["1.SKM7.640188"] = ( + qdb.metadata_template.prep_template.PrepSample( + "1.SKM7.640188", self.tester + ) + ) def test_delitem(self): """delitem raises an error (currently not allowed)""" with self.assertRaises(qdb.exceptions.QiitaDBNotImplementedError): - del self.tester['1.SKM7.640188'] + del self.tester["1.SKM7.640188"] def test_to_file(self): """to file writes a tab delimited file with all the metadata""" fd, fp = mkstemp() close(fd) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) pt.to_file(fp) self._clean_up_files.append(fp) with open(fp, newline=None) as f: @@ -1246,10 +1569,11 @@ def test_to_file(self): def test_investigation_type_setter(self): """Able to update the investigation type""" pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) - self.assertEqual(pt.investigation_type, 'AMPLICON') + self.metadata, self.test_study, self.data_type_id + ) + self.assertEqual(pt.investigation_type, "AMPLICON") pt.investigation_type = "Other" - self.assertEqual(pt.investigation_type, 'Other') + self.assertEqual(pt.investigation_type, "Other") with self.assertRaises(qdb.exceptions.QiitaDBColumnError): pt.investigation_type = "should fail" @@ -1258,8 +1582,8 @@ def test_investigation_type_setter(self): def test_investigation_type_instance_setter(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) - pt.investigation_type = 'RNA-Seq' - self.assertEqual(pt.investigation_type, 'RNA-Seq') + pt.investigation_type = "RNA-Seq" + self.assertEqual(pt.investigation_type, "RNA-Seq") def test_deprecated_setter(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) @@ -1271,75 +1595,80 @@ def test_deprecated_setter(self): def test_status(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) - self.assertEqual(pt.status, 'private') + self.assertEqual(pt.status, "private") # Check that changing the status of the processed data, the status # of the prep template changes a = qdb.artifact.Artifact(1) - a.visibility = 'public' - self.assertEqual(pt.status, 'public') + a.visibility = "public" + self.assertEqual(pt.status, "public") # New prep templates have the status to sandbox because there is no # processed data associated with them pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) - self.assertEqual(pt.status, 'sandbox') + self.metadata, self.test_study, self.data_type_id + ) + self.assertEqual(pt.status, "sandbox") # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) def test_update_category(self): with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - self.tester.update_category('barcode', {"foo": "bar"}) + self.tester.update_category("barcode", {"foo": "bar"}) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - self.tester.update_category('missing column', - {'1.SKB7.640196': 'bar'}) + self.tester.update_category("missing column", {"1.SKB7.640196": "bar"}) - neg_test = self.tester['1.SKB7.640196']['barcode'] - mapping = {'1.SKB8.640193': 'AAAAAAAAAAAA', - '1.SKD8.640184': 'CCCCCCCCCCCC'} + neg_test = self.tester["1.SKB7.640196"]["barcode"] + mapping = {"1.SKB8.640193": "AAAAAAAAAAAA", "1.SKD8.640184": "CCCCCCCCCCCC"} - self.tester.update_category('barcode', mapping) + self.tester.update_category("barcode", mapping) - self.assertEqual(self.tester['1.SKB7.640196']['barcode'], - neg_test) - self.assertEqual(self.tester['1.SKB8.640193']['barcode'], - 'AAAAAAAAAAAA') - self.assertEqual(self.tester['1.SKD8.640184']['barcode'], - 'CCCCCCCCCCCC') + self.assertEqual(self.tester["1.SKB7.640196"]["barcode"], neg_test) + self.assertEqual(self.tester["1.SKB8.640193"]["barcode"], "AAAAAAAAAAAA") + self.assertEqual(self.tester["1.SKD8.640184"]["barcode"], "CCCCCCCCCCCC") - neg_test = self.tester['1.SKB7.640196']['center_name'] - mapping = {'1.SKB8.640193': 'FOO', - '1.SKD8.640184': 'BAR'} + neg_test = self.tester["1.SKB7.640196"]["center_name"] + mapping = {"1.SKB8.640193": "FOO", "1.SKD8.640184": "BAR"} - self.tester.update_category('center_name', mapping) + self.tester.update_category("center_name", mapping) - self.assertEqual(self.tester['1.SKB7.640196']['center_name'], neg_test) - self.assertEqual(self.tester['1.SKB8.640193']['center_name'], 'FOO') - self.assertEqual(self.tester['1.SKD8.640184']['center_name'], 'BAR') + self.assertEqual(self.tester["1.SKB7.640196"]["center_name"], neg_test) + self.assertEqual(self.tester["1.SKB8.640193"]["center_name"], "FOO") + self.assertEqual(self.tester["1.SKD8.640184"]["center_name"], "BAR") def test_qiime_map_fp(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) - exp = join(qdb.util.get_mountpoint('templates')[0][1], - '1_prep_1_qiime_[0-9]*-[0-9]*.txt') + exp = join( + qdb.util.get_mountpoint("templates")[0][1], + "1_prep_1_qiime_[0-9]*-[0-9]*.txt", + ) self.assertRegex(pt.qiime_map_fp, exp) def test_check_restrictions(self): obs = self.tester.check_restrictions( - [qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS['EBI']]) + [qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS["EBI"]] + ) self.assertEqual(obs, set()) - del self.metadata['primer'] + del self.metadata["primer"] pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.prep_template.PrepTemplate.create, - self.metadata, self.test_study, self.data_type) + self.metadata, + self.test_study, + self.data_type, + ) obs = pt.check_restrictions( - [qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS['EBI'], - qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS_TARGET_GENE[ - 'demultiplex']]) + [ + qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS["EBI"], + qdb.metadata_template.constants.PREP_TEMPLATE_COLUMNS_TARGET_GENE[ + "demultiplex" + ], + ] + ) self.assertEqual(obs, set()) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -1349,7 +1678,8 @@ def test_artifact(self): self.assertEqual(self.tester.artifact, qdb.artifact.Artifact(1)) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type_id) + self.metadata, self.test_study, self.data_type_id + ) self.assertEqual(pt.artifact, None) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -1361,10 +1691,12 @@ def test_artifact_setter_error(self): def test_artifact_setter(self): pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, '16S') + self.metadata, self.test_study, "16S" + ) self.assertEqual(pt.artifact, None) artifact = qdb.artifact.Artifact.create( - self.filepaths, "FASTQ", prep_template=pt) + self.filepaths, "FASTQ", prep_template=pt + ) self.assertEqual(pt.artifact, artifact) # here we can test that we can properly create a workflow but we are @@ -1426,45 +1758,59 @@ def test_artifact_setter(self): # let's check that nothing is found due to the parameters, in specific # "prep": {"center_name": "ANL - 1"} - with self.assertRaisesRegex(ValueError, 'This preparation data type: ' - '"16S" and/or artifact type "FASTQ" does ' - 'not have valid workflows; this could be ' - 'due to required parameters, please check ' - 'the available workflows.'): - pt.add_default_workflow(qdb.user.User('test@foo.bar')) + with self.assertRaisesRegex( + ValueError, + "This preparation data type: " + '"16S" and/or artifact type "FASTQ" does ' + "not have valid workflows; this could be " + "due to required parameters, please check " + "the available workflows.", + ): + pt.add_default_workflow(qdb.user.User("test@foo.bar")) # now, let's replace the parameters for something fine qdb.software.DefaultWorkflow(1).parameters = { "sample": {"scientific_name": "1118232"}, - "prep": {"center_name": "ANL"}} - wk = pt.add_default_workflow(qdb.user.User('test@foo.bar')) + "prep": {"center_name": "ANL"}, + } + wk = pt.add_default_workflow(qdb.user.User("test@foo.bar")) self.assertEqual(len(wk.graph.nodes), 5) self.assertEqual(len(wk.graph.edges), 3) self.assertCountEqual( [x.command.name for x in wk.graph.nodes], # we should have 2 split libraries and 3 close reference - ['Split libraries FASTQ', 'Split libraries FASTQ', - 'Pick closed-reference OTUs', 'Pick closed-reference OTUs', - 'Pick closed-reference OTUs']) + [ + "Split libraries FASTQ", + "Split libraries FASTQ", + "Pick closed-reference OTUs", + "Pick closed-reference OTUs", + "Pick closed-reference OTUs", + ], + ) # at this point we can error all the previous steps, add a new smaller # workflow and make sure you get the same one as before because it will # have a higher match than the new one for pj in wk.graph.nodes: - pj._set_error('Killed') + pj._set_error("Killed") sql = """UPDATE qiita.default_workflow_data_type SET data_type_id = 1 WHERE default_workflow_id = 2""" qdb.sql_connection.perform_as_transaction(sql) - wk = pt.add_default_workflow(qdb.user.User('test@foo.bar')) + wk = pt.add_default_workflow(qdb.user.User("test@foo.bar")) self.assertEqual(len(wk.graph.nodes), 5) self.assertEqual(len(wk.graph.edges), 3) self.assertCountEqual( [x.command.name for x in wk.graph.nodes], # we should have 2 split libraries and 3 close reference - ['Split libraries FASTQ', 'Split libraries FASTQ', - 'Pick closed-reference OTUs', 'Pick closed-reference OTUs', - 'Pick closed-reference OTUs']) + [ + "Split libraries FASTQ", + "Split libraries FASTQ", + "Pick closed-reference OTUs", + "Pick closed-reference OTUs", + "Pick closed-reference OTUs", + ], + ) # let's return it back sql = """UPDATE qiita.default_workflow_data_type SET data_type_id = 2 @@ -1473,19 +1819,25 @@ def test_artifact_setter(self): # now let's try to generate again and it should fail cause the jobs # are already created - with self.assertRaisesRegex(ValueError, "Cannot create job because " - "the parameters are the same as jobs"): - pt.add_default_workflow(qdb.user.User('test@foo.bar')) + with self.assertRaisesRegex( + ValueError, "Cannot create job because the parameters are the same as jobs" + ): + pt.add_default_workflow(qdb.user.User("test@foo.bar")) # Then, let's clean up again and add a new command/step with 2 # BIOM input artifacts for pj in wk.graph.nodes: - pj._set_error('Killed') + pj._set_error("Killed") cmd = qdb.software.Command.create( - qdb.software.Software(1), "Multiple BIOM as inputs", "", { - 'req_artifact_1': ['artifact:["BIOM"]', None], - 'req_artifact_2': ['artifact:["BIOM"]', None], - }, outputs={'MB-output': 'BIOM'}) + qdb.software.Software(1), + "Multiple BIOM as inputs", + "", + { + "req_artifact_1": ['artifact:["BIOM"]', None], + "req_artifact_2": ['artifact:["BIOM"]', None], + }, + outputs={"MB-output": "BIOM"}, + ) cmd_dp = qdb.software.DefaultParameters.create("", cmd) # creating the new node for the cmd and linking it's two inputs with # two inputs @@ -1507,26 +1859,36 @@ def test_artifact_setter(self): VALUES (7, 3, 100) """ qdb.sql_connection.perform_as_transaction(sql) - wk = pt.add_default_workflow(qdb.user.User('test@foo.bar')) + wk = pt.add_default_workflow(qdb.user.User("test@foo.bar")) self.assertEqual(len(wk.graph.nodes), 6) self.assertEqual(len(wk.graph.edges), 5) self.assertCountEqual( [x.command.name for x in wk.graph.nodes], # we should have 2 split libraries and 3 close reference - ['Split libraries FASTQ', 'Split libraries FASTQ', - 'Pick closed-reference OTUs', 'Pick closed-reference OTUs', - 'Pick closed-reference OTUs', 'Multiple BIOM as inputs']) + [ + "Split libraries FASTQ", + "Split libraries FASTQ", + "Pick closed-reference OTUs", + "Pick closed-reference OTUs", + "Pick closed-reference OTUs", + "Multiple BIOM as inputs", + ], + ) # now let's test that an error is raised when there is no valid initial # input data; this moves the data type from FASTQ to taxa_summary for # the default_workflow_id = 1 qdb.sql_connection.perform_as_transaction( - 'UPDATE qiita.default_workflow SET artifact_type_id = 10 WHERE ' - 'default_workflow_id = 1') - with self.assertRaisesRegex(ValueError, 'This preparation data type: ' - '"16S" and/or artifact type "FASTQ" does ' - 'not have valid workflows'): - pt.add_default_workflow(qdb.user.User('test@foo.bar')) + "UPDATE qiita.default_workflow SET artifact_type_id = 10 WHERE " + "default_workflow_id = 1" + ) + with self.assertRaisesRegex( + ValueError, + "This preparation data type: " + '"16S" and/or artifact type "FASTQ" does ' + "not have valid workflows", + ): + pt.add_default_workflow(qdb.user.User("test@foo.bar")) # cleaning qdb.artifact.Artifact.delete(artifact.id) @@ -1536,41 +1898,46 @@ def test_can_be_updated_on_new(self): """test if the template can be updated""" # you can update a newly created pt pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) - self.assertTrue(pt.can_be_updated({'barcode'})) + self.metadata, self.test_study, self.data_type + ) + self.assertTrue(pt.can_be_updated({"barcode"})) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) def test_extend_add_samples(self): """extend correctly works adding new samples""" - md_2_samples = self.metadata.loc[('SKB8.640193', 'SKD8.640184'), :] + md_2_samples = self.metadata.loc[("SKB8.640193", "SKD8.640184"), :] pt = qdb.metadata_template.prep_template.PrepTemplate.create( - md_2_samples, self.test_study, self.data_type) + md_2_samples, self.test_study, self.data_type + ) - npt.assert_warns( - qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) + npt.assert_warns(qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) - exp_sample_ids = {'%s.SKB8.640193' % self.test_study.id, - '%s.SKD8.640184' % self.test_study.id, - '%s.SKB7.640196' % self.test_study.id} + exp_sample_ids = { + "%s.SKB8.640193" % self.test_study.id, + "%s.SKD8.640184" % self.test_study.id, + "%s.SKB7.640196" % self.test_study.id, + } self.assertEqual(pt._get_sample_ids(), exp_sample_ids) # test error due to max number of samples during extend cmax = qdb.util.max_preparation_samples() - sql = 'UPDATE settings SET max_preparation_samples = %s' + sql = "UPDATE settings SET max_preparation_samples = %s" qdb.sql_connection.perform_as_transaction(sql, [3]) df = pd.DataFrame.from_dict( - {'SKB1.640202': {'barcode': 'CCTCTGAGAGCT'}}, - orient='index', dtype=str) - with self.assertRaisesRegex(ValueError, "4 exceeds the max allowed " - "number of samples: 3"): + {"SKB1.640202": {"barcode": "CCTCTGAGAGCT"}}, orient="index", dtype=str + ) + with self.assertRaisesRegex( + ValueError, "4 exceeds the max allowed number of samples: 3" + ): pt.extend(df) # now test creation PT = qdb.metadata_template.prep_template.PrepTemplate qdb.sql_connection.perform_as_transaction(sql, [2]) - with self.assertRaisesRegex(ValueError, "3 exceeds the max allowed " - "number of samples: 2"): + with self.assertRaisesRegex( + ValueError, "3 exceeds the max allowed number of samples: 2" + ): PT.create(self.metadata, self.test_study, self.data_type) # cleaning @@ -1580,20 +1947,21 @@ def test_extend_add_samples(self): def test_extend_add_samples_error(self): """extend fails adding samples to an already preprocessed template""" df = pd.DataFrame.from_dict( - {'new_sample': {'barcode': 'CCTCTGAGAGCT'}}, - orient='index', dtype=str) + {"new_sample": {"barcode": "CCTCTGAGAGCT"}}, orient="index", dtype=str + ) with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.metadata_template.prep_template.PrepTemplate(1).extend(df) def test_extend_add_cols(self): """extend correctly adds a new columns""" pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) - self.metadata['new_col'] = pd.Series(['val1', 'val2', 'val3'], - index=self.metadata.index) + self.metadata, self.test_study, self.data_type + ) + self.metadata["new_col"] = pd.Series( + ["val1", "val2", "val3"], index=self.metadata.index + ) - npt.assert_warns( - qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) + npt.assert_warns(qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) with qdb.sql_connection.TRN: sql = """SELECT * @@ -1601,61 +1969,69 @@ def test_extend_add_cols(self): WHERE sample_id != '{1}'""".format(pt.id, self.QCN) qdb.sql_connection.TRN.add(sql) obs = dict(qdb.sql_connection.TRN.execute_fetchindex()) - exp = {'1.SKB7.640196': { - 'barcode': 'CCTCTGAGAGCT', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L002_sequences', - 'str_column': 'Value for sample 3', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'emp_status': 'EMP', - 'new_col': 'val1'}, - '1.SKB8.640193': { - 'barcode': 'GTCCGCAAGTTA', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 1', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'emp_status': 'EMP', - 'new_col': 'val2'}, - '1.SKD8.640184': { - 'barcode': 'CGTAGAGCTCTC', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 2', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'emp_status': 'EMP', - 'new_col': 'val3'}} + exp = { + "1.SKB7.640196": { + "barcode": "CCTCTGAGAGCT", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L002_sequences", + "str_column": "Value for sample 3", + "center_name": "ANL", + "center_project_name": "Test Project", + "emp_status": "EMP", + "new_col": "val1", + }, + "1.SKB8.640193": { + "barcode": "GTCCGCAAGTTA", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 1", + "center_name": "ANL", + "center_project_name": "Test Project", + "emp_status": "EMP", + "new_col": "val2", + }, + "1.SKD8.640184": { + "barcode": "CGTAGAGCTCTC", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 2", + "center_name": "ANL", + "center_project_name": "Test Project", + "emp_status": "EMP", + "new_col": "val3", + }, + } self.assertCountEqual(obs, exp) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) def test_extend_update(self): pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) - self.metadata['new_col'] = pd.Series(['val1', 'val2', 'val3'], - index=self.metadata.index) - self.metadata['str_column']['SKB7.640196'] = 'NEW VAL' + self.metadata, self.test_study, self.data_type + ) + self.metadata["new_col"] = pd.Series( + ["val1", "val2", "val3"], index=self.metadata.index + ) + self.metadata["str_column"]["SKB7.640196"] = "NEW VAL" npt.assert_warns( - qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata) + qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata + ) with qdb.sql_connection.TRN: sql = """SELECT * @@ -1663,89 +2039,98 @@ def test_extend_update(self): WHERE sample_id != '{1}'""".format(pt.id, self.QCN) qdb.sql_connection.TRN.add(sql) obs = dict(qdb.sql_connection.TRN.execute_fetchindex()) - exp = {'1.SKB7.640196': { - 'barcode': 'CCTCTGAGAGCT', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L002_sequences', - 'str_column': 'NEW VAL', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'emp_status': 'EMP', - 'new_col': 'val1'}, - '1.SKB8.640193': { - 'barcode': 'GTCCGCAAGTTA', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 1', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'emp_status': 'EMP', - 'new_col': 'val2'}, - '1.SKD8.640184': { - 'barcode': 'CGTAGAGCTCTC', - 'ebi_submission_accession': None, - 'experiment_design_description': 'BBBB', - 'library_construction_protocol': 'AAAA', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'run_prefix': 's_G1_L001_sequences', - 'str_column': 'Value for sample 2', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'emp_status': 'EMP', - 'new_col': 'val3'}} + exp = { + "1.SKB7.640196": { + "barcode": "CCTCTGAGAGCT", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L002_sequences", + "str_column": "NEW VAL", + "center_name": "ANL", + "center_project_name": "Test Project", + "emp_status": "EMP", + "new_col": "val1", + }, + "1.SKB8.640193": { + "barcode": "GTCCGCAAGTTA", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 1", + "center_name": "ANL", + "center_project_name": "Test Project", + "emp_status": "EMP", + "new_col": "val2", + }, + "1.SKD8.640184": { + "barcode": "CGTAGAGCTCTC", + "ebi_submission_accession": None, + "experiment_design_description": "BBBB", + "library_construction_protocol": "AAAA", + "primer": "GTGCCAGCMGCCGCGGTAA", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "run_prefix": "s_G1_L001_sequences", + "str_column": "Value for sample 2", + "center_name": "ANL", + "center_project_name": "Test Project", + "emp_status": "EMP", + "new_col": "val3", + }, + } self.assertCountEqual(obs, exp) def test_ebi_experiment_accessions(self): obs = self.tester.ebi_experiment_accessions - exp = {'1.SKB8.640193': 'ERX0000000', - '1.SKD8.640184': 'ERX0000001', - '1.SKB7.640196': 'ERX0000002', - '1.SKM9.640192': 'ERX0000003', - '1.SKM4.640180': 'ERX0000004', - '1.SKM5.640177': 'ERX0000005', - '1.SKB5.640181': 'ERX0000006', - '1.SKD6.640190': 'ERX0000007', - '1.SKB2.640194': 'ERX0000008', - '1.SKD2.640178': 'ERX0000009', - '1.SKM7.640188': 'ERX0000010', - '1.SKB1.640202': 'ERX0000011', - '1.SKD1.640179': 'ERX0000012', - '1.SKD3.640198': 'ERX0000013', - '1.SKM8.640201': 'ERX0000014', - '1.SKM2.640199': 'ERX0000015', - '1.SKB9.640200': 'ERX0000016', - '1.SKD5.640186': 'ERX0000017', - '1.SKM3.640197': 'ERX0000018', - '1.SKD9.640182': 'ERX0000019', - '1.SKB4.640189': 'ERX0000020', - '1.SKD7.640191': 'ERX0000021', - '1.SKM6.640187': 'ERX0000022', - '1.SKD4.640185': 'ERX0000023', - '1.SKB3.640195': 'ERX0000024', - '1.SKB6.640176': 'ERX0000025', - '1.SKM1.640183': 'ERX0000026'} + exp = { + "1.SKB8.640193": "ERX0000000", + "1.SKD8.640184": "ERX0000001", + "1.SKB7.640196": "ERX0000002", + "1.SKM9.640192": "ERX0000003", + "1.SKM4.640180": "ERX0000004", + "1.SKM5.640177": "ERX0000005", + "1.SKB5.640181": "ERX0000006", + "1.SKD6.640190": "ERX0000007", + "1.SKB2.640194": "ERX0000008", + "1.SKD2.640178": "ERX0000009", + "1.SKM7.640188": "ERX0000010", + "1.SKB1.640202": "ERX0000011", + "1.SKD1.640179": "ERX0000012", + "1.SKD3.640198": "ERX0000013", + "1.SKM8.640201": "ERX0000014", + "1.SKM2.640199": "ERX0000015", + "1.SKB9.640200": "ERX0000016", + "1.SKD5.640186": "ERX0000017", + "1.SKM3.640197": "ERX0000018", + "1.SKD9.640182": "ERX0000019", + "1.SKB4.640189": "ERX0000020", + "1.SKD7.640191": "ERX0000021", + "1.SKM6.640187": "ERX0000022", + "1.SKD4.640185": "ERX0000023", + "1.SKB3.640195": "ERX0000024", + "1.SKB6.640176": "ERX0000025", + "1.SKM1.640183": "ERX0000026", + } self.assertEqual(obs, exp) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, - self.data_type) + self.metadata, self.test_study, self.data_type + ) obs = pt.ebi_experiment_accessions - exp = {'%s.SKB8.640193' % self.test_study.id: None, - '%s.SKD8.640184' % self.test_study.id: None, - '%s.SKB7.640196' % self.test_study.id: None} + exp = { + "%s.SKB8.640193" % self.test_study.id: None, + "%s.SKD8.640184" % self.test_study.id: None, + "%s.SKB7.640196" % self.test_study.id: None, + } self.assertEqual(obs, exp) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -1753,16 +2138,21 @@ def test_ebi_experiment_accessions(self): def test_ebi_experiment_accessions_setter(self): with self.assertRaises(qdb.exceptions.QiitaDBError): self.tester.ebi_experiment_accessions = { - '1.SKB8.640193': 'ERX1000000', '1.SKD8.640184': 'ERX1000001'} + "1.SKB8.640193": "ERX1000000", + "1.SKD8.640184": "ERX1000001", + } pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) - exp_acc = {'%s.SKB8.640193' % self.test_study.id: 'ERX0000126', - '%s.SKD8.640184' % self.test_study.id: 'ERX0000127'} + self.metadata, self.test_study, self.data_type + ) + exp_acc = { + "%s.SKB8.640193" % self.test_study.id: "ERX0000126", + "%s.SKD8.640184" % self.test_study.id: "ERX0000127", + } pt.ebi_experiment_accessions = exp_acc - exp_acc['%s.SKB7.640196' % self.test_study.id] = None + exp_acc["%s.SKB7.640196" % self.test_study.id] = None self.assertEqual(pt.ebi_experiment_accessions, exp_acc) - exp_acc['%s.SKB7.640196' % self.test_study.id] = 'ERX0000128' + exp_acc["%s.SKB7.640196" % self.test_study.id] = "ERX0000128" pt.ebi_experiment_accessions = exp_acc self.assertEqual(pt.ebi_experiment_accessions, exp_acc) @@ -1770,6 +2160,7 @@ def test_ebi_experiment_accessions_setter(self): # npt.assert_warns def f(): pt.ebi_experiment_accessions = exp_acc + npt.assert_warns(qdb.exceptions.QiitaDBWarning, f) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -1779,13 +2170,17 @@ def test_ebi_experiment_accessions_setter_common_samples(self): # ebi_experiment_accession should affect only the prep template # that it was called to, not both prep templates pt1 = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) pt2 = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) - exp_acc1 = {'%s.SKB8.640193' % self.test_study.id: 'ERX0000126', - '%s.SKD8.640184' % self.test_study.id: 'ERX0000127'} + self.metadata, self.test_study, self.data_type + ) + exp_acc1 = { + "%s.SKB8.640193" % self.test_study.id: "ERX0000126", + "%s.SKD8.640184" % self.test_study.id: "ERX0000127", + } pt1.ebi_experiment_accessions = exp_acc1 - exp_acc1['%s.SKB7.640196' % self.test_study.id] = None + exp_acc1["%s.SKB7.640196" % self.test_study.id] = None self.assertEqual(pt1.ebi_experiment_accessions, exp_acc1) exp_acc2 = {k: None for k in exp_acc1.keys()} self.assertEqual(pt2.ebi_experiment_accessions, exp_acc2) @@ -1796,7 +2191,8 @@ def test_ebi_experiment_accessions_setter_common_samples(self): def test_is_submitted_to_ebi(self): self.assertTrue(self.tester.is_submitted_to_ebi) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) self.assertFalse(pt.is_submitted_to_ebi) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) @@ -1804,36 +2200,38 @@ def test_is_submitted_to_ebi(self): def test_validate_template_warning_missing(self): """Raises an error if the template is missing a required column""" metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', - 'barcodesequence': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'} + "SKB8.640193": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "linkerprimersequence": "GTGCCAGCMGCCGCGGTAA", + "barcodesequence": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) PT = qdb.metadata_template.prep_template.PrepTemplate obs = PT._clean_validate_template(metadata, 2) metadata_dict = { - '2.SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', - 'barcodesequence': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'} + "2.SKB8.640193": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "linkerprimersequence": "GTGCCAGCMGCCGCGGTAA", + "barcodesequence": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", } - exp = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + } + exp = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -1843,36 +2241,40 @@ def test_validate_template_warning_missing(self): def test_delete_column(self): QE = qdb.exceptions pt = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) - pt.delete_column('str_column') - self.assertNotIn('str_column', pt.categories) + self.metadata, self.test_study, self.data_type + ) + pt.delete_column("str_column") + self.assertNotIn("str_column", pt.categories) # testing errors pt = qdb.metadata_template.prep_template.PrepTemplate(1) with self.assertRaises(QE.QiitaDBOperationNotPermittedError): - pt.delete_column('barcode') + pt.delete_column("barcode") with self.assertRaises(QE.QiitaDBColumnError): - pt.delete_column('ph') + pt.delete_column("ph") def test_delete_samples(self): QE = qdb.exceptions sid = self.test_study.id ptA = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) ptB = qdb.metadata_template.prep_template.PrepTemplate.create( - self.metadata, self.test_study, self.data_type) + self.metadata, self.test_study, self.data_type + ) # first let's test that we cannot delete all samples from one of the # preps - with self.assertRaisesRegex(ValueError, "You cannot delete all " - "samples from an information file"): + with self.assertRaisesRegex( + ValueError, "You cannot delete all samples from an information file" + ): ptA.delete_samples(list(ptA.keys())) # then continue with the regular testing - sample1 = '%s.SKB8.640193' % sid - sample2 = '%s.SKD8.640184' % sid - sample3 = '%s.SKB7.640196' % sid + sample1 = "%s.SKB8.640193" % sid + sample2 = "%s.SKD8.640184" % sid + sample3 = "%s.SKB7.640196" % sid ptA.delete_samples([sample1]) self.assertNotIn(sample1, ptA) self.assertIn(sample2, ptA) @@ -1887,11 +2289,11 @@ def test_delete_samples(self): # testing errors with self.assertRaises(QE.QiitaDBUnknownIDError): - ptA.delete_samples(['not.existing.sample']) + ptA.delete_samples(["not.existing.sample"]) pt = qdb.metadata_template.prep_template.PrepTemplate(2) with self.assertRaises(QE.QiitaDBOperationNotPermittedError): - pt.delete_samples(['1.SKM5.640177']) + pt.delete_samples(["1.SKM5.640177"]) # cleaning qdb.metadata_template.prep_template.PrepTemplate.delete(ptA.id) @@ -1899,11 +2301,11 @@ def test_delete_samples(self): def test_name_setter(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) - self.assertEqual(pt.name, 'Prep information 1') - pt.name = 'New Name' - self.assertEqual(pt.name, 'New Name') - pt.name = 'Prep information 1' - self.assertEqual(pt.name, 'Prep information 1') + self.assertEqual(pt.name, "Prep information 1") + pt.name = "New Name" + self.assertEqual(pt.name, "New Name") + pt.name = "Prep information 1" + self.assertEqual(pt.name, "Prep information 1") def test_current_human_filtering(self): pt = qdb.metadata_template.prep_template.PrepTemplate(1) @@ -1920,9 +2322,9 @@ def test_reprocess_job_id(self): self.assertIsNone(pt.reprocess_job_id) # it should not accept an external_job_id with self.assertRaises(ValueError): - pt.reprocess_job_id = '124567' + pt.reprocess_job_id = "124567" # but it should work fine with an uuid - jid = '6d368e16-2242-4cf8-87b4-a5dc40bb890b' + jid = "6d368e16-2242-4cf8-87b4-a5dc40bb890b" pt.reprocess_job_id = jid self.assertEqual(pt.reprocess_job_id, jid) # and it should be fine to return to its default value @@ -1931,20 +2333,21 @@ def test_reprocess_job_id(self): EXP_PREP_TEMPLATE = ( - 'sample_name\tbarcode\tcenter_name\tcenter_project_name\t' - 'ebi_submission_accession\temp_status\texperiment_design_description\t' - 'insdc_nulls\tinstrument_model\tlibrary_construction_protocol\tplatform\t' - 'primer\tqiita_prep_id\trun_prefix\tstr_column\n' - '1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t' - 'not applicable\tIllumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\t' - '{0}\ts_G1_L002_sequences\tValue for sample 3\n' - '1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t' - '3.6\tIllumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\t' - '{0}\ts_G1_L001_sequences\tValue for sample 1\n' - '1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t' - 'not applicable\tIllumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\t' - '{0}\ts_G1_L001_sequences\tValue for sample 2\n') - - -if __name__ == '__main__': + "sample_name\tbarcode\tcenter_name\tcenter_project_name\t" + "ebi_submission_accession\temp_status\texperiment_design_description\t" + "insdc_nulls\tinstrument_model\tlibrary_construction_protocol\tplatform\t" + "primer\tqiita_prep_id\trun_prefix\tstr_column\n" + "1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t" + "not applicable\tIllumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\t" + "{0}\ts_G1_L002_sequences\tValue for sample 3\n" + "1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t" + "3.6\tIllumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\t" + "{0}\ts_G1_L001_sequences\tValue for sample 1\n" + "1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t" + "not applicable\tIllumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\t" + "{0}\ts_G1_L001_sequences\tValue for sample 2\n" +) + + +if __name__ == "__main__": main() diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index 7a91e26e4..06281a095 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -5,21 +5,20 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from tempfile import mkstemp -from os import close, remove from collections import Iterable -from warnings import catch_warnings +from os import close, remove +from tempfile import mkstemp from time import time +from unittest import TestCase, main +from warnings import catch_warnings import numpy.testing as npt import pandas as pd from pandas.testing import assert_frame_equal -from qiita_core.util import qiita_test_checker -from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb - +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_core.util import qiita_test_checker STC = qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS @@ -27,45 +26,66 @@ @qiita_test_checker() class TestSample(TestCase): def setUp(self): - self.sample_template = \ - qdb.metadata_template.sample_template.SampleTemplate(1) - self.sample_id = '1.SKB8.640193' + self.sample_template = qdb.metadata_template.sample_template.SampleTemplate(1) + self.sample_id = "1.SKB8.640193" self.tester = qdb.metadata_template.sample_template.Sample( - self.sample_id, self.sample_template) - self.exp_categories = {'physical_specimen_location', - 'physical_specimen_remaining', - 'dna_extracted', 'sample_type', 'env_package', - 'collection_timestamp', 'host_subject_id', - 'description', 'season_environment', - 'assigned_from_geo', 'texture', 'taxon_id', - 'depth', 'host_taxid', 'common_name', - 'water_content_soil', 'elevation', 'temp', - 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', - 'env_feature', 'latitude', 'longitude', - 'scientific_name'} + self.sample_id, self.sample_template + ) + self.exp_categories = { + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "env_package", + "collection_timestamp", + "host_subject_id", + "description", + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "latitude", + "longitude", + "scientific_name", + } def test_init_unknown_error(self): - """Init raises an error if the sample id is not found in the template - """ + """Init raises an error if the sample id is not found in the template""" with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): qdb.metadata_template.sample_template.Sample( - 'Not_a_Sample', self.sample_template) + "Not_a_Sample", self.sample_template + ) def test_init_wrong_template(self): """Raises an error if using a PrepTemplate instead of SampleTemplate""" with self.assertRaises(IncompetentQiitaDeveloperError): qdb.metadata_template.sample_template.Sample( - 'SKB8.640193', - qdb.metadata_template.prep_template.PrepTemplate(1)) + "SKB8.640193", qdb.metadata_template.prep_template.PrepTemplate(1) + ) def test_init(self): """Init correctly initializes the sample object""" sample = qdb.metadata_template.sample_template.Sample( - self.sample_id, self.sample_template) + self.sample_id, self.sample_template + ) # Check that the internal id have been correctly set - self.assertEqual(sample._id, '1.SKB8.640193') + self.assertEqual(sample._id, "1.SKB8.640193") # Check that the internal template have been correctly set self.assertEqual(sample._md_template, self.sample_template) # Check that the internal dynamic table name have been correctly set @@ -74,31 +94,39 @@ def test_init(self): def test_eq_true(self): """Equality correctly returns true""" other = qdb.metadata_template.sample_template.Sample( - self.sample_id, self.sample_template) + self.sample_id, self.sample_template + ) self.assertTrue(self.tester == other) def test_eq_false_type(self): """Equality returns false if types are not equal""" other = qdb.metadata_template.prep_template.PrepSample( - self.sample_id, - qdb.metadata_template.prep_template.PrepTemplate(1)) + self.sample_id, qdb.metadata_template.prep_template.PrepTemplate(1) + ) self.assertFalse(self.tester == other) def test_eq_false_id(self): """Equality returns false if ids are different""" other = qdb.metadata_template.sample_template.Sample( - '1.SKD8.640184', self.sample_template) + "1.SKD8.640184", self.sample_template + ) self.assertFalse(self.tester == other) def test_exists_true(self): """Exists returns true if the sample exists""" - self.assertTrue(qdb.metadata_template.sample_template.Sample.exists( - self.sample_id, self.sample_template)) + self.assertTrue( + qdb.metadata_template.sample_template.Sample.exists( + self.sample_id, self.sample_template + ) + ) def test_exists_false(self): """Exists returns false if the sample does not exists""" - self.assertFalse(qdb.metadata_template.sample_template.Sample.exists( - 'Not_a_Sample', self.sample_template)) + self.assertFalse( + qdb.metadata_template.sample_template.Sample.exists( + "Not_a_Sample", self.sample_template + ) + ) def test_get_categories(self): """Correctly returns the set of category headers""" @@ -110,23 +138,20 @@ def test_len(self): self.assertEqual(len(self.tester), 31) def test_getitem_required(self): - """Get item returns the correct metadata value from the required table - """ - self.assertEqual(self.tester['physical_specimen_location'], 'ANL') - self.assertEqual(self.tester['collection_timestamp'], - '2011-11-11 13:00:00') - self.assertTrue(self.tester['dna_extracted']) + """Get item returns the correct metadata value from the required table""" + self.assertEqual(self.tester["physical_specimen_location"], "ANL") + self.assertEqual(self.tester["collection_timestamp"], "2011-11-11 13:00:00") + self.assertTrue(self.tester["dna_extracted"]) def test_getitem_dynamic(self): - """Get item returns the correct metadata value from the dynamic table - """ - self.assertEqual(self.tester['SEASON_ENVIRONMENT'], 'winter') - self.assertEqual(self.tester['depth'], '0.15') + """Get item returns the correct metadata value from the dynamic table""" + self.assertEqual(self.tester["SEASON_ENVIRONMENT"], "winter") + self.assertEqual(self.tester["depth"], "0.15") def test_getitem_error(self): """Get item raises an error if category does not exists""" with self.assertRaises(KeyError): - self.tester['Not_a_Category'] + self.tester["Not_a_Category"] def test_iter(self): """iter returns an iterator over the category headers""" @@ -136,12 +161,12 @@ def test_iter(self): def test_contains_true(self): """contains returns true if the category header exists""" - self.assertTrue('DEPTH' in self.tester) - self.assertTrue('depth' in self.tester) + self.assertTrue("DEPTH" in self.tester) + self.assertTrue("depth" in self.tester) def test_contains_false(self): """contains returns false if the category header does not exists""" - self.assertFalse('Not_a_Category' in self.tester) + self.assertFalse("Not_a_Category" in self.tester) def test_keys(self): """keys returns an iterator over the metadata headers""" @@ -153,60 +178,92 @@ def test_values(self): """values returns an iterator over the values""" obs = self.tester.values() self.assertTrue(isinstance(obs, Iterable)) - exp = {'ANL', 'true', 'true', 'ENVO:soil', '2011-11-11 13:00:00', - '1001:M7', 'Cannabis Soil Microbiome', 'winter', 'n', - '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', - 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', - 'ENVO:Temperate grasslands, savannas, and shrubland biome', - 'GAZ:United States of America', '6.94', 'SKB8', '5', - 'Burmese root', 'ENVO:plant-associated habitat', - '74.0894932572', '65.3283470202', '1118232', 'soil'} + exp = { + "ANL", + "true", + "true", + "ENVO:soil", + "2011-11-11 13:00:00", + "1001:M7", + "Cannabis Soil Microbiome", + "winter", + "n", + "64.6 sand, 17.6 silt, 17.8 clay", + "1118232", + "0.15", + "3483", + "root metagenome", + "0.164", + "114", + "15", + "1.41", + "7.15", + "0", + "ENVO:Temperate grasslands, savannas, and shrubland biome", + "GAZ:United States of America", + "6.94", + "SKB8", + "5", + "Burmese root", + "ENVO:plant-associated habitat", + "74.0894932572", + "65.3283470202", + "1118232", + "soil", + } self.assertCountEqual(set(obs), exp) def test_items(self): """items returns an iterator over the (key, value) tuples""" obs = self.tester.items() self.assertTrue(isinstance(obs, Iterable)) - exp = {('physical_specimen_location', 'ANL'), - ('physical_specimen_remaining', 'true'), - ('dna_extracted', 'true'), - ('sample_type', 'ENVO:soil'), - ('collection_timestamp', '2011-11-11 13:00:00'), - ('host_subject_id', '1001:M7'), - ('description', 'Cannabis Soil Microbiome'), - ('env_package', 'soil'), - ('season_environment', 'winter'), ('assigned_from_geo', 'n'), - ('texture', '64.6 sand, 17.6 silt, 17.8 clay'), - ('taxon_id', '1118232'), ('depth', '0.15'), - ('host_taxid', '3483'), ('common_name', 'root metagenome'), - ('water_content_soil', '0.164'), ('elevation', '114'), - ('temp', '15'), ('tot_nitro', '1.41'), - ('samp_salinity', '7.15'), ('altitude', '0'), - ('env_biome', - 'ENVO:Temperate grasslands, savannas, and shrubland biome'), - ('country', 'GAZ:United States of America'), ('ph', '6.94'), - ('anonymized_name', 'SKB8'), ('tot_org_carb', '5'), - ('description_duplicate', 'Burmese root'), - ('env_feature', 'ENVO:plant-associated habitat'), - ('latitude', '74.0894932572'), - ('longitude', '65.3283470202'), - ('scientific_name', '1118232')} + exp = { + ("physical_specimen_location", "ANL"), + ("physical_specimen_remaining", "true"), + ("dna_extracted", "true"), + ("sample_type", "ENVO:soil"), + ("collection_timestamp", "2011-11-11 13:00:00"), + ("host_subject_id", "1001:M7"), + ("description", "Cannabis Soil Microbiome"), + ("env_package", "soil"), + ("season_environment", "winter"), + ("assigned_from_geo", "n"), + ("texture", "64.6 sand, 17.6 silt, 17.8 clay"), + ("taxon_id", "1118232"), + ("depth", "0.15"), + ("host_taxid", "3483"), + ("common_name", "root metagenome"), + ("water_content_soil", "0.164"), + ("elevation", "114"), + ("temp", "15"), + ("tot_nitro", "1.41"), + ("samp_salinity", "7.15"), + ("altitude", "0"), + ("env_biome", "ENVO:Temperate grasslands, savannas, and shrubland biome"), + ("country", "GAZ:United States of America"), + ("ph", "6.94"), + ("anonymized_name", "SKB8"), + ("tot_org_carb", "5"), + ("description_duplicate", "Burmese root"), + ("env_feature", "ENVO:plant-associated habitat"), + ("latitude", "74.0894932572"), + ("longitude", "65.3283470202"), + ("scientific_name", "1118232"), + } self.assertEqual(set(obs), exp) def test_get(self): """get returns the correct sample object""" - self.assertEqual(self.tester.get('SEASON_ENVIRONMENT'), 'winter') - self.assertEqual(self.tester.get('depth'), '0.15') + self.assertEqual(self.tester.get("SEASON_ENVIRONMENT"), "winter") + self.assertEqual(self.tester.get("depth"), "0.15") def test_get_none(self): """get returns none if the sample id is not present""" - self.assertTrue(self.tester.get('Not_a_Category') is None) + self.assertTrue(self.tester.get("Not_a_Category") is None) def test_columns_restrictions(self): """that it returns STC""" - self.assertEqual( - self.sample_template.columns_restrictions, - STC) + self.assertEqual(self.sample_template.columns_restrictions, STC) def test_can_be_updated(self): """test if the template can be updated""" @@ -220,251 +277,296 @@ def test_can_be_extended(self): def test_setitem(self): with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - self.tester['column that does not exist'] = 0.30 + self.tester["column that does not exist"] = 0.30 tester = qdb.metadata_template.sample_template.Sample( - '1.SKB1.640202', self.sample_template) + "1.SKB1.640202", self.sample_template + ) - self.assertEqual(tester['tot_nitro'], '1.41') - tester['tot_nitro'] = '1234.5' - self.assertEqual(tester['tot_nitro'], '1234.5') + self.assertEqual(tester["tot_nitro"], "1.41") + tester["tot_nitro"] = "1234.5" + self.assertEqual(tester["tot_nitro"], "1234.5") def test_delitem(self): """delitem raises an error (currently not allowed)""" with self.assertRaises(qdb.exceptions.QiitaDBNotImplementedError): - del self.tester['DEPTH'] + del self.tester["DEPTH"] @qiita_test_checker() class TestSampleTemplate(TestCase): def setUp(self): info = { - "timeseries_type_id": '1', - "metadata_complete": 'true', - "mixs_compliant": 'true', + "timeseries_type_id": "1", + "metadata_complete": "true", + "mixs_compliant": "true", "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " - "fried chicken", + "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " - "gut microbiome", + "gut microbiome", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } self.new_study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - "Fried Chicken Microbiome %s" % time(), info) + qdb.user.User("test@foo.bar"), "Fried Chicken Microbiome %s" % time(), info + ) self.metadata_dict = { - 'Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample2': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 2', - 'latitude': '4.2', - 'longitude': '1.1', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample3': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 3', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - } - self.metadata = pd.DataFrame.from_dict(self.metadata_dict, - orient='index', dtype=str) + "Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample2": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 2", + "latitude": "4.2", + "longitude": "1.1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample3": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 3", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } + self.metadata = pd.DataFrame.from_dict( + self.metadata_dict, orient="index", dtype=str + ) metadata_str_prefix_dict = { - 'foo.Sample1': self.metadata_dict['Sample1'], - 'bar.Sample2': self.metadata_dict['Sample2'], - 'foo.Sample3': self.metadata_dict['Sample3'], + "foo.Sample1": self.metadata_dict["Sample1"], + "bar.Sample2": self.metadata_dict["Sample2"], + "foo.Sample3": self.metadata_dict["Sample3"], } self.metadata_str_prefix = pd.DataFrame.from_dict( - metadata_str_prefix_dict, orient='index', dtype=str) + metadata_str_prefix_dict, orient="index", dtype=str + ) metadata_int_prefix_dict = { - '12.Sample1': self.metadata_dict['Sample1'], - '12.Sample2': self.metadata_dict['Sample2'], - '12.Sample3': self.metadata_dict['Sample3'] + "12.Sample1": self.metadata_dict["Sample1"], + "12.Sample2": self.metadata_dict["Sample2"], + "12.Sample3": self.metadata_dict["Sample3"], } self.metadata_int_pref = pd.DataFrame.from_dict( - metadata_int_prefix_dict, orient='index', dtype=str) + metadata_int_prefix_dict, orient="index", dtype=str + ) metadata_prefixed_dict = { - '%d.Sample1' % self.new_study.id: self.metadata_dict['Sample1'], - '%d.Sample2' % self.new_study.id: self.metadata_dict['Sample2'], - '%d.Sample3' % self.new_study.id: self.metadata_dict['Sample3'] + "%d.Sample1" % self.new_study.id: self.metadata_dict["Sample1"], + "%d.Sample2" % self.new_study.id: self.metadata_dict["Sample2"], + "%d.Sample3" % self.new_study.id: self.metadata_dict["Sample3"], } self.metadata_prefixed = pd.DataFrame.from_dict( - metadata_prefixed_dict, orient='index', dtype=str) + metadata_prefixed_dict, orient="index", dtype=str + ) self.test_study = qdb.study.Study(1) self.tester = qdb.metadata_template.sample_template.SampleTemplate(1) self.exp_sample_ids = { - '1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', '1.SKB4.640189', - '1.SKB5.640181', '1.SKB6.640176', '1.SKB7.640196', '1.SKB8.640193', - '1.SKB9.640200', '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', '1.SKD7.640191', - '1.SKD8.640184', '1.SKD9.640182', '1.SKM1.640183', '1.SKM2.640199', - '1.SKM3.640197', '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192'} + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + } self._clean_up_files = [] self.metadata_dict_updated_dict = { - 'Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '6', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample2': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '5', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'the only one', - 'Description': 'Test Sample 2', - 'latitude': '4.2', - 'longitude': '1.1', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample3': {'physical_specimen_location': 'new location', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '10', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 3', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - } + "Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "6", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample2": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "5", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "the only one", + "Description": "Test Sample 2", + "latitude": "4.2", + "longitude": "1.1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample3": { + "physical_specimen_location": "new location", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "10", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 3", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } self.metadata_dict_updated = pd.DataFrame.from_dict( - self.metadata_dict_updated_dict, orient='index', dtype=str) + self.metadata_dict_updated_dict, orient="index", dtype=str + ) metadata_dict_updated_sample_error = { - 'Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '6', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample2': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '5', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'the only one', - 'Description': 'Test Sample 2', - 'latitude': '4.2', - 'longitude': '1.1', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample3': {'physical_specimen_location': 'new location', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '10', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 3', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample4': {'physical_specimen_location': 'new location', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '10', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'} - } + "Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "6", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample2": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "5", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "the only one", + "Description": "Test Sample 2", + "latitude": "4.2", + "longitude": "1.1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample3": { + "physical_specimen_location": "new location", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "10", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 3", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample4": { + "physical_specimen_location": "new location", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "10", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } self.metadata_dict_updated_sample_error = pd.DataFrame.from_dict( - metadata_dict_updated_sample_error, orient='index', dtype=str) + metadata_dict_updated_sample_error, orient="index", dtype=str + ) metadata_dict_updated_column_error = { - 'Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '6', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'extra_col': True}, - 'Sample2': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '5', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'the only one', - 'Description': 'Test Sample 2', - 'latitude': '4.2', - 'longitude': '1.1', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'extra_col': True}, - 'Sample3': {'physical_specimen_location': 'new location', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': '10', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 3', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'extra_col': True}, - } + "Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "6", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "extra_col": True, + }, + "Sample2": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "5", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "the only one", + "Description": "Test Sample 2", + "latitude": "4.2", + "longitude": "1.1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "extra_col": True, + }, + "Sample3": { + "physical_specimen_location": "new location", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "10", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 3", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "extra_col": True, + }, + } self.metadata_dict_updated_column_error = pd.DataFrame.from_dict( - metadata_dict_updated_column_error, orient='index', dtype=str) + metadata_dict_updated_column_error, orient="index", dtype=str + ) def tearDown(self): for f in self._clean_up_files: @@ -473,15 +575,39 @@ def tearDown(self): def test_metadata_headers(self): ST = qdb.metadata_template.sample_template.SampleTemplate obs = ST.metadata_headers() - exp = ['altitude', 'anonymized_name', 'assigned_from_geo', - 'collection_timestamp', 'common_name', 'country', 'depth', - 'description', 'description_duplicate', 'dna_extracted', - 'elevation', 'env_biome', 'env_feature', 'host_subject_id', - 'host_taxid', 'latitude', 'longitude', 'ph', 'env_package', - 'physical_specimen_location', 'physical_specimen_remaining', - 'samp_salinity', 'sample_type', 'scientific_name', - 'season_environment', 'taxon_id', 'temp', 'texture', - 'tot_nitro', 'tot_org_carb', 'water_content_soil'] + exp = [ + "altitude", + "anonymized_name", + "assigned_from_geo", + "collection_timestamp", + "common_name", + "country", + "depth", + "description", + "description_duplicate", + "dna_extracted", + "elevation", + "env_biome", + "env_feature", + "host_subject_id", + "host_taxid", + "latitude", + "longitude", + "ph", + "env_package", + "physical_specimen_location", + "physical_specimen_remaining", + "samp_salinity", + "sample_type", + "scientific_name", + "season_environment", + "taxon_id", + "temp", + "texture", + "tot_nitro", + "tot_org_carb", + "water_content_soil", + ] self.assertCountEqual(obs, exp) def test_study_id(self): @@ -501,14 +627,17 @@ def test_init(self): def test_table_name(self): """Table name return the correct string""" obs = qdb.metadata_template.sample_template.SampleTemplate._table_name( - self.test_study.id) + self.test_study.id + ) self.assertEqual(obs, "sample_1") def test_exists_true(self): """Exists returns true when the SampleTemplate already exists""" self.assertTrue( qdb.metadata_template.sample_template.SampleTemplate.exists( - self.test_study.id)) + self.test_study.id + ) + ) def test_get_sample_ids(self): """get_sample_ids returns the correct set of sample ids""" @@ -521,27 +650,49 @@ def test_len(self): def test_getitem(self): """Get item returns the correct sample object""" - obs = self.tester['1.SKM7.640188'] - exp = qdb.metadata_template.sample_template.Sample( - '1.SKM7.640188', self.tester) + obs = self.tester["1.SKM7.640188"] + exp = qdb.metadata_template.sample_template.Sample("1.SKM7.640188", self.tester) self.assertEqual(obs, exp) def test_getitem_error(self): """Get item raises an error if key does not exists""" with self.assertRaises(KeyError): - self.tester['Not_a_Sample'] + self.tester["Not_a_Sample"] def test_categories(self): - exp = {'season_environment', 'assigned_from_geo', - 'texture', 'taxon_id', 'depth', 'host_taxid', - 'common_name', 'water_content_soil', 'elevation', - 'temp', 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', 'env_feature', - 'physical_specimen_location', 'env_package', - 'physical_specimen_remaining', 'dna_extracted', - 'sample_type', 'collection_timestamp', 'host_subject_id', - 'description', 'latitude', 'longitude', 'scientific_name'} + exp = { + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "physical_specimen_location", + "env_package", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "scientific_name", + } obs = set(self.tester.categories) self.assertCountEqual(obs, exp) @@ -553,11 +704,11 @@ def test_iter(self): def test_contains_true(self): """contains returns true if the sample id exists""" - self.assertTrue('1.SKM7.640188' in self.tester) + self.assertTrue("1.SKM7.640188" in self.tester) def test_contains_false(self): """contains returns false if the sample id does not exists""" - self.assertFalse('Not_a_Sample' in self.tester) + self.assertFalse("Not_a_Sample" in self.tester) def test_keys(self): """keys returns an iterator over the sample ids""" @@ -569,64 +720,40 @@ def test_values(self): """values returns an iterator over the values""" obs = self.tester.values() self.assertTrue(isinstance(obs, Iterable)) - exp = {qdb.metadata_template.sample_template.Sample('1.SKB1.640202', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB2.640194', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB3.640195', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB4.640189', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB5.640181', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB6.640176', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB7.640196', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB8.640193', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKB9.640200', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD1.640179', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD2.640178', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD3.640198', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD4.640185', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD5.640186', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD6.640190', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD7.640191', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD8.640184', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKD9.640182', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM1.640183', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM2.640199', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM3.640197', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM4.640180', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM5.640177', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM6.640187', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM7.640188', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM8.640201', - self.tester), - qdb.metadata_template.sample_template.Sample('1.SKM9.640192', - self.tester)} + exp = { + qdb.metadata_template.sample_template.Sample("1.SKB1.640202", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB2.640194", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB3.640195", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB4.640189", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB5.640181", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB6.640176", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB7.640196", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB8.640193", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKB9.640200", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD1.640179", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD2.640178", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD3.640198", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD4.640185", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD5.640186", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD6.640190", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD7.640191", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD8.640184", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKD9.640182", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM1.640183", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM2.640199", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM3.640197", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM4.640180", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM5.640177", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM6.640187", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM7.640188", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM8.640201", self.tester), + qdb.metadata_template.sample_template.Sample("1.SKM9.640192", self.tester), + } # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects - for o, e in zip(sorted(list(obs), key=lambda x: x.id), - sorted(exp, key=lambda x: x.id)): + for o, e in zip( + sorted(list(obs), key=lambda x: x.id), sorted(exp, key=lambda x: x.id) + ): self.assertEqual(o, e) def test_items(self): @@ -634,60 +761,169 @@ def test_items(self): obs = self.tester.items() self.assertTrue(isinstance(obs, Iterable)) exp = [ - ('1.SKB1.640202', qdb.metadata_template.sample_template.Sample( - '1.SKB1.640202', self.tester)), - ('1.SKB2.640194', qdb.metadata_template.sample_template.Sample( - '1.SKB2.640194', self.tester)), - ('1.SKB3.640195', qdb.metadata_template.sample_template.Sample( - '1.SKB3.640195', self.tester)), - ('1.SKB4.640189', qdb.metadata_template.sample_template.Sample( - '1.SKB4.640189', self.tester)), - ('1.SKB5.640181', qdb.metadata_template.sample_template.Sample( - '1.SKB5.640181', self.tester)), - ('1.SKB6.640176', qdb.metadata_template.sample_template.Sample( - '1.SKB6.640176', self.tester)), - ('1.SKB7.640196', qdb.metadata_template.sample_template.Sample( - '1.SKB7.640196', self.tester)), - ('1.SKB8.640193', qdb.metadata_template.sample_template.Sample( - '1.SKB8.640193', self.tester)), - ('1.SKB9.640200', qdb.metadata_template.sample_template.Sample( - '1.SKB9.640200', self.tester)), - ('1.SKD1.640179', qdb.metadata_template.sample_template.Sample( - '1.SKD1.640179', self.tester)), - ('1.SKD2.640178', qdb.metadata_template.sample_template.Sample( - '1.SKD2.640178', self.tester)), - ('1.SKD3.640198', qdb.metadata_template.sample_template.Sample( - '1.SKD3.640198', self.tester)), - ('1.SKD4.640185', qdb.metadata_template.sample_template.Sample( - '1.SKD4.640185', self.tester)), - ('1.SKD5.640186', qdb.metadata_template.sample_template.Sample( - '1.SKD5.640186', self.tester)), - ('1.SKD6.640190', qdb.metadata_template.sample_template.Sample( - '1.SKD6.640190', self.tester)), - ('1.SKD7.640191', qdb.metadata_template.sample_template.Sample( - '1.SKD7.640191', self.tester)), - ('1.SKD8.640184', qdb.metadata_template.sample_template.Sample( - '1.SKD8.640184', self.tester)), - ('1.SKD9.640182', qdb.metadata_template.sample_template.Sample( - '1.SKD9.640182', self.tester)), - ('1.SKM1.640183', qdb.metadata_template.sample_template.Sample( - '1.SKM1.640183', self.tester)), - ('1.SKM2.640199', qdb.metadata_template.sample_template.Sample( - '1.SKM2.640199', self.tester)), - ('1.SKM3.640197', qdb.metadata_template.sample_template.Sample( - '1.SKM3.640197', self.tester)), - ('1.SKM4.640180', qdb.metadata_template.sample_template.Sample( - '1.SKM4.640180', self.tester)), - ('1.SKM5.640177', qdb.metadata_template.sample_template.Sample( - '1.SKM5.640177', self.tester)), - ('1.SKM6.640187', qdb.metadata_template.sample_template.Sample( - '1.SKM6.640187', self.tester)), - ('1.SKM7.640188', qdb.metadata_template.sample_template.Sample( - '1.SKM7.640188', self.tester)), - ('1.SKM8.640201', qdb.metadata_template.sample_template.Sample( - '1.SKM8.640201', self.tester)), - ('1.SKM9.640192', qdb.metadata_template.sample_template.Sample( - '1.SKM9.640192', self.tester))] + ( + "1.SKB1.640202", + qdb.metadata_template.sample_template.Sample( + "1.SKB1.640202", self.tester + ), + ), + ( + "1.SKB2.640194", + qdb.metadata_template.sample_template.Sample( + "1.SKB2.640194", self.tester + ), + ), + ( + "1.SKB3.640195", + qdb.metadata_template.sample_template.Sample( + "1.SKB3.640195", self.tester + ), + ), + ( + "1.SKB4.640189", + qdb.metadata_template.sample_template.Sample( + "1.SKB4.640189", self.tester + ), + ), + ( + "1.SKB5.640181", + qdb.metadata_template.sample_template.Sample( + "1.SKB5.640181", self.tester + ), + ), + ( + "1.SKB6.640176", + qdb.metadata_template.sample_template.Sample( + "1.SKB6.640176", self.tester + ), + ), + ( + "1.SKB7.640196", + qdb.metadata_template.sample_template.Sample( + "1.SKB7.640196", self.tester + ), + ), + ( + "1.SKB8.640193", + qdb.metadata_template.sample_template.Sample( + "1.SKB8.640193", self.tester + ), + ), + ( + "1.SKB9.640200", + qdb.metadata_template.sample_template.Sample( + "1.SKB9.640200", self.tester + ), + ), + ( + "1.SKD1.640179", + qdb.metadata_template.sample_template.Sample( + "1.SKD1.640179", self.tester + ), + ), + ( + "1.SKD2.640178", + qdb.metadata_template.sample_template.Sample( + "1.SKD2.640178", self.tester + ), + ), + ( + "1.SKD3.640198", + qdb.metadata_template.sample_template.Sample( + "1.SKD3.640198", self.tester + ), + ), + ( + "1.SKD4.640185", + qdb.metadata_template.sample_template.Sample( + "1.SKD4.640185", self.tester + ), + ), + ( + "1.SKD5.640186", + qdb.metadata_template.sample_template.Sample( + "1.SKD5.640186", self.tester + ), + ), + ( + "1.SKD6.640190", + qdb.metadata_template.sample_template.Sample( + "1.SKD6.640190", self.tester + ), + ), + ( + "1.SKD7.640191", + qdb.metadata_template.sample_template.Sample( + "1.SKD7.640191", self.tester + ), + ), + ( + "1.SKD8.640184", + qdb.metadata_template.sample_template.Sample( + "1.SKD8.640184", self.tester + ), + ), + ( + "1.SKD9.640182", + qdb.metadata_template.sample_template.Sample( + "1.SKD9.640182", self.tester + ), + ), + ( + "1.SKM1.640183", + qdb.metadata_template.sample_template.Sample( + "1.SKM1.640183", self.tester + ), + ), + ( + "1.SKM2.640199", + qdb.metadata_template.sample_template.Sample( + "1.SKM2.640199", self.tester + ), + ), + ( + "1.SKM3.640197", + qdb.metadata_template.sample_template.Sample( + "1.SKM3.640197", self.tester + ), + ), + ( + "1.SKM4.640180", + qdb.metadata_template.sample_template.Sample( + "1.SKM4.640180", self.tester + ), + ), + ( + "1.SKM5.640177", + qdb.metadata_template.sample_template.Sample( + "1.SKM5.640177", self.tester + ), + ), + ( + "1.SKM6.640187", + qdb.metadata_template.sample_template.Sample( + "1.SKM6.640187", self.tester + ), + ), + ( + "1.SKM7.640188", + qdb.metadata_template.sample_template.Sample( + "1.SKM7.640188", self.tester + ), + ), + ( + "1.SKM8.640201", + qdb.metadata_template.sample_template.Sample( + "1.SKM8.640201", self.tester + ), + ), + ( + "1.SKM9.640192", + qdb.metadata_template.sample_template.Sample( + "1.SKM9.640192", self.tester + ), + ), + ] # Creating a list and looping over it since unittest does not call # the __eq__ function on the objects for o, e in zip(sorted(list(obs)), sorted(exp)): @@ -695,27 +931,26 @@ def test_items(self): def test_get(self): """get returns the correct sample object""" - obs = self.tester.get('1.SKM7.640188') - exp = qdb.metadata_template.sample_template.Sample( - '1.SKM7.640188', self.tester) + obs = self.tester.get("1.SKM7.640188") + exp = qdb.metadata_template.sample_template.Sample("1.SKM7.640188", self.tester) self.assertEqual(obs, exp) def test_get_none(self): """get returns none if the sample id is not present""" - self.assertTrue(self.tester.get('Not_a_Sample') is None) + self.assertTrue(self.tester.get("Not_a_Sample") is None) def test_clean_validate_template_error_bad_chars(self): - """Raises an error if there are invalid characters in the sample names - """ - self.metadata.index = ['o()xxxx[{::::::::>', 'sample.1', 'sample.3'] + """Raises an error if there are invalid characters in the sample names""" + self.metadata.index = ["o()xxxx[{::::::::>", "sample.1", "sample.3"] ST = qdb.metadata_template.sample_template.SampleTemplate with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_error_duplicate_cols(self): """Raises an error if there are duplicated columns in the template""" - self.metadata['SAMPLE_TYPE'] = pd.Series(['foo', 'bar', 'foobar'], - index=self.metadata.index) + self.metadata["SAMPLE_TYPE"] = pd.Series( + ["foo", "bar", "foobar"], index=self.metadata.index + ) ST = qdb.metadata_template.sample_template.SampleTemplate with self.assertRaises(qdb.exceptions.QiitaDBDuplicateHeaderError): @@ -723,40 +958,40 @@ def test_clean_validate_template_error_duplicate_cols(self): def test_clean_validate_template_error_duplicate_samples(self): """Raises an error if there are duplicated samples in the template""" - self.metadata.index = ['sample.1', 'sample.1', 'sample.3'] + self.metadata.index = ["sample.1", "sample.1", "sample.3"] ST = qdb.metadata_template.sample_template.SampleTemplate with self.assertRaises(qdb.exceptions.QiitaDBDuplicateSamplesError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_columns(self): metadata_dict = { - 'Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41'} + "Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) ST = qdb.metadata_template.sample_template.SampleTemplate - obs = ST._clean_validate_template( - metadata, - 2, - current_columns=STC) + obs = ST._clean_validate_template(metadata, 2, current_columns=STC) metadata_dict = { - '2.Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41'} + "2.Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "host_subject_id": "NotIdentified", + "description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", } - exp = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) + } + exp = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -765,49 +1000,49 @@ def test_clean_validate_template_columns(self): def test_clean_validate_template(self): ST = qdb.metadata_template.sample_template.SampleTemplate - obs = ST._clean_validate_template( - self.metadata, - 2, - current_columns=STC) + obs = ST._clean_validate_template(self.metadata, 2, current_columns=STC) metadata_dict = { - '2.Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - '2.Sample2': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 2', - 'latitude': '4.2', - 'longitude': '1.1', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - '2.Sample3': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': - '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 3', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - } - exp = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) + "2.Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "2.Sample2": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 2", + "latitude": "4.2", + "longitude": "1.1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "2.Sample3": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 3", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } + exp = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -816,52 +1051,51 @@ def test_clean_validate_template(self): def test_clean_validate_template_no_pgsql_reserved_words(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.metadata.rename(columns={'taxon_id': 'select'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "select"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_qiime2_reserved_words(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.metadata.rename(columns={'taxon_id': 'featureid'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "featureid"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_invalid_chars(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.metadata.rename(columns={'taxon_id': 'taxon id'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "taxon id"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_invalid_chars2(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.metadata.rename(columns={'taxon_id': 'bla.'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "bla."}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_invalid_chars3(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.metadata.rename(columns={'taxon_id': 'this&is'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "this&is"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_forbidden_words(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.metadata.rename(columns={'taxon_id': 'sampleid'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "sampleid"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_forbidden_words2(self): ST = qdb.metadata_template.sample_template.SampleTemplate # A word forbidden only in SampleTemplate - self.metadata.rename(columns={'taxon_id': 'linkerprimersequence'}, - inplace=True) + self.metadata.rename(columns={"taxon_id": "linkerprimersequence"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) def test_clean_validate_template_no_forbidden_words3(self): ST = qdb.metadata_template.sample_template.SampleTemplate # A word forbidden only in SampleTemplate - self.metadata.rename(columns={'taxon_id': 'barcode'}, inplace=True) + self.metadata.rename(columns={"taxon_id": "barcode"}, inplace=True) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): ST._clean_validate_template(self.metadata, 2) @@ -874,354 +1108,444 @@ def test_identify_forbidden_words_in_column_names(self): # verifies all forbidden elements for base class are returned # verifies a forbidden word in sub-class will not be returned # verifies normal column names are not returned - results = ST._identify_forbidden_words_in_column_names([ - 'just_fine3', - 'sampleid', - 'alice', - 'linkerprimersequence', - 'bob', - 'qiita_study_id', - 'qiita_prep_id', - 'eve']) - self.assertEqual(set(results), - {'qiita_prep_id', - 'qiita_study_id', - 'linkerprimersequence', - 'sampleid'}) + results = ST._identify_forbidden_words_in_column_names( + [ + "just_fine3", + "sampleid", + "alice", + "linkerprimersequence", + "bob", + "qiita_study_id", + "qiita_prep_id", + "eve", + ] + ) + self.assertEqual( + set(results), + {"qiita_prep_id", "qiita_study_id", "linkerprimersequence", "sampleid"}, + ) def test_silent_drop(self): ST = qdb.metadata_template.sample_template.SampleTemplate - self.assertNotIn('qiitq_prep_id', - (ST._clean_validate_template(self.metadata, - 2)).columns.tolist()) + self.assertNotIn( + "qiitq_prep_id", + (ST._clean_validate_template(self.metadata, 2)).columns.tolist(), + ) def test_get_category(self): pt = qdb.metadata_template.sample_template.SampleTemplate(1) - obs = pt.get_category('latitude') - exp = {'1.SKB2.640194': '35.2374368957', - '1.SKM4.640180': 'Not applicable', - '1.SKB3.640195': '95.2060749748', - '1.SKB6.640176': '78.3634273709', - '1.SKD6.640190': '29.1499460692', - '1.SKM6.640187': '0.291867635913', - '1.SKD9.640182': '23.1218032799', - '1.SKM8.640201': '3.21190859967', - '1.SKM2.640199': '82.8302905615', - '1.SKD2.640178': '53.5050692395', - '1.SKB7.640196': '13.089194595', - '1.SKD4.640185': '40.8623799474', - '1.SKB8.640193': '74.0894932572', - '1.SKM3.640197': 'Not applicable', - '1.SKD5.640186': '85.4121476399', - '1.SKB1.640202': '4.59216095574', - '1.SKM1.640183': '38.2627021402', - '1.SKD1.640179': '68.0991287718', - '1.SKD3.640198': '84.0030227585', - '1.SKB5.640181': '10.6655599093', - '1.SKB4.640189': '43.9614715197', - '1.SKB9.640200': '12.6245524972', - '1.SKM9.640192': '12.7065957714', - '1.SKD8.640184': '57.571893782', - '1.SKM5.640177': '44.9725384282', - '1.SKM7.640188': '60.1102854322', - '1.SKD7.640191': '68.51099627'} + obs = pt.get_category("latitude") + exp = { + "1.SKB2.640194": "35.2374368957", + "1.SKM4.640180": "Not applicable", + "1.SKB3.640195": "95.2060749748", + "1.SKB6.640176": "78.3634273709", + "1.SKD6.640190": "29.1499460692", + "1.SKM6.640187": "0.291867635913", + "1.SKD9.640182": "23.1218032799", + "1.SKM8.640201": "3.21190859967", + "1.SKM2.640199": "82.8302905615", + "1.SKD2.640178": "53.5050692395", + "1.SKB7.640196": "13.089194595", + "1.SKD4.640185": "40.8623799474", + "1.SKB8.640193": "74.0894932572", + "1.SKM3.640197": "Not applicable", + "1.SKD5.640186": "85.4121476399", + "1.SKB1.640202": "4.59216095574", + "1.SKM1.640183": "38.2627021402", + "1.SKD1.640179": "68.0991287718", + "1.SKD3.640198": "84.0030227585", + "1.SKB5.640181": "10.6655599093", + "1.SKB4.640189": "43.9614715197", + "1.SKB9.640200": "12.6245524972", + "1.SKM9.640192": "12.7065957714", + "1.SKD8.640184": "57.571893782", + "1.SKM5.640177": "44.9725384282", + "1.SKM7.640188": "60.1102854322", + "1.SKD7.640191": "68.51099627", + } self.assertEqual(obs, exp) def test_get_category_no_exists(self): pt = qdb.metadata_template.sample_template.SampleTemplate(1) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - pt.get_category('DOESNOTEXIST') + pt.get_category("DOESNOTEXIST") def test_create_duplicate(self): """Create raises an error when creating a duplicated SampleTemplate""" with self.assertRaises(qdb.exceptions.QiitaDBDuplicateError): qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.test_study) + self.metadata, self.test_study + ) def test_create_duplicate_header(self): """Create raises an error when duplicate headers are present""" - self.metadata['SAMPLE_TYPE'] = pd.Series(['', '', ''], - index=self.metadata.index) + self.metadata["SAMPLE_TYPE"] = pd.Series( + ["", "", ""], index=self.metadata.index + ) with self.assertRaises(qdb.exceptions.QiitaDBDuplicateHeaderError): qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) def test_create_bad_sample_names(self): """Create raises an error when duplicate headers are present""" # set a horrible list of sample names - self.metadata.index = ['o()xxxx[{::::::::>', 'sample.1', 'sample.3'] + self.metadata.index = ["o()xxxx[{::::::::>", "sample.1", "sample.3"] with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) def test_create(self): """Creates a new SampleTemplate""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) new_id = self.new_study.id # The returned object has the correct id self.assertEqual(st.id, new_id) self.assertEqual(st.study_id, self.new_study.id) self.assertTrue( qdb.metadata_template.sample_template.SampleTemplate.exists( - self.new_study.id)) - exp_sample_ids = {"%s.Sample1" % new_id, "%s.Sample2" % new_id, - "%s.Sample3" % new_id} + self.new_study.id + ) + ) + exp_sample_ids = { + "%s.Sample1" % new_id, + "%s.Sample2" % new_id, + "%s.Sample3" % new_id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 3) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample2" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample3" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } for s_id in exp_sample_ids: self.assertDictEqual(st[s_id]._to_dict(), exp_dict[s_id]) - exp = {"%s.Sample1" % new_id: None, - "%s.Sample2" % new_id: None, - "%s.Sample3" % new_id: None} + exp = { + "%s.Sample1" % new_id: None, + "%s.Sample2" % new_id: None, + "%s.Sample3" % new_id: None, + } self.assertEqual(st.ebi_sample_accessions, exp) self.assertEqual(st.biosample_accessions, exp) def test_create_int_prefix(self): """Creates a new SampleTemplate with sample names int prefixed""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata_int_pref, self.new_study) + self.metadata_int_pref, self.new_study + ) new_id = self.new_study.id # The returned object has the correct id self.assertEqual(st.id, new_id) self.assertEqual(st.study_id, self.new_study.id) self.assertTrue( qdb.metadata_template.sample_template.SampleTemplate.exists( - self.new_study.id)) - exp_sample_ids = {"%s.12.Sample1" % new_id, "%s.12.Sample2" % new_id, - "%s.12.Sample3" % new_id} + self.new_study.id + ) + ) + exp_sample_ids = { + "%s.12.Sample1" % new_id, + "%s.12.Sample2" % new_id, + "%s.12.Sample3" % new_id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 3) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.12.Sample1" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.12.Sample2" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.12.Sample3" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) - exp = {"%s.12.Sample1" % new_id: None, - "%s.12.Sample2" % new_id: None, - "%s.12.Sample3" % new_id: None} + exp = { + "%s.12.Sample1" % new_id: None, + "%s.12.Sample2" % new_id: None, + "%s.12.Sample3" % new_id: None, + } self.assertEqual(st.ebi_sample_accessions, exp) self.assertEqual(st.biosample_accessions, exp) def test_create_str_prefixes(self): """Creates a new SampleTemplate with sample names string prefixed""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata_str_prefix, self.new_study) + self.metadata_str_prefix, self.new_study + ) new_id = self.new_study.id # The returned object has the correct id self.assertEqual(st.id, new_id) self.assertEqual(st.study_id, self.new_study.id) self.assertTrue( qdb.metadata_template.sample_template.SampleTemplate.exists( - self.new_study.id)) - exp_sample_ids = {"%s.foo.Sample1" % new_id, "%s.bar.Sample2" % new_id, - "%s.foo.Sample3" % new_id} + self.new_study.id + ) + ) + exp_sample_ids = { + "%s.foo.Sample1" % new_id, + "%s.bar.Sample2" % new_id, + "%s.foo.Sample3" % new_id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 3) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.foo.Sample1" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.bar.Sample2" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.foo.Sample3" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) - exp = {"%s.foo.Sample1" % new_id: None, - "%s.bar.Sample2" % new_id: None, - "%s.foo.Sample3" % new_id: None} + exp = { + "%s.foo.Sample1" % new_id: None, + "%s.bar.Sample2" % new_id: None, + "%s.foo.Sample3" % new_id: None, + } self.assertEqual(st.ebi_sample_accessions, exp) self.assertEqual(st.biosample_accessions, exp) def test_create_already_prefixed_samples(self): """Creates a new SampleTemplate with the samples already prefixed""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata_prefixed, self.new_study) + self.metadata_prefixed, self.new_study + ) new_id = self.new_study.id # The returned object has the correct id self.assertEqual(st.id, new_id) self.assertEqual(st.study_id, self.new_study.id) self.assertTrue( qdb.metadata_template.sample_template.SampleTemplate.exists( - self.new_study.id)) - exp_sample_ids = {"%s.Sample1" % new_id, "%s.Sample2" % new_id, - "%s.Sample3" % new_id} + self.new_study.id + ) + ) + exp_sample_ids = { + "%s.Sample1" % new_id, + "%s.Sample2" % new_id, + "%s.Sample3" % new_id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 3) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample2" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample3" % new_id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) - exp = {"%s.Sample1" % new_id: None, - "%s.Sample2" % new_id: None, - "%s.Sample3" % new_id: None} + exp = { + "%s.Sample1" % new_id: None, + "%s.Sample2" % new_id: None, + "%s.Sample3" % new_id: None, + } self.assertEqual(st.ebi_sample_accessions, exp) self.assertEqual(st.biosample_accessions, exp) def test_delete(self): """Deletes Sample template 1""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) qdb.metadata_template.sample_template.SampleTemplate.delete(st.id) exp = [] @@ -1233,8 +1557,11 @@ def test_delete(self): with self.assertRaises(ValueError): with qdb.sql_connection.TRN: - sql = """SELECT * - FROM qiita.sample_%d""" % st.id + sql = ( + """SELECT * + FROM qiita.sample_%d""" + % st.id + ) qdb.sql_connection.TRN.add(sql) with self.assertRaises(qdb.exceptions.QiitaDBError): @@ -1249,38 +1576,37 @@ def test_exists_false(self): """Exists returns false when the SampleTemplate does not exists""" self.assertFalse( qdb.metadata_template.sample_template.SampleTemplate.exists( - self.new_study.id)) + self.new_study.id + ) + ) def test_update_category(self): with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - self.tester.update_category('country', {"foo": "bar"}) + self.tester.update_category("country", {"foo": "bar"}) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - self.tester.update_category('missing column', - {'1.SKM7.640188': 'stuff'}) + self.tester.update_category("missing column", {"1.SKM7.640188": "stuff"}) - negtest = self.tester['1.SKM7.640188']['country'] + negtest = self.tester["1.SKM7.640188"]["country"] - mapping = {'1.SKB1.640202': "1", - '1.SKB5.640181': "2", - '1.SKD6.640190': "3"} + mapping = {"1.SKB1.640202": "1", "1.SKB5.640181": "2", "1.SKD6.640190": "3"} - self.tester.update_category('country', mapping) + self.tester.update_category("country", mapping) - self.assertEqual(self.tester['1.SKB1.640202']['country'], "1") - self.assertEqual(self.tester['1.SKB5.640181']['country'], "2") - self.assertEqual(self.tester['1.SKD6.640190']['country'], "3") - self.assertEqual(self.tester['1.SKM7.640188']['country'], negtest) + self.assertEqual(self.tester["1.SKB1.640202"]["country"], "1") + self.assertEqual(self.tester["1.SKB5.640181"]["country"], "2") + self.assertEqual(self.tester["1.SKD6.640190"]["country"], "3") + self.assertEqual(self.tester["1.SKM7.640188"]["country"], negtest) def test_update_equal(self): """It doesn't fail with the exact same template""" # Create a new sample tempalte st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) exp = {s_id: st[s_id]._to_dict() for s_id in st} # Try to update the sample template with the same values - npt.assert_warns( - qdb.exceptions.QiitaDBWarning, st.update, self.metadata) + npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.update, self.metadata) # Check that no values have been changed obs = {s_id: st[s_id]._to_dict() for s_id in st} self.assertEqual(obs, exp) @@ -1289,21 +1615,22 @@ def test_update(self): """Updates values in existing mapping file""" # creating a new sample template st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) # updating the sample template st.update(self.metadata_dict_updated) # validating values - exp = self.metadata_dict_updated_dict['Sample1'].values() - obs = st.get('%s.Sample1' % self.new_study.id).values() + exp = self.metadata_dict_updated_dict["Sample1"].values() + obs = st.get("%s.Sample1" % self.new_study.id).values() self.assertCountEqual(obs, exp) - exp = self.metadata_dict_updated_dict['Sample2'].values() - obs = st.get('%s.Sample2' % self.new_study.id).values() + exp = self.metadata_dict_updated_dict["Sample2"].values() + obs = st.get("%s.Sample2" % self.new_study.id).values() self.assertCountEqual(obs, exp) - exp = self.metadata_dict_updated_dict['Sample3'].values() - obs = st.get('%s.Sample3' % self.new_study.id).values() + exp = self.metadata_dict_updated_dict["Sample3"].values() + obs = st.get("%s.Sample3" % self.new_study.id).values() self.assertCountEqual(obs, exp) # checking errors @@ -1315,13 +1642,16 @@ def test_update(self): def test_update_fewer_samples(self): """Updates using a dataframe with less samples that in the DB""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) new_metadata = pd.DataFrame.from_dict( - {'Sample1': {'physical_specimen_location': 'CHANGE'}}, - orient='index', dtype=str) + {"Sample1": {"physical_specimen_location": "CHANGE"}}, + orient="index", + dtype=str, + ) exp = {s_id: st[s_id]._to_dict() for s_id in st} - s_id = '%d.Sample1' % self.new_study.id - exp[s_id]['physical_specimen_location'] = 'CHANGE' + s_id = "%d.Sample1" % self.new_study.id + exp[s_id]["physical_specimen_location"] = "CHANGE" st.update(new_metadata) obs = {s_id: st[s_id]._to_dict() for s_id in st} self.assertEqual(obs, exp) @@ -1330,20 +1660,17 @@ def test_update_numpy(self): """Update values in existing mapping file with numpy values""" ST = qdb.metadata_template.sample_template.SampleTemplate metadata_dict = { - 'Sample1': {'bool_col': 'true', - 'date_col': '2015-09-01 00:00:00'}, - 'Sample2': {'bool_col': 'true', - 'date_col': '2015-09-01 00:00:00'} - } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) - st = npt.assert_warns(qdb.exceptions.QiitaDBWarning, ST.create, - metadata, self.new_study) - - metadata_dict['Sample2']['date_col'] = '2015-09-01 00:00:00' - metadata_dict['Sample1']['bool_col'] = 'false' - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + "Sample1": {"bool_col": "true", "date_col": "2015-09-01 00:00:00"}, + "Sample2": {"bool_col": "true", "date_col": "2015-09-01 00:00:00"}, + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) + st = npt.assert_warns( + qdb.exceptions.QiitaDBWarning, ST.create, metadata, self.new_study + ) + + metadata_dict["Sample2"]["date_col"] = "2015-09-01 00:00:00" + metadata_dict["Sample1"]["bool_col"] = "false" + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.update, metadata) with qdb.sql_connection.TRN: @@ -1351,14 +1678,21 @@ def test_update_numpy(self): qdb.sql_connection.TRN.add(sql) obs = qdb.sql_connection.TRN.execute_fetchindex() exp = [ - ['%s.Sample2' % self.new_study.id, { - 'bool_col': 'true', 'date_col': '2015-09-01 00:00:00'}], - ['%s.Sample1' % self.new_study.id, { - 'bool_col': 'false', 'date_col': '2015-09-01 00:00:00'}], - ['qiita_sample_column_names', { - 'columns': sorted(['bool_col', 'date_col'])}]] + [ + "%s.Sample2" % self.new_study.id, + {"bool_col": "true", "date_col": "2015-09-01 00:00:00"}, + ], + [ + "%s.Sample1" % self.new_study.id, + {"bool_col": "false", "date_col": "2015-09-01 00:00:00"}, + ], + [ + "qiita_sample_column_names", + {"columns": sorted(["bool_col", "date_col"])}, + ], + ] # making sure they are always in the same order - obs[2][1]['columns'] = sorted(obs[2][1]['columns']) + obs[2][1]["columns"] = sorted(obs[2][1]["columns"]) self.assertEqual(sorted(obs), sorted(exp)) def test_generate_files(self): @@ -1374,7 +1708,8 @@ def test_to_file(self): fd, fp = mkstemp() close(fd) st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) st.to_file(fp) self._clean_up_files.append(fp) with open(fp, newline=None) as f: @@ -1383,14 +1718,16 @@ def test_to_file(self): fd, fp = mkstemp() close(fd) - st.to_file(fp, {'%s.Sample1' % self.new_study.id, - '%s.Sample3' % self.new_study.id}) + st.to_file( + fp, {"%s.Sample1" % self.new_study.id, "%s.Sample3" % self.new_study.id} + ) self._clean_up_files.append(fp) with open(fp, newline=None) as f: obs = f.read() self.assertEqual( - obs, EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES.format(self.new_study.id)) + obs, EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES.format(self.new_study.id) + ) def test_get_filepath(self): # we will check that there is a new id only because the path will @@ -1402,7 +1739,8 @@ def test_get_filepath(self): qdb.sql_connection.TRN.add(sql) exp_id = qdb.sql_connection.TRN.execute_fetchflatten()[0] + 1 st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) self.assertEqual(st.get_filepaths()[0][0], exp_id) # testing current functionaly, to add a new sample template @@ -1410,41 +1748,48 @@ def test_get_filepath(self): qdb.metadata_template.sample_template.SampleTemplate.delete(st.id) exp_id += 1 st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) self.assertEqual(st.get_filepaths()[0][0], exp_id) def test_extend_add_samples(self): """extend correctly works adding new samples""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) # we just created the sample info file so we should only have one # filepath self.assertEqual(len(st.get_filepaths()), 1) md_dict = { - 'Sample4': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample5': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 5', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} - md_ext = pd.DataFrame.from_dict(md_dict, orient='index', dtype=str) + "Sample4": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample5": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 5", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } + md_ext = pd.DataFrame.from_dict(md_dict, orient="index", dtype=str) npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) # we just updated so we should have 2 files: self.assertEqual(len(st.get_filepaths()), 2) @@ -1453,494 +1798,590 @@ def test_extend_add_samples(self): self.assertEqual(len(st.get_filepaths()), 2) # Test samples have been added correctly - exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, - "%s.Sample3" % st.id, "%s.Sample4" % st.id, - "%s.Sample5" % st.id} + exp_sample_ids = { + "%s.Sample1" % st.id, + "%s.Sample2" % st.id, + "%s.Sample3" % st.id, + "%s.Sample4" % st.id, + "%s.Sample5" % st.id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 5) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample2" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample3" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - '%s.Sample4' % st.id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - '%s.Sample5' % st.id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 5', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "%s.Sample4" % st.id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "%s.Sample5" % st.id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 5", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) def test_extend_add_duplicate_samples(self): - """extend correctly works adding new samples and warns for duplicates - """ + """extend correctly works adding new samples and warns for duplicates""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) - - self.metadata_dict['Sample4'] = { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'} + self.metadata, self.new_study + ) + + self.metadata_dict["Sample4"] = { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + } # Change a couple of values on the existent samples to test that # they remain unchanged - self.metadata_dict['Sample1']['Description'] = 'Changed' - self.metadata_dict['Sample2']['scientific_name'] = 'Changed dynamic' + self.metadata_dict["Sample1"]["Description"] = "Changed" + self.metadata_dict["Sample2"]["scientific_name"] = "Changed dynamic" - md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient='index', - dtype=str) + md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient="index", dtype=str) # Make sure adding duplicate samples raises warning npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) # Make sure the new sample has been added and the values for the # existent samples did not change - exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, - "%s.Sample3" % st.id, "%s.Sample4" % st.id} + exp_sample_ids = { + "%s.Sample1" % st.id, + "%s.Sample2" % st.id, + "%s.Sample3" % st.id, + "%s.Sample4" % st.id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 4) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample2" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, "%s.Sample3" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - '%s.Sample4' % st.id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "%s.Sample4" % st.id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) def test_extend_new_columns(self): """extend correctly adds a new column""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) - self.metadata['texture'] = pd.Series(['val1', 'val2', 'val3'], - index=self.metadata.index) - self.metadata['TOT_NITRO'] = pd.Series(['val_1', 'val_2', 'val_3'], - index=self.metadata.index) + self.metadata["texture"] = pd.Series( + ["val1", "val2", "val3"], index=self.metadata.index + ) + self.metadata["TOT_NITRO"] = pd.Series( + ["val_1", "val_2", "val_3"], index=self.metadata.index + ) # Change some values to make sure that they do not change on extend - self.metadata_dict['Sample1']['Description'] = 'Changed' - self.metadata_dict['Sample2']['scientific_name'] = 'Changed dynamic' + self.metadata_dict["Sample1"]["Description"] = "Changed" + self.metadata_dict["Sample2"]["scientific_name"] = "Changed dynamic" # Make sure it raises a warning indicating that the new columns will # be added for the existing samples - npt.assert_warns( - qdb.exceptions.QiitaDBWarning, st.extend, self.metadata) + npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, self.metadata) - exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, - "%s.Sample3" % st.id} + exp_sample_ids = { + "%s.Sample1" % st.id, + "%s.Sample2" % st.id, + "%s.Sample3" % st.id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 3) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id', - 'texture', 'tot_nitro'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + "texture", + "tot_nitro", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'texture': 'val1', - 'tot_nitro': 'val_1'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "texture": "val1", + "tot_nitro": "val_1", + }, "%s.Sample2" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'texture': 'val2', - 'tot_nitro': 'val_2'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "texture": "val2", + "tot_nitro": "val_2", + }, "%s.Sample3" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'texture': 'val3', - 'tot_nitro': 'val_3'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "texture": "val3", + "tot_nitro": "val_3", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) def test_extend_new_samples_and_columns(self): """extend correctly adds new samples and columns at the same time""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) - - self.metadata_dict['Sample4'] = { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'} + self.metadata, self.new_study + ) + + self.metadata_dict["Sample4"] = { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + } # Change a couple of values on the existent samples to test that # they remain unchanged - self.metadata_dict['Sample1']['Description'] = 'Changed' - self.metadata_dict['Sample2']['dna_extracted'] = 'Changed dynamic' + self.metadata_dict["Sample1"]["Description"] = "Changed" + self.metadata_dict["Sample2"]["dna_extracted"] = "Changed dynamic" - md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient='index', - dtype=str) + md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient="index", dtype=str) - md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'], - index=md_ext.index) + md_ext["TOT_NITRO"] = pd.Series( + ["val1", "val2", "val3", "val4"], index=md_ext.index + ) # Make sure adding duplicate samples raises warning npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) - exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, - "%s.Sample3" % st.id, "%s.Sample4" % st.id} + exp_sample_ids = { + "%s.Sample1" % st.id, + "%s.Sample2" % st.id, + "%s.Sample3" % st.id, + "%s.Sample4" % st.id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 4) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id', 'tot_nitro'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + "tot_nitro", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 1", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val1'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 1", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val1", + }, "%s.Sample2" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val2'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val2", + }, "%s.Sample3" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val3'}, - '%s.Sample4' % st.id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val4'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val3", + }, + "%s.Sample4" % st.id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val4", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) def test_extend_update(self): """extend correctly adds new samples and columns at the same time""" st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) # test updating with same data, none of the rest of the code/tests # should change st.extend_and_update(self.metadata) - self.metadata_dict['Sample4'] = { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'} - - self.metadata_dict['Sample1']['Description'] = 'Changed' - self.metadata_dict['Sample2']['scientific_name'] = 'Changed dynamic' - - md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient='index', - dtype=str) - - md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'], - index=md_ext.index) - - npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update, - md_ext) - exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, - "%s.Sample3" % st.id, "%s.Sample4" % st.id} + self.metadata_dict["Sample4"] = { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + } + + self.metadata_dict["Sample1"]["Description"] = "Changed" + self.metadata_dict["Sample2"]["scientific_name"] = "Changed dynamic" + + md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient="index", dtype=str) + + md_ext["TOT_NITRO"] = pd.Series( + ["val1", "val2", "val3", "val4"], index=md_ext.index + ) + + npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update, md_ext) + exp_sample_ids = { + "%s.Sample1" % st.id, + "%s.Sample2" % st.id, + "%s.Sample3" % st.id, + "%s.Sample4" % st.id, + } self.assertEqual(st._get_sample_ids(), exp_sample_ids) self.assertEqual(len(st), 4) - exp_categories = {'collection_timestamp', 'description', - 'dna_extracted', 'host_subject_id', 'latitude', - 'longitude', 'physical_specimen_location', - 'physical_specimen_remaining', 'sample_type', - 'scientific_name', 'taxon_id', 'tot_nitro'} + exp_categories = { + "collection_timestamp", + "description", + "dna_extracted", + "host_subject_id", + "latitude", + "longitude", + "physical_specimen_location", + "physical_specimen_remaining", + "sample_type", + "scientific_name", + "taxon_id", + "tot_nitro", + } self.assertCountEqual(st.categories, exp_categories) exp_dict = { "%s.Sample1" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Changed", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '42.42', - 'longitude': '41.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val1'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Changed", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "42.42", + "longitude": "41.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val1", + }, "%s.Sample2" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 2", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.2', - 'longitude': '1.1', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'Changed dynamic', - 'tot_nitro': 'val2'}, + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 2", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.2", + "longitude": "1.1", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "Changed dynamic", + "tot_nitro": "val2", + }, "%s.Sample3" % st.id: { - 'collection_timestamp': '2014-05-29 12:24:15', - 'description': "Test Sample 3", - 'dna_extracted': 'true', - 'host_subject_id': "NotIdentified", - 'latitude': '4.8', - 'longitude': '4.41', - 'physical_specimen_location': "location1", - 'physical_specimen_remaining': 'true', - 'sample_type': "type1", - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val3'}, - '%s.Sample4' % st.id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens', - 'tot_nitro': 'val4'}} + "collection_timestamp": "2014-05-29 12:24:15", + "description": "Test Sample 3", + "dna_extracted": "true", + "host_subject_id": "NotIdentified", + "latitude": "4.8", + "longitude": "4.41", + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "sample_type": "type1", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val3", + }, + "%s.Sample4" % st.id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + "tot_nitro": "val4", + }, + } for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) def test_to_dataframe(self): st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) obs = st.to_dataframe() new_id = self.new_study.id exp_dict = { - '%s.Sample1' % new_id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'qiita_study_id': str(new_id), - 'scientific_name': 'homo sapiens'}, - '%s.Sample2' % new_id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 2', - 'latitude': '4.2', - 'longitude': '1.1', - 'taxon_id': '9606', - 'qiita_study_id': str(new_id), - 'scientific_name': 'homo sapiens'}, - '%s.Sample3' % new_id: { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 3', - 'latitude': '4.8', - 'longitude': '4.41', - 'taxon_id': '9606', - 'qiita_study_id': str(new_id), - 'scientific_name': 'homo sapiens'}, - } - exp = pd.DataFrame.from_dict(exp_dict, orient='index', dtype=str) - exp.index.name = 'sample_name' + "%s.Sample1" % new_id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "qiita_study_id": str(new_id), + "scientific_name": "homo sapiens", + }, + "%s.Sample2" % new_id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 2", + "latitude": "4.2", + "longitude": "1.1", + "taxon_id": "9606", + "qiita_study_id": str(new_id), + "scientific_name": "homo sapiens", + }, + "%s.Sample3" % new_id: { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "description": "Test Sample 3", + "latitude": "4.8", + "longitude": "4.41", + "taxon_id": "9606", + "qiita_study_id": str(new_id), + "scientific_name": "homo sapiens", + }, + } + exp = pd.DataFrame.from_dict(exp_dict, orient="index", dtype=str) + exp.index.name = "sample_name" obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -1953,30 +2394,74 @@ def test_to_dataframe(self): # 27 samples self.assertEqual(len(obs), 27) - exp = {'1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', - '1.SKB4.640189', '1.SKB5.640181', '1.SKB6.640176', - '1.SKB7.640196', '1.SKB8.640193', '1.SKB9.640200', - '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', - '1.SKD7.640191', '1.SKD8.640184', '1.SKD9.640182', - '1.SKM1.640183', '1.SKM2.640199', '1.SKM3.640197', - '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192'} + exp = { + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + } self.assertEqual(set(obs.index), exp) exp_columns = { - 'physical_specimen_location', 'physical_specimen_remaining', - 'dna_extracted', 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', 'latitude', 'longitude', - 'season_environment', 'assigned_from_geo', 'texture', - 'taxon_id', 'depth', 'host_taxid', 'common_name', 'env_package', - 'water_content_soil', 'elevation', 'temp', 'tot_nitro', - 'samp_salinity', 'altitude', 'env_biome', 'country', 'ph', - 'anonymized_name', 'tot_org_carb', 'description_duplicate', - 'env_feature', 'scientific_name', 'qiita_study_id'} + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "env_package", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "scientific_name", + "qiita_study_id", + } self.assertEqual(set(obs.columns), exp_columns) # test limiting samples produced - exp_samples = set(['1.SKD4.640185', '1.SKD5.640186']) + exp_samples = set(["1.SKD4.640185", "1.SKD5.640186"]) obs = self.tester.to_dataframe(samples=exp_samples) self.assertEqual(len(obs), 2) self.assertEqual(set(obs.index), exp_samples) @@ -1985,65 +2470,74 @@ def test_to_dataframe(self): # test with add_ebi_accessions as True obs = self.tester.to_dataframe(True) self.assertEqual( - self.tester.ebi_sample_accessions, - obs.qiita_ebi_sample_accessions.to_dict()) + self.tester.ebi_sample_accessions, obs.qiita_ebi_sample_accessions.to_dict() + ) def test_check_restrictions(self): - obs = self.tester.check_restrictions( - [STC['EBI']]) + obs = self.tester.check_restrictions([STC["EBI"]]) self.assertEqual(obs, set([])) def test_ebi_sample_accessions(self): obs = self.tester.ebi_sample_accessions - exp = {'1.SKB8.640193': 'ERS000000', - '1.SKD8.640184': 'ERS000001', - '1.SKB7.640196': 'ERS000002', - '1.SKM9.640192': 'ERS000003', - '1.SKM4.640180': 'ERS000004', - '1.SKM5.640177': 'ERS000005', - '1.SKB5.640181': 'ERS000006', - '1.SKD6.640190': 'ERS000007', - '1.SKB2.640194': 'ERS000008', - '1.SKD2.640178': 'ERS000009', - '1.SKM7.640188': 'ERS000010', - '1.SKB1.640202': 'ERS000011', - '1.SKD1.640179': 'ERS000012', - '1.SKD3.640198': 'ERS000013', - '1.SKM8.640201': 'ERS000014', - '1.SKM2.640199': 'ERS000015', - '1.SKB9.640200': 'ERS000016', - '1.SKD5.640186': 'ERS000017', - '1.SKM3.640197': 'ERS000018', - '1.SKD9.640182': 'ERS000019', - '1.SKB4.640189': 'ERS000020', - '1.SKD7.640191': 'ERS000021', - '1.SKM6.640187': 'ERS000022', - '1.SKD4.640185': 'ERS000023', - '1.SKB3.640195': 'ERS000024', - '1.SKB6.640176': 'ERS000025', - '1.SKM1.640183': 'ERS000025'} + exp = { + "1.SKB8.640193": "ERS000000", + "1.SKD8.640184": "ERS000001", + "1.SKB7.640196": "ERS000002", + "1.SKM9.640192": "ERS000003", + "1.SKM4.640180": "ERS000004", + "1.SKM5.640177": "ERS000005", + "1.SKB5.640181": "ERS000006", + "1.SKD6.640190": "ERS000007", + "1.SKB2.640194": "ERS000008", + "1.SKD2.640178": "ERS000009", + "1.SKM7.640188": "ERS000010", + "1.SKB1.640202": "ERS000011", + "1.SKD1.640179": "ERS000012", + "1.SKD3.640198": "ERS000013", + "1.SKM8.640201": "ERS000014", + "1.SKM2.640199": "ERS000015", + "1.SKB9.640200": "ERS000016", + "1.SKD5.640186": "ERS000017", + "1.SKM3.640197": "ERS000018", + "1.SKD9.640182": "ERS000019", + "1.SKB4.640189": "ERS000020", + "1.SKD7.640191": "ERS000021", + "1.SKM6.640187": "ERS000022", + "1.SKD4.640185": "ERS000023", + "1.SKB3.640195": "ERS000024", + "1.SKB6.640176": "ERS000025", + "1.SKM1.640183": "ERS000025", + } self.assertEqual(obs, exp) obs = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study).ebi_sample_accessions - exp = {"%s.Sample1" % self.new_study.id: None, - "%s.Sample2" % self.new_study.id: None, - "%s.Sample3" % self.new_study.id: None} + self.metadata, self.new_study + ).ebi_sample_accessions + exp = { + "%s.Sample1" % self.new_study.id: None, + "%s.Sample2" % self.new_study.id: None, + "%s.Sample3" % self.new_study.id: None, + } self.assertEqual(obs, exp) def test_ebi_sample_accessions_setter(self): with self.assertRaises(qdb.exceptions.QiitaDBError): - self.tester.ebi_sample_accessions = {'1.SKB8.640193': 'ERS000010', - '1.SKD8.640184': 'ERS000001'} + self.tester.ebi_sample_accessions = { + "1.SKB8.640193": "ERS000010", + "1.SKD8.640184": "ERS000001", + } st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) - exp_acc = {"%s.Sample1" % self.new_study.id: 'ERS000100', - "%s.Sample2" % self.new_study.id: 'ERS000110'} + self.metadata, self.new_study + ) + exp_acc = { + "%s.Sample1" % self.new_study.id: "ERS000100", + "%s.Sample2" % self.new_study.id: "ERS000110", + } st.ebi_sample_accessions = exp_acc exp_acc["%s.Sample3" % self.new_study.id] = None self.assertEqual(st.ebi_sample_accessions, exp_acc) - exp_acc["%s.Sample3" % self.new_study.id] = 'ERS0000120' + exp_acc["%s.Sample3" % self.new_study.id] = "ERS0000120" st.ebi_sample_accessions = exp_acc self.assertEqual(st.ebi_sample_accessions, exp_acc) @@ -2051,59 +2545,70 @@ def test_ebi_sample_accessions_setter(self): # npt.assert_warns def f(): st.ebi_sample_accessions = exp_acc + npt.assert_warns(qdb.exceptions.QiitaDBWarning, f) def test_biosample_accessions(self): obs = self.tester.biosample_accessions - exp = {'1.SKB8.640193': 'SAMEA0000000', - '1.SKD8.640184': 'SAMEA0000001', - '1.SKB7.640196': 'SAMEA0000002', - '1.SKM9.640192': 'SAMEA0000003', - '1.SKM4.640180': 'SAMEA0000004', - '1.SKM5.640177': 'SAMEA0000005', - '1.SKB5.640181': 'SAMEA0000006', - '1.SKD6.640190': 'SAMEA0000007', - '1.SKB2.640194': 'SAMEA0000008', - '1.SKD2.640178': 'SAMEA0000009', - '1.SKM7.640188': 'SAMEA0000010', - '1.SKB1.640202': 'SAMEA0000011', - '1.SKD1.640179': 'SAMEA0000012', - '1.SKD3.640198': 'SAMEA0000013', - '1.SKM8.640201': 'SAMEA0000014', - '1.SKM2.640199': 'SAMEA0000015', - '1.SKB9.640200': 'SAMEA0000016', - '1.SKD5.640186': 'SAMEA0000017', - '1.SKM3.640197': 'SAMEA0000018', - '1.SKD9.640182': 'SAMEA0000019', - '1.SKB4.640189': 'SAMEA0000020', - '1.SKD7.640191': 'SAMEA0000021', - '1.SKM6.640187': 'SAMEA0000022', - '1.SKD4.640185': 'SAMEA0000023', - '1.SKB3.640195': 'SAMEA0000024', - '1.SKB6.640176': 'SAMEA0000025', - '1.SKM1.640183': 'SAMEA0000026'} + exp = { + "1.SKB8.640193": "SAMEA0000000", + "1.SKD8.640184": "SAMEA0000001", + "1.SKB7.640196": "SAMEA0000002", + "1.SKM9.640192": "SAMEA0000003", + "1.SKM4.640180": "SAMEA0000004", + "1.SKM5.640177": "SAMEA0000005", + "1.SKB5.640181": "SAMEA0000006", + "1.SKD6.640190": "SAMEA0000007", + "1.SKB2.640194": "SAMEA0000008", + "1.SKD2.640178": "SAMEA0000009", + "1.SKM7.640188": "SAMEA0000010", + "1.SKB1.640202": "SAMEA0000011", + "1.SKD1.640179": "SAMEA0000012", + "1.SKD3.640198": "SAMEA0000013", + "1.SKM8.640201": "SAMEA0000014", + "1.SKM2.640199": "SAMEA0000015", + "1.SKB9.640200": "SAMEA0000016", + "1.SKD5.640186": "SAMEA0000017", + "1.SKM3.640197": "SAMEA0000018", + "1.SKD9.640182": "SAMEA0000019", + "1.SKB4.640189": "SAMEA0000020", + "1.SKD7.640191": "SAMEA0000021", + "1.SKM6.640187": "SAMEA0000022", + "1.SKD4.640185": "SAMEA0000023", + "1.SKB3.640195": "SAMEA0000024", + "1.SKB6.640176": "SAMEA0000025", + "1.SKM1.640183": "SAMEA0000026", + } self.assertEqual(obs, exp) obs = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study).biosample_accessions - exp = {"%s.Sample1" % self.new_study.id: None, - "%s.Sample2" % self.new_study.id: None, - "%s.Sample3" % self.new_study.id: None} + self.metadata, self.new_study + ).biosample_accessions + exp = { + "%s.Sample1" % self.new_study.id: None, + "%s.Sample2" % self.new_study.id: None, + "%s.Sample3" % self.new_study.id: None, + } self.assertEqual(obs, exp) def test_biosample_accessions_setter(self): with self.assertRaises(qdb.exceptions.QiitaDBError): - self.tester.biosample_accessions = {'1.SKB8.640193': 'SAMEA110000', - '1.SKD8.640184': 'SAMEA110000'} + self.tester.biosample_accessions = { + "1.SKB8.640193": "SAMEA110000", + "1.SKD8.640184": "SAMEA110000", + } st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) - exp_acc = {"%s.Sample1" % self.new_study.id: 'SAMEA110000', - "%s.Sample2" % self.new_study.id: 'SAMEA120000'} + self.metadata, self.new_study + ) + exp_acc = { + "%s.Sample1" % self.new_study.id: "SAMEA110000", + "%s.Sample2" % self.new_study.id: "SAMEA120000", + } st.biosample_accessions = exp_acc exp_acc["%s.Sample3" % self.new_study.id] = None self.assertEqual(st.biosample_accessions, exp_acc) - exp_acc["%s.Sample3" % self.new_study.id] = 'SAMEA130000' + exp_acc["%s.Sample3" % self.new_study.id] = "SAMEA130000" st.biosample_accessions = exp_acc self.assertEqual(st.biosample_accessions, exp_acc) @@ -2111,35 +2616,39 @@ def test_biosample_accessions_setter(self): # npt.assert_warns def f(): st.biosample_accessions = exp_acc + npt.assert_warns(qdb.exceptions.QiitaDBWarning, f) def test_validate_template_warning_missing(self): """Warns if the template is missing a required column""" metadata_dict = { - 'Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41'} + "Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) ST = qdb.metadata_template.sample_template.SampleTemplate obs = ST._clean_validate_template(metadata, 2) metadata_dict = { - '2.Sample1': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'host_subject_id': 'NotIdentified', - 'description': 'Test Sample 1', - 'latitude': '42.42', - 'longitude': '41.41'} + "2.Sample1": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "host_subject_id": "NotIdentified", + "description": "Test Sample 1", + "latitude": "42.42", + "longitude": "41.41", } - exp = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) + } + exp = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -2147,23 +2656,25 @@ def test_validate_template_warning_missing(self): assert_frame_equal(obs, exp, check_like=True) def test_validate_template_warning_missing_restrictions(self): - del self.metadata['collection_timestamp'] + del self.metadata["collection_timestamp"] st = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.sample_template.SampleTemplate.create, - self.metadata, self.new_study) - obs = st.check_restrictions( - [STC['EBI']]) - self.assertEqual(obs, {'collection_timestamp'}) + self.metadata, + self.new_study, + ) + obs = st.check_restrictions([STC["EBI"]]) + self.assertEqual(obs, {"collection_timestamp"}) def test_validate_errors(self): - self.metadata.at['Sample1', 'collection_timestamp'] = 'wrong date' - self.metadata.at['Sample2', 'latitude'] = 'wrong latitude' - self.metadata.at['Sample3', 'latitude'] = None + self.metadata.at["Sample1", "collection_timestamp"] = "wrong date" + self.metadata.at["Sample2", "latitude"] = "wrong latitude" + self.metadata.at["Sample3", "latitude"] = None with catch_warnings(record=True) as warn: qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) # it should only return one warning self.assertEqual(len(warn), 1) @@ -2172,143 +2683,164 @@ def test_validate_errors(self): self.assertEqual(warn.category, qdb.exceptions.QiitaDBWarning) # it should contain this text message = str(warn.message) - exp_error = ('Sample "%s.Sample2", column "latitude", wrong value ' - '"wrong latitude"' % self.new_study.id) + exp_error = ( + 'Sample "%s.Sample2", column "latitude", wrong value ' + '"wrong latitude"' % self.new_study.id + ) self.assertIn(exp_error, message) - exp_error = ('Sample "%s.Sample1", column "collection_timestamp", ' - 'wrong value "wrong date"' % self.new_study.id) + exp_error = ( + 'Sample "%s.Sample1", column "collection_timestamp", ' + 'wrong value "wrong date"' % self.new_study.id + ) self.assertIn(exp_error, message) - exp_error = ('Sample "%s.Sample3", column "latitude", ' - 'wrong value "None"' % self.new_study.id) + exp_error = ( + 'Sample "%s.Sample3", column "latitude", ' + 'wrong value "None"' % self.new_study.id + ) self.assertIn(exp_error, message) def test_validate_errors_timestampA_year4digits(self): - column = 'collection_timestamp' - self.metadata.at['Sample1', column] = '2016-09-20 12:00' - self.metadata.at['Sample2', column] = '2016-09-20 12' - self.metadata.at['Sample3', column] = '2016-09-20' + column = "collection_timestamp" + self.metadata.at["Sample1", column] = "2016-09-20 12:00" + self.metadata.at["Sample2", column] = "2016-09-20 12" + self.metadata.at["Sample3", column] = "2016-09-20" with catch_warnings(record=True) as warn: qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) # the warnings should be empty self.assertEqual(warn, []) def test_validate_errors_timestampA_year2digits(self): - column = 'collection_timestamp' - self.metadata.at['Sample1', column] = '16-09-20 12:00' - self.metadata.at['Sample2', column] = '9/20/16 12' - self.metadata.at['Sample3', column] = '09-20-16' + column = "collection_timestamp" + self.metadata.at["Sample1", column] = "16-09-20 12:00" + self.metadata.at["Sample2", column] = "9/20/16 12" + self.metadata.at["Sample3", column] = "09-20-16" with catch_warnings(record=True) as warn: st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) exp_message = ( - 'Some functionality will be disabled due to missing ' - 'columns:\n\t' + "Some functionality will be disabled due to missing " + "columns:\n\t" 'Sample "{0}.Sample1", column "collection_timestamp", ' 'wrong value "16-09-20 12:00";\n\t' 'Sample "{0}.Sample2", column "collection_timestamp", ' 'wrong value "9/20/16 12";\n\t' 'Sample "{0}.Sample3", column "collection_timestamp", ' 'wrong value "09-20-16".\n' - 'See the Templates tutorial ' - 'for a description of these fields.'.format(st.id)) + "See the Templates tutorial " + "for a description of these fields.".format(st.id) + ) # warnings is a list of 1 element self.assertEqual(len(warn), 1) # the order might change so testing by elements - self.assertCountEqual(str(warn[0].message).split('\n'), - exp_message.split('\n')) + self.assertCountEqual( + str(warn[0].message).split("\n"), exp_message.split("\n") + ) def test_validate_errors_timestampB_year4digits(self): - column = 'collection_timestamp' - self.metadata.at['Sample1', column] = '2016-12' - self.metadata.at['Sample2', column] = '2016' + column = "collection_timestamp" + self.metadata.at["Sample1", column] = "2016-12" + self.metadata.at["Sample2", column] = "2016" with catch_warnings(record=True) as warn: qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) # the warnings should be empty self.assertEqual(warn, []) def test_validate_errors_timestampB_year2digits(self): - column = 'collection_timestamp' - self.metadata.at['Sample1', column] = '16-12' - self.metadata.at['Sample2', column] = '16' + column = "collection_timestamp" + self.metadata.at["Sample1", column] = "16-12" + self.metadata.at["Sample2", column] = "16" with catch_warnings(record=True) as warn: st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) + self.metadata, self.new_study + ) exp_message = ( - 'Some functionality will be disabled due to missing ' - 'columns:\n\t' + "Some functionality will be disabled due to missing " + "columns:\n\t" 'Sample "{0}.Sample1", column "collection_timestamp", wrong ' 'value "16-12";\n\t' 'Sample "{0}.Sample2", column "collection_timestamp", wrong ' 'value "16".\n' - 'See the Templates tutorial for a description ' - 'of these fields.'.format(st.id)) + "See the Templates tutorial for a description " + "of these fields.".format(st.id) + ) # warnings is a list of 1 element self.assertEqual(len(warn), 1) self.assertEqual(str(warn[0].message), exp_message) def test_delete_column(self): st = qdb.metadata_template.sample_template.SampleTemplate.create( - self.metadata, self.new_study) - st.delete_column('dna_extracted') - self.assertNotIn('dna_extracted', st.categories) + self.metadata, self.new_study + ) + st.delete_column("dna_extracted") + self.assertNotIn("dna_extracted", st.categories) def test_delete_samples(self): QE = qdb.exceptions st = qdb.metadata_template.sample_template.SampleTemplate(1) md_dict = { - 'Sample4': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample5': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}, - 'Sample6': {'physical_specimen_location': 'location1', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:15', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'latitude': '42.42', - 'longitude': '41.41', - 'taxon_id': '9606', - 'scientific_name': 'homo sapiens'}} - md_ext = pd.DataFrame.from_dict(md_dict, orient='index', dtype=str) + "Sample4": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample5": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + "Sample6": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:15", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "latitude": "42.42", + "longitude": "41.41", + "taxon_id": "9606", + "scientific_name": "homo sapiens", + }, + } + md_ext = pd.DataFrame.from_dict(md_dict, orient="index", dtype=str) npt.assert_warns(QE.QiitaDBWarning, st.extend, md_ext) - st.delete_samples(['1.Sample4']) - self.assertNotIn('1.Sample4', st.keys()) - self.assertIn('1.Sample5', st.keys()) - self.assertIn('1.Sample6', st.keys()) - st.delete_samples(['1.Sample5', '1.Sample6']) - self.assertNotIn('1.Sample5', st.keys()) - self.assertNotIn('1.Sample6', st.keys()) + st.delete_samples(["1.Sample4"]) + self.assertNotIn("1.Sample4", st.keys()) + self.assertIn("1.Sample5", st.keys()) + self.assertIn("1.Sample6", st.keys()) + st.delete_samples(["1.Sample5", "1.Sample6"]) + self.assertNotIn("1.Sample5", st.keys()) + self.assertNotIn("1.Sample6", st.keys()) # testing errors with self.assertRaises(QE.QiitaDBUnknownIDError): - st.delete_samples(['not.existing.sample']) + st.delete_samples(["not.existing.sample"]) with self.assertRaises(QE.QiitaDBOperationNotPermittedError): - st.delete_samples(['1.SKM5.640177']) + st.delete_samples(["1.SKM5.640177"]) EXP_SAMPLE_TEMPLATE = ( @@ -2321,7 +2853,8 @@ def test_delete_samples(self): "{0}.Sample2\t2014-05-29 12:24:15\tTest Sample 2\ttrue\tNotIdentified\t" "4.2\t1.1\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n" "{0}.Sample3\t2014-05-29 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t" - "4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n") + "4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n" +) EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES = ( "sample_name\tcollection_timestamp\tdescription\tdna_extracted\t" @@ -2331,8 +2864,9 @@ def test_delete_samples(self): "{0}.Sample1\t2014-05-29 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t" "42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n" "{0}.Sample3\t2014-05-29 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t" - "4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n") + "4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n" +) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 8d2d47eac..1c6eb8f5f 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -6,45 +6,44 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from six import StringIO +import warnings from inspect import currentframe, getfile -from os.path import dirname, abspath, join +from os.path import abspath, dirname, join from unittest import TestCase, main -import warnings import numpy.testing as npt import pandas as pd from pandas.testing import assert_frame_equal +from six import StringIO import qiita_db as qdb class TestUtil(TestCase): """Tests some utility functions on the metadata_template module""" + def setUp(self): metadata_dict = { - 'Sample1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, - 'Sample2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, - 'Sample3': {'int_col': 3, 'float_col': 3, 'str_col': 'string30'}, + "Sample1": {"int_col": 1, "float_col": 2.1, "str_col": "str1"}, + "Sample2": {"int_col": 2, "float_col": 3.1, "str_col": "200"}, + "Sample3": {"int_col": 3, "float_col": 3, "str_col": "string30"}, } - self.metadata_map = pd.DataFrame.from_dict(metadata_dict, - orient='index', dtype=str) - self.headers = ['float_col', 'str_col', 'int_col'] + self.metadata_map = pd.DataFrame.from_dict( + metadata_dict, orient="index", dtype=str + ) + self.headers = ["float_col", "str_col", "int_col"] - self.mfp = join( - dirname(abspath(getfile(currentframe()))), 'support_files') + self.mfp = join(dirname(abspath(getfile(currentframe()))), "support_files") def test_prefix_sample_names_with_id(self): exp_metadata_dict = { - '1.Sample1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, - '1.Sample2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, - '1.Sample3': {'int_col': 3, 'float_col': 3, 'str_col': 'string30'}, + "1.Sample1": {"int_col": 1, "float_col": 2.1, "str_col": "str1"}, + "1.Sample2": {"int_col": 2, "float_col": 3.1, "str_col": "200"}, + "1.Sample3": {"int_col": 3, "float_col": 3, "str_col": "string30"}, } - exp_df = pd.DataFrame.from_dict(exp_metadata_dict, orient='index', - dtype=str) + exp_df = pd.DataFrame.from_dict(exp_metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True) as warn: - qdb.metadata_template.util.prefix_sample_names_with_id( - self.metadata_map, 1) + qdb.metadata_template.util.prefix_sample_names_with_id(self.metadata_map, 1) self.assertEqual(len(warn), 0) self.metadata_map.sort_index(inplace=True) exp_df.sort_index(inplace=True) @@ -52,74 +51,76 @@ def test_prefix_sample_names_with_id(self): # test that it only prefixes the samples that are needed metadata_dict = { - 'Sample1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, - '1.Sample2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, - 'Sample3': {'int_col': 3, 'float_col': 3, 'str_col': 'string30'}, + "Sample1": {"int_col": 1, "float_col": 2.1, "str_col": "str1"}, + "1.Sample2": {"int_col": 2, "float_col": 3.1, "str_col": "200"}, + "Sample3": {"int_col": 3, "float_col": 3, "str_col": "string30"}, } - metadata_map = pd.DataFrame.from_dict( - metadata_dict, orient='index', dtype=str) + metadata_map = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True) as warn: - qdb.metadata_template.util.prefix_sample_names_with_id( - metadata_map, 1) + qdb.metadata_template.util.prefix_sample_names_with_id(metadata_map, 1) self.assertEqual(len(warn), 1) - self.assertEqual(str(warn[0].message), 'Some of the samples were ' - 'already prefixed with the study id.') + self.assertEqual( + str(warn[0].message), + "Some of the samples were already prefixed with the study id.", + ) metadata_map.sort_index(inplace=True) assert_frame_equal(metadata_map, exp_df) # making sure that samples with the same sample name than the study are # actually prepended metadata_dict = { - '1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, - '2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, + "1": {"int_col": 1, "float_col": 2.1, "str_col": "str1"}, + "2": {"int_col": 2, "float_col": 3.1, "str_col": "200"}, } - metadata_map = pd.DataFrame.from_dict( - metadata_dict, orient='index', dtype=str) + metadata_map = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) qdb.metadata_template.util.prefix_sample_names_with_id(metadata_map, 1) - self.assertCountEqual(metadata_map.index, ['1.1', '1.2']) + self.assertCountEqual(metadata_map.index, ["1.1", "1.2"]) def test_load_template_to_dataframe(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE)) + StringIO(EXP_SAMPLE_TEMPLATE) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_sample_id(self): obs = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.util.load_template_to_dataframe, - StringIO(EXP_SAMPLE_TEMPLATE_WITH_SAMPLE_ID)) + StringIO(EXP_SAMPLE_TEMPLATE_WITH_SAMPLE_ID), + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_xlsx(self): # test loading a qiimp file - fp = join(self.mfp, 'a_qiimp_wb.xlsx') + fp = join(self.mfp, "a_qiimp_wb.xlsx") obs = qdb.metadata_template.util.load_template_to_dataframe(fp) exp = pd.DataFrame.from_dict(EXP_QIIMP, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) # test loading an empty qiimp file - fp = join(self.mfp, 'empty_qiimp_wb.xlsx') + fp = join(self.mfp, "empty_qiimp_wb.xlsx") with self.assertRaises(ValueError) as error: qdb.metadata_template.util.load_template_to_dataframe(fp) self.assertEqual(str(error.exception), "The template is empty") # test loading non qiimp file - fp = join(self.mfp, 'not_a_qiimp_wb.xlsx') + fp = join(self.mfp, "not_a_qiimp_wb.xlsx") obs = qdb.metadata_template.util.load_template_to_dataframe(fp) exp = pd.DataFrame.from_dict(EXP_NOT_QIIMP, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_qiime_map(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(QIIME_TUTORIAL_MAP_SUBSET), index='#SampleID') + StringIO(QIIME_TUTORIAL_MAP_SUBSET), index="#SampleID" + ) exp = pd.DataFrame.from_dict(QIIME_TUTORIAL_MAP_DICT_FORM, dtype=str) - exp.index.name = 'SampleID' + exp.index.name = "SampleID" obs.sort_index(axis=0, inplace=True) obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) @@ -136,7 +137,8 @@ def test_load_template_to_dataframe_duplicate_cols(self): test = ( "sample_name\tdescription\t \t \t\t \t\n" "sample1\tsample1\t \t \t\t\n" - "sample2\tsample2\t\t\t\t \t") + "sample2\tsample2\t\t\t\t \t" + ) with self.assertRaises(ValueError): LTTD(StringIO(test)) @@ -144,196 +146,251 @@ def test_load_template_to_dataframe_duplicate_cols(self): test = ( "sample_name\tdescription\tcol1\ttcol2\n" "sample1\tsample1\t \t \n" - "sample2\tsample2\t \t") + "sample2\tsample2\t \t" + ) df = LTTD(StringIO(test)) - self.assertEqual(df.columns.values, ['description']) + self.assertEqual(df.columns.values, ["description"]) def test_load_template_to_dataframe_scrubbing(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_SPACES)) + StringIO(EXP_SAMPLE_TEMPLATE_SPACES) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_empty_columns(self): obs = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.util.load_template_to_dataframe, - StringIO(EXP_ST_SPACES_EMPTY_COLUMN)) + StringIO(EXP_ST_SPACES_EMPTY_COLUMN), + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_empty_rows(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_SPACES_EMPTY_ROW)) + StringIO(EXP_SAMPLE_TEMPLATE_SPACES_EMPTY_ROW) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_no_sample_name_cast(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_NUMBER_SAMPLE_NAMES)) + StringIO(EXP_SAMPLE_TEMPLATE_NUMBER_SAMPLE_NAMES) + ) exp = pd.DataFrame.from_dict( - SAMPLE_TEMPLATE_NUMBER_SAMPLE_NAMES_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + SAMPLE_TEMPLATE_NUMBER_SAMPLE_NAMES_DICT_FORM, dtype=str + ) + exp.index.name = "sample_name" obs.sort_index(inplace=True) exp.sort_index(inplace=True) assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_empty_sample_names(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(SAMPLE_TEMPLATE_NO_SAMPLE_NAMES)) + StringIO(SAMPLE_TEMPLATE_NO_SAMPLE_NAMES) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(SAMPLE_TEMPLATE_NO_SAMPLE_NAMES_SOME_SPACES)) + StringIO(SAMPLE_TEMPLATE_NO_SAMPLE_NAMES_SOME_SPACES) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_empty_column(self): obs = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.util.load_template_to_dataframe, - StringIO(SAMPLE_TEMPLATE_EMPTY_COLUMN)) + StringIO(SAMPLE_TEMPLATE_EMPTY_COLUMN), + ) exp = pd.DataFrame.from_dict(ST_EMPTY_COLUMN_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_column_with_nas(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(SAMPLE_TEMPLATE_COLUMN_WITH_NAS)) + StringIO(SAMPLE_TEMPLATE_COLUMN_WITH_NAS) + ) exp = pd.DataFrame.from_dict(ST_COLUMN_WITH_NAS_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_exception(self): with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.metadata_template.util.load_template_to_dataframe( - StringIO(SAMPLE_TEMPLATE_NO_SAMPLE_NAME)) + StringIO(SAMPLE_TEMPLATE_NO_SAMPLE_NAME) + ) def test_load_template_to_dataframe_whitespace(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_WHITESPACE)) + StringIO(EXP_SAMPLE_TEMPLATE_WHITESPACE) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_lowercase(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_MULTICASE)) + StringIO(EXP_SAMPLE_TEMPLATE_MULTICASE) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_DICT_FORM, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" exp.rename(columns={"str_column": "str_CoLumn"}, inplace=True) assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_non_utf8(self): - replace = EXP_SAMPLE_TEMPLATE.replace( - 'Test Sample 2', u'Test Sample\x962') - qdb.metadata_template.util.load_template_to_dataframe( - StringIO(replace)) + replace = EXP_SAMPLE_TEMPLATE.replace("Test Sample 2", "Test Sample\x962") + qdb.metadata_template.util.load_template_to_dataframe(StringIO(replace)) # setting back - replace = EXP_SAMPLE_TEMPLATE.replace( - u'Test Sample\x962', 'Test Sample 2') - qdb.metadata_template.util.load_template_to_dataframe( - StringIO(replace)) + replace = EXP_SAMPLE_TEMPLATE.replace("Test Sample\x962", "Test Sample 2") + qdb.metadata_template.util.load_template_to_dataframe(StringIO(replace)) def test_load_template_to_dataframe_typechecking(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_LAT_ALL_INT)) + StringIO(EXP_SAMPLE_TEMPLATE_LAT_ALL_INT) + ) - exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_LAT_ALL_INT_DICT, - dtype=str) - exp.index.name = 'sample_name' + exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_LAT_ALL_INT_DICT, dtype=str) + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_LAT_MIXED_FLOAT_INT)) + StringIO(EXP_SAMPLE_TEMPLATE_LAT_MIXED_FLOAT_INT) + ) - exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_MIXED_FLOAT_INT_DICT, - dtype=str) - exp.index.name = 'sample_name' + exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_MIXED_FLOAT_INT_DICT, dtype=str) + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_with_nulls(self): obs = qdb.metadata_template.util.load_template_to_dataframe( - StringIO(EXP_SAMPLE_TEMPLATE_NULLS)) + StringIO(EXP_SAMPLE_TEMPLATE_NULLS) + ) exp = pd.DataFrame.from_dict(SAMPLE_TEMPLATE_NULLS_DICT, dtype=str) - exp.index.name = 'sample_name' + exp.index.name = "sample_name" assert_frame_equal(obs, exp, check_like=True) def test_load_template_to_dataframe_better_tokenizing_error_msg(self): - with self.assertRaisesRegex(RuntimeError, 'Your file has more columns ' - 'with values than headers'): + with self.assertRaisesRegex( + RuntimeError, "Your file has more columns with values than headers" + ): qdb.metadata_template.util.load_template_to_dataframe( - StringIO('sample_name\tcollection_timestamp\n' - '2.Sample1\t2014-05-29 12:24:51\t\n' - '2.Sample2\taaa\n' - 'xxx\tadfa\t\t\n')) + StringIO( + "sample_name\tcollection_timestamp\n" + "2.Sample1\t2014-05-29 12:24:51\t\n" + "2.Sample2\taaa\n" + "xxx\tadfa\t\t\n" + ) + ) def test_get_invalid_sample_names(self): - all_valid = ['2.sample.1', 'foo.bar.baz', 'roses', 'are', 'red', - 'v10l3t5', '4r3', '81u3'] + all_valid = [ + "2.sample.1", + "foo.bar.baz", + "roses", + "are", + "red", + "v10l3t5", + "4r3", + "81u3", + ] obs = qdb.metadata_template.util.get_invalid_sample_names(all_valid) self.assertEqual(obs, []) - all_valid = ['sample.1', 'sample.2', 'SAMPLE.1', 'BOOOM'] + all_valid = ["sample.1", "sample.2", "SAMPLE.1", "BOOOM"] obs = qdb.metadata_template.util.get_invalid_sample_names(all_valid) self.assertEqual(obs, []) def test_get_invalid_sample_names_str(self): - one_invalid = ['2.sample.1', 'foo.bar.baz', 'roses', 'are', 'red', - 'I am the chosen one', 'v10l3t5', '4r3', '81u3'] + one_invalid = [ + "2.sample.1", + "foo.bar.baz", + "roses", + "are", + "red", + "I am the chosen one", + "v10l3t5", + "4r3", + "81u3", + ] obs = qdb.metadata_template.util.get_invalid_sample_names(one_invalid) - self.assertCountEqual(obs, ['I am the chosen one']) - - one_invalid = ['2.sample.1', 'foo.bar.baz', 'roses', 'are', 'red', - ':L{=<', ':L}=<', '4r3', '81u3'] + self.assertCountEqual(obs, ["I am the chosen one"]) + + one_invalid = [ + "2.sample.1", + "foo.bar.baz", + "roses", + "are", + "red", + ":L{=<", + ":L}=<", + "4r3", + "81u3", + ] obs = qdb.metadata_template.util.get_invalid_sample_names(one_invalid) - self.assertCountEqual(obs, [':L{=<', ':L}=<']) + self.assertCountEqual(obs, [":L{=<", ":L}=<"]) def test_get_get_invalid_sample_names_mixed(self): - one_invalid = ['.', '1', '2'] + one_invalid = [".", "1", "2"] obs = qdb.metadata_template.util.get_invalid_sample_names(one_invalid) self.assertCountEqual(obs, []) - one_invalid = [' ', ' ', ' '] + one_invalid = [" ", " ", " "] obs = qdb.metadata_template.util.get_invalid_sample_names(one_invalid) - self.assertCountEqual(obs, [' ', ' ', ' ']) + self.assertCountEqual(obs, [" ", " ", " "]) def test_looks_like_qiime_mapping_file(self): obs = qdb.metadata_template.util.looks_like_qiime_mapping_file( - StringIO(EXP_SAMPLE_TEMPLATE)) + StringIO(EXP_SAMPLE_TEMPLATE) + ) self.assertFalse(obs) obs = qdb.metadata_template.util.looks_like_qiime_mapping_file( - StringIO(QIIME_TUTORIAL_MAP_SUBSET)) + StringIO(QIIME_TUTORIAL_MAP_SUBSET) + ) self.assertTrue(obs) - mf = join(self.mfp, 'qiita_map_unicode.tsv') + mf = join(self.mfp, "qiita_map_unicode.tsv") obs = qdb.metadata_template.util.looks_like_qiime_mapping_file(mf) self.assertTrue(obs) - obs = qdb.metadata_template.util.looks_like_qiime_mapping_file( - StringIO()) + obs = qdb.metadata_template.util.looks_like_qiime_mapping_file(StringIO()) self.assertFalse(obs) def test_parse_mapping_file(self): # Tests ported over from QIIME - s1 = ['#sample\ta\tb', '#comment line to skip', - 'x \t y \t z ', ' ', '#more skip', 'i\tj\tk'] - exp = ([['x', 'y', 'z'], ['i', 'j', 'k']], - ['sample', 'a', 'b'], - ['comment line to skip', 'more skip']) + s1 = [ + "#sample\ta\tb", + "#comment line to skip", + "x \t y \t z ", + " ", + "#more skip", + "i\tj\tk", + ] + exp = ( + [["x", "y", "z"], ["i", "j", "k"]], + ["sample", "a", "b"], + ["comment line to skip", "more skip"], + ) obs = qdb.metadata_template.util._parse_mapping_file(s1) self.assertEqual(obs, exp) # check that we strip double quotes by default - s2 = ['#sample\ta\tb', '#comment line to skip', - '"x "\t" y "\t z ', ' ', '"#more skip"', 'i\t"j"\tk'] + s2 = [ + "#sample\ta\tb", + "#comment line to skip", + '"x "\t" y "\t z ', + " ", + '"#more skip"', + 'i\t"j"\tk', + ] obs = qdb.metadata_template.util._parse_mapping_file(s2) self.assertEqual(obs, exp) @@ -341,13 +398,13 @@ def test_get_pgsql_reserved_words(self): # simply testing that at least one of the well know reserved words is # in the list obs = qdb.metadata_template.util.get_pgsql_reserved_words() - self.assertIn('select', obs) + self.assertIn("select", obs) def test_get_qiime2_reserved_words(self): # simply testing that at least one of the well know reserved words is # in the list obs = qdb.metadata_template.util.get_qiime2_reserved_words() - self.assertIn('featureid', obs) + self.assertIn("featureid", obs) QIIME_TUTORIAL_MAP_SUBSET = ( @@ -368,7 +425,8 @@ def test_get_qiime2_reserved_words(self): "2.Sample2\t2014-05-29 12:24:51\tTest Sample 2\tTrue\tTrue\tNotIdentified" "\t2\t4.2\t1.1\tlocation1\treceived\ttype1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\tTrue\tNotIdentified" - "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n") + "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n" +) EXP_SAMPLE_TEMPLATE_MULTICASE = ( "sAmPle_Name\tcollection_timestamp\tDescription\thas_extracted_data\t" @@ -379,7 +437,8 @@ def test_get_qiime2_reserved_words(self): "2.Sample2\t2014-05-29 12:24:51\tTest Sample 2\tTrue\tTrue\tNotIdentified" "\t2\t4.2\t1.1\tlocation1\treceived\ttype1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\tTrue\tNotIdentified" - "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n") + "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n" +) EXP_SAMPLE_TEMPLATE_LAT_ALL_INT = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -390,7 +449,8 @@ def test_get_qiime2_reserved_words(self): "2.Sample2\t2014-05-29 12:24:51\tTest Sample 2\tTrue\tTrue\tNotIdentified" "\t2\t4\t1.1\tlocation1\treceived\ttype1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\tTrue\tNotIdentified" - "\t3\t4\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n") + "\t3\t4\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n" +) EXP_SAMPLE_TEMPLATE_LAT_MIXED_FLOAT_INT = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -401,7 +461,8 @@ def test_get_qiime2_reserved_words(self): "2.Sample2\t2014-05-29 12:24:51\tTest Sample 2\tTrue\tTrue\tNotIdentified" "\t2\t4\t1.1\tlocation1\treceived\ttype1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\tTrue\tNotIdentified" - "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n") + "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\n" +) EXP_SAMPLE_TEMPLATE_DUPE_COLS = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -416,7 +477,8 @@ def test_get_qiime2_reserved_words(self): "type1\tValue for sample 2\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "Value for sample 3\tValue for sample 3\n") + "Value for sample 3\tValue for sample 3\n" +) EXP_SAMPLE_TEMPLATE_SPACES = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -432,7 +494,8 @@ def test_get_qiime2_reserved_words(self): "received\ttype1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "Value for sample 3\n") + "Value for sample 3\n" +) EXP_SAMPLE_TEMPLATE_WHITESPACE = ( "sample_name \tcollection_timestamp\t description \thas_extracted_data\t" @@ -447,7 +510,8 @@ def test_get_qiime2_reserved_words(self): "received\ttype1\t Value for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\t Test Sample 3 \tTrue\t" "True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "Value for sample 3\n") + "Value for sample 3\n" +) EXP_SAMPLE_TEMPLATE_SPACES_EMPTY_ROW = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -464,7 +528,8 @@ def test_get_qiime2_reserved_words(self): "True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t" "Value for sample 3\n" "\t\t\t\t\t\t\t\t\t\t\t\t\n" - "\t\t\t\t\t\t\t\t\t\t \t\t\n") + "\t\t\t\t\t\t\t\t\t\t \t\t\n" +) EXP_SAMPLE_TEMPLATE_WITH_SAMPLE_ID = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -476,7 +541,8 @@ def test_get_qiime2_reserved_words(self): "2.Sample2\t2014-05-29 12:24:51\tTest Sample 2\tTrue\tTrue\tNotIdentified" "\t2\t4.2\t1.1\tlocation1\treceived\ttype1\tValue for sample 2\tB\tb\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\tTrue\tNotIdentified" - "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\tC\tc\n") + "\t3\t4.8\t4.41\tlocation1\treceived\ttype1\tValue for sample 3\tC\tc\n" +) EXP_ST_SPACES_EMPTY_COLUMN = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -491,7 +557,8 @@ def test_get_qiime2_reserved_words(self): "received\ttype1\tValue for sample 2\t\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "Value for sample 3\t\n") + "Value for sample 3\t\n" +) EXP_SAMPLE_TEMPLATE_NUMBER_SAMPLE_NAMES = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -506,7 +573,8 @@ def test_get_qiime2_reserved_words(self): "type1\tValue for sample 2\n" "0.12121\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "Value for sample 3\n") + "Value for sample 3\n" +) SAMPLE_TEMPLATE_NO_SAMPLE_NAMES = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -526,7 +594,7 @@ def test_get_qiime2_reserved_words(self): "True\tNotIdentified\t4.8\t4.41\tlocation1\treceived\ttype1\t" "Value for sample 3\n" "\t\t\t\t\t\t\t\t\t\t\t\n" - ) +) SAMPLE_TEMPLATE_NO_SAMPLE_NAMES_SOME_SPACES = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -543,7 +611,7 @@ def test_get_qiime2_reserved_words(self): "True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t" "Value for sample 3\n" "\t\t\t\t\t \t\t\t\t\t \t\t\n" - ) +) SAMPLE_TEMPLATE_EMPTY_COLUMN = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -558,7 +626,8 @@ def test_get_qiime2_reserved_words(self): "type1\t\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "\n") + "\n" +) SAMPLE_TEMPLATE_COLUMN_WITH_NAS = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -573,7 +642,8 @@ def test_get_qiime2_reserved_words(self): "type1\tNA\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "NA\n") + "NA\n" +) SAMPLE_TEMPLATE_NO_SAMPLE_NAME = ( ":L}={\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -588,7 +658,8 @@ def test_get_qiime2_reserved_words(self): "type1\tNA\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t4.8\t4.41\tlocation1\treceived\ttype1\t" - "NA\n") + "NA\n" +) SAMPLE_TEMPLATE_INVALID_LONGITUDE_COLUMNS = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" @@ -603,7 +674,8 @@ def test_get_qiime2_reserved_words(self): "type1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\1\t4.8\t4.XXXXX41\tlocation1\treceived\ttype1\t" - "Value for sample 3\n") + "Value for sample 3\n" +) EXP_SAMPLE_TEMPLATE_NULLS = ( "sample_name\tmy_bool_col\tmy_bool_col_w_nulls\n" @@ -612,279 +684,332 @@ def test_get_qiime2_reserved_words(self): "sample.3\tTrue\tTrue\n" "sample.4\tFalse\t\n" "sample.5\tTrue\tTrue\n" - "sample.6\tFalse\tTrue\n") + "sample.6\tFalse\tTrue\n" +) SAMPLE_TEMPLATE_NULLS_DICT = { - 'my_bool_col': {"sample.1": 'True', - "sample.2": 'False', - "sample.3": 'True', - "sample.4": 'False', - "sample.5": 'True', - "sample.6": 'False'}, - 'my_bool_col_w_nulls': {"sample.1": 'False', - "sample.2": 'Unknown', - "sample.3": 'True', - "sample.4": '', - "sample.5": 'True', - "sample.6": 'True'} + "my_bool_col": { + "sample.1": "True", + "sample.2": "False", + "sample.3": "True", + "sample.4": "False", + "sample.5": "True", + "sample.6": "False", + }, + "my_bool_col_w_nulls": { + "sample.1": "False", + "sample.2": "Unknown", + "sample.3": "True", + "sample.4": "", + "sample.5": "True", + "sample.6": "True", + }, } SAMPLE_TEMPLATE_DICT_FORM = { - 'collection_timestamp': {'2.Sample1': '2014-05-29 12:24:51', - '2.Sample2': '2014-05-29 12:24:51', - '2.Sample3': '2014-05-29 12:24:51'}, - 'description': {'2.Sample1': 'Test Sample 1', - '2.Sample2': 'Test Sample 2', - '2.Sample3': 'Test Sample 3'}, - 'has_extracted_data': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'has_physical_specimen': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'host_subject_id': {'2.Sample1': 'NotIdentified', - '2.Sample2': 'NotIdentified', - '2.Sample3': 'NotIdentified'}, - 'latitude': {'2.Sample1': '42.42', - '2.Sample2': '4.2', - '2.Sample3': '4.8'}, - 'longitude': {'2.Sample1': '41.41', - '2.Sample2': '1.1', - '2.Sample3': '4.41'}, - 'physical_location': {'2.Sample1': 'location1', - '2.Sample2': 'location1', - '2.Sample3': 'location1'}, - 'required_sample_info_status': {'2.Sample1': 'received', - '2.Sample2': 'received', - '2.Sample3': 'received'}, - 'sample_type': {'2.Sample1': 'type1', - '2.Sample2': 'type1', - '2.Sample3': 'type1'}, - 'str_column': {'2.Sample1': 'Value for sample 1', - '2.Sample2': 'Value for sample 2', - '2.Sample3': 'Value for sample 3'}, - 'int_column': {'2.Sample1': '1', - '2.Sample2': '2', - '2.Sample3': '3'} - } + "collection_timestamp": { + "2.Sample1": "2014-05-29 12:24:51", + "2.Sample2": "2014-05-29 12:24:51", + "2.Sample3": "2014-05-29 12:24:51", + }, + "description": { + "2.Sample1": "Test Sample 1", + "2.Sample2": "Test Sample 2", + "2.Sample3": "Test Sample 3", + }, + "has_extracted_data": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "has_physical_specimen": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "host_subject_id": { + "2.Sample1": "NotIdentified", + "2.Sample2": "NotIdentified", + "2.Sample3": "NotIdentified", + }, + "latitude": {"2.Sample1": "42.42", "2.Sample2": "4.2", "2.Sample3": "4.8"}, + "longitude": {"2.Sample1": "41.41", "2.Sample2": "1.1", "2.Sample3": "4.41"}, + "physical_location": { + "2.Sample1": "location1", + "2.Sample2": "location1", + "2.Sample3": "location1", + }, + "required_sample_info_status": { + "2.Sample1": "received", + "2.Sample2": "received", + "2.Sample3": "received", + }, + "sample_type": {"2.Sample1": "type1", "2.Sample2": "type1", "2.Sample3": "type1"}, + "str_column": { + "2.Sample1": "Value for sample 1", + "2.Sample2": "Value for sample 2", + "2.Sample3": "Value for sample 3", + }, + "int_column": {"2.Sample1": "1", "2.Sample2": "2", "2.Sample3": "3"}, +} SAMPLE_TEMPLATE_LAT_ALL_INT_DICT = { - 'collection_timestamp': {'2.Sample1': '2014-05-29 12:24:51', - '2.Sample2': '2014-05-29 12:24:51', - '2.Sample3': '2014-05-29 12:24:51'}, - 'description': {'2.Sample1': 'Test Sample 1', - '2.Sample2': 'Test Sample 2', - '2.Sample3': 'Test Sample 3'}, - 'has_extracted_data': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'has_physical_specimen': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'host_subject_id': {'2.Sample1': 'NotIdentified', - '2.Sample2': 'NotIdentified', - '2.Sample3': 'NotIdentified'}, - 'latitude': {'2.Sample1': '42', - '2.Sample2': '4', - '2.Sample3': '4'}, - 'longitude': {'2.Sample1': '41.41', - '2.Sample2': '1.1', - '2.Sample3': '4.41'}, - 'physical_location': {'2.Sample1': 'location1', - '2.Sample2': 'location1', - '2.Sample3': 'location1'}, - 'required_sample_info_status': {'2.Sample1': 'received', - '2.Sample2': 'received', - '2.Sample3': 'received'}, - 'sample_type': {'2.Sample1': 'type1', - '2.Sample2': 'type1', - '2.Sample3': 'type1'}, - 'str_column': {'2.Sample1': 'Value for sample 1', - '2.Sample2': 'Value for sample 2', - '2.Sample3': 'Value for sample 3'}, - 'int_column': {'2.Sample1': '1', - '2.Sample2': '2', - '2.Sample3': '3'} - } + "collection_timestamp": { + "2.Sample1": "2014-05-29 12:24:51", + "2.Sample2": "2014-05-29 12:24:51", + "2.Sample3": "2014-05-29 12:24:51", + }, + "description": { + "2.Sample1": "Test Sample 1", + "2.Sample2": "Test Sample 2", + "2.Sample3": "Test Sample 3", + }, + "has_extracted_data": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "has_physical_specimen": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "host_subject_id": { + "2.Sample1": "NotIdentified", + "2.Sample2": "NotIdentified", + "2.Sample3": "NotIdentified", + }, + "latitude": {"2.Sample1": "42", "2.Sample2": "4", "2.Sample3": "4"}, + "longitude": {"2.Sample1": "41.41", "2.Sample2": "1.1", "2.Sample3": "4.41"}, + "physical_location": { + "2.Sample1": "location1", + "2.Sample2": "location1", + "2.Sample3": "location1", + }, + "required_sample_info_status": { + "2.Sample1": "received", + "2.Sample2": "received", + "2.Sample3": "received", + }, + "sample_type": {"2.Sample1": "type1", "2.Sample2": "type1", "2.Sample3": "type1"}, + "str_column": { + "2.Sample1": "Value for sample 1", + "2.Sample2": "Value for sample 2", + "2.Sample3": "Value for sample 3", + }, + "int_column": {"2.Sample1": "1", "2.Sample2": "2", "2.Sample3": "3"}, +} SAMPLE_TEMPLATE_MIXED_FLOAT_INT_DICT = { - 'collection_timestamp': {'2.Sample1': '2014-05-29 12:24:51', - '2.Sample2': '2014-05-29 12:24:51', - '2.Sample3': '2014-05-29 12:24:51'}, - 'description': {'2.Sample1': 'Test Sample 1', - '2.Sample2': 'Test Sample 2', - '2.Sample3': 'Test Sample 3'}, - 'has_extracted_data': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'has_physical_specimen': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'host_subject_id': {'2.Sample1': 'NotIdentified', - '2.Sample2': 'NotIdentified', - '2.Sample3': 'NotIdentified'}, - 'latitude': {'2.Sample1': '42', - '2.Sample2': '4', - '2.Sample3': '4.8'}, - 'longitude': {'2.Sample1': '41.41', - '2.Sample2': '1.1', - '2.Sample3': '4.41'}, - 'physical_location': {'2.Sample1': 'location1', - '2.Sample2': 'location1', - '2.Sample3': 'location1'}, - 'required_sample_info_status': {'2.Sample1': 'received', - '2.Sample2': 'received', - '2.Sample3': 'received'}, - 'sample_type': {'2.Sample1': 'type1', - '2.Sample2': 'type1', - '2.Sample3': 'type1'}, - 'str_column': {'2.Sample1': 'Value for sample 1', - '2.Sample2': 'Value for sample 2', - '2.Sample3': 'Value for sample 3'}, - 'int_column': {'2.Sample1': '1', - '2.Sample2': '2', - '2.Sample3': '3'} - } + "collection_timestamp": { + "2.Sample1": "2014-05-29 12:24:51", + "2.Sample2": "2014-05-29 12:24:51", + "2.Sample3": "2014-05-29 12:24:51", + }, + "description": { + "2.Sample1": "Test Sample 1", + "2.Sample2": "Test Sample 2", + "2.Sample3": "Test Sample 3", + }, + "has_extracted_data": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "has_physical_specimen": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "host_subject_id": { + "2.Sample1": "NotIdentified", + "2.Sample2": "NotIdentified", + "2.Sample3": "NotIdentified", + }, + "latitude": {"2.Sample1": "42", "2.Sample2": "4", "2.Sample3": "4.8"}, + "longitude": {"2.Sample1": "41.41", "2.Sample2": "1.1", "2.Sample3": "4.41"}, + "physical_location": { + "2.Sample1": "location1", + "2.Sample2": "location1", + "2.Sample3": "location1", + }, + "required_sample_info_status": { + "2.Sample1": "received", + "2.Sample2": "received", + "2.Sample3": "received", + }, + "sample_type": {"2.Sample1": "type1", "2.Sample2": "type1", "2.Sample3": "type1"}, + "str_column": { + "2.Sample1": "Value for sample 1", + "2.Sample2": "Value for sample 2", + "2.Sample3": "Value for sample 3", + }, + "int_column": {"2.Sample1": "1", "2.Sample2": "2", "2.Sample3": "3"}, +} SAMPLE_TEMPLATE_NUMBER_SAMPLE_NAMES_DICT_FORM = { - 'collection_timestamp': {'002.000': '2014-05-29 12:24:51', - '1.11111': '2014-05-29 12:24:51', - '0.12121': '2014-05-29 12:24:51'}, - 'description': {'002.000': 'Test Sample 1', - '1.11111': 'Test Sample 2', - '0.12121': 'Test Sample 3'}, - 'has_extracted_data': {'002.000': 'True', - '1.11111': 'True', - '0.12121': 'True'}, - 'has_physical_specimen': {'002.000': 'True', - '1.11111': 'True', - '0.12121': 'True'}, - 'host_subject_id': {'002.000': 'NotIdentified', - '1.11111': 'NotIdentified', - '0.12121': 'NotIdentified'}, - 'latitude': {'002.000': '42.42', - '1.11111': '4.2', - '0.12121': '4.8'}, - 'longitude': {'002.000': '41.41', - '1.11111': '1.1', - '0.12121': '4.41'}, - 'physical_location': {'002.000': 'location1', - '1.11111': 'location1', - '0.12121': 'location1'}, - 'required_sample_info_status': {'002.000': 'received', - '1.11111': 'received', - '0.12121': 'received'}, - 'sample_type': {'002.000': 'type1', - '1.11111': 'type1', - '0.12121': 'type1'}, - 'str_column': {'002.000': 'Value for sample 1', - '1.11111': 'Value for sample 2', - '0.12121': 'Value for sample 3'}} - -ST_EMPTY_COLUMN_DICT_FORM = \ - {'collection_timestamp': {'2.Sample1': '2014-05-29 12:24:51', - '2.Sample2': '2014-05-29 12:24:51', - '2.Sample3': '2014-05-29 12:24:51'}, - 'description': {'2.Sample1': 'Test Sample 1', - '2.Sample2': 'Test Sample 2', - '2.Sample3': 'Test Sample 3'}, - 'has_extracted_data': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'has_physical_specimen': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'host_subject_id': {'2.Sample1': 'NotIdentified', - '2.Sample2': 'NotIdentified', - '2.Sample3': 'NotIdentified'}, - 'latitude': {'2.Sample1': '42.42', - '2.Sample2': '4.2', - '2.Sample3': '4.8'}, - 'longitude': {'2.Sample1': '41.41', - '2.Sample2': '1.1', - '2.Sample3': '4.41'}, - 'physical_location': {'2.Sample1': 'location1', - '2.Sample2': 'location1', - '2.Sample3': 'location1'}, - 'required_sample_info_status': {'2.Sample1': 'received', - '2.Sample2': 'received', - '2.Sample3': 'received'}, - 'sample_type': {'2.Sample1': 'type1', - '2.Sample2': 'type1', - '2.Sample3': 'type1'}} - -ST_COLUMN_WITH_NAS_DICT_FORM = \ - {'collection_timestamp': {'2.Sample1': '2014-05-29 12:24:51', - '2.Sample2': '2014-05-29 12:24:51', - '2.Sample3': '2014-05-29 12:24:51'}, - 'description': {'2.Sample1': 'Test Sample 1', - '2.Sample2': 'Test Sample 2', - '2.Sample3': 'Test Sample 3'}, - 'has_extracted_data': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'has_physical_specimen': {'2.Sample1': 'True', - '2.Sample2': 'True', - '2.Sample3': 'True'}, - 'host_subject_id': {'2.Sample1': 'NotIdentified', - '2.Sample2': 'NotIdentified', - '2.Sample3': 'NotIdentified'}, - 'latitude': {'2.Sample1': '42.42', - '2.Sample2': '4.2', - '2.Sample3': '4.8'}, - 'longitude': {'2.Sample1': '41.41', - '2.Sample2': '1.1', - '2.Sample3': '4.41'}, - 'physical_location': {'2.Sample1': 'location1', - '2.Sample2': 'location1', - '2.Sample3': 'location1'}, - 'required_sample_info_status': {'2.Sample1': 'received', - '2.Sample2': 'received', - '2.Sample3': 'received'}, - 'sample_type': {'2.Sample1': 'type1', - '2.Sample2': 'type1', - '2.Sample3': 'type1'}, - 'str_column': {'2.Sample1': 'NA', '2.Sample2': 'NA', '2.Sample3': 'NA'}} + "collection_timestamp": { + "002.000": "2014-05-29 12:24:51", + "1.11111": "2014-05-29 12:24:51", + "0.12121": "2014-05-29 12:24:51", + }, + "description": { + "002.000": "Test Sample 1", + "1.11111": "Test Sample 2", + "0.12121": "Test Sample 3", + }, + "has_extracted_data": {"002.000": "True", "1.11111": "True", "0.12121": "True"}, + "has_physical_specimen": {"002.000": "True", "1.11111": "True", "0.12121": "True"}, + "host_subject_id": { + "002.000": "NotIdentified", + "1.11111": "NotIdentified", + "0.12121": "NotIdentified", + }, + "latitude": {"002.000": "42.42", "1.11111": "4.2", "0.12121": "4.8"}, + "longitude": {"002.000": "41.41", "1.11111": "1.1", "0.12121": "4.41"}, + "physical_location": { + "002.000": "location1", + "1.11111": "location1", + "0.12121": "location1", + }, + "required_sample_info_status": { + "002.000": "received", + "1.11111": "received", + "0.12121": "received", + }, + "sample_type": {"002.000": "type1", "1.11111": "type1", "0.12121": "type1"}, + "str_column": { + "002.000": "Value for sample 1", + "1.11111": "Value for sample 2", + "0.12121": "Value for sample 3", + }, +} + +ST_EMPTY_COLUMN_DICT_FORM = { + "collection_timestamp": { + "2.Sample1": "2014-05-29 12:24:51", + "2.Sample2": "2014-05-29 12:24:51", + "2.Sample3": "2014-05-29 12:24:51", + }, + "description": { + "2.Sample1": "Test Sample 1", + "2.Sample2": "Test Sample 2", + "2.Sample3": "Test Sample 3", + }, + "has_extracted_data": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "has_physical_specimen": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "host_subject_id": { + "2.Sample1": "NotIdentified", + "2.Sample2": "NotIdentified", + "2.Sample3": "NotIdentified", + }, + "latitude": {"2.Sample1": "42.42", "2.Sample2": "4.2", "2.Sample3": "4.8"}, + "longitude": {"2.Sample1": "41.41", "2.Sample2": "1.1", "2.Sample3": "4.41"}, + "physical_location": { + "2.Sample1": "location1", + "2.Sample2": "location1", + "2.Sample3": "location1", + }, + "required_sample_info_status": { + "2.Sample1": "received", + "2.Sample2": "received", + "2.Sample3": "received", + }, + "sample_type": {"2.Sample1": "type1", "2.Sample2": "type1", "2.Sample3": "type1"}, +} + +ST_COLUMN_WITH_NAS_DICT_FORM = { + "collection_timestamp": { + "2.Sample1": "2014-05-29 12:24:51", + "2.Sample2": "2014-05-29 12:24:51", + "2.Sample3": "2014-05-29 12:24:51", + }, + "description": { + "2.Sample1": "Test Sample 1", + "2.Sample2": "Test Sample 2", + "2.Sample3": "Test Sample 3", + }, + "has_extracted_data": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "has_physical_specimen": { + "2.Sample1": "True", + "2.Sample2": "True", + "2.Sample3": "True", + }, + "host_subject_id": { + "2.Sample1": "NotIdentified", + "2.Sample2": "NotIdentified", + "2.Sample3": "NotIdentified", + }, + "latitude": {"2.Sample1": "42.42", "2.Sample2": "4.2", "2.Sample3": "4.8"}, + "longitude": {"2.Sample1": "41.41", "2.Sample2": "1.1", "2.Sample3": "4.41"}, + "physical_location": { + "2.Sample1": "location1", + "2.Sample2": "location1", + "2.Sample3": "location1", + }, + "required_sample_info_status": { + "2.Sample1": "received", + "2.Sample2": "received", + "2.Sample3": "received", + }, + "sample_type": {"2.Sample1": "type1", "2.Sample2": "type1", "2.Sample3": "type1"}, + "str_column": {"2.Sample1": "NA", "2.Sample2": "NA", "2.Sample3": "NA"}, +} QIIME_TUTORIAL_MAP_DICT_FORM = { - 'BarcodeSequence': {'PC.354': 'AGCACGAGCCTA', - 'PC.607': 'AACTGTGCGTAC'}, - 'LinkerPrimerSequence': {'PC.354': 'YATGCTGCCTCCCGTAGGAGT', - 'PC.607': 'YATGCTGCCTCCCGTAGGAGT'}, - 'Treatment': {'PC.354': 'Control', - 'PC.607': 'Fast'}, - 'DOB': {'PC.354': '20061218', - 'PC.607': '20071112'}, - 'Description': {'PC.354': 'Control_mouse_I.D._354', - 'PC.607': 'Fasting_mouse_I.D._607'} + "BarcodeSequence": {"PC.354": "AGCACGAGCCTA", "PC.607": "AACTGTGCGTAC"}, + "LinkerPrimerSequence": { + "PC.354": "YATGCTGCCTCCCGTAGGAGT", + "PC.607": "YATGCTGCCTCCCGTAGGAGT", + }, + "Treatment": {"PC.354": "Control", "PC.607": "Fast"}, + "DOB": {"PC.354": "20061218", "PC.607": "20071112"}, + "Description": { + "PC.354": "Control_mouse_I.D._354", + "PC.607": "Fasting_mouse_I.D._607", + }, } EXP_PREP_TEMPLATE = ( - 'sample_name\tbarcodesequence\tcenter_name\tcenter_project_name\t' - 'ebi_submission_accession\temp_status\texperiment_design_description\t' - 'library_construction_protocol\tlinkerprimersequence\tplatform\t' - 'run_prefix\tstr_column\n' - '1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t' - 'GTGCCAGCMGCCGCGGTAA\tIllumina\ts_G1_L002_sequences\tValue for sample 3\n' - '1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t' - 'GTGCCAGCMGCCGCGGTAA\tIllumina\ts_G1_L001_sequences\tValue for sample 1\n' - '1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t' - 'GTGCCAGCMGCCGCGGTAA\tIllumina\ts_G1_L001_sequences\tValue for sample 2\n') + "sample_name\tbarcodesequence\tcenter_name\tcenter_project_name\t" + "ebi_submission_accession\temp_status\texperiment_design_description\t" + "library_construction_protocol\tlinkerprimersequence\tplatform\t" + "run_prefix\tstr_column\n" + "1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t" + "GTGCCAGCMGCCGCGGTAA\tIllumina\ts_G1_L002_sequences\tValue for sample 3\n" + "1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t" + "GTGCCAGCMGCCGCGGTAA\tIllumina\ts_G1_L001_sequences\tValue for sample 1\n" + "1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\tNone\tEMP\tBBBB\tAAAA\t" + "GTGCCAGCMGCCGCGGTAA\tIllumina\ts_G1_L001_sequences\tValue for sample 2\n" +) EXP_QIIMP = { - 'asfaewf': {'sample': 'f', 'oijnmk': 'f'}, - 'pheno': {'sample': 'med', 'oijnmk': 'missing: not provided'}, - 'bawer': {'sample': 'a', 'oijnmk': 'b'}, - 'aelrjg': {'sample': 'asfe', 'oijnmk': 'asfs'} + "asfaewf": {"sample": "f", "oijnmk": "f"}, + "pheno": {"sample": "med", "oijnmk": "missing: not provided"}, + "bawer": {"sample": "a", "oijnmk": "b"}, + "aelrjg": {"sample": "asfe", "oijnmk": "asfs"}, } EXP_NOT_QIIMP = { - 'myownidea': { - 'sample5': 'I skipped some', - 'sample1': 'sampleoneinfo', - 'sample2': 'sampletwoinfo'} + "myownidea": { + "sample5": "I skipped some", + "sample1": "sampleoneinfo", + "sample2": "sampletwoinfo", + } } -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 1bd619c71..6dcd17d0e 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -5,17 +5,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from six import StringIO +import warnings +from string import ascii_letters, digits -import pandas as pd import numpy as np -import warnings +import pandas as pd from iteration_utilities import duplicates +from six import StringIO import qiita_db as qdb -from string import ascii_letters, digits - def prefix_sample_names_with_id(md_template, study_id): r"""prefix the sample_names in md_template with the study id @@ -29,27 +28,34 @@ def prefix_sample_names_with_id(md_template, study_id): """ # loop over the samples and prefix those that aren't prefixed sid = str(study_id) - md_template['qiita_sample_name_with_id'] = pd.Series( - [idx if idx.split('.', 1)[0] == sid and idx != sid - else '%d.%s' % (study_id, idx) - for idx in md_template.index], index=md_template.index) + md_template["qiita_sample_name_with_id"] = pd.Series( + [ + idx + if idx.split(".", 1)[0] == sid and idx != sid + else "%d.%s" % (study_id, idx) + for idx in md_template.index + ], + index=md_template.index, + ) # get the rows that are going to change - changes = len(md_template.index[ - md_template['qiita_sample_name_with_id'] != md_template.index]) + changes = len( + md_template.index[md_template["qiita_sample_name_with_id"] != md_template.index] + ) if changes != 0 and changes != len(md_template.index): warnings.warn( "Some of the samples were already prefixed with the study id.", - qdb.exceptions.QiitaDBWarning) + qdb.exceptions.QiitaDBWarning, + ) md_template.index = md_template.qiita_sample_name_with_id - del md_template['qiita_sample_name_with_id'] + del md_template["qiita_sample_name_with_id"] # The original metadata template had the index column unnamed -> remove # the name of the index for consistency md_template.index.name = None -def load_template_to_dataframe(fn, index='sample_name'): +def load_template_to_dataframe(fn, index="sample_name"): """Load a sample/prep template or a QIIME mapping file into a data frame Parameters @@ -95,50 +101,49 @@ def load_template_to_dataframe(fn, index='sample_name'): """ # Load in file lines holdfile = None - with qdb.util.open_file(fn, newline=None, - encoding="utf8", errors='ignore') as f: + with qdb.util.open_file(fn, newline=None, encoding="utf8", errors="ignore") as f: holdfile = f.readlines() if not holdfile: - raise ValueError('Empty file passed!') + raise ValueError("Empty file passed!") if index == "#SampleID": # We're going to parse a QIIME mapping file. We are going to first # parse it with the QIIME function so we can remove the comments # easily and make sure that QIIME will accept this as a mapping file data, headers, comments = _parse_mapping_file(holdfile) - holdfile = ["%s\n" % '\t'.join(d) for d in data] - holdfile.insert(0, "%s\n" % '\t'.join(headers)) + holdfile = ["%s\n" % "\t".join(d) for d in data] + holdfile.insert(0, "%s\n" % "\t".join(headers)) # The QIIME parser fixes the index and removes the # - index = 'SampleID' + index = "SampleID" # Strip all values in the cells in the input file for pos, line in enumerate(holdfile): - cols = line.split('\t') - if pos == 0 and index != 'SampleID': + cols = line.split("\t") + if pos == 0 and index != "SampleID": # get and clean the controlled columns - ccols = {'sample_name'} + ccols = {"sample_name"} ccols.update(qdb.metadata_template.constants.CONTROLLED_COLS) newcols = [ - c.lower().strip() if c.lower().strip() in ccols - else c.strip() - for c in cols] + c.lower().strip() if c.lower().strip() in ccols else c.strip() + for c in cols + ] # while we are here, let's check for duplicate columns headers ncols = set(newcols) if len(ncols) != len(newcols): - if '' in ncols: - raise ValueError( - 'Your file has empty columns headers.') + if "" in ncols: + raise ValueError("Your file has empty columns headers.") raise qdb.exceptions.QiitaDBDuplicateHeaderError( - set(duplicates(newcols))) + set(duplicates(newcols)) + ) else: # .strip will remove odd chars, newlines, tabs and multiple # spaces but we need to read a new line at the end of the # line(+'\n') newcols = [d.strip(" \r\n") for d in cols] - holdfile[pos] = '\t'.join(newcols) + '\n' + holdfile[pos] = "\t".join(newcols) + "\n" # index_col: # is set as False, otherwise it is cast as a float and we want a string @@ -150,28 +155,30 @@ def load_template_to_dataframe(fn, index='sample_name'): # constituted only by delimiters i. e. empty rows. try: template = pd.read_csv( - StringIO(''.join(holdfile)), - sep='\t', + StringIO("".join(holdfile)), + sep="\t", dtype=str, - encoding='utf-8', + encoding="utf-8", keep_default_na=False, index_col=False, - comment='\t', - converters={index: lambda x: str(x).strip()}) + comment="\t", + converters={index: lambda x: str(x).strip()}, + ) except pd.errors.ParserError as e: - if 'tokenizing' in str(e): - msg = ('Your file has more columns with values than headers. To ' - 'fix, make sure to delete any extra rows or columns; they ' - 'might look empty because they have spaces. Then upload ' - 'and try again.') + if "tokenizing" in str(e): + msg = ( + "Your file has more columns with values than headers. To " + "fix, make sure to delete any extra rows or columns; they " + "might look empty because they have spaces. Then upload " + "and try again." + ) raise RuntimeError(msg) else: raise e # remove newlines and tabs from fields - template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='', - regex=True, inplace=True) + template.replace(to_replace="[\t\n\r\x0b\x0c]+", value="", regex=True, inplace=True) # removing columns with empty values - template.dropna(axis='columns', how='all', inplace=True) + template.dropna(axis="columns", how="all", inplace=True) if template.empty: raise ValueError("The template is empty") @@ -180,40 +187,43 @@ def load_template_to_dataframe(fn, index='sample_name'): if index not in template.columns: raise qdb.exceptions.QiitaDBColumnError( "The '%s' column is missing from your template, this file cannot " - "be parsed." % index) + "be parsed." % index + ) # remove rows that have no sample identifier but that may have other data # in the rest of the columns - template.dropna(subset=[index], how='all', inplace=True) + template.dropna(subset=[index], how="all", inplace=True) # set the sample name as the index template.set_index(index, inplace=True) # it is not uncommon to find templates that have empty columns so let's # find the columns that are all '' - columns = np.where(np.all(template.applymap(lambda x: x == ''), axis=0)) + columns = np.where(np.all(template.applymap(lambda x: x == ""), axis=0)) template.drop(template.columns[columns], axis=1, inplace=True) initial_columns.remove(index) dropped_cols = initial_columns - set(template.columns) if dropped_cols: warnings.warn( - 'The following column(s) were removed from the template because ' - 'all their values are empty: %s' - % ', '.join(dropped_cols), qdb.exceptions.QiitaDBWarning) + "The following column(s) were removed from the template because " + "all their values are empty: %s" % ", ".join(dropped_cols), + qdb.exceptions.QiitaDBWarning, + ) # removing 'sample-id' and 'sample_id' as per issue #2906 sdrop = [] - if 'sample-id' in template.columns: - sdrop.append('sample-id') - if 'sample_id' in template.columns: - sdrop.append('sample_id') + if "sample-id" in template.columns: + sdrop.append("sample-id") + if "sample_id" in template.columns: + sdrop.append("sample_id") if sdrop: template.drop(columns=sdrop, inplace=True) warnings.warn( - 'The following column(s) were removed from the template because ' - 'they will cause conflicts with sample_name: %s' - % ', '.join(sdrop), qdb.exceptions.QiitaDBWarning) + "The following column(s) were removed from the template because " + "they will cause conflicts with sample_name: %s" % ", ".join(sdrop), + qdb.exceptions.QiitaDBWarning, + ) # Pandas represents data with np.nan rather than Nones, change it to None # because psycopg2 knows that a None is a Null in SQL, while it doesn't @@ -243,7 +253,7 @@ def get_invalid_sample_names(sample_names): """ # from the QIIME mapping file documentation - valid = set(ascii_letters+digits+'.') + valid = set(ascii_letters + digits + ".") inv = [] for s in sample_names: @@ -275,13 +285,13 @@ def looks_like_qiime_mapping_file(fp): some other different column. """ first_line = None - with qdb.util.open_file(fp, newline=None, errors='replace') as f: + with qdb.util.open_file(fp, newline=None, errors="replace") as f: first_line = f.readline() if not first_line: return False first_col = first_line.split()[0] - return first_col == '#SampleID' + return first_col == "#SampleID" def _parse_mapping_file(lines, strip_quotes=True, suppress_stripping=False): @@ -319,12 +329,12 @@ def _parse_mapping_file(lines, strip_quotes=True, suppress_stripping=False): # remove quotes but not spaces def strip_f(x): - return x.replace('"', '') + return x.replace('"', "") else: # remove quotes and spaces def strip_f(x): - return x.replace('"', '').strip() + return x.replace('"', "").strip() else: if suppress_stripping: # don't remove quotes or spaces @@ -349,24 +359,22 @@ def strip_f(x): # skip blank lines when not stripping lines continue - if line.startswith('#'): + if line.startswith("#"): line = line[1:] if not header: - header = line.strip().split('\t') + header = line.strip().split("\t") else: comments.append(line) else: # Will add empty string to empty fields - tmp_line = list(map(strip_f, line.split('\t'))) + tmp_line = list(map(strip_f, line.split("\t"))) if len(tmp_line) < len(header): - tmp_line.extend([''] * (len(header) - len(tmp_line))) + tmp_line.extend([""] * (len(header) - len(tmp_line))) mapping_data.append(tmp_line) if not header: - raise qdb.exceptions.QiitaDBError( - "No header line was found in mapping file.") + raise qdb.exceptions.QiitaDBError("No header line was found in mapping file.") if not mapping_data: - raise qdb.exceptions.QiitaDBError( - "No data found in mapping file.") + raise qdb.exceptions.QiitaDBError("No data found in mapping file.") return mapping_data, header, comments @@ -393,7 +401,14 @@ def get_qiime2_reserved_words(): set: str The reserved words """ - qiime2_reserved_column_names = ['feature id', 'feature-id', 'featureid', - 'id', 'sample id', 'sample-id', 'sampleid'] + qiime2_reserved_column_names = [ + "feature id", + "feature-id", + "featureid", + "id", + "sample id", + "sample-id", + "sampleid", + ] return set(qiime2_reserved_column_names) diff --git a/qiita_db/ontology.py b/qiita_db/ontology.py index 0f5010efe..88f323c9a 100644 --- a/qiita_db/ontology.py +++ b/qiita_db/ontology.py @@ -23,6 +23,7 @@ Ontology """ + import qiita_db as qdb @@ -34,7 +35,8 @@ class Ontology(qdb.base.QiitaObject): terms shortname """ - _table = 'ontology' + + _table = "ontology" def __contains__(self, value): with qdb.sql_connection.TRN: @@ -67,7 +69,7 @@ def user_defined_terms(self): @property def shortname(self): - return qdb.util.convert_from_id(self.id, 'ontology') + return qdb.util.convert_from_id(self.id, "ontology") def add_user_defined_term(self, term): """Add a user defined term to the ontology @@ -110,8 +112,8 @@ def term_type(self, term): result = qdb.sql_connection.TRN.execute_fetchindex() if not result: - return 'not_ontology' + return "not_ontology" elif result[0][0]: - return 'user_defined' + return "user_defined" elif not result[0][0]: - return 'ontology' + return "ontology" diff --git a/qiita_db/portal.py b/qiita_db/portal.py index a5585aa69..bfded647e 100644 --- a/qiita_db/portal.py +++ b/qiita_db/portal.py @@ -26,12 +26,13 @@ class Portal(qdb.base.QiitaObject): add_analyses remove_analyses """ - _table = 'portal_type' + + _table = "portal_type" def __init__(self, portal): with qdb.sql_connection.TRN: self.portal = portal - portal_id = qdb.util.convert_to_id(portal, 'portal_type', 'portal') + portal_id = qdb.util.convert_to_id(portal, "portal_type", "portal") super(Portal, self).__init__(portal_id) @staticmethod @@ -119,7 +120,7 @@ def delete(portal): """ with qdb.sql_connection.TRN: # Check if attached to any studies - portal_id = qdb.util.convert_to_id(portal, 'portal_type', 'portal') + portal_id = qdb.util.convert_to_id(portal, "portal_type", "portal") sql = """SELECT study_id FROM qiita.study_portal WHERE portal_type_id = %s""" @@ -127,8 +128,9 @@ def delete(portal): studies = qdb.sql_connection.TRN.execute_fetchflatten() if studies: raise qdb.exceptions.QiitaDBError( - " Cannot delete portal '%s', studies still attached: %s" % - (portal, ', '.join(map(str, studies)))) + " Cannot delete portal '%s', studies still attached: %s" + % (portal, ", ".join(map(str, studies))) + ) # Check if attached to any analyses sql = """SELECT analysis_id @@ -139,8 +141,9 @@ def delete(portal): analyses = qdb.sql_connection.TRN.execute_fetchflatten() if analyses: raise qdb.exceptions.QiitaDBError( - " Cannot delete portal '%s', analyses still attached: %s" % - (portal, ', '.join(map(str, analyses)))) + " Cannot delete portal '%s', analyses still attached: %s" + % (portal, ", ".join(map(str, analyses))) + ) # Remove portal and default analyses for all users sql = """DO $do$ @@ -182,7 +185,7 @@ def exists(portal): Whether the portal exists or not """ try: - qdb.util.convert_to_id(portal, 'portal_type', 'portal') + qdb.util.convert_to_id(portal, "portal_type", "portal") except qdb.exceptions.QiitaDBLookupError: return False else: @@ -203,7 +206,8 @@ def get_studies(self): qdb.sql_connection.TRN.add(sql, [self._id]) return set( qdb.study.Study(sid) - for sid in qdb.sql_connection.TRN.execute_fetchflatten()) + for sid in qdb.sql_connection.TRN.execute_fetchflatten() + ) def _check_studies(self, studies): with qdb.sql_connection.TRN: @@ -214,7 +218,8 @@ def _check_studies(self, studies): if len(existing) != len(list(studies)): bad = map(str, set(studies).difference(existing)) raise qdb.exceptions.QiitaDBError( - "The following studies do not exist: %s" % ", ".join(bad)) + "The following studies do not exist: %s" % ", ".join(bad) + ) def add_studies(self, studies): """Adds studies to given portal @@ -245,8 +250,9 @@ def add_studies(self, studies): if len(duplicates) > 0: warnings.warn( "The following studies are already part of %s: %s" - % (self.portal, ', '.join(map(str, duplicates))), - qdb.exceptions.QiitaDBWarning) + % (self.portal, ", ".join(map(str, duplicates))), + qdb.exceptions.QiitaDBWarning, + ) # Add cleaned list to the portal clean_studies = set(studies).difference(duplicates) @@ -254,7 +260,8 @@ def add_studies(self, studies): VALUES (%s, %s)""" if len(clean_studies) != 0: qdb.sql_connection.TRN.add( - sql, [[s, self._id] for s in clean_studies], many=True) + sql, [[s, self._id] for s in clean_studies], many=True + ) qdb.sql_connection.TRN.execute() def remove_studies(self, studies): @@ -276,7 +283,7 @@ def remove_studies(self, studies): Some studies already do not exist in the given portal """ if self.portal == "QIITA": - raise ValueError('Can not remove from main QIITA portal!') + raise ValueError("Can not remove from main QIITA portal!") with qdb.sql_connection.TRN: self._check_studies(studies) @@ -293,7 +300,8 @@ def remove_studies(self, studies): raise qdb.exceptions.QiitaDBError( "The following studies are used in an analysis on portal " "%s and can't be removed: %s" - % (self.portal, ", ".join(map(str, analysed)))) + % (self.portal, ", ".join(map(str, analysed))) + ) # Clean list of studies down to ones associated with portal already sql = """SELECT study_id @@ -306,8 +314,9 @@ def remove_studies(self, studies): rem = map(str, set(studies).difference(clean_studies)) warnings.warn( "The following studies are not part of %s: %s" - % (self.portal, ', '.join(rem)), - qdb.exceptions.QiitaDBWarning) + % (self.portal, ", ".join(rem)), + qdb.exceptions.QiitaDBWarning, + ) sql = """DELETE FROM qiita.study_portal WHERE study_id IN %s AND portal_type_id = %s""" @@ -330,7 +339,8 @@ def get_analyses(self): qdb.sql_connection.TRN.add(sql, [self._id]) return set( qdb.analysis.Analysis(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()) + for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ) def _check_analyses(self, analyses): with qdb.sql_connection.TRN: @@ -343,7 +353,8 @@ def _check_analyses(self, analyses): if len(existing) != len(analyses): bad = map(str, set(analyses).difference(existing)) raise qdb.exceptions.QiitaDBError( - "The following analyses do not exist: %s" % ", ".join(bad)) + "The following analyses do not exist: %s" % ", ".join(bad) + ) # Check if any analyses given are default sql = """SELECT analysis_id @@ -355,7 +366,8 @@ def _check_analyses(self, analyses): bad = map(str, set(analyses).difference(default)) raise qdb.exceptions.QiitaDBError( "The following analyses are default and can't be deleted " - "or assigned to another portal: %s" % ", ".join(bad)) + "or assigned to another portal: %s" % ", ".join(bad) + ) def add_analyses(self, analyses): """Adds analyses to given portal @@ -395,7 +407,8 @@ def add_analyses(self, analyses): raise qdb.exceptions.QiitaDBError( "Portal %s is mising studies used in the following " "analyses: %s" - % (self.portal, ", ".join(map(str, missing_info)))) + % (self.portal, ", ".join(map(str, missing_info))) + ) # Clean list of analyses to ones not already associated with portal sql = """SELECT analysis_id @@ -409,8 +422,9 @@ def add_analyses(self, analyses): if len(duplicates) > 0: warnings.warn( "The following analyses are already part of %s: %s" - % (self.portal, ', '.join(map(str, duplicates))), - qdb.exceptions.QiitaDBWarning) + % (self.portal, ", ".join(map(str, duplicates))), + qdb.exceptions.QiitaDBWarning, + ) sql = """INSERT INTO qiita.analysis_portal (analysis_id, portal_type_id) @@ -418,7 +432,8 @@ def add_analyses(self, analyses): clean_analyses = set(analyses).difference(duplicates) if len(clean_analyses) != 0: qdb.sql_connection.TRN.add( - sql, [[a, self._id] for a in clean_analyses], many=True) + sql, [[a, self._id] for a in clean_analyses], many=True + ) qdb.sql_connection.TRN.execute() def remove_analyses(self, analyses): @@ -439,7 +454,7 @@ def remove_analyses(self, analyses): with qdb.sql_connection.TRN: self._check_analyses(analyses) if self.portal == "QIITA": - raise ValueError('Can not remove from main QIITA portal!') + raise ValueError("Can not remove from main QIITA portal!") # Clean list of analyses to ones already associated with portal sql = """SELECT analysis_id @@ -454,12 +469,12 @@ def remove_analyses(self, analyses): rem = map(str, set(analyses).difference(clean_analyses)) warnings.warn( "The following analyses are not part of %s: %s" - % (self.portal, ', '.join(rem)), - qdb.exceptions.QiitaDBWarning) + % (self.portal, ", ".join(rem)), + qdb.exceptions.QiitaDBWarning, + ) sql = """DELETE FROM qiita.analysis_portal WHERE analysis_id IN %s AND portal_type_id = %s""" if len(clean_analyses) != 0: - qdb.sql_connection.TRN.add( - sql, [tuple(clean_analyses), self._id]) + qdb.sql_connection.TRN.add(sql, [tuple(clean_analyses), self._id]) qdb.sql_connection.TRN.execute() diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py index 0d11853dd..837c7e24b 100644 --- a/qiita_db/processing_job.py +++ b/qiita_db/processing_job.py @@ -6,24 +6,24 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -import networkx as nx -import qiita_db as qdb -import pandas as pd -from numpy import log as nlog # noqa - -from collections import defaultdict, Iterable +from collections import Iterable, defaultdict from datetime import datetime, timedelta from itertools import chain from json import dumps, loads -from multiprocessing import Process, Queue, Event -from re import search, findall -from subprocess import Popen, PIPE +from multiprocessing import Event, Process, Queue +from os import environ +from os.path import join +from re import findall, search +from subprocess import PIPE, Popen from time import sleep from uuid import UUID -from os.path import join + +import networkx as nx +import pandas as pd from humanize import naturalsize -from os import environ +from numpy import log as nlog # noqa +import qiita_db as qdb from qiita_core.qiita_settings import qiita_config from qiita_db.util import create_nested_path @@ -45,23 +45,32 @@ class Watcher(Process): # 'success', 'error', 'in_construction', 'waiting'} # TODO: what to map in_construction to? - job_state_map = {'C': 'completed', 'E': 'exiting', 'H': 'held', - 'Q': 'queued', 'R': 'running', 'T': 'moving', - 'W': 'waiting', 'S': 'suspended'} + job_state_map = { + "C": "completed", + "E": "exiting", + "H": "held", + "Q": "queued", + "R": "running", + "T": "moving", + "W": "waiting", + "S": "suspended", + } # TODO: moving, waiting, and suspended have been mapped to # 'running' in Qiita, as 'waiting' in Qiita connotes that the # main job itself has completed, and is waiting on validator # jobs to finish, etc. Revisit - job_scheduler_to_qiita_state_map = {'completed': 'completed', - 'held': 'queued', - 'queued': 'queued', - 'exiting': 'running', - 'running': 'running', - 'moving': 'running', - 'waiting': 'running', - 'suspended': 'running', - 'DROPPED': 'error'} + job_scheduler_to_qiita_state_map = { + "completed": "completed", + "held": "queued", + "queued": "queued", + "exiting": "running", + "running": "running", + "moving": "running", + "waiting": "running", + "suspended": "running", + "DROPPED": "error", + } def __init__(self): super(Watcher, self).__init__() @@ -86,24 +95,25 @@ def __init__(self): # the cross-process sentinel value to shutdown Watcher self.event = Event() - def _element_extract(self, snippet, list_of_elements, - list_of_optional_elements): + def _element_extract(self, snippet, list_of_elements, list_of_optional_elements): results = {} missing_elements = [] for element in list_of_elements: - value = search('<%s>(.*?)' % (element, element), snippet) + value = search("<%s>(.*?)" % (element, element), snippet) if value: results[element] = value.group(1) else: missing_elements.append(element) if missing_elements: - raise AssertionError("The following elements were not found: %s" - % ', '.join(missing_elements)) + raise AssertionError( + "The following elements were not found: %s" + % ", ".join(missing_elements) + ) for element in list_of_optional_elements: - value = search('<%s>(.*?)' % (element, element), snippet) + value = search("<%s>(.*?)" % (element, element), snippet) if value: results[element] = value.group(1) @@ -115,21 +125,21 @@ def _process_dependent_jobs(self, results): # 'held' on this job's successful completion. If we are maintaining # state on any of these jobs, mark them as 'DROPPED', because they will # no longer appear in qstat output. - if results['job_state'] == 'completed': - if results['exit_status'] == '0': + if results["job_state"] == "completed": + if results["exit_status"] == "0": return - if 'depend' in results: - tmp = results['depend'].split(':') - if tmp[0] == 'beforeok': + if "depend" in results: + tmp = results["depend"].split(":") + if tmp[0] == "beforeok": tmp.pop(0) for child_job_id in tmp: # jobs in 'beforeok' are labeled with the complete # job id and what looks to be the server name doing # the work. For now, simply remove the # '@host.domain.org' (server) component. - child_job_id = child_job_id.split('@')[0] - self.processes[child_job_id]['job_state'] = 'DROPPED' + child_job_id = child_job_id.split("@")[0] + self.processes[child_job_id]["job_state"] = "DROPPED" self.queue.put(self.processes[child_job_id]) def run(self): @@ -138,7 +148,7 @@ def run(self): proc.wait() if proc.returncode != 0: # inform any process expecting data from Watcher - self.queue.put('QUIT') + self.queue.put("QUIT") self.event.set() while not self.event.is_set(): @@ -148,38 +158,35 @@ def run(self): # qstat returned successfully with metadata on processes # break up metadata into individual elements # for processing. - m = findall('(.*?)', stdout.decode('ascii')) + m = findall("(.*?)", stdout.decode("ascii")) for item in m: # filter out jobs that don't belong to owner - if search('%s' % self.owner, item): + if search("%s" % self.owner, item): # extract the metadata we want. # if a job has completed, an exit_status element will # be present. We also want that. - results = self._element_extract(item, ['Job_Id', - 'Job_Name', - 'job_state'], - ['depend']) - tmp = Watcher.job_state_map[results['job_state']] - results['job_state'] = tmp - if results['job_state'] == 'completed': - results2 = self._element_extract(item, - ['exit_status'], - []) - results['exit_status'] = results2['exit_status'] + results = self._element_extract( + item, ["Job_Id", "Job_Name", "job_state"], ["depend"] + ) + tmp = Watcher.job_state_map[results["job_state"]] + results["job_state"] = tmp + if results["job_state"] == "completed": + results2 = self._element_extract(item, ["exit_status"], []) + results["exit_status"] = results2["exit_status"] # determine if anything has changed since last poll - if results['Job_Id'] in self.processes: - if self.processes[results['Job_Id']] != results: + if results["Job_Id"] in self.processes: + if self.processes[results["Job_Id"]] != results: # metadata for existing job has changed - self.processes[results['Job_Id']] = results + self.processes[results["Job_Id"]] = results self.queue.put(results) self._process_dependent_jobs(results) else: # metadata for new job inserted - self.processes[results['Job_Id']] = results + self.processes[results["Job_Id"]] = results self.queue.put(results) else: - self.queue.put('QUIT') + self.queue.put("QUIT") self.event.set() # don't join(), since we are exiting from the main loop @@ -187,7 +194,7 @@ def run(self): def stop(self): # 'poison pill' to thread/process - self.queue.put('QUIT') + self.queue.put("QUIT") # setting self.event is a safe way of communicating a boolean # value across processes and threads. # when this event is 'set' by the main line of execution in Qiita, @@ -201,7 +208,6 @@ def stop(self): def launch_local(env_script, start_script, url, job_id, job_dir): - # launch_local() differs from launch_job_scheduler(), as no Watcher() is # used. # each launch_local() process will execute the cmd as a child process, @@ -219,7 +225,7 @@ def launch_local(env_script, start_script, url, job_id, job_dir): # When Popen() executes, the shell is not in interactive mode, # so it is not sourcing any of the bash configuration files # We need to source it so the env_script are available - cmd = "bash -c '%s; %s'" % (env_script, ' '.join(cmd)) + cmd = "bash -c '%s; %s'" % (env_script, " ".join(cmd)) print("CMD STRING: %s" % cmd) # Popen() may also need universal_newlines=True @@ -247,35 +253,36 @@ def launch_local(env_script, start_script, url, job_id, job_dir): ProcessingJob(job_id).complete(False, error=error) -def launch_job_scheduler(env_script, start_script, url, job_id, job_dir, - dependent_job_id, resource_params): - +def launch_job_scheduler( + env_script, start_script, url, job_id, job_dir, dependent_job_id, resource_params +): # note that job_id is Qiita's UUID, not a job_scheduler job ID cmd = [start_script, url, job_id, job_dir] lines = [ - '#!/bin/bash', - f'#SBATCH --error {job_dir}/slurm-error.txt', - f'#SBATCH --output {job_dir}/slurm-output.txt'] + "#!/bin/bash", + f"#SBATCH --error {job_dir}/slurm-error.txt", + f"#SBATCH --output {job_dir}/slurm-output.txt", + ] lines.append("echo $SLURM_JOBID") lines.append("source ~/.bash_profile") lines.append(env_script) - epilogue = environ.get('QIITA_JOB_SCHEDULER_EPILOGUE', '') + epilogue = environ.get("QIITA_JOB_SCHEDULER_EPILOGUE", "") if epilogue: lines.append(f"#SBATCH --epilog {epilogue}") - lines.append(' '.join(cmd)) + lines.append(" ".join(cmd)) # writing the script file create_nested_path(job_dir) - fp = join(job_dir, '%s.txt' % job_id) + fp = join(job_dir, "%s.txt" % job_id) - with open(fp, 'w') as job_file: + with open(fp, "w") as job_file: job_file.write("\n".join(lines)) - sbatch_cmd = ['sbatch'] + sbatch_cmd = ["sbatch"] if dependent_job_id: # note that a dependent job should be submitted before the @@ -286,12 +293,12 @@ def launch_job_scheduler(env_script, start_script, url, job_id, job_dir, sbatch_cmd.append(resource_params) sbatch_cmd.append(fp) - stdout, stderr, return_value = _system_call(' '.join(sbatch_cmd)) + stdout, stderr, return_value = _system_call(" ".join(sbatch_cmd)) if return_value != 0: - raise AssertionError(f'Error submitting job: {sbatch_cmd} :: {stderr}') + raise AssertionError(f"Error submitting job: {sbatch_cmd} :: {stderr}") - job_id = stdout.strip('\n').split(" ")[-1] + job_id = stdout.strip("\n").split(" ")[-1] return job_id @@ -317,8 +324,7 @@ def _system_call(cmd): the authors of this function to port it to Qiita and keep it under BSD license. """ - proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE, - stderr=PIPE) + proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE, stderr=PIPE) # Communicate pulls all stdout/stderr from the PIPEs # This call blocks until the command is done stdout, stderr = proc.communicate() @@ -344,13 +350,18 @@ class ProcessingJob(qdb.base.QiitaObject): exists create """ - _table = 'processing_job' - _launch_map = {'qiita-plugin-launcher': - {'function': launch_local, - 'execute_in_process': False}, - 'qiita-plugin-launcher-slurm': - {'function': launch_job_scheduler, - 'execute_in_process': True}} + + _table = "processing_job" + _launch_map = { + "qiita-plugin-launcher": { + "function": launch_local, + "execute_in_process": False, + }, + "qiita-plugin-launcher-slurm": { + "function": launch_job_scheduler, + "execute_in_process": True, + }, + } @classmethod def exists(cls, job_id): @@ -410,50 +421,50 @@ def resource_allocation_info(self): """ with qdb.sql_connection.TRN: analysis = None - if self.command.name == 'complete_job': - jtype = 'COMPLETE_JOBS_RESOURCE_PARAM' + if self.command.name == "complete_job": + jtype = "COMPLETE_JOBS_RESOURCE_PARAM" params = self.parameters.values - v = loads(params['payload']) + v = loads(params["payload"]) # assume an empty string for name is preferable to None - name = '' - if v['artifacts'] is not None: - an_element = list(v['artifacts'].keys())[0] - name = v['artifacts'][an_element]['artifact_type'] + name = "" + if v["artifacts"] is not None: + an_element = list(v["artifacts"].keys())[0] + name = v["artifacts"][an_element]["artifact_type"] # for analysis we have two options, either use the # input_artifacts or use the parameter 'analysis' of the job # to complete - job = ProcessingJob(params['job_id']) + job = ProcessingJob(params["job_id"]) params = job.parameters.values ia = job.input_artifacts - if 'analysis' in params and params['analysis'] is not None: - analysis = qdb.analysis.Analysis(params['analysis']) + if "analysis" in params and params["analysis"] is not None: + analysis = qdb.analysis.Analysis(params["analysis"]) elif ia: analysis = ia[0].analysis - elif self.command.name == 'release_validators': - jtype = 'RELEASE_VALIDATORS_RESOURCE_PARAM' - tmp = ProcessingJob(self.parameters.values['job']) + elif self.command.name == "release_validators": + jtype = "RELEASE_VALIDATORS_RESOURCE_PARAM" + tmp = ProcessingJob(self.parameters.values["job"]) name = tmp.parameters.command.name if tmp.input_artifacts: analysis = tmp.input_artifacts[0].analysis - elif self.command.name == 'Validate': - jtype = 'VALIDATOR' + elif self.command.name == "Validate": + jtype = "VALIDATOR" vals = self.parameters.values - name = vals['artifact_type'] - if vals['analysis'] is not None: - analysis = qdb.analysis.Analysis(vals['analysis']) - elif self.id == 'register': - jtype = 'REGISTER' - name = 'REGISTER' + name = vals["artifact_type"] + if vals["analysis"] is not None: + analysis = qdb.analysis.Analysis(vals["analysis"]) + elif self.id == "register": + jtype = "REGISTER" + name = "REGISTER" else: # assume anything else is a command - jtype = 'RESOURCE_PARAMS_COMMAND' + jtype = "RESOURCE_PARAMS_COMMAND" name = self.command.name # for analysis we have two options, either use the # input_artifacts or use the parameter 'analysis' of self params = self.parameters.values ia = self.input_artifacts - if 'analysis' in params and params['analysis'] is not None: - analysis = qdb.analysis.Analysis(params['analysis']) + if "analysis" in params and params["analysis"] is not None: + analysis = qdb.analysis.Analysis(params["analysis"]) elif ia: analysis = ia[0].analysis @@ -472,87 +483,100 @@ def resource_allocation_info(self): sql = """SELECT allocation FROM qiita.processing_job_resource_allocation WHERE name = %s and job_type = %s""" - qdb.sql_connection.TRN.add(sql, ['default', jtype]) + qdb.sql_connection.TRN.add(sql, ["default", jtype]) result = qdb.sql_connection.TRN.execute_fetchflatten() if not result: AssertionError( - "Could not match %s to a resource allocation!" % name) + "Could not match %s to a resource allocation!" % name + ) allocation = result[0] # adding user_level extra parameters - allocation = f'{allocation} {self.user.slurm_parameters}'.strip() + allocation = f"{allocation} {self.user.slurm_parameters}".strip() # adding analysis reservation if analysis is not None: sr = analysis.slurm_reservation if sr is not None: - allocation = f'{allocation} --reservation {sr}' + allocation = f"{allocation} --reservation {sr}" - if ('{samples}' in allocation or '{columns}' in allocation or - '{input_size}' in allocation): + if ( + "{samples}" in allocation + or "{columns}" in allocation + or "{input_size}" in allocation + ): samples, columns, input_size = self.shape parts = [] - error_msg = ('Obvious incorrect allocation. Please ' - 'contact %s' % qiita_config.help_email) - for part in allocation.split('--'): - param = '' - if part.startswith('time '): - param = 'time ' - elif part.startswith('mem '): - param = 'mem ' + error_msg = ( + "Obvious incorrect allocation. Please " + "contact %s" % qiita_config.help_email + ) + for part in allocation.split("--"): + param = "" + if part.startswith("time "): + param = "time " + elif part.startswith("mem "): + param = "mem " else: # if parts is empty, this is the first part so no -- if parts: - parts.append(f'--{part.strip()}') + parts.append(f"--{part.strip()}") else: parts.append(part.strip()) continue - part = part[len(param):] - if ('{samples}' in part or '{columns}' in part or - '{input_size}' in part): + part = part[len(param) :] + if ( + "{samples}" in part + or "{columns}" in part + or "{input_size}" in part + ): # to make sure that the formula is correct and avoid # possible issues with conversions, we will check that # all the variables {samples}/{columns}/{input_size} # present in the formula are not None, if any is None # we will set the job's error (will stop it) and the # message is gonna be shown to the user within the job - if (('{samples}' in part and samples is None) or - ('{columns}' in part and columns is None) or - ('{input_size}' in part and input_size is - None)): + if ( + ("{samples}" in part and samples is None) + or ("{columns}" in part and columns is None) + or ("{input_size}" in part and input_size is None) + ): self._set_error(error_msg) - return 'Not valid' + return "Not valid" try: # if eval has something that can't be processed # it will raise a NameError - value = eval(part.format( - samples=samples, columns=columns, - input_size=input_size)) + value = eval( + part.format( + samples=samples, + columns=columns, + input_size=input_size, + ) + ) except NameError: self._set_error(error_msg) - return 'Not valid' + return "Not valid" else: if value <= 0: self._set_error(error_msg) - return 'Not valid' + return "Not valid" - if param == 'time ': + if param == "time ": td = timedelta(seconds=value) if td.days > 0: days = td.days td = td - timedelta(days=days) - part = f'{days}-{str(td)}' + part = f"{days}-{str(td)}" else: part = str(td) - part = part.split('.')[0] + part = part.split(".")[0] else: - part = naturalsize( - value, gnu=True, format='%.0f') - parts.append(f'--{param}{part}'.strip()) + part = naturalsize(value, gnu=True, format="%.0f") + parts.append(f"--{param}{part}".strip()) - allocation = ' '.join(parts) + allocation = " ".join(parts) return allocation @@ -610,9 +634,13 @@ def create(cls, user, parameters, force=False): if params: # divided by 2 as we have key-value pairs - len_params = int(len(params)/2) - sql = sql.format(' AND ' + ' AND '.join( - ["command_parameters->>%s ILIKE %s"] * len_params)) + len_params = int(len(params) / 2) + sql = sql.format( + " AND " + + " AND ".join( + ["command_parameters->>%s ILIKE %s"] * len_params + ) + ) params = [command.id] + params TTRN.add(sql, params) else: @@ -623,25 +651,31 @@ def create(cls, user, parameters, force=False): # checking that if the job status is success, it has children # [2] status, [3] children count - existing_jobs = [r for r in TTRN.execute_fetchindex() - if r[2] != 'success' or r[3] > 0] + existing_jobs = [ + r + for r in TTRN.execute_fetchindex() + if r[2] != "success" or r[3] > 0 + ] if existing_jobs: raise ValueError( - 'Cannot create job because the parameters are the ' - 'same as jobs that are queued, running or already ' - 'have succeeded:\n%s' % '\n'.join( - ["%s: %s" % (jid, status) - for jid, _, status, _ in existing_jobs])) + "Cannot create job because the parameters are the " + "same as jobs that are queued, running or already " + "have succeeded:\n%s" + % "\n".join( + [ + "%s: %s" % (jid, status) + for jid, _, status, _ in existing_jobs + ] + ) + ) sql = """INSERT INTO qiita.processing_job (email, command_id, command_parameters, processing_job_status_id) VALUES (%s, %s, %s, %s) RETURNING processing_job_id""" - status = qdb.util.convert_to_id( - "in_construction", "processing_job_status") - sql_args = [user.id, command.id, - parameters.dump(), status] + status = qdb.util.convert_to_id("in_construction", "processing_job_status") + sql_args = [user.id, command.id, parameters.dump(), status] TTRN.add(sql, sql_args) job_id = TTRN.execute_fetchlast() @@ -651,7 +685,7 @@ def create(cls, user, parameters, force=False): VALUES (%s, %s)""" pending = defaultdict(dict) for pname, vals in command.parameters.items(): - if vals[0] == 'artifact': + if vals[0] == "artifact": artifact_info = parameters.values[pname] # If the artifact_info is a list, then the artifact # still doesn't exist because the current job is part @@ -660,7 +694,7 @@ def create(cls, user, parameters, force=False): TTRN.add(sql, [artifact_info, job_id]) else: pending[artifact_info[0]][pname] = artifact_info[1] - elif pname == 'artifact': + elif pname == "artifact": TTRN.add(sql, [parameters.values[pname], job_id]) if pending: @@ -723,7 +757,8 @@ def parameters(self): qdb.sql_connection.TRN.add(sql, [self.id]) res = qdb.sql_connection.TRN.execute_fetchindex()[0] return qdb.software.Parameters.load( - qdb.software.Command(res[0]), values_dict=res[1]) + qdb.software.Command(res[0]), values_dict=res[1] + ) @property def input_artifacts(self): @@ -740,8 +775,10 @@ def input_artifacts(self): WHERE processing_job_id = %s ORDER BY artifact_id""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [qdb.artifact.Artifact(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.artifact.Artifact(aid) + for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def status(self): @@ -764,19 +801,19 @@ def status(self): return qdb.sql_connection.TRN.execute_fetchlast() def _generate_notification_message(self, value, error_msg): - ignored_software = ('artifact definition',) - ignored_commands = ('Validate', 'complete_job', 'release_validators') + ignored_software = ("artifact definition",) + ignored_commands = ("Validate", "complete_job", "release_validators") # abort early conditions (don't send an email notification) # tentatively accept the overhead of a function-call, even when a # notification isn't sent, just to keep the logic clean and # centralized. - if value == 'waiting': + if value == "waiting": # notification not needed. return None - if not self.user.info['receive_processing_job_emails']: + if not self.user.info["receive_processing_job_emails"]: # notification not needed. return None @@ -789,17 +826,15 @@ def _generate_notification_message(self, value, error_msg): return None # generate subject line - subject = (f'{self.command.name}: {value}, {self.id} ' - f'[{self.external_id}]') + subject = f"{self.command.name}: {value}, {self.id} [{self.external_id}]" # generate message line - message = '' + message = "" input_artifacts = self.input_artifacts if input_artifacts is None: # this is an admin job. display command name and parameters - message = (f'Admin Job {self.command.name} ' - f'{self.command.parameters}') + message = f"Admin Job {self.command.name} {self.command.parameters}" else: for artifact in input_artifacts: if artifact.prep_templates: @@ -807,61 +842,62 @@ def _generate_notification_message(self, value, error_msg): # prep ids, data_type, and command name. study_ids = [x.study_id for x in artifact.prep_templates] prep_ids = [x.id for x in artifact.prep_templates] - data_types = [x.data_type() for x in - artifact.prep_templates] + data_types = [x.data_type() for x in artifact.prep_templates] # there should only be one study id study_ids = set(study_ids) if len(study_ids) > 1: - raise qdb.exceptions.QiitaError("More than one Study " - "ID was found: " - f"{study_ids}") + raise qdb.exceptions.QiitaError( + f"More than one Study ID was found: {study_ids}" + ) study_id = study_ids.pop() # there should be at least one prep_id and probably more. prep_ids = list(set(prep_ids)) if len(prep_ids) == 0: - raise qdb.exceptions.QiitaError("No Prep IDs were " - "found") + raise qdb.exceptions.QiitaError("No Prep IDs were found") if len(prep_ids) == 1: - study_url = (f'{qiita_config.base_url}/study/' - f'description/{study_id}?prep_id=' - f'{prep_ids[0]}') + study_url = ( + f"{qiita_config.base_url}/study/" + f"description/{study_id}?prep_id=" + f"{prep_ids[0]}" + ) else: - study_url = (f'{qiita_config.base_url}/study/' - f'description/{study_id}') + study_url = ( + f"{qiita_config.base_url}/study/description/{study_id}" + ) # convert into a string for presentation. prep_ids = [str(x) for x in prep_ids] - prep_ids = ', '.join(prep_ids) + prep_ids = ", ".join(prep_ids) # there should be only one data type. data_types = set(data_types) if len(data_types) > 1: - raise qdb.exceptions.QiitaError("More than one data " - "type was found: " - f"{data_types}") + raise qdb.exceptions.QiitaError( + f"More than one data type was found: {data_types}" + ) data_type = data_types.pop() - message = f'{self.command.name}\n' - message += f'Prep IDs: {prep_ids}\n' - message += f'{study_url}\n' - message += f'Data Type: {data_type}\n' + message = f"{self.command.name}\n" + message += f"Prep IDs: {prep_ids}\n" + message += f"{study_url}\n" + message += f"Data Type: {data_type}\n" elif artifact.analysis: # this is an analysis job. display analysis id as link and # the command name. - message = f'Analysis Job {self.command.name}\n' - message += f'{qiita_config.base_url}/analysis/' - message += f'description/{artifact.analysis.id}/\n' + message = f"Analysis Job {self.command.name}\n" + message += f"{qiita_config.base_url}/analysis/" + message += f"description/{artifact.analysis.id}/\n" else: raise qdb.exceptions.QiitaError("Unknown Condition") # append legacy message line - message += 'New status: %s' % (value) + message += "New status: %s" % (value) - if value == 'error' and error_msg is not None: - message += f'\n\nError:\n{error_msg}' + if value == "error" and error_msg is not None: + message += f"\n\nError:\n{error_msg}" - return {'subject': subject, 'message': message} + return {"subject": subject, "message": message} def _set_status(self, value, error_msg=None): """Sets the status of the job @@ -884,27 +920,27 @@ def _set_status(self, value, error_msg=None): """ with qdb.sql_connection.TRN: current_status = self.status - if current_status == 'success': + if current_status == "success": raise qdb.exceptions.QiitaDBStatusError( - "Cannot change the status of a 'success' job") - elif current_status == 'running' and value == 'queued': + "Cannot change the status of a 'success' job" + ) + elif current_status == "running" and value == "queued": raise qdb.exceptions.QiitaDBStatusError( - "Cannot revert the status of a 'running' job to 'queued'") + "Cannot revert the status of a 'running' job to 'queued'" + ) - new_status = qdb.util.convert_to_id( - value, "processing_job_status") + new_status = qdb.util.convert_to_id(value, "processing_job_status") msg = self._generate_notification_message(value, error_msg) if msg is not None: # send email - qdb.util.send_email(self.user.email, msg['subject'], - msg['message']) + qdb.util.send_email(self.user.email, msg["subject"], msg["message"]) # send email to our sys-admin if error from admin - if self.user.level in {'admin', 'wet-lab admin'}: - if value == 'error': + if self.user.level in {"admin", "wet-lab admin"}: + if value == "error": qdb.util.send_email( - qiita_config.sysadmin_email, msg['subject'], - msg['message']) + qiita_config.sysadmin_email, msg["subject"], msg["message"] + ) sql = """UPDATE qiita.processing_job SET processing_job_status_id = %s @@ -922,7 +958,7 @@ def external_id(self): qdb.sql_connection.TRN.add(sql, [self.id]) result = qdb.sql_connection.TRN.execute_fetchlast() if result is None: - result = 'Not Available' + result = "Not Available" return result @external_id.setter @@ -989,11 +1025,12 @@ def submit(self, parent_job_id=None, dependent_jobs_list=None): """ with qdb.sql_connection.TRN: status = self.status - if status not in {'in_construction', 'waiting'}: + if status not in {"in_construction", "waiting"}: raise qdb.exceptions.QiitaDBOperationNotPermittedError( "Can't submit job, not in 'in_construction' or " - "'waiting' status. Current status: %s" % status) - self._set_status('queued') + "'waiting' status. Current status: %s" % status + ) + self._set_status("queued") # At this point we are going to involve other processes. We need # to commit the changes to the DB or the other processes will not # see these changes @@ -1021,19 +1058,20 @@ def submit(self, parent_job_id=None, dependent_jobs_list=None): # names to know if it should be executed differently and the # plugin should let Qiita know that a specific command should be ran # as job array or not - cnames_to_skip = {'Calculate Cell Counts', 'Calculate RNA Copy Counts'} - if 'ENVIRONMENT' in plugin_env_script and cname not in cnames_to_skip: + cnames_to_skip = {"Calculate Cell Counts", "Calculate RNA Copy Counts"} + if "ENVIRONMENT" in plugin_env_script and cname not in cnames_to_skip: # the job has to be in running state so the plugin can change its` # status with qdb.sql_connection.TRN: - self._set_status('running') + self._set_status("running") qdb.sql_connection.TRN.commit() create_nested_path(job_dir) - cmd = (f'{plugin_env_script}; {plugin_start_script} ' - f'{url} {self.id} {job_dir}') + cmd = ( + f"{plugin_env_script}; {plugin_start_script} {url} {self.id} {job_dir}" + ) stdout, stderr, return_value = _system_call(cmd) - if return_value != 0 or stderr != '': + if return_value != 0 or stderr != "": self._set_error(stderr) job_id = stdout # note that dependent jobs, such as m validator jobs marshalled into @@ -1043,7 +1081,7 @@ def submit(self, parent_job_id=None, dependent_jobs_list=None): # requires metadata from a late-defined and time-sensitive source. elif qiita_config.plugin_launcher in ProcessingJob._launch_map: launcher = ProcessingJob._launch_map[qiita_config.plugin_launcher] - if launcher['execute_in_process']: + if launcher["execute_in_process"]: # run this launcher function within this process. # usually this is done if the launcher spawns other processes # before returning immediately, usually with a job ID that can @@ -1063,12 +1101,15 @@ def submit(self, parent_job_id=None, dependent_jobs_list=None): # handled by this code. Out of proc launches will need to # handle exceptions by catching them and returning an error # code. - job_id = launcher['function'](plugin_env_script, - plugin_start_script, - url, - self.id, - job_dir, - parent_job_id, resource_params) + job_id = launcher["function"]( + plugin_env_script, + plugin_start_script, + url, + self.id, + job_dir, + parent_job_id, + resource_params, + ) if dependent_jobs_list: # a dependent_jobs_list will always have at least one @@ -1086,20 +1127,25 @@ def submit(self, parent_job_id=None, dependent_jobs_list=None): # launches a job, pulls the next job from the list, and # submits it. The remainder of the list is also passed to # continue the process. - next_job.submit(parent_job_id=job_id, - dependent_jobs_list=dependent_jobs_list) + next_job.submit( + parent_job_id=job_id, dependent_jobs_list=dependent_jobs_list + ) - elif not launcher['execute_in_process']: + elif not launcher["execute_in_process"]: # run this launcher function as a new process. # usually this is done if the launcher performs work that takes # an especially long time, or waits for children who perform # such work. - p = Process(target=launcher['function'], - args=(plugin_env_script, - plugin_start_script, - url, - self.id, - job_dir)) + p = Process( + target=launcher["function"], + args=( + plugin_env_script, + plugin_start_script, + url, + self.id, + job_dir, + ), + ) p.start() @@ -1113,24 +1159,29 @@ def submit(self, parent_job_id=None, dependent_jobs_list=None): for dependent in dependent_jobs_list: # register dependent job as queued to make qiita # aware of this child process - dependent._set_status('queued') + dependent._set_status("queued") dep_software = dependent.command.software - dep_job_dir = join(qdb.util.get_work_base_dir(), - dependent.id) - p = Process(target=launcher['function'], - args=(dep_software.environment_script, - dep_software.start_script, - url, - dependent.id, - dep_job_dir)) + dep_job_dir = join(qdb.util.get_work_base_dir(), dependent.id) + p = Process( + target=launcher["function"], + args=( + dep_software.environment_script, + dep_software.start_script, + url, + dependent.id, + dep_job_dir, + ), + ) p.start() # assign the child process ID as external id to # the dependent dependent.external_id = p.pid else: - error = ("execute_in_process must be defined", - "as either true or false") + error = ( + "execute_in_process must be defined", + "as either true or false", + ) raise AssertionError(error) else: error = "plugin_launcher should be one of two values for now" @@ -1152,9 +1203,10 @@ def release(self): The mapping between the job output and the artifact """ with qdb.sql_connection.TRN: - if self.command.software.type != 'artifact definition': + if self.command.software.type != "artifact definition": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Only artifact definition jobs can be released") + "Only artifact definition jobs can be released" + ) # Retrieve the artifact information from the DB sql = """SELECT artifact_info @@ -1163,41 +1215,45 @@ def release(self): qdb.sql_connection.TRN.add(sql, [self.id]) a_info = qdb.sql_connection.TRN.execute_fetchlast() - provenance = loads(self.parameters.values['provenance']) - job = ProcessingJob(provenance['job']) - if 'data_type' in a_info: + provenance = loads(self.parameters.values["provenance"]) + job = ProcessingJob(provenance["job"]) + if "data_type" in a_info: # This job is resulting from a private job parents = None params = None name = None - data_type = a_info['data_type'] + data_type = a_info["data_type"] pvals = job.parameters.values - if 'analysis' in pvals: + if "analysis" in pvals: cmd_out_id = None - analysis = qdb.analysis.Analysis( - job.parameters.values['analysis']) + analysis = qdb.analysis.Analysis(job.parameters.values["analysis"]) else: - cmd_out_id = provenance['cmd_out_id'] + cmd_out_id = provenance["cmd_out_id"] analysis = None - a_info = a_info['artifact_data'] + a_info = a_info["artifact_data"] else: # This job is resulting from a plugin job parents = job.input_artifacts params = job.parameters - cmd_out_id = provenance['cmd_out_id'] - name = provenance['name'] + cmd_out_id = provenance["cmd_out_id"] + name = provenance["name"] analysis = None data_type = None # Create the artifact - atype = a_info['artifact_type'] - filepaths = a_info['filepaths'] + atype = a_info["artifact_type"] + filepaths = a_info["filepaths"] a = qdb.artifact.Artifact.create( - filepaths, atype, parents=parents, + filepaths, + atype, + parents=parents, processing_parameters=params, - analysis=analysis, data_type=data_type, name=name) + analysis=analysis, + data_type=data_type, + name=name, + ) - self._set_status('success') + self._set_status("success") mapping = {} if cmd_out_id is not None: @@ -1207,53 +1263,59 @@ def release(self): def release_validators(self): """Allows all the validator job spawned by this job to complete""" - if self.command.software.type not in ('artifact transformation', - 'private'): + if self.command.software.type not in ("artifact transformation", "private"): raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Only artifact transformation and private jobs can " - "release validators") + "Only artifact transformation and private jobs can release validators" + ) # Check if all the validators are completed. Validator jobs can be # in two states when completed: 'waiting' in case of success # or 'error' otherwise - validator_ids = ['%s [%s]' % (j.id, j.external_id) - for j in self.validator_jobs - if j.status not in ['waiting', 'error']] + validator_ids = [ + "%s [%s]" % (j.id, j.external_id) + for j in self.validator_jobs + if j.status not in ["waiting", "error"] + ] # Active polling - wait until all validator jobs are completed # TODO: As soon as we see one errored validator, we should kill # the other jobs and exit early. Don't wait for all of the jobs # to complete. while validator_ids: - jids = ', '.join(validator_ids) - self.step = ("Validating outputs (%d remaining) via " - "job(s) %s" % (len(validator_ids), jids)) + jids = ", ".join(validator_ids) + self.step = "Validating outputs (%d remaining) via job(s) %s" % ( + len(validator_ids), + jids, + ) sleep(10) - validator_ids = ['%s [%s]' % (j.id, j.external_id) - for j in self.validator_jobs - if j.status not in ['waiting', 'error']] + validator_ids = [ + "%s [%s]" % (j.id, j.external_id) + for j in self.validator_jobs + if j.status not in ["waiting", "error"] + ] # Check if any of the validators errored - errored = [j for j in self.validator_jobs - if j.status == 'error'] + errored = [j for j in self.validator_jobs if j.status == "error"] if errored: # At least one of the validators failed, Set the rest of the # validators and the current job as failed - waiting = [j.id for j in self.validator_jobs - if j.status == 'waiting'] + waiting = [j.id for j in self.validator_jobs if j.status == "waiting"] common_error = "\n".join( - ["Validator %s error message: %s" % (j.id, j.log.msg) - for j in errored]) + ["Validator %s error message: %s" % (j.id, j.log.msg) for j in errored] + ) val_error = "%d sister validator jobs failed: %s" % ( - len(errored), common_error) + len(errored), + common_error, + ) for j in waiting: ProcessingJob(j)._set_error(val_error) - self._set_error('%d validator jobs failed: %s' - % (len(errored), common_error)) + self._set_error( + "%d validator jobs failed: %s" % (len(errored), common_error) + ) else: mapping = {} # Loop through all validator jobs and release them, allowing @@ -1269,15 +1331,14 @@ def release_validators(self): (artifact_id, processing_job_id, command_output_id) VALUES (%s, %s, %s)""" - sql_args = [[aid, self.id, outid] - for outid, aid in mapping.items()] + sql_args = [[aid, self.id, outid] for outid, aid in mapping.items()] with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql, sql_args, many=True) self._update_and_launch_children(mapping) - self._set_status('success') + self._set_status("success") def _complete_artifact_definition(self, artifact_data): - """"Performs the needed steps to complete an artifact definition job + """ "Performs the needed steps to complete an artifact definition job In order to complete an artifact definition job we need to create the artifact, and then start all the jobs that were waiting for this @@ -1300,22 +1361,24 @@ def _complete_artifact_definition(self, artifact_data): regular processing """ with qdb.sql_connection.TRN: - atype = artifact_data['artifact_type'] - filepaths = artifact_data['filepaths'] + atype = artifact_data["artifact_type"] + filepaths = artifact_data["filepaths"] # We need to differentiate if this artifact is the # result of a previous job or uploading job_params = self.parameters.values - if job_params['provenance'] is not None: + if job_params["provenance"] is not None: # The artifact is a result from a previous job - provenance = loads(job_params['provenance']) - if provenance.get('direct_creation', False): - original_job = ProcessingJob(provenance['job']) + provenance = loads(job_params["provenance"]) + if provenance.get("direct_creation", False): + original_job = ProcessingJob(provenance["job"]) artifact = qdb.artifact.Artifact.create( - filepaths, atype, + filepaths, + atype, parents=original_job.input_artifacts, processing_parameters=original_job.parameters, - analysis=job_params['analysis'], - name=job_params['name']) + analysis=job_params["analysis"], + name=job_params["name"], + ) sql = """ INSERT INTO qiita.artifact_output_processing_job @@ -1323,53 +1386,59 @@ def _complete_artifact_definition(self, artifact_data): command_output_id) VALUES (%s, %s, %s)""" qdb.sql_connection.TRN.add( - sql, [artifact.id, original_job.id, - provenance['cmd_out_id']]) + sql, [artifact.id, original_job.id, provenance["cmd_out_id"]] + ) qdb.sql_connection.TRN.execute() - self._set_status('success') + self._set_status("success") else: - if provenance.get('data_type') is not None: - artifact_data = {'data_type': provenance['data_type'], - 'artifact_data': artifact_data} + if provenance.get("data_type") is not None: + artifact_data = { + "data_type": provenance["data_type"], + "artifact_data": artifact_data, + } sql = """UPDATE qiita.processing_job_validator SET artifact_info = %s WHERE validator_id = %s""" - qdb.sql_connection.TRN.add( - sql, [dumps(artifact_data), self.id]) + qdb.sql_connection.TRN.add(sql, [dumps(artifact_data), self.id]) qdb.sql_connection.TRN.execute() # Can't create the artifact until all validators # are completed - self._set_status('waiting') + self._set_status("waiting") else: # The artifact is uploaded by the user or is the initial # artifact of an analysis - if ('analysis' in job_params and - job_params['analysis'] is not None): + if "analysis" in job_params and job_params["analysis"] is not None: pt = None - an = qdb.analysis.Analysis(job_params['analysis']) + an = qdb.analysis.Analysis(job_params["analysis"]) sql = """SELECT data_type FROM qiita.analysis_processing_job WHERE analysis_id = %s AND processing_job_id = %s""" qdb.sql_connection.TRN.add(sql, [an.id, self.id]) data_type = qdb.sql_connection.TRN.execute_fetchlast() - elif job_params['template'] is not None: + elif job_params["template"] is not None: pt = qdb.metadata_template.prep_template.PrepTemplate( - job_params['template']) + job_params["template"] + ) an = None data_type = None else: pt = None an = None - data_type = 'Job Output Folder' + data_type = "Job Output Folder" artifact = qdb.artifact.Artifact.create( - filepaths, atype, prep_template=pt, analysis=an, - data_type=data_type, name=job_params['name']) - self._set_status('success') + filepaths, + atype, + prep_template=pt, + analysis=an, + data_type=data_type, + name=job_params["name"], + ) + self._set_status("success") # we need to update the children jobs to replace the input # for the newly created artifact via the validator @@ -1405,9 +1474,9 @@ def _complete_artifact_transformation(self, artifacts_data): # Correct the format of the filepaths parameter so we can # create a validate job filepaths = defaultdict(list) - for fp, fptype in a_data['filepaths']: + for fp, fptype in a_data["filepaths"]: filepaths[fptype].append(fp) - atype = a_data['artifact_type'] + atype = a_data["artifact_type"] # The validate job needs a prep information file. In theory, # a job can be generated from more that one prep information @@ -1426,7 +1495,8 @@ def _complete_artifact_transformation(self, artifacts_data): if len(templates) > 1: raise qdb.exceptions.QiitaDBError( "Currently only single prep template " - "is allowed, found %d" % len(templates)) + "is allowed, found %d" % len(templates) + ) elif len(templates) == 1: template = templates.pop() elif self.input_artifacts: @@ -1453,38 +1523,49 @@ def _complete_artifact_transformation(self, artifacts_data): if naming_params: params = self.parameters.values art_name = "%s %s" % ( - out_name, ' '.join([str(params[p]).split('/')[-1] - for p in naming_params])) + out_name, + " ".join( + [str(params[p]).split("/")[-1] for p in naming_params] + ), + ) else: art_name = out_name - provenance = {'job': self.id, - 'cmd_out_id': cmd_out_id, - 'name': art_name} + provenance = { + "job": self.id, + "cmd_out_id": cmd_out_id, + "name": art_name, + } - if self.command.software.type == 'private': - provenance['data_type'] = 'Job Output Folder' + if self.command.software.type == "private": + provenance["data_type"] = "Job Output Folder" # Get the validator command for the current artifact type and # create a new job # see also release_validators() cmd = qdb.software.Command.get_validator(atype) values_dict = { - 'files': dumps(filepaths), 'artifact_type': atype, - 'template': template, 'provenance': dumps(provenance), - 'analysis': None} + "files": dumps(filepaths), + "artifact_type": atype, + "template": template, + "provenance": dumps(provenance), + "analysis": None, + } if analysis is not None: - values_dict['analysis'] = analysis + values_dict["analysis"] = analysis validate_params = qdb.software.Parameters.load( - cmd, values_dict=values_dict) + cmd, values_dict=values_dict + ) validator_jobs.append( - ProcessingJob.create(self.user, validate_params, True)) + ProcessingJob.create(self.user, validate_params, True) + ) # Change the current step of the job self.step = "Validating outputs (%d remaining) via job(s) %s" % ( - len(validator_jobs), ', '.join(['%s [%s]' % ( - j.id, j.external_id) for j in validator_jobs])) + len(validator_jobs), + ", ".join(["%s [%s]" % (j.id, j.external_id) for j in validator_jobs]), + ) # Link all the validator jobs with the current job self._set_validator_jobs(validator_jobs) @@ -1496,8 +1577,10 @@ def _complete_artifact_transformation(self, artifacts_data): # taken from: # https://www.geeksforgeeks.org/break-list-chunks-size-n-python/ - lists = [validator_jobs[i * n:(i + 1) * n] - for i in range((len(validator_jobs) + n - 1) // n)] + lists = [ + validator_jobs[i * n : (i + 1) * n] + for i in range((len(validator_jobs) + n - 1) // n) + ] for sub_list in lists: # each sub_list will always have at least a lead_job @@ -1508,11 +1591,9 @@ def _complete_artifact_transformation(self, artifacts_data): lead_job.submit(dependent_jobs_list=sub_list) # Submit the job that will release all the validators - plugin = qdb.software.Software.from_name_and_version( - 'Qiita', 'alpha') - cmd = plugin.get_command('release_validators') - params = qdb.software.Parameters.load( - cmd, values_dict={'job': self.id}) + plugin = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = plugin.get_command("release_validators") + params = qdb.software.Parameters.load(cmd, values_dict={"job": self.id}) job = ProcessingJob.create(self.user, params) # Doing the submission outside of the transaction @@ -1557,20 +1638,21 @@ def complete(self, success, artifacts_data=None, error=None): """ with qdb.sql_connection.TRN: if success: - if self.status != 'running': + if self.status != "running": # If the job is not running, we only allow to complete it # if it did not succeed raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can't complete job: not in a running state") + "Can't complete job: not in a running state" + ) if artifacts_data: - if self.command.software.type == 'artifact definition': + if self.command.software.type == "artifact definition": # There is only one artifact created _, a_data = artifacts_data.popitem() self._complete_artifact_definition(a_data) else: self._complete_artifact_transformation(artifacts_data) else: - self._set_status('success') + self._set_status("success") else: self._set_error(error) @@ -1586,7 +1668,7 @@ def log(self): """ with qdb.sql_connection.TRN: res = None - if self.status == 'error': + if self.status == "error": sql = """SELECT logging_id FROM qiita.processing_job WHERE processing_job_id = %s""" @@ -1609,11 +1691,12 @@ def _set_error(self, error): If the status of the job is 'success' """ with qdb.sql_connection.TRN: - if self.status == 'success': + if self.status == "success": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can only set up the log for jobs whose status is 'error'") + "Can only set up the log for jobs whose status is 'error'" + ) - log = qdb.logger.LogEntry.create('Runtime', error) + log = qdb.logger.LogEntry.create("Runtime", error) sql = """UPDATE qiita.processing_job SET logging_id = %s @@ -1626,7 +1709,7 @@ def _set_error(self, error): c.complete(False, error="Parent job '%s' failed." % self.id) # set as error after everything is in place - self._set_status('error', error_msg=error) + self._set_status("error", error_msg=error) @property def heartbeat(self): @@ -1657,11 +1740,12 @@ def update_heartbeat_state(self): """ with qdb.sql_connection.TRN: status = self.status - if status == 'queued': - self._set_status('running') - elif status != 'running': + if status == "queued": + self._set_status("running") + elif status != "running": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can't execute heartbeat on job: already completed") + "Can't execute heartbeat on job: already completed" + ) sql = """UPDATE qiita.processing_job SET heartbeat = %s WHERE processing_job_id = %s""" @@ -1698,10 +1782,10 @@ def step(self, value): qiita_db.exceptions.QiitaDBOperationNotPermittedError If the status of the job is not 'running' """ - if self.status != 'running': + if self.status != "running": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Cannot change the step of a job whose status is not " - "'running'") + "Cannot change the step of a job whose status is not 'running'" + ) sql = """UPDATE qiita.processing_job SET step = %s WHERE processing_job_id = %s""" @@ -1770,8 +1854,7 @@ def _helper_update_children(self, new_map): # Force to insert a NULL in the DB if pending is empty pending = pending if pending else None - qdb.sql_connection.TRN.add(sql_update, - [dumps(params), pending, c.id]) + qdb.sql_connection.TRN.add(sql_update, [dumps(params), pending, c.id]) qdb.sql_connection.TRN.execute() if pending is None: @@ -1816,7 +1899,7 @@ def _update_and_launch_children(self, mapping): ready = self._update_children(mapping) # Submit all the children that already have all the input parameters for c in ready: - if c.status in {'in_construction', 'waiting'}: + if c.status in {"in_construction", "waiting"}: c.submit() # some jobs create several children jobs/validators and this # can clog the submission process; giving it a second to @@ -1833,9 +1916,10 @@ def outputs(self): The outputs of the job keyed by output name """ with qdb.sql_connection.TRN: - if self.status != 'success': + if self.status != "success": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can't return the outputs of a non-success job") + "Can't return the outputs of a non-success job" + ) sql = """SELECT artifact_id, name FROM qiita.artifact_output_processing_job @@ -1844,7 +1928,8 @@ def outputs(self): qdb.sql_connection.TRN.add(sql, [self.id]) return { name: qdb.artifact.Artifact(aid) - for aid, name in qdb.sql_connection.TRN.execute_fetchindex()} + for aid, name in qdb.sql_connection.TRN.execute_fetchindex() + } @property def processing_job_workflow(self): @@ -1867,8 +1952,7 @@ def processing_job_workflow(self): WHERE processing_job_id = %s""" qdb.sql_connection.TRN.add(sql, [res[0][0]]) r = qdb.sql_connection.TRN.execute_fetchindex() - return (qdb.processing_job.ProcessingWorkflow(r[0][0]) if r - else None) + return qdb.processing_job.ProcessingWorkflow(r[0][0]) if r else None else: return None @@ -1914,10 +1998,11 @@ def hide(self): """ with qdb.sql_connection.TRN: status = self.status - if status != 'error': + if status != "error": raise qdb.exceptions.QiitaDBOperationNotPermittedError( - 'Only jobs in error status can be hidden. Current status: ' - '%s' % status) + "Only jobs in error status can be hidden. Current status: " + "%s" % status + ) sql = """UPDATE qiita.processing_job SET hidden = %s WHERE processing_job_id = %s""" @@ -1945,38 +2030,43 @@ def shape(self): parameters = self.parameters.values QUIDError = qdb.exceptions.QiitaDBUnknownIDError - if self.command.name == 'Validate': + if self.command.name == "Validate": # Validate only has two options to calculate it's size: template (a # job that has a preparation linked) or analysis (is from an # analysis). However, 'template' can be present and be None - if 'template' in parameters and parameters['template'] is not None: + if "template" in parameters and parameters["template"] is not None: try: PT = qdb.metadata_template.prep_template.PrepTemplate - prep_info = PT(parameters['template']) + prep_info = PT(parameters["template"]) except QUIDError: pass else: study_id = prep_info.study_id - elif 'analysis' in parameters: - analysis_id = parameters['analysis'] - elif self.command.name == 'build_analysis_files': + elif "analysis" in parameters: + analysis_id = parameters["analysis"] + elif self.command.name == "build_analysis_files": # build analysis is a special case because the analysis doesn't # exist yet - sanalysis = qdb.analysis.Analysis(parameters['analysis']).samples + sanalysis = qdb.analysis.Analysis(parameters["analysis"]).samples samples = sum([len(sams) for sams in sanalysis.values()]) # only count the biom files - input_size = sum([fp['fp_size'] for aid in sanalysis - for fp in qdb.artifact.Artifact(aid).filepaths - if fp['fp_type'] == 'biom']) - columns = self.parameters.values['categories'] + input_size = sum( + [ + fp["fp_size"] + for aid in sanalysis + for fp in qdb.artifact.Artifact(aid).filepaths + if fp["fp_type"] == "biom" + ] + ) + columns = self.parameters.values["categories"] if columns is not None: columns = len(columns) - elif self.command.software.name == 'Qiita': - if self.command.name == 'delete_sample_or_column': + elif self.command.software.name == "Qiita": + if self.command.name == "delete_sample_or_column": MT = qdb.metadata_template - _id = parameters['obj_id'] + _id = parameters["obj_id"] try: - if parameters['obj_class'] == 'SampleTemplate': + if parameters["obj_class"] == "SampleTemplate": obj = MT.sample_template.SampleTemplate(_id) else: obj = MT.prep_template.PrepTemplate(_id) @@ -1984,35 +2074,40 @@ def shape(self): except QUIDError: pass else: - if 'study' in parameters: - study_id = parameters['study'] - elif 'study_id' in parameters: - study_id = parameters['study_id'] - elif 'analysis' in parameters: - analysis_id = parameters['analysis'] - elif 'analysis_id' in parameters: - analysis_id = parameters['analysis_id'] - elif 'artifact' in parameters: + if "study" in parameters: + study_id = parameters["study"] + elif "study_id" in parameters: + study_id = parameters["study_id"] + elif "analysis" in parameters: + analysis_id = parameters["analysis"] + elif "analysis_id" in parameters: + analysis_id = parameters["analysis_id"] + elif "artifact" in parameters: try: - artifact = qdb.artifact.Artifact( - parameters['artifact']) + artifact = qdb.artifact.Artifact(parameters["artifact"]) except QUIDError: pass - elif self.command.name == 'Sequence Processing Pipeline': - body = self.parameters.values['sample_sheet']['body'] - samples = body.count('\r') - stemp = body.count('\n') + elif self.command.name == "Sequence Processing Pipeline": + body = self.parameters.values["sample_sheet"]["body"] + samples = body.count("\r") + stemp = body.count("\n") if stemp > samples: samples = stemp elif self.input_artifacts: artifact = self.input_artifacts[0] - if artifact.artifact_type == 'BIOM': - input_size = sum([fp['fp_size'] for a in self.input_artifacts - for fp in a.filepaths - if fp['fp_type'] == 'biom']) + if artifact.artifact_type == "BIOM": + input_size = sum( + [ + fp["fp_size"] + for a in self.input_artifacts + for fp in a.filepaths + if fp["fp_type"] == "biom" + ] + ) else: - input_size = sum([fp['fp_size'] for a in self.input_artifacts - for fp in a.filepaths]) + input_size = sum( + [fp["fp_size"] for a in self.input_artifacts for fp in a.filepaths] + ) # if there is an artifact, then we need to get the study_id/analysis_id if artifact is not None: @@ -2042,12 +2137,17 @@ def shape(self): except qdb.exceptions.QiitaDBUnknownIDError: pass else: - mfp = qdb.util.get_filepath_information( - analysis.mapping_file)['fullpath'] - samples, columns = pd.read_csv( - mfp, sep='\t', dtype=str).shape - input_size = sum([fp['fp_size'] for aid in analysis.samples for - fp in qdb.artifact.Artifact(aid).filepaths]) + mfp = qdb.util.get_filepath_information(analysis.mapping_file)[ + "fullpath" + ] + samples, columns = pd.read_csv(mfp, sep="\t", dtype=str).shape + input_size = sum( + [ + fp["fp_size"] + for aid in analysis.samples + for fp in qdb.artifact.Artifact(aid).filepaths + ] + ) return samples, columns, input_size @@ -2067,27 +2167,37 @@ def complete_processing_job(self): @property def trace(self): - """ Returns as a text array the full trace of the job, from itself - to validators and complete jobs""" - lines = [f'{self.id} [{self.external_id}] ({self.status}): ' - f'{self.command.name} | {self.resource_allocation_info}'] + """Returns as a text array the full trace of the job, from itself + to validators and complete jobs""" + lines = [ + f"{self.id} [{self.external_id}] ({self.status}): " + f"{self.command.name} | {self.resource_allocation_info}" + ] cjob = self.complete_processing_job if cjob is not None: - lines.append(f' {cjob.id} [{cjob.external_id}] ({cjob.status})| ' - f'{cjob.resource_allocation_info}') + lines.append( + f" {cjob.id} [{cjob.external_id}] ({cjob.status})| " + f"{cjob.resource_allocation_info}" + ) vjob = self.release_validator_job if vjob is not None: - lines.append(f' {vjob.id} [{vjob.external_id}] ' - f' ({vjob.status}) | ' - f'{vjob.resource_allocation_info}') + lines.append( + f" {vjob.id} [{vjob.external_id}] " + f" ({vjob.status}) | " + f"{vjob.resource_allocation_info}" + ) for v in self.validator_jobs: - lines.append(f' {v.id} [{v.external_id}] ({v.status}): ' - f'{v.command.name} | {v.resource_allocation_info}') + lines.append( + f" {v.id} [{v.external_id}] ({v.status}): " + f"{v.command.name} | {v.resource_allocation_info}" + ) cjob = v.complete_processing_job if cjob is not None: - lines.append(f' {cjob.id} [{cjob.external_id}] ' - f'({cjob.status}) | ' - f'{cjob.resource_allocation_info}') + lines.append( + f" {cjob.id} [{cjob.external_id}] " + f"({cjob.status}) | " + f"{cjob.resource_allocation_info}" + ) return lines @@ -2101,6 +2211,7 @@ class ProcessingWorkflow(qdb.base.QiitaObject): root : list of qiita_db.processing_job.ProcessingJob The first job in the workflow """ + _table = "processing_job_workflow" @classmethod @@ -2118,7 +2229,7 @@ def _common_creation_steps(cls, user, root_jobs, name=None): """ with qdb.sql_connection.TRN: # Insert the workflow in the processing_job_workflow table - name = name if name else "%s's workflow" % user.info['name'] + name = name if name else "%s's workflow" % user.info["name"] sql = """INSERT INTO qiita.processing_job_workflow (email, name) VALUES (%s, %s) RETURNING processing_job_workflow_id""" @@ -2135,8 +2246,7 @@ def _common_creation_steps(cls, user, root_jobs, name=None): return cls(w_id) @classmethod - def from_default_workflow(cls, user, dflt_wf, req_params, name=None, - force=False): + def from_default_workflow(cls, user, dflt_wf, req_params, name=None, force=False): """Creates a new processing workflow from a default workflow Parameters @@ -2182,28 +2292,34 @@ def from_default_workflow(cls, user, dflt_wf, req_params, name=None, # Check that we have all the required parameters root_cmds = set(c for c, _ in roots.values()) if root_cmds != set(req_params): - error_msg = ['Provided required parameters do not match the ' - 'initial set of commands for the workflow.'] + error_msg = [ + "Provided required parameters do not match the " + "initial set of commands for the workflow." + ] missing = [c.name for c in root_cmds - set(req_params)] if missing: error_msg.append( ' Command(s) "%s" are missing the required parameter ' - 'set.' % ', '.join(missing)) + "set." % ", ".join(missing) + ) extra = [c.name for c in set(req_params) - root_cmds] if extra: error_msg.append( ' Paramters for command(s) "%s" have been provided, ' - 'but they are not the initial commands for the ' - 'workflow.' % ', '.join(extra)) - raise qdb.exceptions.QiitaDBError(''.join(error_msg)) + "but they are not the initial commands for the " + "workflow." % ", ".join(extra) + ) + raise qdb.exceptions.QiitaDBError("".join(error_msg)) # Start creating the root jobs node_to_job = { n: ProcessingJob.create( user, - qdb.software.Parameters.from_default_params( - p, req_params[c]), force) - for n, (c, p) in roots.items()} + qdb.software.Parameters.from_default_params(p, req_params[c]), + force, + ) + for n, (c, p) in roots.items() + } root_jobs = node_to_job.values() # SQL used to create the edges between jobs @@ -2232,7 +2348,7 @@ def from_default_workflow(cls, user, dflt_wf, req_params, name=None, source_id = node_to_job[source].id parent_ids.append(source_id) # Get the connections between the job and the source - connections = data['connections'].connections + connections = data["connections"].connections for out, in_param, _ in connections: # We take advantage of the fact the parameters are # stored in JSON to encode the name of the output @@ -2242,8 +2358,12 @@ def from_default_workflow(cls, user, dflt_wf, req_params, name=None, # At this point we should have all the requried parameters for # the current job, so create it new_job = ProcessingJob.create( - user, qdb.software.Parameters.from_default_params( - dflt_params, job_req_params), force) + user, + qdb.software.Parameters.from_default_params( + dflt_params, job_req_params + ), + force, + ) node_to_job[n] = new_job # Create the parent-child links in the DB @@ -2277,7 +2397,7 @@ def from_scratch(cls, user, parameters, name=None, force=False): @property def name(self): - """"The name of the workflow + """ "The name of the workflow Returns ------- @@ -2326,8 +2446,9 @@ def graph(self): edges = qdb.sql_connection.TRN.execute_fetchindex() nodes = {} if edges: - nodes = {jid: ProcessingJob(jid) - for jid in set(chain.from_iterable(edges))} + nodes = { + jid: ProcessingJob(jid) for jid in set(chain.from_iterable(edges)) + } edges = [(nodes[s], nodes[d]) for s, d in edges] g.add_edges_from(edges) # It is possible that there are root jobs that doesn't have any @@ -2342,7 +2463,8 @@ def graph(self): qdb.sql_connection.TRN.add(sql, sql_args) nodes = [ ProcessingJob(jid) - for jid in qdb.sql_connection.TRN.execute_fetchflatten()] + for jid in qdb.sql_connection.TRN.execute_fetchflatten() + ] g.add_nodes_from(nodes) return g @@ -2375,13 +2497,20 @@ def _raise_if_not_in_construction(self): # for processing. Note that if the above query doesn't retrun any # value, it means that no jobs are in the workflow and that means # that the workflow is in construction. - if (len(res) == 1 and res[0] != 'in_construction') or len(res) > 1: + if (len(res) == 1 and res[0] != "in_construction") or len(res) > 1: # The workflow is no longer in construction, raise an error raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Workflow not in construction") - - def add(self, dflt_params, connections=None, req_params=None, - opt_params=None, force=False): + "Workflow not in construction" + ) + + def add( + self, + dflt_params, + connections=None, + req_params=None, + opt_params=None, + force=False, + ): """Adds a new job to the workflow Parameters @@ -2414,7 +2543,8 @@ def add(self, dflt_params, connections=None, req_params=None, # checking that the new number of artifacts is not above # max_artifacts_in_workflow current_artifacts = sum( - [len(j.command.outputs) for j in self.graph.nodes()]) + [len(j.command.outputs) for j in self.graph.nodes()] + ) to_add_artifacts = len(dflt_params.command.outputs) total_artifacts = current_artifacts + to_add_artifacts max_artifacts = qdb.util.max_artifacts_in_workflow() @@ -2423,7 +2553,8 @@ def add(self, dflt_params, connections=None, req_params=None, "Cannot add new job because it will create more " f"artifacts (current: {current_artifacts} + new: " f"{to_add_artifacts} = {total_artifacts}) that what is " - f"allowed in a single workflow ({max_artifacts})") + f"allowed in a single workflow ({max_artifacts})" + ) if connections: # The new Job depends on previous jobs in the workflow @@ -2436,8 +2567,12 @@ def add(self, dflt_params, connections=None, req_params=None, req_params[in_param] = [source_id, out] new_job = ProcessingJob.create( - self.user, qdb.software.Parameters.from_default_params( - dflt_params, req_params, opt_params=opt_params), force) + self.user, + qdb.software.Parameters.from_default_params( + dflt_params, req_params, opt_params=opt_params + ), + force, + ) # SQL used to create the edges between jobs sql = """INSERT INTO qiita.parent_processing_job @@ -2450,8 +2585,12 @@ def add(self, dflt_params, connections=None, req_params=None, # The new job doesn't depend on any previous job in the # workflow, so it is a new root job new_job = ProcessingJob.create( - self.user, qdb.software.Parameters.from_default_params( - dflt_params, req_params, opt_params=opt_params), force) + self.user, + qdb.software.Parameters.from_default_params( + dflt_params, req_params, opt_params=opt_params + ), + force, + ) sql = """INSERT INTO qiita.processing_job_workflow_root (processing_job_workflow_id, processing_job_id) VALUES (%s, %s)""" @@ -2485,7 +2624,8 @@ def remove(self, job, cascade=False): if children: if not cascade: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Can't remove job '%s': it has children" % job.id) + "Can't remove job '%s': it has children" % job.id + ) else: # We need to remove all job's children, remove them first # and then remove the current job @@ -2535,7 +2675,7 @@ def submit(self): if degree == 0: roots.append(job) else: - job._set_status('waiting') + job._set_status("waiting") for job in roots: job.submit() diff --git a/qiita_db/reference.py b/qiita_db/reference.py index 5cb107aa7..5f2a4cc1d 100644 --- a/qiita_db/reference.py +++ b/qiita_db/reference.py @@ -28,6 +28,7 @@ class Reference(qdb.base.QiitaObject): -------- QiitaObject """ + _table = "reference" @classmethod @@ -60,31 +61,33 @@ def create(cls, name, version, seqs_fp, tax_fp=None, tree_fp=None): with qdb.sql_connection.TRN: if cls.exists(name, version): raise qdb.exceptions.QiitaDBDuplicateError( - "Reference", "Name: %s, Version: %s" % (name, version)) + "Reference", "Name: %s, Version: %s" % (name, version) + ) - fps = [(seqs_fp, - qdb.util.convert_to_id("reference_seqs", "filepath_type"))] + fps = [(seqs_fp, qdb.util.convert_to_id("reference_seqs", "filepath_type"))] seq_id = qdb.util.insert_filepaths( - fps, "%s_%s" % (name, version), "reference")[0] + fps, "%s_%s" % (name, version), "reference" + )[0] # Check if the database has taxonomy file tax_id = None if tax_fp: fps = [ - (tax_fp, - qdb.util.convert_to_id("reference_tax", "filepath_type"))] + (tax_fp, qdb.util.convert_to_id("reference_tax", "filepath_type")) + ] tax_id = qdb.util.insert_filepaths( - fps, "%s_%s" % (name, version), "reference")[0] + fps, "%s_%s" % (name, version), "reference" + )[0] # Check if the database has tree file tree_id = None if tree_fp: fps = [ - (tree_fp, - qdb.util.convert_to_id("reference_tree", "filepath_type")) - ] + (tree_fp, qdb.util.convert_to_id("reference_tree", "filepath_type")) + ] tree_id = qdb.util.insert_filepaths( - fps, "%s_%s" % (name, version), "reference")[0] + fps, "%s_%s" % (name, version), "reference" + )[0] # Insert the actual object to the db sql = """INSERT INTO qiita.{0} @@ -92,8 +95,7 @@ def create(cls, name, version, seqs_fp, tax_fp=None, tree_fp=None): taxonomy_filepath, tree_filepath) VALUES (%s, %s, %s, %s, %s) RETURNING reference_id""".format(cls._table) - qdb.sql_connection.TRN.add( - sql, [name, version, seq_id, tax_id, tree_id]) + qdb.sql_connection.TRN.add(sql, [name, version, seq_id, tax_id, tree_id]) id_ = qdb.sql_connection.TRN.execute_fetchlast() return cls(id_) @@ -161,16 +163,16 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): db_dir = qdb.util.get_db_files_base_dir() return path_builder(db_dir, fp, mountpoint, subdir, self._id) else: - return '' + return "" @property def sequence_fp(self): - return self._retrieve_filepath('sequence_filepath') + return self._retrieve_filepath("sequence_filepath") @property def taxonomy_fp(self): - return self._retrieve_filepath('taxonomy_filepath') + return self._retrieve_filepath("taxonomy_filepath") @property def tree_fp(self): - return self._retrieve_filepath('tree_filepath') + return self._retrieve_filepath("tree_filepath") diff --git a/qiita_db/software.py b/qiita_db/software.py index 3119b118a..84dae6b34 100644 --- a/qiita_db/software.py +++ b/qiita_db/software.py @@ -6,17 +6,16 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from json import dumps, loads -from copy import deepcopy import inspect import warnings +from configparser import ConfigParser +from copy import deepcopy +from json import dumps, loads import networkx as nx -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb - -from configparser import ConfigParser +from qiita_core.qiita_settings import qiita_config class Command(qdb.base.QiitaObject): @@ -58,11 +57,13 @@ class Command(qdb.base.QiitaObject): -------- qiita_db.software.Software """ + _table = "software_command" @classmethod - def get_commands_by_input_type(cls, artifact_types, active_only=True, - exclude_analysis=True, prep_type=None): + def get_commands_by_input_type( + cls, artifact_types, active_only=True, exclude_analysis=True, prep_type=None + ): """Returns the commands that can process the given artifact types Parameters @@ -96,11 +97,17 @@ def get_commands_by_input_type(cls, artifact_types, active_only=True, cids = set(qdb.sql_connection.TRN.execute_fetchflatten()) if prep_type is not None: - dws = [w for w in qdb.software.DefaultWorkflow.iter() - if prep_type in w.data_type] + dws = [ + w + for w in qdb.software.DefaultWorkflow.iter() + if prep_type in w.data_type + ] if dws: - cmds = {n.default_parameter.command.id - for w in dws for n in w.graph.nodes} + cmds = { + n.default_parameter.command.id + for w in dws + for n in w.graph.nodes + } cids = cmds & cids return [cls(cid) for cid in cids] @@ -138,7 +145,8 @@ def get_html_generator(cls, artifact_type): except IndexError: raise qdb.exceptions.QiitaDBError( "There is no command to generate the HTML summary for " - "artifact type '%s'" % artifact_type) + "artifact type '%s'" % artifact_type + ) return cls(res) @@ -174,7 +182,8 @@ def get_validator(cls, artifact_type): except IndexError: raise qdb.exceptions.QiitaDBError( "There is no command to generate the Validate for " - "artifact type '%s'" % artifact_type) + "artifact type '%s'" % artifact_type + ) return cls(res) @@ -224,8 +233,9 @@ def exists(cls, software, name): return qdb.sql_connection.TRN.execute_fetchlast() @classmethod - def create(cls, software, name, description, parameters, outputs=None, - analysis_only=False): + def create( + cls, software, name, description, parameters, outputs=None, analysis_only=False + ): r"""Creates a new command in the system The supported types for the parameters are: @@ -294,14 +304,15 @@ def create(cls, software, name, description, parameters, outputs=None, if not parameters: raise qdb.exceptions.QiitaDBError( "Error creating command %s. At least one parameter should " - "be provided." % name) + "be provided." % name + ) sql_param_values = [] sql_artifact_params = [] for pname, vals in parameters.items(): qiita_optional_parameter = False - if 'qiita_optional_parameter' in vals: + if "qiita_optional_parameter" in vals: qiita_optional_parameter = True - vals.remove('qiita_optional_parameter') + vals.remove("qiita_optional_parameter") lenvals = len(vals) if lenvals == 2: ptype, dflt = vals @@ -315,23 +326,33 @@ def create(cls, software, name, description, parameters, outputs=None, "either {param_name: [parameter_type, default]} or " "{parameter_name: (parameter_type, default, name_order, " "check_biom_merge)}. Found: %s for parameter name %s" - % (vals, pname)) + % (vals, pname) + ) # Check that the type is one of the supported types - supported_types = ['string', 'integer', 'float', 'reference', - 'boolean', 'prep_template', 'analysis'] + supported_types = [ + "string", + "integer", + "float", + "reference", + "boolean", + "prep_template", + "analysis", + ] if ptype not in supported_types and not ptype.startswith( - ('choice', 'mchoice', 'artifact')): - supported_types.extend(['choice', 'mchoice', 'artifact']) + ("choice", "mchoice", "artifact") + ): + supported_types.extend(["choice", "mchoice", "artifact"]) raise qdb.exceptions.QiitaDBError( "Unsupported parameters type '%s' for parameter %s. " "Supported types are: %s" - % (ptype, pname, ', '.join(supported_types))) + % (ptype, pname, ", ".join(supported_types)) + ) - if ptype.startswith(('choice', 'mchoice')) and dflt is not None: - choices = set(loads(ptype.split(':')[1])) + if ptype.startswith(("choice", "mchoice")) and dflt is not None: + choices = set(loads(ptype.split(":")[1])) dflt_val = dflt - if ptype.startswith('choice'): + if ptype.startswith("choice"): # In the choice case, the dflt value is a single string, # create a list with it the string on it to use the # issuperset call below @@ -343,25 +364,26 @@ def create(cls, software, name, description, parameters, outputs=None, raise qdb.exceptions.QiitaDBError( "The default value '%s' for the parameter %s is not " "listed in the available choices: %s" - % (dflt, pname, ', '.join(choices))) + % (dflt, pname, ", ".join(choices)) + ) - if ptype.startswith('artifact'): - atypes = loads(ptype.split(':')[1]) - sql_artifact_params.append( - [pname, 'artifact', atypes]) + if ptype.startswith("artifact"): + atypes = loads(ptype.split(":")[1]) + sql_artifact_params.append([pname, "artifact", atypes]) else: # a parameter will be required (not optional) if # qiita_optional_parameter is false and there is the default # value (dflt) is None required = not qiita_optional_parameter and dflt is None - sql_param_values.append([pname, ptype, required, dflt, - name_order, check_biom_merge]) + sql_param_values.append( + [pname, ptype, required, dflt, name_order, check_biom_merge] + ) with qdb.sql_connection.TRN: if cls.exists(software, name): raise qdb.exceptions.QiitaDBDuplicateError( - "command", "software: %d, name: %s" - % (software.id, name)) + "command", "software: %d, name: %s" % (software.id, name) + ) # Add the command to the DB sql = """INSERT INTO qiita.software_command (name, software_id, description, is_analysis) @@ -379,7 +401,8 @@ def create(cls, software, name, description, parameters, outputs=None, RETURNING command_parameter_id""" sql_params = [ [c_id, pname, p_type, reqd, default, no, chm] - for pname, p_type, reqd, default, no, chm in sql_param_values] + for pname, p_type, reqd, default, no, chm in sql_param_values + ] qdb.sql_connection.TRN.add(sql, sql_params, many=True) qdb.sql_connection.TRN.execute() @@ -393,27 +416,26 @@ def create(cls, software, name, description, parameters, outputs=None, qdb.sql_connection.TRN.add(sql, sql_params) pid = qdb.sql_connection.TRN.execute_fetchlast() sql_params = [ - [pid, qdb.util.convert_to_id(at, 'artifact_type')] - for at in atypes] + [pid, qdb.util.convert_to_id(at, "artifact_type")] for at in atypes + ] qdb.sql_connection.TRN.add(sql_type, sql_params, many=True) supported_types.extend([atid for _, atid in sql_params]) # If the software type is 'artifact definition', there are a couple # of extra steps - if software.type == 'artifact definition': + if software.type == "artifact definition": # If supported types is not empty, link the software with these # types if supported_types: sql = """INSERT INTO qiita.software_artifact_type (software_id, artifact_type_id) VALUES (%s, %s)""" - sql_params = [[software.id, atid] - for atid in supported_types] + sql_params = [[software.id, atid] for atid in supported_types] qdb.sql_connection.TRN.add(sql, sql_params, many=True) # If this is the validate command, we need to add the # provenance and name parameters. These are used internally, # that's why we are adding them here - if name == 'Validate': + if name == "Validate": sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) @@ -429,16 +451,22 @@ def create(cls, software, name, description, parameters, outputs=None, for pname, at in outputs.items(): if isinstance(at, tuple): sql_args.append( - [pname, c_id, - qdb.util.convert_to_id(at[0], 'artifact_type'), - at[1]]) + [ + pname, + c_id, + qdb.util.convert_to_id(at[0], "artifact_type"), + at[1], + ] + ) else: try: - at_id = qdb.util.convert_to_id(at, 'artifact_type') + at_id = qdb.util.convert_to_id(at, "artifact_type") except qdb.exceptions.QiitaDBLookupError: - msg = (f'Error creating {software.name}, {name}, ' - f'{description} - Unknown artifact_type: ' - f'{at}') + msg = ( + f"Error creating {software.name}, {name}, " + f"{description} - Unknown artifact_type: " + f"{at}" + ) raise ValueError(msg) sql_args.append([pname, c_id, at_id, False]) @@ -583,12 +611,11 @@ def optional_parameters(self): # if ptype is multiple choice. When I added it to the for loop as # a one liner if, made the code a bit hard to read def dflt_fmt(dflt, ptype): - if ptype.startswith('mchoice'): + if ptype.startswith("mchoice"): return loads(dflt) return dflt - return {pname: [ptype, dflt_fmt(dflt, ptype)] - for pname, ptype, dflt in res} + return {pname: [ptype, dflt_fmt(dflt, ptype)] for pname, ptype, dflt in res} @property def default_parameter_sets(self): @@ -646,7 +673,7 @@ def active(self): """ with qdb.sql_connection.TRN: cmd_type = self.software.type - if self.analysis_only or cmd_type == 'artifact definition': + if self.analysis_only or cmd_type == "artifact definition": sql = """SELECT active FROM qiita.software_command WHERE command_id = %s""" @@ -728,9 +755,11 @@ def merging_scheme(self): qdb.sql_connection.TRN.add(sql, [self.id]) ipc = qdb.sql_connection.TRN.execute_fetchlast() - return {'parameters': params, - 'outputs': outputs, - 'ignore_parent_command': ipc} + return { + "parameters": params, + "outputs": outputs, + "ignore_parent_command": ipc, + } @property def resource_allocation(self): @@ -757,12 +786,13 @@ def resource_allocation(self): sql = """SELECT allocation FROM qiita.processing_job_resource_allocation WHERE name = %s and job_type = 'RESOURCE_PARAMS_COMMAND'""" - qdb.sql_connection.TRN.add(sql, ['default']) + qdb.sql_connection.TRN.add(sql, ["default"]) result = qdb.sql_connection.TRN.execute_fetchflatten() if not result: - raise ValueError("Could not match '%s' to a resource " - "allocation!" % self.name) + raise ValueError( + "Could not match '%s' to a resource allocation!" % self.name + ) return result[0] @@ -809,6 +839,7 @@ class Software(qdb.base.QiitaObject): -------- qiita_db.software.Command """ + _table = "software" @classmethod @@ -827,8 +858,7 @@ def iter(cls, active=True): """ sql = """SELECT software_id FROM qiita.software {0} - ORDER BY software_id""".format( - 'WHERE active = True' if active else '') + ORDER BY software_id""".format("WHERE active = True" if active else "") with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) for s_id in qdb.sql_connection.TRN.execute_fetchflatten(): @@ -873,16 +903,16 @@ def from_file(cls, fp, update=False): with open(fp, newline=None) as conf_file: config.read_file(conf_file) - name = config.get('main', 'NAME') - version = config.get('main', 'VERSION') - description = config.get('main', 'DESCRIPTION') - env_script = config.get('main', 'ENVIRONMENT_SCRIPT') - start_script = config.get('main', 'START_SCRIPT') - software_type = config.get('main', 'PLUGIN_TYPE') - publications = config.get('main', 'PUBLICATIONS') + name = config.get("main", "NAME") + version = config.get("main", "VERSION") + description = config.get("main", "DESCRIPTION") + env_script = config.get("main", "ENVIRONMENT_SCRIPT") + start_script = config.get("main", "START_SCRIPT") + software_type = config.get("main", "PLUGIN_TYPE") + publications = config.get("main", "PUBLICATIONS") publications = loads(publications) if publications else [] - client_id = config.get('oauth2', 'CLIENT_ID') - client_secret = config.get('oauth2', 'CLIENT_SECRET') + client_id = config.get("oauth2", "CLIENT_ID") + client_secret = config.get("oauth2", "CLIENT_SECRET") if cls.exists(name, version): # This plugin already exists, check that all the values are the @@ -900,12 +930,13 @@ def from_file(cls, fp, update=False): WHERE software_id = %s""" values = [description, env_script, start_script] - attrs = ['description', 'environment_script', 'start_script'] + attrs = ["description", "environment_script", "start_script"] for value, attr in zip(values, attrs): if value != instance.__getattribute__(attr): if update: qdb.sql_connection.TRN.add( - sql_update.format(attr), [value, instance.id]) + sql_update.format(attr), [value, instance.id] + ) else: warning_values.append(attr) @@ -914,16 +945,19 @@ def from_file(cls, fp, update=False): if software_type != instance.type: raise qdb.exceptions.QiitaDBOperationNotPermittedError( 'The plugin type of the plugin "%s" version %s does ' - 'not match the one in the system' % (name, version)) + "not match the one in the system" % (name, version) + ) if publications != instance.publications: if update: instance.add_publications(publications) else: - warning_values.append('publications') + warning_values.append("publications") - if (client_id != instance.client_id or - client_secret != instance.client_secret): + if ( + client_id != instance.client_id + or client_secret != instance.client_secret + ): if update: sql = """INSERT INTO qiita.oauth_identifiers (client_id, client_secret) @@ -933,34 +967,42 @@ def from_file(cls, fp, update=False): WHERE client_id = %s AND client_secret = %s)""" qdb.sql_connection.TRN.add( - sql, [client_id, client_secret, - client_id, client_secret]) + sql, [client_id, client_secret, client_id, client_secret] + ) sql = """UPDATE qiita.oauth_software SET client_id = %s WHERE software_id = %s""" - qdb.sql_connection.TRN.add( - sql, [client_id, instance.id]) + qdb.sql_connection.TRN.add(sql, [client_id, instance.id]) else: raise qdb.exceptions.QiitaDBOperationNotPermittedError( - 'The (client_id, client_secret) pair of the ' + "The (client_id, client_secret) pair of the " 'plugin "%s" version "%s" does not match the one ' - 'in the system' % (name, version)) + "in the system" % (name, version) + ) if warning_values: warnings.warn( 'Plugin "%s" version "%s" config file does not match ' - 'with stored information. Check the config file or ' + "with stored information. Check the config file or " 'run "qiita plugin update" to update the plugin ' - 'information. Offending values: %s' + "information. Offending values: %s" % (name, version, ", ".join(sorted(warning_values))), - qdb.exceptions.QiitaDBWarning) + qdb.exceptions.QiitaDBWarning, + ) qdb.sql_connection.TRN.execute() else: # This is a new plugin, create it instance = cls.create( - name, version, description, env_script, start_script, - software_type, publications=publications, client_id=client_id, - client_secret=client_secret) + name, + version, + description, + env_script, + start_script, + software_type, + publications=publications, + client_id=client_id, + client_secret=client_secret, + ) return instance @@ -983,9 +1025,18 @@ def exists(cls, name, version): return qdb.sql_connection.TRN.execute_fetchlast() @classmethod - def create(cls, name, version, description, environment_script, - start_script, software_type, publications=None, - client_id=None, client_secret=None): + def create( + cls, + name, + version, + description, + environment_script, + start_script, + software_type, + publications=None, + client_id=None, + client_secret=None, + ): r"""Creates a new software in the system Parameters @@ -1022,8 +1073,14 @@ def create(cls, name, version, description, environment_script, VALUES (%s, %s, %s, %s, %s, %s) RETURNING software_id""" type_id = qdb.util.convert_to_id(software_type, "software_type") - sql_params = [name, version, description, environment_script, - start_script, type_id] + sql_params = [ + name, + version, + description, + environment_script, + start_script, + type_id, + ] qdb.sql_connection.TRN.add(sql, sql_params) s_id = qdb.sql_connection.TRN.execute_fetchlast() @@ -1043,8 +1100,9 @@ def create(cls, name, version, description, environment_script, # One has been provided but not the other, raise an error raise qdb.exceptions.QiitaDBError( 'Plugin "%s" version "%s" cannot be created, please ' - 'provide both client_id and client_secret or none of them' - % (name, version)) + "provide both client_id and client_secret or none of them" + % (name, version) + ) # At this point both client_id and client_secret are defined sql = """INSERT INTO qiita.oauth_identifiers @@ -1055,7 +1113,8 @@ def create(cls, name, version, description, environment_script, WHERE client_id = %s AND client_secret = %s)""" qdb.sql_connection.TRN.add( - sql, [client_id, client_secret, client_id, client_secret]) + sql, [client_id, client_secret, client_id, client_secret] + ) sql = """INSERT INTO qiita.oauth_software (software_id, client_id) VALUES (%s, %s)""" qdb.sql_connection.TRN.add(sql, [s_id, client_id]) @@ -1091,7 +1150,8 @@ def from_name_and_version(cls, name, version): res = qdb.sql_connection.TRN.execute_fetchindex() if not res: raise qdb.exceptions.QiitaDBUnknownIDError( - "%s %s" % (name, version), cls._table) + "%s %s" % (name, version), cls._table + ) return cls(res[0][0]) @property @@ -1152,8 +1212,9 @@ def commands(self): FROM qiita.software_command WHERE software_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [Command(cid) - for cid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + Command(cid) for cid in qdb.sql_connection.TRN.execute_fetchflatten() + ] def get_command(self, cmd_name): """Returns the command with the given name in the software @@ -1175,8 +1236,7 @@ def get_command(self, cmd_name): qdb.sql_connection.TRN.add(sql, [self.id, cmd_name]) res = qdb.sql_connection.TRN.execute_fetchindex() if not res: - raise qdb.exceptions.QiitaDBUnknownIDError( - cmd_name, "software_command") + raise qdb.exceptions.QiitaDBUnknownIDError(cmd_name, "software_command") return Command(res[0][0]) @property @@ -1227,8 +1287,7 @@ def add_publications(self, publications): FROM qiita.software_publication WHERE software_id = %s AND publication_doi = %s)""" - sql_params = [[self.id, doi, self.id, doi] - for doi, _ in publications] + sql_params = [[self.id, doi, self.id, doi] for doi, _ in publications] qdb.sql_connection.TRN.add(sql, sql_params, many=True) qdb.sql_connection.TRN.execute() @@ -1368,14 +1427,17 @@ def register_commands(self): """Registers the software commands""" url = "%s%s" % (qiita_config.base_url, qiita_config.portal_dir) cmd = '%s; %s "%s" "register" "ignored"' % ( - self.environment_script, self.start_script, url) + self.environment_script, + self.start_script, + url, + ) # it can be assumed that any command beginning with 'source' # is calling 'source', an internal command of 'bash' and hence # should be executed from bash, instead of sh. # TODO: confirm that exit_code propagates from bash to sh to # rv. - if cmd.startswith('source'): + if cmd.startswith("source"): cmd = "bash -c '%s'" % cmd p_out, p_err, rv = qdb.processing_job._system_call(cmd) @@ -1407,7 +1469,8 @@ class DefaultParameters(qdb.base.QiitaObject): -------- qiita_db.software.Command """ - _table = 'default_parameter_set' + + _table = "default_parameter_set" @classmethod def exists(cls, command, **kwargs): @@ -1444,7 +1507,8 @@ def exists(cls, command, **kwargs): "The given set of parameters do not match the ones for " "the command.\nMissing parameters: %s\n" "Extra parameters: %s\n" - % (', '.join(missing_in_user), ', '.join(extra_in_user))) + % (", ".join(missing_in_user), ", ".join(extra_in_user)) + ) sql = """SELECT parameter_set FROM qiita.default_parameter_set @@ -1486,8 +1550,9 @@ def create(cls, param_set_name, command, **kwargs): with qdb.sql_connection.TRN: # setting to default values all parameters not in the user_params cmd_params = command.optional_parameters - missing_in_user = {k: cmd_params[k][1] - for k in (set(cmd_params) - set(kwargs))} + missing_in_user = { + k: cmd_params[k][1] for k in (set(cmd_params) - set(kwargs)) + } if missing_in_user: kwargs.update(missing_in_user) @@ -1495,7 +1560,8 @@ def create(cls, param_set_name, command, **kwargs): # will raise the error for us if cls.exists(command, **kwargs): raise qdb.exceptions.QiitaDBDuplicateError( - cls._table, "Values: %s" % kwargs) + cls._table, "Values: %s" % kwargs + ) sql = """INSERT INTO qiita.default_parameter_set (command_id, parameter_set_name, parameter_set) @@ -1610,31 +1676,36 @@ def load(cls, command, json_str=None, values_dict=None): """ if json_str is None and values_dict is None: raise qdb.exceptions.QiitaDBError( - "Either `json_str` or `values_dict` should be provided.") + "Either `json_str` or `values_dict` should be provided." + ) elif json_str is not None and values_dict is not None: raise qdb.exceptions.QiitaDBError( - "Either `json_str` or `values_dict` should be provided, " - "but not both") + "Either `json_str` or `values_dict` should be provided, but not both" + ) elif json_str is not None: parameters = loads(json_str) - error_msg = ("The provided JSON string doesn't encode a " - "parameter set for command '%s (ID: %s)'" % ( - command.name, command.id)) + error_msg = ( + "The provided JSON string doesn't encode a " + "parameter set for command '%s (ID: %s)'" % (command.name, command.id) + ) else: if not isinstance(values_dict, dict): raise qdb.exceptions.QiitaDBError( "The provided value_dict is %s (i.e. not None) but also " - "not a dictionary for command %s" % ( - values_dict, command.id)) + "not a dictionary for command %s" % (values_dict, command.id) + ) parameters = deepcopy(values_dict) - error_msg = ("The provided values dictionary doesn't encode a " - "parameter set for command %s" % command.id) + error_msg = ( + "The provided values dictionary doesn't encode a " + "parameter set for command %s" % command.id + ) # setting to default values all parameters not in the user_params cmd_params = command.optional_parameters - missing_in_user = {k: cmd_params[k][1] - for k in (set(cmd_params) - set(parameters))} + missing_in_user = { + k: cmd_params[k][1] for k in (set(cmd_params) - set(parameters)) + } if missing_in_user: parameters.update(missing_in_user) @@ -1648,22 +1719,23 @@ def load(cls, command, json_str=None, values_dict=None): values[key] = parameters.pop(key) except KeyError: raise qdb.exceptions.QiitaDBError( - "%s. Missing required parameter: %s" - % (error_msg, key)) + "%s. Missing required parameter: %s" % (error_msg, key) + ) for key in cmd_opt_params: try: values[key] = parameters.pop(key) except KeyError: raise qdb.exceptions.QiitaDBError( - "%s. Missing optional parameter: %s" - % (error_msg, key)) + "%s. Missing optional parameter: %s" % (error_msg, key) + ) if parameters: - error_msg += f'--- {cmd_reqd_params} --- {cmd_opt_params}' + error_msg += f"--- {cmd_reqd_params} --- {cmd_opt_params}" raise qdb.exceptions.QiitaDBError( "%s. Extra parameters: %s" - % (error_msg, ', '.join(parameters.keys()))) + % (error_msg, ", ".join(parameters.keys())) + ) return cls(values, command) @@ -1699,14 +1771,15 @@ def from_default_params(cls, dflt_params, req_params, opt_params=None): "Provided required parameters not expected.\n" "Missing required parameters: %s\n" "Extra required parameters: %s\n" - % (', '.join(missing_reqd), ', '.join(extra_reqd))) + % (", ".join(missing_reqd), ", ".join(extra_reqd)) + ) if opt_params: extra_opts = set(opt_params) - set(cmd_opt_params) if extra_opts: raise qdb.exceptions.QiitaDBError( - "Extra optional parameters provded: %s" - % ', '.join(extra_opts)) + "Extra optional parameters provded: %s" % ", ".join(extra_opts) + ) values = dflt_params.values values.update(req_params) @@ -1732,12 +1805,15 @@ def __init__(self, values, command): current_file = current_frame.f_code.co_filename caller_file = caller_frame.f_code.co_filename caller_name = caller_frame.f_code.co_name - if current_file != caller_file or \ - caller_name not in ['load', 'from_default_params']: + if current_file != caller_file or caller_name not in [ + "load", + "from_default_params", + ]: raise qdb.exceptions.QiitaDBOperationNotPermittedError( "qiita_db.software.Parameters can't be instantiated directly. " "Please use one of the classmethods: `load` or " - "`from_default_params`") + "`from_default_params`" + ) self._values = values self._command = command @@ -1783,6 +1859,7 @@ class DefaultWorkflowNode(qdb.base.QiitaObject): command parameters """ + _table = "default_workflow_node" @property @@ -1809,6 +1886,7 @@ class DefaultWorkflowEdge(qdb.base.QiitaObject): ---------- connections """ + _table = "default_workflow_edge" @property @@ -1844,6 +1922,7 @@ class DefaultWorkflow(qdb.base.QiitaObject): which outputs of the source command are provided as input to the destination command. """ + _table = "default_workflow" @classmethod @@ -1863,7 +1942,8 @@ def iter(cls, active=True): sql = """SELECT default_workflow_id FROM qiita.default_workflow {0} ORDER BY default_workflow_id""".format( - 'WHERE active = True' if active else '') + "WHERE active = True" if active else "" + ) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) for s_id in qdb.sql_connection.TRN.execute_fetchflatten(): @@ -2051,13 +2131,14 @@ def parameters(self, values): ValueError if the passed parameter is not a properly formated dict """ - if not isinstance(values, dict) or \ - set(values.keys()) != set(['prep', 'sample']): - raise ValueError("Improper format for values, should be " - "{'sample': dict, 'prep': dict} ") + if not isinstance(values, dict) or set(values.keys()) != set( + ["prep", "sample"] + ): + raise ValueError( + "Improper format for values, should be {'sample': dict, 'prep': dict} " + ) with qdb.sql_connection.TRN: sql = """UPDATE qiita.default_workflow SET parameters = %s WHERE default_workflow_id = %s""" - qdb.sql_connection.perform_as_transaction( - sql, [dumps(values), self._id]) + qdb.sql_connection.perform_as_transaction(sql, [dumps(values), self._id]) diff --git a/qiita_db/sql_connection.py b/qiita_db/sql_connection.py index f60a01a4c..21126f4df 100644 --- a/qiita_db/sql_connection.py +++ b/qiita_db/sql_connection.py @@ -19,6 +19,7 @@ Transaction """ + # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # @@ -27,26 +28,29 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from contextlib import contextmanager -from itertools import chain from functools import wraps +from itertools import chain -from psycopg2 import (connect, ProgrammingError, Error as PostgresError, - OperationalError, errorcodes) -from psycopg2.extras import DictCursor +from psycopg2 import Error as PostgresError +from psycopg2 import OperationalError, ProgrammingError, connect, errorcodes from psycopg2.extensions import TRANSACTION_STATUS_IDLE +from psycopg2.extras import DictCursor from qiita_core.qiita_settings import qiita_config def _checker(func): """Decorator to check that methods are executed inside the context""" + @wraps(func) def wrapper(self, *args, **kwargs): if self._contexts_entered == 0: raise RuntimeError( "Operation not permitted. Transaction methods can only be " - "invoked within the context manager.") + "invoked within the context manager." + ) return func(self, *args, **kwargs) + return wrapper @@ -66,6 +70,7 @@ class Transaction(object): When the execution leaves the context manager, any remaining queries in the transaction will be executed and committed. """ + def __init__(self, admin=False): self._queries = [] self._results = [] @@ -86,40 +91,50 @@ def _open_connection(self): user=qiita_config.admin_user, password=qiita_config.admin_password, host=qiita_config.host, - port=qiita_config.port) + port=qiita_config.port, + ) self._connection.autocommit = True else: - self._connection = connect(user=qiita_config.user, - password=qiita_config.password, - database=qiita_config.database, - host=qiita_config.host, - port=qiita_config.port) + self._connection = connect( + user=qiita_config.user, + password=qiita_config.password, + database=qiita_config.database, + host=qiita_config.host, + port=qiita_config.port, + ) except OperationalError as e: # catch three known common exceptions and raise runtime errors try: - etype = str(e).split(':')[1].split()[0] + etype = str(e).split(":")[1].split()[0] except IndexError: # we recieved a really unanticipated error without a colon - etype = '' - if etype == 'database': - etext = ('This is likely because the database `%s` has not ' - 'been created or has been dropped.' % - qiita_config.database) - elif etype == 'role': - etext = ('This is likely because the user string `%s` ' - 'supplied in your configuration file `%s` is ' - 'incorrect or not an authorized postgres user.' % - (qiita_config.user, qiita_config.conf_fp)) - elif etype == 'Connection': - etext = ('This is likely because postgres isn\'t ' - 'running. Check that postgres is correctly ' - 'installed and is running.') + etype = "" + if etype == "database": + etext = ( + "This is likely because the database `%s` has not " + "been created or has been dropped." % qiita_config.database + ) + elif etype == "role": + etext = ( + "This is likely because the user string `%s` " + "supplied in your configuration file `%s` is " + "incorrect or not an authorized postgres user." + % (qiita_config.user, qiita_config.conf_fp) + ) + elif etype == "Connection": + etext = ( + "This is likely because postgres isn't " + "running. Check that postgres is correctly " + "installed and is running." + ) else: # we recieved a really unanticipated error with a colon - etext = '' - ebase = ('An OperationalError with the following message occured' - '\n\n\t%s\n%s For more information, review `INSTALL.md`' - ' in the Qiita installation base directory.') + etext = "" + ebase = ( + "An OperationalError with the following message occured" + "\n\n\t%s\n%s For more information, review `INSTALL.md`" + " in the Qiita installation base directory." + ) raise RuntimeError(ebase % (str(e), etext)) def close(self): @@ -164,8 +179,7 @@ def _clean_up(self, exc_type): # wrapped in a try/except and rollbacks in case of failure self.execute() self.commit() - elif self._connection.get_transaction_status() != \ - TRANSACTION_STATUS_IDLE: + elif self._connection.get_transaction_status() != TRANSACTION_STATUS_IDLE: # There are no queries to be executed, however, the transaction # is still not committed. Commit it so the changes are not lost self.commit() @@ -196,8 +210,7 @@ def _raise_execution_error(self, sql, sql_args, error): try: ec_lu = errorcodes.lookup(error.pgcode) - raise ValueError( - "Error running SQL: %s. MSG: %s\n" % (ec_lu, str(error))) + raise ValueError("Error running SQL: %s. MSG: %s\n" % (ec_lu, str(error))) # the order of except statements is important, do not change except (KeyError, AttributeError, TypeError) as error: raise ValueError("Error running SQL query: %s" % str(error)) @@ -239,8 +252,10 @@ def add(self, sql, sql_args=None, many=False): for args in sql_args: if args: if not isinstance(args, (list, tuple, dict)): - raise TypeError("sql_args should be a list, tuple or dict." - " Found %s" % type(args)) + raise TypeError( + "sql_args should be a list, tuple or dict." + " Found %s" % type(args) + ) self._queries.append((sql, args)) def _execute(self): @@ -402,7 +417,8 @@ def _funcs_executor(self, funcs, func_str): if error_msg: raise RuntimeError( "An error occurred during the post %s commands:\n%s" - % (func_str, "\n".join(error_msg))) + % (func_str, "\n".join(error_msg)) + ) @_checker def commit(self): diff --git a/qiita_db/study.py b/qiita_db/study.py index 98ddc1ed6..aa5652d86 100644 --- a/qiita_db/study.py +++ b/qiita_db/study.py @@ -31,14 +31,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +import warnings from collections import defaultdict from copy import deepcopy from itertools import chain -import warnings +import qiita_db as qdb from qiita_core.exceptions import IncompetentQiitaDeveloperError from qiita_core.qiita_settings import qiita_config -import qiita_db as qdb class Study(qdb.base.QiitaObject): @@ -74,17 +74,18 @@ class Study(qdb.base.QiitaObject): All setters raise QiitaDBStatusError if trying to change a public study. You should not be doing that. """ + _table = "study" _portal_table = "study_portal" # The following columns are considered not part of the study info - _non_info = frozenset(["email", "study_title", "ebi_study_accession", - "autoloaded"]) + _non_info = frozenset(["email", "study_title", "ebi_study_accession", "autoloaded"]) def _lock_non_sandbox(self): """Raises QiitaDBStatusError if study is non-sandboxed""" - if self.status != 'sandbox': + if self.status != "sandbox": raise qdb.exceptions.QiitaDBStatusError( - "Illegal operation on non-sandbox study!") + "Illegal operation on non-sandbox study!" + ) @classmethod def from_title(cls, title): @@ -115,7 +116,8 @@ def from_title(cls, title): if not sid: raise qdb.exceptions.QiitaDBUnknownIDError( - cls._table, f'"{title}" does not exist') + cls._table, f'"{title}" does not exist' + ) return qdb.study.Study(sid[0]) @@ -150,9 +152,9 @@ def status(self): JOIN qiita.study_artifact USING (artifact_id) WHERE study_id = %s and visibility_id NOT IN %s""" qdb.sql_connection.TRN.add( - sql, [self._id, qdb.util.artifact_visibilities_to_skip()]) - return qdb.util.infer_status( - qdb.sql_connection.TRN.execute_fetchindex()) + sql, [self._id, qdb.util.artifact_visibilities_to_skip()] + ) + return qdb.util.infer_status(qdb.sql_connection.TRN.execute_fetchindex()) @staticmethod def all_data_types(): @@ -194,7 +196,7 @@ def get_ids_by_status(cls, status): sids = set(qdb.sql_connection.TRN.execute_fetchflatten()) # If status is sandbox, all the studies that are not present in the # study_artifact table are also sandbox - if status == 'sandbox': + if status == "sandbox": sql = """SELECT study_id FROM qiita.study JOIN qiita.study_portal USING (study_id) @@ -203,8 +205,7 @@ def get_ids_by_status(cls, status): SELECT study_id FROM qiita.study_artifact)""" qdb.sql_connection.TRN.add(sql, [qiita_config.portal]) - sids = sids.union( - qdb.sql_connection.TRN.execute_fetchflatten()) + sids = sids.union(qdb.sql_connection.TRN.execute_fetchflatten()) return sids @@ -242,18 +243,22 @@ def get_info(cls, study_ids=None, info_cols=None): accessed as a list of dictionaries, keyed on column name. """ # The following tables are considered part of info - _info_cols = frozenset(chain( - qdb.util.get_table_cols('study'), - qdb.util.get_table_cols('study_status'), - qdb.util.get_table_cols('timeseries_type'), - # placeholder for table study_publication - ['publications'])) + _info_cols = frozenset( + chain( + qdb.util.get_table_cols("study"), + qdb.util.get_table_cols("study_status"), + qdb.util.get_table_cols("timeseries_type"), + # placeholder for table study_publication + ["publications"], + ) + ) if info_cols is None: info_cols = _info_cols elif not _info_cols.issuperset(info_cols): - warnings.warn("Non-info columns passed: %s" % ", ".join( - set(info_cols) - _info_cols)) + warnings.warn( + "Non-info columns passed: %s" % ", ".join(set(info_cols) - _info_cols) + ) search_cols = ",".join(sorted(_info_cols.intersection(info_cols))) @@ -281,14 +286,16 @@ def get_info(cls, study_ids=None, info_cols=None): rows = qdb.sql_connection.TRN.execute_fetchindex() if study_ids is not None and len(rows) != len(study_ids): raise qdb.exceptions.QiitaDBError( - 'Non-portal-accessible studies asked for!') + "Non-portal-accessible studies asked for!" + ) res = [] for r in rows: r = dict(r) - if 'ebi_study_accession' in info_cols: - r['ebi_submission_status'] = cls( - r['study_id']).ebi_submission_status + if "ebi_study_accession" in info_cols: + r["ebi_submission_status"] = cls( + r["study_id"] + ).ebi_submission_status res.append(r) return res @@ -348,27 +355,26 @@ def create(cls, owner, title, info, investigation=None): # make sure not passing non-info columns in the info dict if cls._non_info.intersection(info): raise qdb.exceptions.QiitaDBColumnError( - "non info keys passed: %s" % cls._non_info.intersection(info)) + "non info keys passed: %s" % cls._non_info.intersection(info) + ) # cleaning up title, this is also done in JS for the GUI but rather # be safe than sorry - title = ' '.join(title.split()).strip() + title = " ".join(title.split()).strip() with qdb.sql_connection.TRN: if cls.exists(title): - raise qdb.exceptions.QiitaDBDuplicateError( - "Study", "title: %s" % title) + raise qdb.exceptions.QiitaDBDuplicateError("Study", "title: %s" % title) # add default values to info insertdict = deepcopy(info) - insertdict['email'] = owner.id - insertdict['study_title'] = title + insertdict["email"] = owner.id + insertdict["study_title"] = title if "reprocess" not in insertdict: - insertdict['reprocess'] = False + insertdict["reprocess"] = False # No nuns allowed - insertdict = {k: v for k, v in insertdict.items() - if v is not None} + insertdict = {k: v for k, v in insertdict.items() if v is not None} # make sure dictionary only has keys for available columns in db qdb.util.check_table_cols(insertdict, cls._table) @@ -378,8 +384,8 @@ def create(cls, owner, title, info, investigation=None): # Insert study into database sql = """INSERT INTO qiita.{0} ({1}) VALUES ({2}) RETURNING study_id""".format( - cls._table, ','.join(insertdict), - ','.join(['%s'] * len(insertdict))) + cls._table, ",".join(insertdict), ",".join(["%s"] * len(insertdict)) + ) # make sure data in same order as sql column names, # and ids are used @@ -395,13 +401,13 @@ def create(cls, owner, title, info, investigation=None): # Add to both QIITA and given portal (if not QIITA) portal_id = qdb.util.convert_to_id( - qiita_config.portal, 'portal_type', 'portal') + qiita_config.portal, "portal_type", "portal" + ) sql = """INSERT INTO qiita.study_portal (study_id, portal_type_id) VALUES (%s, %s)""" args = [[study_id, portal_id]] - if qiita_config.portal != 'QIITA': - qp_id = qdb.util.convert_to_id( - 'QIITA', 'portal_type', 'portal') + if qiita_config.portal != "QIITA": + qp_id = qdb.util.convert_to_id("QIITA", "portal_type", "portal") args.append([study_id, qp_id]) qdb.sql_connection.TRN.add(sql, args, many=True) qdb.sql_connection.TRN.execute() @@ -435,10 +441,11 @@ def delete(cls, id_): # checking that the id_ exists cls(id_) - if qdb.util.exists_table('sample_%d' % id_): + if qdb.util.exists_table("sample_%d" % id_): raise qdb.exceptions.QiitaDBError( 'Study "%s" cannot be erased because it has a ' - 'sample template' % cls(id_).title) + "sample template" % cls(id_).title + ) args = [id_] @@ -488,10 +495,10 @@ def get_tags(cls): results = dict(qdb.sql_connection.TRN.execute_fetchindex()) # when the system is empty, # it's possible to get an empty dict, fixing - if 'admin' not in results: - results['admin'] = [] - if 'user' not in results: - results['user'] = [] + if "admin" not in results: + results["admin"] = [] + if "user" not in results: + results["user"] = [] return results @@ -515,7 +522,7 @@ def insert_tags(cls, user, tags): qdb.sql_connection.TRN.add(sql, sql_args, many=True) qdb.sql_connection.TRN.execute() -# --- Attributes --- + # --- Attributes --- @property def autoloaded(self): """Returns if the study was autoloaded @@ -626,8 +633,7 @@ def public_raw_download(self, public_raw_download): """ sql = """UPDATE qiita.{0} SET public_raw_download = %s WHERE study_id = %s""".format(self._table) - qdb.sql_connection.perform_as_transaction( - sql, [public_raw_download, self._id]) + qdb.sql_connection.perform_as_transaction(sql, [public_raw_download, self._id]) @property def info(self): @@ -639,29 +645,28 @@ def info(self): info of study keyed to column names """ with qdb.sql_connection.TRN: - sql = "SELECT * FROM qiita.{0} WHERE study_id = %s".format( - self._table) + sql = "SELECT * FROM qiita.{0} WHERE study_id = %s".format(self._table) qdb.sql_connection.TRN.add(sql, [self._id]) info = dict(qdb.sql_connection.TRN.execute_fetchindex()[0]) # remove non-info items from info for item in self._non_info: info.pop(item) # removed because redundant to the id already stored in the object - info.pop('study_id') + info.pop("study_id") - if info['principal_investigator_id']: - info['principal_investigator'] = qdb.study.StudyPerson( - info["principal_investigator_id"]) + if info["principal_investigator_id"]: + info["principal_investigator"] = qdb.study.StudyPerson( + info["principal_investigator_id"] + ) else: - info['principal_investigator'] = None - del info['principal_investigator_id'] + info["principal_investigator"] = None + del info["principal_investigator_id"] - if info['lab_person_id']: - info['lab_person'] = qdb.study.StudyPerson( - info["lab_person_id"]) + if info["lab_person_id"]: + info["lab_person"] = qdb.study.StudyPerson(info["lab_person_id"]) else: - info['lab_person'] = None - del info['lab_person_id'] + info["lab_person"] = None + del info["lab_person_id"] return info @@ -684,15 +689,16 @@ def info(self, info): if not info: raise IncompetentQiitaDeveloperError("Need entries in info dict!") - if 'study_id' in info: + if "study_id" in info: raise qdb.exceptions.QiitaDBColumnError("Cannot set study_id!") if self._non_info.intersection(info): raise qdb.exceptions.QiitaDBColumnError( - "non info keys passed: %s" % self._non_info.intersection(info)) + "non info keys passed: %s" % self._non_info.intersection(info) + ) with qdb.sql_connection.TRN: - if 'timeseries_type_id' in info: + if "timeseries_type_id" in info: # We only lock if the timeseries type changes self._lock_non_sandbox() @@ -711,7 +717,8 @@ def info(self, info): data.append(self._id) sql = "UPDATE qiita.{0} SET {1} WHERE study_id = %s".format( - self._table, ','.join(sql_vals)) + self._table, ",".join(sql_vals) + ) qdb.sql_connection.TRN.add(sql, data) qdb.sql_connection.TRN.execute() @@ -728,12 +735,14 @@ def shared_with(self): sql = """SELECT email FROM qiita.{0}_users WHERE study_id = %s""".format(self._table) qdb.sql_connection.TRN.add(sql, [self._id]) - return [qdb.user.User(uid) - for uid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.user.User(uid) + for uid in qdb.sql_connection.TRN.execute_fetchflatten() + ] @property def publications(self): - """ Returns list of publications from this study + """Returns list of publications from this study Returns ------- @@ -763,7 +772,7 @@ def publications(self, values): """ # Check that a list is actually passed if not isinstance(values, list): - raise TypeError('publications should be a list') + raise TypeError("publications should be a list") with qdb.sql_connection.TRN: # Delete the previous pmids associated with the study @@ -780,7 +789,7 @@ def publications(self, values): @property def investigation(self): - """ Returns Investigation this study is part of + """Returns Investigation this study is part of If the study doesn't have an investigation associated with it, it will return None @@ -815,8 +824,11 @@ def sample_template(self): WHERE study_id = %s)""" qdb.sql_connection.TRN.add(sql, [self.id]) exists = qdb.sql_connection.TRN.execute_fetchlast() - return (qdb.metadata_template.sample_template.SampleTemplate(self._id) - if exists else None) + return ( + qdb.metadata_template.sample_template.SampleTemplate(self._id) + if exists + else None + ) @property def data_types(self): @@ -846,7 +858,8 @@ def owner(self): """ with qdb.sql_connection.TRN: sql = """SELECT email FROM qiita.{} WHERE study_id = %s""".format( - self._table) + self._table + ) qdb.sql_connection.TRN.add(sql, [self._id]) return qdb.user.User(qdb.sql_connection.TRN.execute_fetchlast()) @@ -889,17 +902,17 @@ def environmental_packages(self, values): # Check that a list is actually passed if not isinstance(values, list): - raise TypeError('Environmental packages should be a list') + raise TypeError("Environmental packages should be a list") # Get all the environmental packages - env_pkgs = [pkg[0] - for pkg in qdb.util.get_environmental_packages()] + env_pkgs = [pkg[0] for pkg in qdb.util.get_environmental_packages()] # Check that all the passed values are valid environmental packages missing = set(values).difference(env_pkgs) if missing: - raise ValueError('Environmetal package(s) not recognized: %s' - % ', '.join(missing)) + raise ValueError( + "Environmetal package(s) not recognized: %s" % ", ".join(missing) + ) # Delete the previous environmental packages associated with # the study @@ -965,8 +978,8 @@ def ebi_study_accession(self, value): """ if self.ebi_study_accession is not None: raise qdb.exceptions.QiitaDBError( - "Study %s already has an EBI study accession" - % self.id) + "Study %s already has an EBI study accession" % self.id + ) sql = """UPDATE qiita.{} SET ebi_study_accession = %s WHERE study_id = %s""".format(self._table) @@ -974,9 +987,8 @@ def ebi_study_accession(self, value): def _ebi_submission_jobs(self): """Helper code to avoid duplication""" - plugin = qdb.software.Software.from_name_and_version( - 'Qiita', 'alpha') - cmd = plugin.get_command('submit_to_EBI') + plugin = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = plugin.get_command("submit_to_EBI") sql = """SELECT processing_job_id, pj.command_parameters->>'artifact' as aid, @@ -1018,10 +1030,10 @@ def ebi_submission_status(self): artifacts, & 'failed' if there are artifacts with failed jobs without successful ones. """ - status = 'not submitted' + status = "not submitted" with qdb.sql_connection.TRN: if self.ebi_study_accession: - status = 'submitted' + status = "submitted" jobs = defaultdict(dict) for info in self._ebi_submission_jobs(): @@ -1030,21 +1042,22 @@ def ebi_submission_status(self): continue jobs[js][aid] = jid - if 'queued' in jobs or 'running' in jobs: - status = 'submitting' - elif 'error' in jobs: + if "queued" in jobs or "running" in jobs: + status = "submitting" + elif "error" in jobs: aids_error = [] aids_other = [] for s, aids in jobs.items(): for aid in aids.keys(): - if s == 'error': + if s == "error": aids_error.append(aid) else: aids_other.append(aid) difference = set(aids_error) - set(aids_other) if difference: - status = ('Some artifact submissions failed: %s' % - ', '.join(map(str, list(difference)))) + status = "Some artifact submissions failed: %s" % ", ".join( + map(str, list(difference)) + ) return status @@ -1066,7 +1079,7 @@ def tags(self): qdb.sql_connection.TRN.add(sql) return [t[0] for t in qdb.sql_connection.TRN.execute_fetchindex()] -# --- methods --- + # --- methods --- def artifacts(self, dtype=None, artifact_type=None): """Returns the list of artifacts associated with the study @@ -1104,8 +1117,10 @@ def artifacts(self, dtype=None, artifact_type=None): sql_args.append(qdb.util.artifact_visibilities_to_skip()) qdb.sql_connection.TRN.add(sql, sql_args) - return [qdb.artifact.Artifact(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.artifact.Artifact(aid) + for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ] def prep_templates(self, data_type=None): """Return list of prep template ids @@ -1133,8 +1148,10 @@ def prep_templates(self, data_type=None): WHERE study_id = %s{0} ORDER BY prep_template_id""".format(spec_data) qdb.sql_connection.TRN.add(sql, args) - return [qdb.metadata_template.prep_template.PrepTemplate(ptid) - for ptid in qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.metadata_template.prep_template.PrepTemplate(ptid) + for ptid in qdb.sql_connection.TRN.execute_fetchflatten() + ] def analyses(self): """Get all analyses where samples from this study have been used @@ -1152,10 +1169,13 @@ def analyses(self): WHERE sample_id IN %s ORDER BY analysis_id""" qdb.sql_connection.TRN.add( - sql, [tuple(self.sample_template.keys())]) + sql, [tuple(self.sample_template.keys())] + ) - return [qdb.analysis.Analysis(_id) for _id in - qdb.sql_connection.TRN.execute_fetchflatten()] + return [ + qdb.analysis.Analysis(_id) + for _id in qdb.sql_connection.TRN.execute_fetchflatten() + ] return [] def has_access(self, user, no_public=False): @@ -1176,12 +1196,12 @@ def has_access(self, user, no_public=False): """ with qdb.sql_connection.TRN: # return True if the user is one of the admins - if user.level in {'superuser', 'admin'}: + if user.level in {"superuser", "admin"}: return True # if no_public is False then just check if the study is public # and return True - if not no_public and self.status == 'public': + if not no_public and self.status == "public": return True # let's check if the study belongs to this user or has been @@ -1201,8 +1221,16 @@ def has_access(self, user, no_public=False): ) """ qdb.sql_connection.TRN.add( - sql, [user.email, qiita_config.portal, self.id, - user.email, qiita_config.portal, self.id]) + sql, + [ + user.email, + qiita_config.portal, + self.id, + user.email, + qiita_config.portal, + self.id, + ], + ) result = qdb.sql_connection.TRN.execute_fetchlast() return result @@ -1222,8 +1250,11 @@ def can_edit(self, user): """ # The study is editable only if the user is the owner, is in the shared # list or the user is an admin - return (user.level in {'superuser', 'admin'} or self.owner == user or - user in self.shared_with) + return ( + user.level in {"superuser", "admin"} + or self.owner == user + or user in self.shared_with + ) def share(self, user): """Share the study with another user @@ -1271,9 +1302,9 @@ def update_tags(self, user, tags): str Warnings during insertion """ - message = '' + message = "" # converting to set just to facilitate operations - system_tags_admin = set(self.get_tags()['admin']) + system_tags_admin = set(self.get_tags()["admin"]) user_level = user.level current_tags = set(self.tags) to_delete = current_tags - set(tags) @@ -1282,25 +1313,24 @@ def update_tags(self, user, tags): if to_delete or to_add: with qdb.sql_connection.TRN: if to_delete: - if user_level != 'admin': + if user_level != "admin": admin_tags = to_delete & system_tags_admin if admin_tags: - message += 'You cannot remove: %s' % ', '.join( - admin_tags) + message += "You cannot remove: %s" % ", ".join(admin_tags) to_delete = to_delete - admin_tags if to_delete: sql = """DELETE FROM qiita.per_study_tags WHERE study_id = %s AND study_tag IN %s""" - qdb.sql_connection.TRN.add( - sql, [self._id, tuple(to_delete)]) + qdb.sql_connection.TRN.add(sql, [self._id, tuple(to_delete)]) if to_add: - if user_level != 'admin': + if user_level != "admin": admin_tags = to_add & system_tags_admin if admin_tags: - message += ('Only admins can assign: ' - '%s' % ', '.join(admin_tags)) + message += "Only admins can assign: %s" % ", ".join( + admin_tags + ) to_add = to_add - admin_tags if to_add: @@ -1321,7 +1351,7 @@ def update_tags(self, user, tags): qdb.sql_connection.TRN.execute() else: - message = 'No changes in the tags.' + message = "No changes in the tags." return message @@ -1342,6 +1372,7 @@ class StudyPerson(qdb.base.QiitaObject): phone : str or None phone number of the person """ + _table = "study_person" @classmethod @@ -1404,8 +1435,7 @@ def from_name_and_affiliation(cls, name, affiliation): """ with qdb.sql_connection.TRN: if not cls.exists(name, affiliation): - raise qdb.exceptions.QiitaDBLookupError( - 'Study person does not exist') + raise qdb.exceptions.QiitaDBLookupError("Study person does not exist") sql = """SELECT study_person_id FROM qiita.{0} WHERE name = %s @@ -1481,14 +1511,15 @@ def delete(cls, id_): sql = """SELECT study_id FROM qiita.study WHERE {} = %s""" - cols = ['lab_person_id', 'principal_investigator_id'] + cols = ["lab_person_id", "principal_investigator_id"] rel = {} for c in cols: qdb.sql_connection.TRN.add(sql.format(c), [id_]) rel[c] = qdb.sql_connection.TRN.execute_fetchindex() raise qdb.exceptions.QiitaDBError( 'StudyPerson "%s" cannot be deleted because there are ' - 'studies referencing it: %s' % (id_, str(rel))) + "studies referencing it: %s" % (id_, str(rel)) + ) sql = "DELETE FROM qiita.study_person WHERE study_person_id = %s" qdb.sql_connection.TRN.add(sql, [id_]) diff --git a/qiita_db/test/support_files/worker.py b/qiita_db/test/support_files/worker.py index e2d4a77bd..ebfd4f681 100755 --- a/qiita_db/test/support_files/worker.py +++ b/qiita_db/test/support_files/worker.py @@ -1,15 +1,16 @@ #!/usr/bin/env python -import click -from json import dumps import sys -from os import utime, mkdir +from json import dumps +from os import mkdir, utime from os.path import exists +import click + @click.command() -@click.option('--fp_archive', required=False, type=str) -@click.option('--fp_biom', required=False, type=str) -@click.option('--output_dir', required=False, type=str) +@click.option("--fp_archive", required=False, type=str) +@click.option("--fp_biom", required=False, type=str) +@click.option("--output_dir", required=False, type=str) # The above parameters are actually required. However, # for testing purposes, they are optional here. Specifically, they # are optional to test use cases where one or both are missing. @@ -21,31 +22,33 @@ # --env_report is a worker.py specific flag to report the python environment # version that this script is currently running in. Useful for testing # environment switching. -@click.option('--env_report', is_flag=True, default=False) +@click.option("--env_report", is_flag=True, default=False) # execute needed to support click def execute(fp_archive, fp_biom, output_dir, env_report): """worker.py implements an example interface to directly communicate - with plugins, or other external programs. + with plugins, or other external programs. """ if env_report: - d = {'version_major': '%d' % sys.version_info.major, - 'version_minor': '%d' % sys.version_info.minor, - 'version_micro': '%d' % sys.version_info.micro} + d = { + "version_major": "%d" % sys.version_info.major, + "version_minor": "%d" % sys.version_info.minor, + "version_micro": "%d" % sys.version_info.micro, + } click.echo("%s" % dumps(d)) else: - fp_archive = fp_archive.replace('.json', '.tre') + fp_archive = fp_archive.replace(".json", ".tre") # creating blank files if not exists(output_dir): mkdir(output_dir) for fname in [fp_archive, fp_biom]: - with open(fname, 'a'): + with open(fname, "a"): utime(fname, None) - d = {'archive': fp_archive, 'biom': fp_biom, 'output_dir': output_dir} + d = {"archive": fp_archive, "biom": fp_biom, "output_dir": output_dir} click.echo("%s" % dumps(d)) -if __name__ == '__main__': +if __name__ == "__main__": execute() diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py index d428ca8fd..1e80c3789 100644 --- a/qiita_db/test/test_analysis.py +++ b/qiita_db/test/test_analysis.py @@ -1,17 +1,17 @@ -from unittest import TestCase, main +from functools import partial +from json import dumps from os import remove -from os.path import exists, join, basename +from os.path import basename, exists, join from shutil import move +from unittest import TestCase, main from biom import load_table from pandas.testing import assert_frame_equal -from functools import partial -from qiita_core.util import qiita_test_checker -from qiita_core.testing import wait_for_processing_job -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb -from json import dumps +from qiita_core.qiita_settings import qiita_config +from qiita_core.testing import wait_for_processing_job +from qiita_core.util import qiita_test_checker # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. @@ -26,9 +26,13 @@ class TestAnalysisIter(TestCase): def test_iter(self): obs = list(qdb.analysis.Analysis.iter()) exp = [ - qdb.analysis.Analysis(1), qdb.analysis.Analysis(2), - qdb.analysis.Analysis(3), qdb.analysis.Analysis(4), - qdb.analysis.Analysis(5), qdb.analysis.Analysis(6)] + qdb.analysis.Analysis(1), + qdb.analysis.Analysis(2), + qdb.analysis.Analysis(3), + qdb.analysis.Analysis(4), + qdb.analysis.Analysis(5), + qdb.analysis.Analysis(6), + ] self.assertCountEqual(obs, exp) @@ -45,42 +49,43 @@ def setUp(self): self.table_fp = None # fullpaths for testing - self.duplicated_samples_not_merged = self.get_fp( - "not_merged_samples.txt") + self.duplicated_samples_not_merged = self.get_fp("not_merged_samples.txt") self.map_exp_fp = self.get_fp("1_analysis_mapping_exp.txt") from glob import glob + conf_files = glob(join(qiita_config.plugin_dir, "BIOM*.conf")) for i, fp in enumerate(conf_files): qdb.software.Software.from_file(fp, update=True) def tearDown(self): - self.analysis.artifacts[0].visibility = 'private' + self.analysis.artifacts[0].visibility = "private" qiita_config.portal = self.portal - with open(self.biom_fp, 'w') as f: + with open(self.biom_fp, "w") as f: f.write("") - fp = self.get_fp('testfile.txt') + fp = self.get_fp("testfile.txt") if exists(fp): remove(fp) if self.table_fp: mp = qdb.util.get_mountpoint("processed_data")[0][1] if exists(self.table_fp): - move(self.table_fp, - join(mp, "2_study_1001_closed_reference_otu_table.biom")) + move( + self.table_fp, + join(mp, "2_study_1001_closed_reference_otu_table.biom"), + ) qiita_config.portal = self._old_portal def _wait_for_jobs(self, analysis): for j in analysis.jobs: wait_for_processing_job(j.id) - if j.status == 'error': + if j.status == "error": print(j.log.msg) - def _create_analyses_with_samples(self, user='demo@microbio.me', - merge=False): + def _create_analyses_with_samples(self, user="demo@microbio.me", merge=False): """Aux function to create an analysis with samples Parameters @@ -103,19 +108,31 @@ def _create_analyses_with_samples(self, user='demo@microbio.me', dflt_analysis = user.default_analysis dflt_analysis.add_samples( - {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180']}) + { + 4: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ] + } + ) new = qdb.analysis.Analysis.create( - user, "newAnalysis", "A New Analysis", from_default=True, - merge_duplicated_sample_ids=merge) + user, + "newAnalysis", + "A New Analysis", + from_default=True, + merge_duplicated_sample_ids=merge, + ) self._wait_for_jobs(new) return new def test_lock_samples(self): - dflt = qdb.user.User('demo@microbio.me').default_analysis + dflt = qdb.user.User("demo@microbio.me").default_analysis # The default analysis can have samples added/removed dflt._lock_samples() @@ -124,31 +141,27 @@ def test_lock_samples(self): qdb.analysis.Analysis(1)._lock_samples() def test_get_by_status(self): - qiita_config.portal = 'QIITA' - self.assertEqual( - qdb.analysis.Analysis.get_by_status('public'), set([])) - qiita_config.portal = 'EMP' - self.assertEqual( - qdb.analysis.Analysis.get_by_status('public'), set([])) - - qiita_config.portal = 'QIITA' - self.analysis.artifacts[0].visibility = 'public' - - self.assertEqual(qdb.analysis.Analysis.get_by_status('public'), - {self.analysis}) - qiita_config.portal = 'EMP' - self.assertEqual( - qdb.analysis.Analysis.get_by_status('public'), set([])) + qiita_config.portal = "QIITA" + self.assertEqual(qdb.analysis.Analysis.get_by_status("public"), set([])) + qiita_config.portal = "EMP" + self.assertEqual(qdb.analysis.Analysis.get_by_status("public"), set([])) + + qiita_config.portal = "QIITA" + self.analysis.artifacts[0].visibility = "public" + + self.assertEqual(qdb.analysis.Analysis.get_by_status("public"), {self.analysis}) + qiita_config.portal = "EMP" + self.assertEqual(qdb.analysis.Analysis.get_by_status("public"), set([])) def test_can_be_publicized(self): analysis = qdb.analysis.Analysis(1) self.assertEqual(analysis.can_be_publicized, (False, [4, 5, 6])) a4 = qdb.artifact.Artifact(4) - a4.visibility = 'public' + a4.visibility = "public" self.assertEqual(analysis.can_be_publicized, (True, [])) - a4.visibility = 'private' + a4.visibility = "private" self.assertEqual(analysis.can_be_publicized, (False, [4, 5, 6])) def test_add_artifact(self): @@ -159,46 +172,41 @@ def test_add_artifact(self): def test_has_access_public(self): analysis = self._create_analyses_with_samples("admin@foo.bar") - analysis.artifacts[0].visibility = 'public' + analysis.artifacts[0].visibility = "public" - qiita_config.portal = 'QIITA' - self.assertTrue( - analysis.has_access(qdb.user.User("demo@microbio.me"))) - qiita_config.portal = 'EMP' - self.assertFalse( - analysis.has_access(qdb.user.User("demo@microbio.me"))) + qiita_config.portal = "QIITA" + self.assertTrue(analysis.has_access(qdb.user.User("demo@microbio.me"))) + qiita_config.portal = "EMP" + self.assertFalse(analysis.has_access(qdb.user.User("demo@microbio.me"))) def test_has_access_shared(self): - self.assertTrue( - self.analysis.has_access(qdb.user.User("shared@foo.bar"))) + self.assertTrue(self.analysis.has_access(qdb.user.User("shared@foo.bar"))) def test_has_access_private(self): - self.assertTrue( - self.analysis.has_access(qdb.user.User("test@foo.bar"))) + self.assertTrue(self.analysis.has_access(qdb.user.User("test@foo.bar"))) def test_has_access_admin(self): - qiita_config.portal = 'QIITA' - self.assertTrue( - self.analysis.has_access(qdb.user.User("admin@foo.bar"))) - qiita_config.portal = 'EMP' + qiita_config.portal = "QIITA" + self.assertTrue(self.analysis.has_access(qdb.user.User("admin@foo.bar"))) + qiita_config.portal = "EMP" with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.analysis.Analysis(1).has_access(qdb.user.User("admin@foo.bar")) def test_has_access_no_access(self): - self.assertFalse( - self.analysis.has_access(qdb.user.User("demo@microbio.me"))) + self.assertFalse(self.analysis.has_access(qdb.user.User("demo@microbio.me"))) def test_can_edit(self): a = qdb.analysis.Analysis(1) - self.assertTrue(a.can_edit(qdb.user.User('test@foo.bar'))) - self.assertTrue(a.can_edit(qdb.user.User('shared@foo.bar'))) - self.assertTrue(a.can_edit(qdb.user.User('admin@foo.bar'))) - self.assertFalse(a.can_edit(qdb.user.User('demo@microbio.me'))) + self.assertTrue(a.can_edit(qdb.user.User("test@foo.bar"))) + self.assertTrue(a.can_edit(qdb.user.User("shared@foo.bar"))) + self.assertTrue(a.can_edit(qdb.user.User("admin@foo.bar"))) + self.assertFalse(a.can_edit(qdb.user.User("demo@microbio.me"))) def test_create_nonqiita_portal(self): qiita_config.portal = "EMP" obs = qdb.analysis.Analysis.create( - qdb.user.User("admin@foo.bar"), "newAnalysis", "A New Analysis") + qdb.user.User("admin@foo.bar"), "newAnalysis", "A New Analysis" + ) # make sure portal is associated self.assertCountEqual(obs._portals, ["QIITA", "EMP"]) @@ -211,7 +219,8 @@ def test_create_from_default(self): owner = qdb.user.User("test@foo.bar") obs = qdb.analysis.Analysis.create( - owner, "newAnalysis", "A New Analysis", from_default=True) + owner, "newAnalysis", "A New Analysis", from_default=True + ) self.assertEqual(obs.owner, owner) self.assertEqual(obs.name, "newAnalysis") @@ -220,9 +229,10 @@ def test_create_from_default(self): self.assertEqual(obs.description, "A New Analysis") self.assertCountEqual(obs.samples, [4]) self.assertCountEqual( - obs.samples[4], ['1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180']) - self.assertEqual(obs.data_types, ['18S']) + obs.samples[4], + ["1.SKD8.640184", "1.SKB7.640196", "1.SKM9.640192", "1.SKM4.640180"], + ) + self.assertEqual(obs.data_types, ["18S"]) self.assertEqual(obs.shared_with, []) self.assertEqual(obs.mapping_file, None) self.assertEqual(obs.tgz, None) @@ -230,18 +240,18 @@ def test_create_from_default(self): self.assertEqual(obs.pmid, None) def test_exists(self): - qiita_config.portal = 'QIITA' + qiita_config.portal = "QIITA" self.assertTrue(qdb.analysis.Analysis.exists(1)) self.assertFalse(qdb.analysis.Analysis.exists(1000)) - qiita_config.portal = 'EMP' + qiita_config.portal = "EMP" self.assertFalse(qdb.analysis.Analysis.exists(1)) self.assertFalse(qdb.analysis.Analysis.exists(1000)) def test_delete(self): # successful delete new = qdb.analysis.Analysis.create( - qdb.user.User('demo@microbio.me'), "newAnalysis", - "A New Analysis") + qdb.user.User("demo@microbio.me"), "newAnalysis", "A New Analysis" + ) self.assertTrue(qdb.analysis.Analysis.exists(new.id)) qdb.analysis.Analysis.delete(new.id) self.assertFalse(qdb.analysis.Analysis.exists(new.id)) @@ -269,24 +279,40 @@ def test_set_description(self): self.assertEqual(self.analysis.description, "New description") def test_retrieve_samples(self): - exp = {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180'], - 5: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180'], - 6: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180']} + exp = { + 4: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ], + 5: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ], + 6: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ], + } self.assertCountEqual(self.analysis.samples, exp) def test_retrieve_portal(self): self.assertEqual(self.analysis._portals, ["QIITA"]) def test_retrieve_data_types(self): - exp = ['18S', '16S'] + exp = ["18S", "16S"] self.assertCountEqual(self.analysis.data_types, exp) def test_retrieve_shared_with(self): - self.assertEqual(self.analysis.shared_with, - [qdb.user.User("shared@foo.bar")]) + self.assertEqual(self.analysis.shared_with, [qdb.user.User("shared@foo.bar")]) def test_retrieve_jobs(self): self.assertEqual(self.analysis.jobs, []) @@ -304,31 +330,71 @@ def test_retrieve_mapping_file(self): exp = join(self.fp, "1_analysis_mapping.txt") obs = self.analysis.mapping_file self.assertIsNotNone(obs) - self.assertEqual( - qdb.util.get_filepath_information(obs)['fullpath'], exp) + self.assertEqual(qdb.util.get_filepath_information(obs)["fullpath"], exp) self.assertTrue(exists(exp)) def test_metadata_categories(self): - exp = {1: { - 'sample': { - 'env_package', 'water_content_soil', 'collection_timestamp', - 'anonymized_name', 'sample_type', 'env_biome', 'host_taxid', - 'ph', 'env_feature', 'temp', 'country', 'scientific_name', - 'assigned_from_geo', 'physical_specimen_location', - 'common_name', 'longitude', 'depth', 'season_environment', - 'description', 'tot_org_carb', 'tot_nitro', 'dna_extracted', - 'texture', 'samp_salinity', 'taxon_id', 'host_subject_id', - 'description_duplicate', 'latitude', - 'physical_specimen_remaining', 'altitude', 'elevation'}, - 'prep': { - 'run_prefix', 'platform', 'study_center', - 'library_construction_protocol', 'emp_status', - 'target_subfragment', 'target_gene', 'center_project_name', - 'illumina_technology', 'experiment_title', 'instrument_model', - 'run_date', 'run_center', 'pcr_primers', 'sequencing_meth', - 'experiment_center', 'experiment_design_description', - 'barcode', 'samp_size', 'sample_center', 'primer', - 'center_name'}}} + exp = { + 1: { + "sample": { + "env_package", + "water_content_soil", + "collection_timestamp", + "anonymized_name", + "sample_type", + "env_biome", + "host_taxid", + "ph", + "env_feature", + "temp", + "country", + "scientific_name", + "assigned_from_geo", + "physical_specimen_location", + "common_name", + "longitude", + "depth", + "season_environment", + "description", + "tot_org_carb", + "tot_nitro", + "dna_extracted", + "texture", + "samp_salinity", + "taxon_id", + "host_subject_id", + "description_duplicate", + "latitude", + "physical_specimen_remaining", + "altitude", + "elevation", + }, + "prep": { + "run_prefix", + "platform", + "study_center", + "library_construction_protocol", + "emp_status", + "target_subfragment", + "target_gene", + "center_project_name", + "illumina_technology", + "experiment_title", + "instrument_model", + "run_date", + "run_center", + "pcr_primers", + "sequencing_meth", + "experiment_center", + "experiment_design_description", + "barcode", + "samp_size", + "sample_center", + "primer", + "center_name", + }, + } + } obs = self.analysis.metadata_categories self.assertDictEqual(obs, exp) @@ -336,10 +402,10 @@ def test_retrieve_tgz(self): # generating here as the tgz is only generated once the analysis runs # to completion (un)successfully analysis = self._create_analyses_with_samples("admin@foo.bar") - fp = self.get_fp('test.tgz') - with open(fp, 'w') as f: - f.write('') - analysis._add_file(fp, 'tgz') + fp = self.get_fp("test.tgz") + with open(fp, "w") as f: + f.write("") + analysis._add_file(fp, "tgz") self.assertEqual(analysis.tgz, fp) def test_retrieve_tgz_none(self): @@ -347,50 +413,77 @@ def test_retrieve_tgz_none(self): def test_summary_data(self): obs = self.analysis.summary_data() - exp = {'studies': 1, - 'artifacts': 3, - 'samples': 5} + exp = {"studies": 1, "artifacts": 3, "samples": 5} self.assertEqual(obs, exp) def test_add_remove_samples(self): - analysis = qdb.user.User('shared@foo.bar').default_analysis - exp = {4: ['1.SKD8.640184', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180', '1.SKB8.640193'], - 5: ['1.SKD8.640184', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180', '1.SKB8.640193'], - 6: ['1.SKD8.640184', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180', '1.SKB8.640193']} + analysis = qdb.user.User("shared@foo.bar").default_analysis + exp = { + 4: [ + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKB8.640193", + ], + 5: [ + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKB8.640193", + ], + 6: [ + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKB8.640193", + ], + } analysis.add_samples(exp) obs = analysis.samples self.assertCountEqual(list(obs.keys()), exp.keys()) for k in obs: self.assertCountEqual(obs[k], exp[k]) - analysis.remove_samples(artifacts=(qdb.artifact.Artifact(4), ), - samples=('1.SKB8.640193', )) - exp = {4: ['1.SKD8.640184', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180'], - 5: ['1.SKD8.640184', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180', '1.SKB8.640193'], - 6: ['1.SKD8.640184', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180', '1.SKB8.640193']} + analysis.remove_samples( + artifacts=(qdb.artifact.Artifact(4),), samples=("1.SKB8.640193",) + ) + exp = { + 4: ["1.SKD8.640184", "1.SKB7.640196", "1.SKM9.640192", "1.SKM4.640180"], + 5: [ + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKB8.640193", + ], + 6: [ + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKB8.640193", + ], + } obs = analysis.samples self.assertCountEqual(list(obs.keys()), exp.keys()) for k in obs: self.assertCountEqual(obs[k], exp[k]) - analysis.remove_samples(samples=('1.SKD8.640184', )) - exp = {4: ['1.SKB7.640196', '1.SKM9.640192', '1.SKM4.640180'], - 5: ['1.SKB8.640193', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180'], - 6: ['1.SKB8.640193', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180']} + analysis.remove_samples(samples=("1.SKD8.640184",)) + exp = { + 4: ["1.SKB7.640196", "1.SKM9.640192", "1.SKM4.640180"], + 5: ["1.SKB8.640193", "1.SKB7.640196", "1.SKM9.640192", "1.SKM4.640180"], + 6: ["1.SKB8.640193", "1.SKB7.640196", "1.SKM9.640192", "1.SKM4.640180"], + } self.assertCountEqual(analysis.samples, exp) analysis.remove_samples( - artifacts=(qdb.artifact.Artifact(4), qdb.artifact.Artifact(5))) - exp = {6: {'1.SKB7.640196', '1.SKB8.640193', - '1.SKM4.640180', '1.SKM9.640192'}} + artifacts=(qdb.artifact.Artifact(4), qdb.artifact.Artifact(5)) + ) + exp = {6: {"1.SKB7.640196", "1.SKB8.640193", "1.SKM4.640180", "1.SKM9.640192"}} self.assertCountEqual(analysis.samples, exp) def test_share_unshare(self): @@ -405,19 +498,20 @@ def test_share_unshare(self): def test_build_mapping_file(self): analysis = self._create_analyses_with_samples() - samples = {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']} + samples = {4: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]} analysis._build_mapping_file(samples) - obs = qdb.util.get_filepath_information( - analysis.mapping_file)['fullpath'] + obs = qdb.util.get_filepath_information(analysis.mapping_file)["fullpath"] exp = self.get_fp("%s_analysis_mapping.txt" % analysis.id) self.assertEqual(obs, exp) obs = qdb.metadata_template.util.load_template_to_dataframe( - obs, index='#SampleID') + obs, index="#SampleID" + ) exp = qdb.metadata_template.util.load_template_to_dataframe( - self.map_exp_fp, index='#SampleID') + self.map_exp_fp, index="#SampleID" + ) # assert_frame_equal assumes same order on the rows, thus sorting # frames by index @@ -431,30 +525,41 @@ def test_build_mapping_file(self): # testing categories analysis._build_mapping_file( - samples, categories=set( - ['env_package', 'experiment_design_description'])) - obs = qdb.util.get_filepath_information( - analysis.mapping_file)['fullpath'] + samples, categories=set(["env_package", "experiment_design_description"]) + ) + obs = qdb.util.get_filepath_information(analysis.mapping_file)["fullpath"] obs = qdb.metadata_template.util.load_template_to_dataframe( - obs, index='#SampleID').columns - exp = ['experiment_design_description', 'env_package', - 'qiita_artifact_id', 'qiita_prep_deprecated', - 'qiita_study_title', 'qiita_study_alias', 'qiita_owner', - 'qiita_principal_investigator'] + obs, index="#SampleID" + ).columns + exp = [ + "experiment_design_description", + "env_package", + "qiita_artifact_id", + "qiita_prep_deprecated", + "qiita_study_title", + "qiita_study_alias", + "qiita_owner", + "qiita_principal_investigator", + ] self.assertCountEqual(obs, exp) def test_build_mapping_file_duplicated_samples_no_merge(self): analysis = self._create_analyses_with_samples() - samples = {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], - 3: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']} + samples = { + 4: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"], + 3: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"], + } analysis._build_mapping_file(samples, True) - mapping_fp = qdb.util.get_filepath_information( - analysis.mapping_file)['fullpath'] + mapping_fp = qdb.util.get_filepath_information(analysis.mapping_file)[ + "fullpath" + ] obs = qdb.metadata_template.util.load_template_to_dataframe( - mapping_fp, index='#SampleID') + mapping_fp, index="#SampleID" + ) exp = qdb.metadata_template.util.load_template_to_dataframe( - self.duplicated_samples_not_merged, index='#SampleID') + self.duplicated_samples_not_merged, index="#SampleID" + ) # assert_frame_equal assumes same order on the rows, thus sorting # frames by index @@ -468,15 +573,20 @@ def test_build_mapping_file_duplicated_samples_no_merge(self): def test_build_mapping_file_duplicated_samples_merge(self): analysis = self._create_analyses_with_samples() - samples = {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], - 3: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']} + samples = { + 4: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"], + 3: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"], + } analysis._build_mapping_file(samples) - mapping_fp = qdb.util.get_filepath_information( - analysis.mapping_file)['fullpath'] + mapping_fp = qdb.util.get_filepath_information(analysis.mapping_file)[ + "fullpath" + ] obs = qdb.metadata_template.util.load_template_to_dataframe( - mapping_fp, index='#SampleID') + mapping_fp, index="#SampleID" + ) exp = qdb.metadata_template.util.load_template_to_dataframe( - self.map_exp_fp, index='#SampleID') + self.map_exp_fp, index="#SampleID" + ) # assert_frame_equal assumes same order on the rows, thus sorting # frames by index @@ -491,74 +601,98 @@ def test_build_mapping_file_duplicated_samples_merge(self): def test_build_biom_tables(self): analysis = self._create_analyses_with_samples() grouped_samples = { - '18S || algorithm': [ - (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]} + "18S || algorithm": [ + (4, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]) + ] + } obs_bioms = analysis._build_biom_tables(grouped_samples) - biom_fp = self.get_fp( - "%s_analysis_18S_algorithm.biom" % analysis.id) + biom_fp = self.get_fp("%s_analysis_18S_algorithm.biom" % analysis.id) obs = [(a, basename(b)) for a, b, _ in obs_bioms] - self.assertEqual(obs, [('18S', basename(biom_fp))]) + self.assertEqual(obs, [("18S", basename(biom_fp))]) table = load_table(obs_bioms[0][1]) - obs = set(table.ids(axis='sample')) - exp = {'1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'} + obs = set(table.ids(axis="sample")) + exp = {"1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"} self.assertEqual(obs, exp) def test_build_biom_tables_with_references(self): analysis = self._create_analyses_with_samples() analysis_id = analysis.id grouped_samples = { - ('18S || Pick closed-reference OTUs (reference: 1) | ' - 'Split libraries FASTQ'): [ - (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']), - (5, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])], - ('18S || Pick closed-reference OTUs (reference: 1) | ' - 'Trim (lenght: 150)'): [ - (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']), - (5, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])], - ('16S || Pick closed-reference OTUs (reference: 2) | ' - 'Trim (lenght: 100)'): [ - (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']), - (5, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]} + ( + "18S || Pick closed-reference OTUs (reference: 1) | " + "Split libraries FASTQ" + ): [ + (4, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + (5, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + ], + ("18S || Pick closed-reference OTUs (reference: 1) | Trim (lenght: 150)"): [ + (4, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + (5, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + ], + ("16S || Pick closed-reference OTUs (reference: 2) | Trim (lenght: 100)"): [ + (4, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + (5, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + ], + } obs_bioms = analysis._build_biom_tables(grouped_samples) obs = [(a, basename(b)) for a, b, _ in obs_bioms] exp = [ - ('16S', '%s_analysis_16S_PickclosedreferenceOTUsreference2' - 'Trimlenght100.biom' % analysis_id), - ('18S', '%s_analysis_18S_PickclosedreferenceOTUsreference1' - 'SplitlibrariesFASTQ.biom' % analysis_id), - ('18S', '%s_analysis_18S_PickclosedreferenceOTUsreference1' - 'Trimlenght150.biom' % analysis_id)] + ( + "16S", + "%s_analysis_16S_PickclosedreferenceOTUsreference2" + "Trimlenght100.biom" % analysis_id, + ), + ( + "18S", + "%s_analysis_18S_PickclosedreferenceOTUsreference1" + "SplitlibrariesFASTQ.biom" % analysis_id, + ), + ( + "18S", + "%s_analysis_18S_PickclosedreferenceOTUsreference1" + "Trimlenght150.biom" % analysis_id, + ), + ] self.assertCountEqual(obs, exp) - exp = {'1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'} + exp = {"1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"} for dt, fp, _ in obs_bioms: table = load_table(fp) - obs = set(table.ids(axis='sample')) + obs = set(table.ids(axis="sample")) self.assertEqual(obs, exp) def test_build_biom_tables_duplicated_samples_not_merge(self): analysis = self._create_analyses_with_samples() grouped_samples = { - '18S || algorithm': [ - (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']), - (5, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]} + "18S || algorithm": [ + (4, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + (5, ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"]), + ] + } obs_bioms = analysis._build_biom_tables(grouped_samples, True) obs = [(a, basename(b)) for a, b, _ in obs_bioms] - biom_fp = ( - "%s_analysis_18S_algorithm.biom" % analysis.id) - self.assertEqual(obs, [('18S', biom_fp)]) + biom_fp = "%s_analysis_18S_algorithm.biom" % analysis.id + self.assertEqual(obs, [("18S", biom_fp)]) table = load_table(obs_bioms[0][1]) - obs = set(table.ids(axis='sample')) - exp = {'4.1.SKD8.640184', '4.1.SKB7.640196', '4.1.SKB8.640193', - '5.1.SKB8.640193', '5.1.SKB7.640196', '5.1.SKD8.640184'} + obs = set(table.ids(axis="sample")) + exp = { + "4.1.SKD8.640184", + "4.1.SKB7.640196", + "4.1.SKB8.640193", + "5.1.SKB8.640193", + "5.1.SKB7.640196", + "5.1.SKD8.640184", + } self.assertCountEqual(obs, exp) def test_build_biom_tables_raise_error_due_to_sample_selection(self): grouped_samples = { - '18S || algorithm': [ - (4, ['sample_name_1', 'sample_name_2', 'sample_name_3'])]} + "18S || algorithm": [ + (4, ["sample_name_1", "sample_name_2", "sample_name_3"]) + ] + } with self.assertRaises(RuntimeError): self.analysis._build_biom_tables(grouped_samples) @@ -568,17 +702,24 @@ def test_build_files(self): # testing that the generated files have the same sample ids biom_fp = biom_tables[0][1] - biom_ids = load_table(biom_fp).ids(axis='sample') - mapping_fp = qdb.util.get_filepath_information( - analysis.mapping_file)['fullpath'] + biom_ids = load_table(biom_fp).ids(axis="sample") + mapping_fp = qdb.util.get_filepath_information(analysis.mapping_file)[ + "fullpath" + ] mf_ids = qdb.metadata_template.util.load_template_to_dataframe( - mapping_fp, index='#SampleID').index + mapping_fp, index="#SampleID" + ).index self.assertCountEqual(biom_ids, mf_ids) # now that the samples have been prefixed - exp = ['1.SKM9.640192', '1.SKM4.640180', '1.SKD8.640184', - '1.SKB8.640193', '1.SKB7.640196'] + exp = [ + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKD8.640184", + "1.SKB8.640193", + "1.SKB7.640196", + ] self.assertCountEqual(biom_ids, exp) def test_build_files_post_processing_cmd(self): @@ -589,11 +730,11 @@ def test_build_files_post_processing_cmd(self): # to a known value. Then test for it. # qiita_db/test/support_files/worker.py will work w/py2.7 & 3.6 envs. results = {} - results['script_env'] = 'source deactivate; source activate qiita;' - results['script_path'] = 'qiita_db/test/support_files/worker.py' + results["script_env"] = "source deactivate; source activate qiita;" + results["script_path"] = "qiita_db/test/support_files/worker.py" # no additional parameters are needed for worker.py # fp_biom and fp_archive will be generated by build_files() - results['script_params'] = {} + results["script_params"] = {} # convert to json representation and store in PostgreSQL results = dumps(results) @@ -612,13 +753,20 @@ def test_build_files_post_processing_cmd(self): # output archive-artifact data. self.assertEqual(2, len(biom_files)) aid = analysis.id - exp = [('%d_analysis_18S_PickclosedreferenceOTUsSplitlibraries' - 'FASTQ.biom' % aid, None), - ('%d_analysis_18S_PickclosedreferenceOTUsSplitlibraries' - 'FASTQ.biom' % aid, 'archive_%d.tre' % aid)] - obs = [(basename(fp1), - basename(fp2) if fp2 is not None else None) - for _, fp1, fp2 in biom_files] + exp = [ + ( + "%d_analysis_18S_PickclosedreferenceOTUsSplitlibrariesFASTQ.biom" % aid, + None, + ), + ( + "%d_analysis_18S_PickclosedreferenceOTUsSplitlibrariesFASTQ.biom" % aid, + "archive_%d.tre" % aid, + ), + ] + obs = [ + (basename(fp1), basename(fp2) if fp2 is not None else None) + for _, fp1, fp2 in biom_files + ] self.assertEqual(obs, exp) # cleanup (assume command was NULL previously) @@ -631,15 +779,37 @@ def test_build_files_merge_duplicated_sample_ids(self): user = qdb.user.User("demo@microbio.me") dflt_analysis = user.default_analysis dflt_analysis.add_samples( - {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180'], - 5: ['1.SKB8.640193', '1.SKB7.640196', '1.SKM9.640192', - '1.SKM4.640180', '1.SKD8.640184'], - 6: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180']}) + { + 4: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ], + 5: [ + "1.SKB8.640193", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + "1.SKD8.640184", + ], + 6: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ], + } + ) new = qdb.analysis.Analysis.create( - user, "newAnalysis", "A New Analysis", from_default=True, - merge_duplicated_sample_ids=True) + user, + "newAnalysis", + "A New Analysis", + from_default=True, + merge_duplicated_sample_ids=True, + ) self._wait_for_jobs(new) @@ -648,22 +818,33 @@ def test_build_files_merge_duplicated_sample_ids(self): # testing that the generated files have the same sample ids biom_ids = [] for _, fp, _ in biom_tables: - biom_ids.extend(load_table(fp).ids(axis='sample')) + biom_ids.extend(load_table(fp).ids(axis="sample")) - mapping_fp = qdb.util.get_filepath_information( - new.mapping_file)['fullpath'] + mapping_fp = qdb.util.get_filepath_information(new.mapping_file)["fullpath"] mf_ids = qdb.metadata_template.util.load_template_to_dataframe( - mapping_fp, index='#SampleID').index + mapping_fp, index="#SampleID" + ).index self.assertCountEqual(biom_ids, mf_ids) # now that the samples have been prefixed - exp = ['4.1.SKM9.640192', '4.1.SKM4.640180', '4.1.SKD8.640184', - '4.1.SKB8.640193', '4.1.SKB7.640196', - '5.1.SKM9.640192', '5.1.SKM4.640180', '5.1.SKD8.640184', - '5.1.SKB8.640193', '5.1.SKB7.640196', - '6.1.SKM9.640192', '6.1.SKM4.640180', '6.1.SKD8.640184', - '6.1.SKB8.640193', '6.1.SKB7.640196'] + exp = [ + "4.1.SKM9.640192", + "4.1.SKM4.640180", + "4.1.SKD8.640184", + "4.1.SKB8.640193", + "4.1.SKB7.640196", + "5.1.SKM9.640192", + "5.1.SKM4.640180", + "5.1.SKD8.640184", + "5.1.SKB8.640193", + "5.1.SKB7.640196", + "6.1.SKM9.640192", + "6.1.SKM4.640180", + "6.1.SKD8.640184", + "6.1.SKB8.640193", + "6.1.SKB7.640196", + ] self.assertCountEqual(biom_ids, exp) def test_add_file(self): @@ -680,7 +861,7 @@ def test_is_public_make_public(self): # testing successfully making public # 4 is the only artifact being used in _create_analyses_with_samples - qdb.artifact.Artifact(4).visibility = 'public' + qdb.artifact.Artifact(4).visibility = "public" analysis.make_public() self.assertTrue(analysis.is_public) @@ -688,7 +869,7 @@ def test_is_public_make_public(self): def test_slurm_reservation(self): analysis = qdb.analysis.Analysis(1) self.assertIsNone(analysis.slurm_reservation) - text = 'thisisatest' + text = "thisisatest" analysis.slurm_reservation = text self.assertEqual(analysis._slurm_reservation(), [text]) self.assertIsNone(analysis.slurm_reservation) diff --git a/qiita_db/test/test_archive.py b/qiita_db/test/test_archive.py index f1015897e..cca1f948d 100644 --- a/qiita_db/test/test_archive.py +++ b/qiita_db/test/test_archive.py @@ -6,11 +6,11 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main from json import dumps +from unittest import TestCase, main -from qiita_core.util import qiita_test_checker import qiita_db as qdb +from qiita_core.util import qiita_test_checker @qiita_test_checker() @@ -21,68 +21,76 @@ def test_insert_from_biom_and_retrieve_feature_values(self): # 1 - to test error as it's FASTQ with self.assertRaises(ValueError) as err: - qdb.archive.Archive.insert_from_artifact( - qdb.artifact.Artifact(1), {}) + qdb.archive.Archive.insert_from_artifact(qdb.artifact.Artifact(1), {}) self.assertEqual( - str(err.exception), 'To archive artifact must be BIOM but FASTQ') + str(err.exception), "To archive artifact must be BIOM but FASTQ" + ) # 7 - to test error due to not filepath biom aid = 7 qdb.sql_connection.perform_as_transaction( - "DELETE FROM qiita.artifact_filepath " - "WHERE artifact_id = %d" % aid) + "DELETE FROM qiita.artifact_filepath WHERE artifact_id = %d" % aid + ) with self.assertRaises(ValueError) as err: - qdb.archive.Archive.insert_from_artifact( - qdb.artifact.Artifact(aid), {}) - self.assertEqual( - str(err.exception), 'The artifact has no biom files') + qdb.archive.Archive.insert_from_artifact(qdb.artifact.Artifact(aid), {}) + self.assertEqual(str(err.exception), "The artifact has no biom files") # testing specific artifacts and parameters for i in [4, 5, 8, 9]: qdb.archive.Archive.insert_from_artifact( - qdb.artifact.Artifact(i), { - 'featureA%d' % i: dumps({'valuesA': 'vA', 'int': 1}), - 'featureB%d' % i: dumps({'valuesB': 'vB', 'float': 1.1})}) + qdb.artifact.Artifact(i), + { + "featureA%d" % i: dumps({"valuesA": "vA", "int": 1}), + "featureB%d" % i: dumps({"valuesB": "vB", "float": 1.1}), + }, + ) # now let's tests that all the inserts happen as expected exp = { - 'featureA4': dumps({'valuesA': 'vA', 'int': 1}), - 'featureA5': dumps({'valuesA': 'vA', 'int': 1}), - 'featureB9': dumps({'valuesB': 'vB', 'float': 1.1}), - 'featureB8': dumps({'valuesB': 'vB', 'float': 1.1}), - 'featureB5': dumps({'valuesB': 'vB', 'float': 1.1}), - 'featureB4': dumps({'valuesB': 'vB', 'float': 1.1}), - 'featureA8': dumps({'valuesA': 'vA', 'int': 1}), - 'featureA9': dumps({'valuesA': 'vA', 'int': 1})} + "featureA4": dumps({"valuesA": "vA", "int": 1}), + "featureA5": dumps({"valuesA": "vA", "int": 1}), + "featureB9": dumps({"valuesB": "vB", "float": 1.1}), + "featureB8": dumps({"valuesB": "vB", "float": 1.1}), + "featureB5": dumps({"valuesB": "vB", "float": 1.1}), + "featureB4": dumps({"valuesB": "vB", "float": 1.1}), + "featureA8": dumps({"valuesA": "vA", "int": 1}), + "featureA9": dumps({"valuesA": "vA", "int": 1}), + } obs = qdb.archive.Archive.retrieve_feature_values() self.assertEqual(obs, exp) # that we retrieve only one kind - exp = dumps({ - 'featureA9': dumps({'valuesA': 'vA', 'int': 1}), - 'featureB9': dumps({'valuesB': 'vB', 'float': 1.1}), - }) - obs = qdb.archive.Archive.retrieve_feature_values( - 'Single Rarefaction | N/A') + exp = dumps( + { + "featureA9": dumps({"valuesA": "vA", "int": 1}), + "featureB9": dumps({"valuesB": "vB", "float": 1.1}), + } + ) + obs = qdb.archive.Archive.retrieve_feature_values("Single Rarefaction | N/A") self.assertEqual(dumps(obs), exp) # and nothing exp = {} - obs = qdb.archive.Archive.retrieve_feature_values('Nothing') + obs = qdb.archive.Archive.retrieve_feature_values("Nothing") self.assertEqual(obs, exp) # now merging_schemes should have 3 elements; note that 2 is empty # string because we are inserting an artifact [8] that was a direct # upload - self.assertDictEqual(qdb.archive.Archive.merging_schemes(), { - 1: 'Pick closed-reference OTUs | Split libraries FASTQ', - 2: '', 3: 'Single Rarefaction | N/A'}) + self.assertDictEqual( + qdb.archive.Archive.merging_schemes(), + { + 1: "Pick closed-reference OTUs | Split libraries FASTQ", + 2: "", + 3: "Single Rarefaction | N/A", + }, + ) def test_get_merging_scheme_from_job(self): - exp = 'Split libraries FASTQ | N/A' + exp = "Split libraries FASTQ | N/A" obs = qdb.archive.Archive.get_merging_scheme_from_job( - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b')) + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b") + ) self.assertEqual(obs, exp) with qdb.sql_connection.TRN: @@ -91,10 +99,10 @@ def test_get_merging_scheme_from_job(self): qdb.sql_connection.TRN.add(sql) qdb.sql_connection.TRN.execute() - exp = 'Split libraries FASTQ' + exp = "Split libraries FASTQ" obs = qdb.archive.Archive.get_merging_scheme_from_job( - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b')) + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b") + ) self.assertEqual(obs, exp) # returning to previous state @@ -104,5 +112,5 @@ def test_get_merging_scheme_from_job(self): qdb.sql_connection.TRN.execute() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 2319acfaa..4b47db89a 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -6,38 +6,40 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from tempfile import mkstemp, mkdtemp from datetime import datetime -from os import close, remove -from os.path import exists, join, basename, dirname, abspath -from shutil import copyfile from functools import partial from json import dumps +from os import close, remove +from os.path import abspath, basename, dirname, exists, join +from shutil import copyfile +from tempfile import mkdtemp, mkstemp +from unittest import TestCase, main -import pandas as pd import networkx as nx +import pandas as pd from biom import example_table as et from biom.util import biom_open -from qiita_core.util import qiita_test_checker -from qiita_core.testing import wait_for_processing_job import qiita_db as qdb +from qiita_core.testing import wait_for_processing_job +from qiita_core.util import qiita_test_checker class ArtifactTestsReadOnly(TestCase): def test_iter(self): - obs = list(qdb.artifact.Artifact.iter_by_visibility('public')) + obs = list(qdb.artifact.Artifact.iter_by_visibility("public")) self.assertEqual(obs, []) - obs = list(qdb.artifact.Artifact.iter_by_visibility('private')) - exp = [qdb.artifact.Artifact(1), - qdb.artifact.Artifact(2), - qdb.artifact.Artifact(3), - qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), - qdb.artifact.Artifact(6), - qdb.artifact.Artifact(7)] + obs = list(qdb.artifact.Artifact.iter_by_visibility("private")) + exp = [ + qdb.artifact.Artifact(1), + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(3), + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + qdb.artifact.Artifact(7), + ] self.assertEqual(obs, exp) exp.extend([qdb.artifact.Artifact(8), qdb.artifact.Artifact(9)]) @@ -45,47 +47,55 @@ def test_iter(self): def test_create_type(self): obs = qdb.artifact.Artifact.types() - exp = [['BIOM', 'BIOM table', False, False, True], - ['Demultiplexed', 'Demultiplexed and QC sequences', True, True, - False], - ['FASTA', None, False, False, False], - ['FASTA_Sanger', None, False, False, False], - ['FASTQ', None, False, False, True], - ['SFF', None, False, False, False], - ['per_sample_FASTQ', None, True, False, True], - ['beta_div_plots', 'Qiime 1 beta diversity results', False, - False, False], - ['rarefaction_curves', 'Rarefaction curves', False, False, - False], - ['taxa_summary', 'Taxa summary plots', False, False, False]] + exp = [ + ["BIOM", "BIOM table", False, False, True], + ["Demultiplexed", "Demultiplexed and QC sequences", True, True, False], + ["FASTA", None, False, False, False], + ["FASTA_Sanger", None, False, False, False], + ["FASTQ", None, False, False, True], + ["SFF", None, False, False, False], + ["per_sample_FASTQ", None, True, False, True], + ["beta_div_plots", "Qiime 1 beta diversity results", False, False, False], + ["rarefaction_curves", "Rarefaction curves", False, False, False], + ["taxa_summary", "Taxa summary plots", False, False, False], + ] self.assertCountEqual(obs, exp) qdb.artifact.Artifact.create_type( - "NewType", "NewTypeDesc", False, False, False, - [("log", False), ("raw_forward_seqs", True)]) + "NewType", + "NewTypeDesc", + False, + False, + False, + [("log", False), ("raw_forward_seqs", True)], + ) obs = qdb.artifact.Artifact.types() - exp = [['BIOM', 'BIOM table', False, False, True], - ['Demultiplexed', 'Demultiplexed and QC sequences', True, True, - False], - ['FASTA', None, False, False, False], - ['FASTA_Sanger', None, False, False, False], - ['FASTQ', None, False, False, True], - ['SFF', None, False, False, False], - ['per_sample_FASTQ', None, True, False, True], - ['beta_div_plots', 'Qiime 1 beta diversity results', False, - False, False], - ['rarefaction_curves', 'Rarefaction curves', False, False, - False], - ['taxa_summary', 'Taxa summary plots', False, False, False], - ['NewType', 'NewTypeDesc', False, False, False]] + exp = [ + ["BIOM", "BIOM table", False, False, True], + ["Demultiplexed", "Demultiplexed and QC sequences", True, True, False], + ["FASTA", None, False, False, False], + ["FASTA_Sanger", None, False, False, False], + ["FASTQ", None, False, False, True], + ["SFF", None, False, False, False], + ["per_sample_FASTQ", None, True, False, True], + ["beta_div_plots", "Qiime 1 beta diversity results", False, False, False], + ["rarefaction_curves", "Rarefaction curves", False, False, False], + ["taxa_summary", "Taxa summary plots", False, False, False], + ["NewType", "NewTypeDesc", False, False, False], + ] self.assertCountEqual(obs, exp) - self.assertTrue(exists(qdb.util.get_mountpoint('NewType')[0][1])) + self.assertTrue(exists(qdb.util.get_mountpoint("NewType")[0][1])) with self.assertRaises(qdb.exceptions.QiitaDBDuplicateError): qdb.artifact.Artifact.create_type( - "NewType", "NewTypeDesc", False, False, False, - [("log", False), ("raw_forward_seqs", True)]) + "NewType", + "NewTypeDesc", + False, + False, + False, + [("log", False), ("raw_forward_seqs", True)], + ) def test_name(self): self.assertEqual(qdb.artifact.Artifact(1).name, "Raw data 1") @@ -94,40 +104,56 @@ def test_name(self): self.assertEqual(qdb.artifact.Artifact(4).name, "BIOM") def test_timestamp(self): - self.assertEqual(qdb.artifact.Artifact(1).timestamp, - datetime(2012, 10, 1, 9, 30, 27)) - self.assertEqual(qdb.artifact.Artifact(2).timestamp, - datetime(2012, 10, 1, 10, 30, 27)) - self.assertEqual(qdb.artifact.Artifact(3).timestamp, - datetime(2012, 10, 1, 11, 30, 27)) - self.assertEqual(qdb.artifact.Artifact(4).timestamp, - datetime(2012, 10, 2, 17, 30, 00)) + self.assertEqual( + qdb.artifact.Artifact(1).timestamp, datetime(2012, 10, 1, 9, 30, 27) + ) + self.assertEqual( + qdb.artifact.Artifact(2).timestamp, datetime(2012, 10, 1, 10, 30, 27) + ) + self.assertEqual( + qdb.artifact.Artifact(3).timestamp, datetime(2012, 10, 1, 11, 30, 27) + ) + self.assertEqual( + qdb.artifact.Artifact(4).timestamp, datetime(2012, 10, 2, 17, 30, 00) + ) def test_processing_parameters(self): self.assertIsNone(qdb.artifact.Artifact(1).processing_parameters) obs = qdb.artifact.Artifact(2).processing_parameters exp = qdb.software.Parameters.load( qdb.software.Command(1), - values_dict={'max_barcode_errors': '1.5', 'sequence_max_n': '0', - 'max_bad_run_length': '3', 'rev_comp': 'False', - 'phred_quality_threshold': '3', 'input_data': '1', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', - 'min_per_read_length_fraction': '0.75', - 'barcode_type': 'golay_12', - 'phred_offset': 'auto'}) + values_dict={ + "max_barcode_errors": "1.5", + "sequence_max_n": "0", + "max_bad_run_length": "3", + "rev_comp": "False", + "phred_quality_threshold": "3", + "input_data": "1", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "False", + "min_per_read_length_fraction": "0.75", + "barcode_type": "golay_12", + "phred_offset": "auto", + }, + ) self.assertEqual(obs, exp) obs = qdb.artifact.Artifact(3).processing_parameters exp = qdb.software.Parameters.load( qdb.software.Command(1), - values_dict={'max_barcode_errors': '1.5', 'sequence_max_n': '0', - 'max_bad_run_length': '3', 'rev_comp': 'False', - 'phred_quality_threshold': '3', 'input_data': '1', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'True', - 'min_per_read_length_fraction': '0.75', - 'barcode_type': 'golay_12', - 'phred_offset': 'auto'}) + values_dict={ + "max_barcode_errors": "1.5", + "sequence_max_n": "0", + "max_bad_run_length": "3", + "rev_comp": "False", + "phred_quality_threshold": "3", + "input_data": "1", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "True", + "min_per_read_length_fraction": "0.75", + "barcode_type": "golay_12", + "phred_offset": "auto", + }, + ) self.assertEqual(obs, exp) def test_visibility(self): @@ -135,10 +161,8 @@ def test_visibility(self): def test_artifact_type(self): self.assertEqual(qdb.artifact.Artifact(1).artifact_type, "FASTQ") - self.assertEqual(qdb.artifact.Artifact(2).artifact_type, - "Demultiplexed") - self.assertEqual(qdb.artifact.Artifact(3).artifact_type, - "Demultiplexed") + self.assertEqual(qdb.artifact.Artifact(2).artifact_type, "Demultiplexed") + self.assertEqual(qdb.artifact.Artifact(3).artifact_type, "Demultiplexed") self.assertEqual(qdb.artifact.Artifact(4).artifact_type, "BIOM") def test_data_type(self): @@ -157,50 +181,48 @@ def test_is_submitted_to_ebi(self): self.assertTrue(qdb.artifact.Artifact(2).is_submitted_to_ebi) self.assertFalse(qdb.artifact.Artifact(3).is_submitted_to_ebi) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): qdb.artifact.Artifact(1).is_submitted_to_ebi - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): qdb.artifact.Artifact(4).is_submitted_to_ebi def test_ebi_run_accessions(self): - exp = {'1.SKB1.640202': 'ERR0000001', - '1.SKB2.640194': 'ERR0000002', - '1.SKB3.640195': 'ERR0000003', - '1.SKB4.640189': 'ERR0000004', - '1.SKB5.640181': 'ERR0000005', - '1.SKB6.640176': 'ERR0000006', - '1.SKB7.640196': 'ERR0000007', - '1.SKB8.640193': 'ERR0000008', - '1.SKB9.640200': 'ERR0000009', - '1.SKD1.640179': 'ERR0000010', - '1.SKD2.640178': 'ERR0000011', - '1.SKD3.640198': 'ERR0000012', - '1.SKD4.640185': 'ERR0000013', - '1.SKD5.640186': 'ERR0000014', - '1.SKD6.640190': 'ERR0000015', - '1.SKD7.640191': 'ERR0000016', - '1.SKD8.640184': 'ERR0000017', - '1.SKD9.640182': 'ERR0000018', - '1.SKM1.640183': 'ERR0000019', - '1.SKM2.640199': 'ERR0000020', - '1.SKM3.640197': 'ERR0000021', - '1.SKM4.640180': 'ERR0000022', - '1.SKM5.640177': 'ERR0000023', - '1.SKM6.640187': 'ERR0000024', - '1.SKM7.640188': 'ERR0000025', - '1.SKM8.640201': 'ERR0000026', - '1.SKM9.640192': 'ERR0000027'} + exp = { + "1.SKB1.640202": "ERR0000001", + "1.SKB2.640194": "ERR0000002", + "1.SKB3.640195": "ERR0000003", + "1.SKB4.640189": "ERR0000004", + "1.SKB5.640181": "ERR0000005", + "1.SKB6.640176": "ERR0000006", + "1.SKB7.640196": "ERR0000007", + "1.SKB8.640193": "ERR0000008", + "1.SKB9.640200": "ERR0000009", + "1.SKD1.640179": "ERR0000010", + "1.SKD2.640178": "ERR0000011", + "1.SKD3.640198": "ERR0000012", + "1.SKD4.640185": "ERR0000013", + "1.SKD5.640186": "ERR0000014", + "1.SKD6.640190": "ERR0000015", + "1.SKD7.640191": "ERR0000016", + "1.SKD8.640184": "ERR0000017", + "1.SKD9.640182": "ERR0000018", + "1.SKM1.640183": "ERR0000019", + "1.SKM2.640199": "ERR0000020", + "1.SKM3.640197": "ERR0000021", + "1.SKM4.640180": "ERR0000022", + "1.SKM5.640177": "ERR0000023", + "1.SKM6.640187": "ERR0000024", + "1.SKM7.640188": "ERR0000025", + "1.SKM8.640201": "ERR0000026", + "1.SKM9.640192": "ERR0000027", + } self.assertEqual(qdb.artifact.Artifact(2).ebi_run_accessions, exp) self.assertEqual(qdb.artifact.Artifact(3).ebi_run_accessions, dict()) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): qdb.artifact.Artifact(1).ebi_run_accessions - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): qdb.artifact.Artifact(4).ebi_run_accessions def test_can_be_submitted_to_vamps(self): @@ -210,29 +232,32 @@ def test_can_be_submitted_to_vamps(self): self.assertFalse(qdb.artifact.Artifact(4).can_be_submitted_to_vamps) def test_is_submitted_to_vamps(self): - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): self.assertFalse(qdb.artifact.Artifact(1).is_submitted_to_vamps) self.assertFalse(qdb.artifact.Artifact(2).is_submitted_to_vamps) self.assertFalse(qdb.artifact.Artifact(3).is_submitted_to_vamps) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): self.assertFalse(qdb.artifact.Artifact(4).is_submitted_to_vamps) def test_filepaths(self): - db_test_raw_dir = qdb.util.get_mountpoint('raw_data')[0][1] + db_test_raw_dir = qdb.util.get_mountpoint("raw_data")[0][1] path_builder = partial(join, db_test_raw_dir) - exp_fps = [{'fp_id': 1, - 'fp': path_builder("1_s_G1_L001_sequences.fastq.gz"), - 'fp_type': "raw_forward_seqs", - 'checksum': '2125826711', - 'fp_size': 58}, - {'fp_id': 2, - 'fp': path_builder( - "1_s_G1_L001_sequences_barcodes.fastq.gz"), - 'fp_type': "raw_barcodes", - 'checksum': '2125826711', - 'fp_size': 58}] + exp_fps = [ + { + "fp_id": 1, + "fp": path_builder("1_s_G1_L001_sequences.fastq.gz"), + "fp_type": "raw_forward_seqs", + "checksum": "2125826711", + "fp_size": 58, + }, + { + "fp_id": 2, + "fp": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "fp_type": "raw_barcodes", + "checksum": "2125826711", + "fp_size": 58, + }, + ] self.assertEqual(qdb.artifact.Artifact(1).filepaths, exp_fps) def test_parents(self): @@ -255,15 +280,22 @@ def test_create_lineage_graph_from_edge_list_empty(self): def test_create_lineage_graph_from_edge_list(self): tester = qdb.artifact.Artifact(1) obs = tester._create_lineage_graph_from_edge_list( - [(1, 2), (2, 4), (1, 3), (3, 4)]) + [(1, 2), (2, 4), (1, 3), (3, 4)] + ) self.assertTrue(isinstance(obs, nx.DiGraph)) - exp = [qdb.artifact.Artifact(1), qdb.artifact.Artifact(2), - qdb.artifact.Artifact(3), qdb.artifact.Artifact(4)] + exp = [ + qdb.artifact.Artifact(1), + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(3), + qdb.artifact.Artifact(4), + ] self.assertCountEqual(obs.nodes(), exp) - exp = [(qdb.artifact.Artifact(1), qdb.artifact.Artifact(2)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), - (qdb.artifact.Artifact(1), qdb.artifact.Artifact(3)), - (qdb.artifact.Artifact(3), qdb.artifact.Artifact(4))] + exp = [ + (qdb.artifact.Artifact(1), qdb.artifact.Artifact(2)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), + (qdb.artifact.Artifact(1), qdb.artifact.Artifact(3)), + (qdb.artifact.Artifact(3), qdb.artifact.Artifact(4)), + ] self.assertCountEqual(obs.edges(), exp) def test_ancestors(self): @@ -295,40 +327,58 @@ def test_ancestors(self): obs = qdb.artifact.Artifact(4).ancestors self.assertTrue(isinstance(obs, nx.DiGraph)) obs_nodes = obs.nodes() - exp_nodes = [qdb.artifact.Artifact(1), qdb.artifact.Artifact(2), - qdb.artifact.Artifact(4)] + exp_nodes = [ + qdb.artifact.Artifact(1), + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(4), + ] self.assertCountEqual(obs_nodes, exp_nodes) obs_edges = obs.edges() - exp_edges = [(qdb.artifact.Artifact(1), qdb.artifact.Artifact(2)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4))] + exp_edges = [ + (qdb.artifact.Artifact(1), qdb.artifact.Artifact(2)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), + ] self.assertCountEqual(obs_edges, exp_edges) def test_descendants(self): obs = qdb.artifact.Artifact(1).descendants self.assertTrue(isinstance(obs, nx.DiGraph)) obs_nodes = obs.nodes() - exp_nodes = [qdb.artifact.Artifact(1), qdb.artifact.Artifact(2), - qdb.artifact.Artifact(3), qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), qdb.artifact.Artifact(6)] + exp_nodes = [ + qdb.artifact.Artifact(1), + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(3), + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + ] self.assertCountEqual(obs_nodes, exp_nodes) obs_edges = obs.edges() - exp_edges = [(qdb.artifact.Artifact(1), qdb.artifact.Artifact(2)), - (qdb.artifact.Artifact(1), qdb.artifact.Artifact(3)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(5)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(6))] + exp_edges = [ + (qdb.artifact.Artifact(1), qdb.artifact.Artifact(2)), + (qdb.artifact.Artifact(1), qdb.artifact.Artifact(3)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(5)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(6)), + ] self.assertCountEqual(obs_edges, exp_edges) obs = qdb.artifact.Artifact(2).descendants self.assertTrue(isinstance(obs, nx.DiGraph)) obs_nodes = obs.nodes() - exp_nodes = [qdb.artifact.Artifact(2), qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), qdb.artifact.Artifact(6)] + exp_nodes = [ + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + ] self.assertCountEqual(obs_nodes, exp_nodes) obs_edges = obs.edges() - exp_edges = [(qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(5)), - (qdb.artifact.Artifact(2), qdb.artifact.Artifact(6))] + exp_edges = [ + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(4)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(5)), + (qdb.artifact.Artifact(2), qdb.artifact.Artifact(6)), + ] self.assertCountEqual(obs_edges, exp_edges) obs = qdb.artifact.Artifact(3).descendants @@ -354,37 +404,51 @@ def test_descendants_with_jobs(self): # Add an HTML summary job in one artifact in a non-success statuts, to # make sure that it doesn't get returned in the graph html_job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( qdb.software.Command.get_html_generator(A(6).artifact_type), - values_dict={'input_data': 6})) - html_job._set_status('running') + values_dict={"input_data": 6}, + ), + ) + html_job._set_status("running") # as jobs are created at random we will only check that the artifacts # are there and that the number of jobs matches - exp_nodes = [('artifact', A(1)), ('artifact', A(2)), - ('artifact', A(3)), ('artifact', A(4)), - ('artifact', A(5)), ('artifact', A(6))] + exp_nodes = [ + ("artifact", A(1)), + ("artifact", A(2)), + ("artifact", A(3)), + ("artifact", A(4)), + ("artifact", A(5)), + ("artifact", A(6)), + ] for e in exp_nodes: self.assertIn(e, obs_nodes) - self.assertEqual(5, len([e for dt, e in obs_nodes if dt == 'job'])) + self.assertEqual(5, len([e for dt, e in obs_nodes if dt == "job"])) obs_edges = obs.edges() # as jobs are created at random we will only check the number of pairs # matches and they are instances of what we expect self.assertEqual(10, len(obs_edges)) - self.assertEqual(2, len([x for x, y in obs_edges - if x[1] == A(1) and y[0] == 'job'])) - self.assertEqual(3, len([x for x, y in obs_edges - if x[1] == A(2) and y[0] == 'job'])) - self.assertEqual(1, len([y for x, y in obs_edges - if y[1] == A(2) and x[0] == 'job'])) - self.assertEqual(1, len([y for x, y in obs_edges - if y[1] == A(3) and x[0] == 'job'])) - self.assertEqual(1, len([y for x, y in obs_edges - if y[1] == A(4) and x[0] == 'job'])) - self.assertEqual(1, len([y for x, y in obs_edges - if y[1] == A(5) and x[0] == 'job'])) - self.assertEqual(1, len([y for x, y in obs_edges - if y[1] == A(6) and x[0] == 'job'])) + self.assertEqual( + 2, len([x for x, y in obs_edges if x[1] == A(1) and y[0] == "job"]) + ) + self.assertEqual( + 3, len([x for x, y in obs_edges if x[1] == A(2) and y[0] == "job"]) + ) + self.assertEqual( + 1, len([y for x, y in obs_edges if y[1] == A(2) and x[0] == "job"]) + ) + self.assertEqual( + 1, len([y for x, y in obs_edges if y[1] == A(3) and x[0] == "job"]) + ) + self.assertEqual( + 1, len([y for x, y in obs_edges if y[1] == A(4) and x[0] == "job"]) + ) + self.assertEqual( + 1, len([y for x, y in obs_edges if y[1] == A(5) and x[0] == "job"]) + ) + self.assertEqual( + 1, len([y for x, y in obs_edges if y[1] == A(6) and x[0] == "job"]) + ) obs = A(3).descendants self.assertTrue(isinstance(obs, nx.DiGraph)) @@ -401,14 +465,19 @@ def test_descendants_with_jobs(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": "auto"}') - params = qdb.software.Parameters.load(qdb.software.Command(1), - json_str=json_str) + '"phred_offset": "auto"}' + ) + params = qdb.software.Parameters.load( + qdb.software.Command(1), json_str=json_str + ) wf = qdb.processing_job.ProcessingWorkflow.from_scratch( - qdb.user.User('test@foo.bar'), params, name='Test WF') + qdb.user.User("test@foo.bar"), params, name="Test WF" + ) parent = list(wf.graph.nodes())[0] - wf.add(qdb.software.DefaultParameters(10), - connections={parent: {'demultiplexed': 'input_data'}}) + wf.add( + qdb.software.DefaultParameters(10), + connections={parent: {"demultiplexed": "input_data"}}, + ) obs = A(1).descendants_with_jobs obs_edges = obs.edges() # We have 4 more edges than before. From artifact 1 to parent job, @@ -417,14 +486,17 @@ def test_descendants_with_jobs(self): self.assertEqual(len(obs_edges), 14) # We will check that the edges related with the "type" nodes (i.e. # the outputs of the jobs in construction) are present - self.assertEqual(1, len([y for x, y in obs_edges if x[0] == 'type'])) - self.assertEqual(2, len([y for x, y in obs_edges if y[0] == 'type'])) + self.assertEqual(1, len([y for x, y in obs_edges if x[0] == "type"])) + self.assertEqual(2, len([y for x, y in obs_edges if y[0] == "type"])) def test_children(self): exp = [qdb.artifact.Artifact(2), qdb.artifact.Artifact(3)] self.assertEqual(qdb.artifact.Artifact(1).children, exp) - exp = [qdb.artifact.Artifact(4), qdb.artifact.Artifact(5), - qdb.artifact.Artifact(6)] + exp = [ + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + ] self.assertEqual(qdb.artifact.Artifact(2).children, exp) self.assertEqual(qdb.artifact.Artifact(3).children, []) self.assertEqual(qdb.artifact.Artifact(4).children, []) @@ -433,60 +505,66 @@ def test_youngest_artifact(self): exp = qdb.artifact.Artifact(6) self.assertEqual(qdb.artifact.Artifact(1).youngest_artifact, exp) self.assertEqual(qdb.artifact.Artifact(2).youngest_artifact, exp) - self.assertEqual(qdb.artifact.Artifact(3).youngest_artifact, - qdb.artifact.Artifact(3)) + self.assertEqual( + qdb.artifact.Artifact(3).youngest_artifact, qdb.artifact.Artifact(3) + ) self.assertEqual(qdb.artifact.Artifact(6).youngest_artifact, exp) def test_prep_templates(self): self.assertEqual( qdb.artifact.Artifact(1).prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + [qdb.metadata_template.prep_template.PrepTemplate(1)], + ) self.assertEqual( qdb.artifact.Artifact(2).prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + [qdb.metadata_template.prep_template.PrepTemplate(1)], + ) self.assertEqual( qdb.artifact.Artifact(3).prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + [qdb.metadata_template.prep_template.PrepTemplate(1)], + ) self.assertEqual( qdb.artifact.Artifact(4).prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + [qdb.metadata_template.prep_template.PrepTemplate(1)], + ) def test_study(self): self.assertEqual(qdb.artifact.Artifact(1).study, qdb.study.Study(1)) self.assertIsNone(qdb.artifact.Artifact(9).study) def test_analysis(self): - self.assertEqual(qdb.artifact.Artifact(9).analysis, - qdb.analysis.Analysis(1)) + self.assertEqual(qdb.artifact.Artifact(9).analysis, qdb.analysis.Analysis(1)) self.assertIsNone(qdb.artifact.Artifact(1).analysis) def test_merging_scheme(self): - self.assertEqual(qdb.artifact.Artifact(1).merging_scheme, ('', '')) - self.assertEqual(qdb.artifact.Artifact(2).merging_scheme, - ('Split libraries FASTQ | N/A', 'N/A')) - self.assertEqual(qdb.artifact.Artifact(3).merging_scheme, - ('Split libraries FASTQ | N/A', 'N/A')) - self.assertEqual(qdb.artifact.Artifact(4).merging_scheme, - ('Pick closed-reference OTUs | Split libraries FASTQ', - 'QIIMEq2 v1.9.1')) - self.assertEqual(qdb.artifact.Artifact(5).merging_scheme, - ('Pick closed-reference OTUs | Split libraries FASTQ', - 'QIIMEq2 v1.9.1')) + self.assertEqual(qdb.artifact.Artifact(1).merging_scheme, ("", "")) + self.assertEqual( + qdb.artifact.Artifact(2).merging_scheme, + ("Split libraries FASTQ | N/A", "N/A"), + ) + self.assertEqual( + qdb.artifact.Artifact(3).merging_scheme, + ("Split libraries FASTQ | N/A", "N/A"), + ) + self.assertEqual( + qdb.artifact.Artifact(4).merging_scheme, + ("Pick closed-reference OTUs | Split libraries FASTQ", "QIIMEq2 v1.9.1"), + ) + self.assertEqual( + qdb.artifact.Artifact(5).merging_scheme, + ("Pick closed-reference OTUs | Split libraries FASTQ", "QIIMEq2 v1.9.1"), + ) def test_jobs(self): # Returning all jobs obs = qdb.artifact.Artifact(1).jobs(show_hidden=True) exp = [ - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - qdb.processing_job.ProcessingJob( - '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - qdb.processing_job.ProcessingJob( - '063e553b-327c-4818-ab4a-adfe58e49860'), - qdb.processing_job.ProcessingJob( - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d'), - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75')] + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b"), + qdb.processing_job.ProcessingJob("4c7115e8-4c8e-424c-bf25-96c292ca1931"), + qdb.processing_job.ProcessingJob("063e553b-327c-4818-ab4a-adfe58e49860"), + qdb.processing_job.ProcessingJob("bcc7ebcd-39c1-43e4-af2d-822e3589f14d"), + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + ] # there are some extra jobs randomly generated, not testing those for e in exp: @@ -495,12 +573,10 @@ def test_jobs(self): # Returning only jobs visible by the user obs = qdb.artifact.Artifact(1).jobs() exp = [ - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - qdb.processing_job.ProcessingJob( - '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75')] + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b"), + qdb.processing_job.ProcessingJob("4c7115e8-4c8e-424c-bf25-96c292ca1931"), + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + ] for e in exp: self.assertIn(e, obs) @@ -509,94 +585,76 @@ def test_jobs_cmd(self): cmd = qdb.software.Command(1) obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, show_hidden=True) exp = [ - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - qdb.processing_job.ProcessingJob( - '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - qdb.processing_job.ProcessingJob( - '063e553b-327c-4818-ab4a-adfe58e49860'), - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75') - ] + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b"), + qdb.processing_job.ProcessingJob("4c7115e8-4c8e-424c-bf25-96c292ca1931"), + qdb.processing_job.ProcessingJob("063e553b-327c-4818-ab4a-adfe58e49860"), + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + ] # there are some extra jobs randomly generated, not testing those for e in exp: self.assertIn(e, obs) obs = qdb.artifact.Artifact(1).jobs(cmd=cmd) exp = [ - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - qdb.processing_job.ProcessingJob( - '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75') - ] + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b"), + qdb.processing_job.ProcessingJob("4c7115e8-4c8e-424c-bf25-96c292ca1931"), + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + ] cmd = qdb.software.Command(2) obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, show_hidden=True) - exp = [qdb.processing_job.ProcessingJob( - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d')] + exp = [qdb.processing_job.ProcessingJob("bcc7ebcd-39c1-43e4-af2d-822e3589f14d")] self.assertEqual(obs, exp) obs = qdb.artifact.Artifact(1).jobs(cmd=cmd) self.assertEqual(obs, []) def test_jobs_status(self): - obs = qdb.artifact.Artifact(1).jobs(status='success') + obs = qdb.artifact.Artifact(1).jobs(status="success") exp = [ - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - qdb.processing_job.ProcessingJob( - '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75') - ] + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b"), + qdb.processing_job.ProcessingJob("4c7115e8-4c8e-424c-bf25-96c292ca1931"), + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + ] # there are some extra jobs randomly generated, not testing those for e in exp: self.assertIn(e, obs) - obs = qdb.artifact.Artifact(1).jobs(status='running', show_hidden=True) - exp = [qdb.processing_job.ProcessingJob( - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d')] + obs = qdb.artifact.Artifact(1).jobs(status="running", show_hidden=True) + exp = [qdb.processing_job.ProcessingJob("bcc7ebcd-39c1-43e4-af2d-822e3589f14d")] self.assertEqual(obs, exp) - obs = qdb.artifact.Artifact(1).jobs(status='running') + obs = qdb.artifact.Artifact(1).jobs(status="running") self.assertEqual(obs, []) - obs = qdb.artifact.Artifact(1).jobs(status='queued', show_hidden=True) - exp = [qdb.processing_job.ProcessingJob( - '063e553b-327c-4818-ab4a-adfe58e49860')] + obs = qdb.artifact.Artifact(1).jobs(status="queued", show_hidden=True) + exp = [qdb.processing_job.ProcessingJob("063e553b-327c-4818-ab4a-adfe58e49860")] self.assertEqual(obs, exp) - obs = qdb.artifact.Artifact(1).jobs(status='queued') + obs = qdb.artifact.Artifact(1).jobs(status="queued") self.assertEqual(obs, []) def test_jobs_cmd_and_status(self): cmd = qdb.software.Command(1) - obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status='success') + obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status="success") exp = [ - qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - qdb.processing_job.ProcessingJob( - '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75') - ] + qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b"), + qdb.processing_job.ProcessingJob("4c7115e8-4c8e-424c-bf25-96c292ca1931"), + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + ] # there are some extra jobs randomly generated, not testing those for e in exp: self.assertIn(e, obs) - obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status='queued', - show_hidden=True) - exp = [qdb.processing_job.ProcessingJob( - '063e553b-327c-4818-ab4a-adfe58e49860')] + obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status="queued", show_hidden=True) + exp = [qdb.processing_job.ProcessingJob("063e553b-327c-4818-ab4a-adfe58e49860")] self.assertEqual(obs, exp) - obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status='queued') + obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status="queued") self.assertEqual(obs, []) cmd = qdb.software.Command(2) - obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status='queued') + obs = qdb.artifact.Artifact(1).jobs(cmd=cmd, status="queued") exp = [] self.assertEqual(obs, exp) @@ -621,84 +679,98 @@ def test_get_commands(self): class ArtifactTests(TestCase): def setUp(self): # Generate some files for a root artifact - fd, self.fp1 = mkstemp(suffix='_seqs.fastq') + fd, self.fp1 = mkstemp(suffix="_seqs.fastq") close(fd) - with open(self.fp1, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") + with open(self.fp1, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) - fd, self.fp2 = mkstemp(suffix='_barcodes.fastq') + fd, self.fp2 = mkstemp(suffix="_barcodes.fastq") close(fd) - with open(self.fp2, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" - "NNNCNNNNNNNNN\n" - "+\n" - "#############\n") + with open(self.fp2, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" + "NNNCNNNNNNNNN\n" + "+\n" + "#############\n" + ) self.filepaths_root = [(self.fp1, 1), (self.fp2, 3)] # Generate some files for a processed artifact - fd, self.fp3 = mkstemp(suffix='_seqs.fna') + fd, self.fp3 = mkstemp(suffix="_seqs.fna") close(fd) - with open(self.fp3, 'w') as f: - f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " - "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" - "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n") + with open(self.fp3, "w") as f: + f.write( + ">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " + "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" + "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n" + ) self.filepaths_processed = [(self.fp3, 4)] # Generate some file for a BIOM - fd, self.fp4 = mkstemp(suffix='_table.biom') - with biom_open(self.fp4, 'w') as f: + fd, self.fp4 = mkstemp(suffix="_table.biom") + with biom_open(self.fp4, "w") as f: et.to_hdf5(f, "test") self.filepaths_biom = [(self.fp4, 7)] # Create a new prep template metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'target_subfragment': 'V4', - 'target_gene': '16S rRNA', - 'experiment_design_description': 'BBBB'}} - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) - self.prep_template = \ - qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "16S") - self.prep_template_2 = \ - qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "16S") + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "target_subfragment": "V4", + "target_gene": "16S rRNA", + "experiment_design_description": "BBBB", + } + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) + self.prep_template = qdb.metadata_template.prep_template.PrepTemplate.create( + metadata, qdb.study.Study(1), "16S" + ) + self.prep_template_2 = qdb.metadata_template.prep_template.PrepTemplate.create( + metadata, qdb.study.Study(1), "16S" + ) self._clean_up_files = [self.fp1, self.fp2, self.fp3, self.fp4] # per_sample_FASTQ Metagenomic example - self.prep_template_per_sample_fastq = \ + self.prep_template_per_sample_fastq = ( qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "Metagenomic") - fd, self.fwd = mkstemp(prefix='SKB8.640193', suffix='_R1.fastq') + metadata, qdb.study.Study(1), "Metagenomic" + ) + ) + fd, self.fwd = mkstemp(prefix="SKB8.640193", suffix="_R1.fastq") close(fd) - with open(self.fwd, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") - fd, self.rev = mkstemp(prefix='SKB8.640193', suffix='_R2.fastq') + with open(self.fwd, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) + fd, self.rev = mkstemp(prefix="SKB8.640193", suffix="_R2.fastq") close(fd) - with open(self.rev, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") + with open(self.rev, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) self._clean_up_files.extend([self.fwd, self.rev]) - self.user = qdb.user.User('test@foo.bar') + self.user = qdb.user.User("test@foo.bar") def tearDown(self): for f in self._clean_up_files: @@ -707,30 +779,28 @@ def tearDown(self): def test_copy(self): src = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) before = datetime.now() obs = qdb.artifact.Artifact.copy(src, self.prep_template_2) self.assertTrue(before < obs.timestamp < datetime.now()) self.assertIsNone(obs.processing_parameters) - self.assertEqual(obs.visibility, 'sandbox') + self.assertEqual(obs.visibility, "sandbox") self.assertEqual(obs.artifact_type, src.artifact_type) self.assertEqual(obs.data_type, self.prep_template.data_type()) - self.assertEqual(obs.can_be_submitted_to_ebi, - src.can_be_submitted_to_ebi) - self.assertEqual(obs.can_be_submitted_to_vamps, - src.can_be_submitted_to_vamps) + self.assertEqual(obs.can_be_submitted_to_ebi, src.can_be_submitted_to_ebi) + self.assertEqual(obs.can_be_submitted_to_vamps, src.can_be_submitted_to_vamps) db_dir = qdb.util.get_mountpoint(src.artifact_type)[0][1] path_builder = partial(join, db_dir, str(obs.id)) exp_fps = [] for x in src.filepaths: - new_fp = path_builder(basename(x['fp'])) - exp_fps.append((new_fp, x['fp_type'])) + new_fp = path_builder(basename(x["fp"])) + exp_fps.append((new_fp, x["fp_type"])) self._clean_up_files.append(new_fp) - self.assertEqual([(x['fp'], x['fp_type']) - for x in obs.filepaths], exp_fps) + self.assertEqual([(x["fp"], x["fp_type"]) for x in obs.filepaths], exp_fps) self.assertEqual(obs.parents, []) self.assertEqual(obs.prep_templates, [self.prep_template_2]) @@ -739,27 +809,34 @@ def test_copy(self): def test_create_error(self): # no filepaths with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): - qdb.artifact.Artifact.create( - [], "FASTQ", prep_template=self.prep_template) + qdb.artifact.Artifact.create([], "FASTQ", prep_template=self.prep_template) # prep template and parents with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template, - parents=[qdb.artifact.Artifact(1)]) + self.filepaths_root, + "FASTQ", + prep_template=self.prep_template, + parents=[qdb.artifact.Artifact(1)], + ) # analysis and prep_template with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "BIOM", prep_template=self.prep_template, - analysis=qdb.analysis.Analysis(1)) + self.filepaths_root, + "BIOM", + prep_template=self.prep_template, + analysis=qdb.analysis.Analysis(1), + ) # Analysis and parents with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "BIOM", + self.filepaths_root, + "BIOM", parents=[qdb.artifact.Artifact(1)], - analysis=qdb.analysis.Analysis(1)) + analysis=qdb.analysis.Analysis(1), + ) # no prep template no parents no analysis with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): @@ -768,69 +845,82 @@ def test_create_error(self): # parents no processing parameters with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", - parents=[qdb.artifact.Artifact(1)]) + self.filepaths_root, "FASTQ", parents=[qdb.artifact.Artifact(1)] + ) # analysis no data type with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "BIOM", analysis=qdb.analysis.Analysis(1)) + self.filepaths_root, "BIOM", analysis=qdb.analysis.Analysis(1) + ) # prep template and processing parameters parameters = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template, - processing_parameters=parameters) + self.filepaths_root, + "FASTQ", + prep_template=self.prep_template, + processing_parameters=parameters, + ) # prep template and data type with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template, - data_type="Multiomic") + self.filepaths_root, + "FASTQ", + prep_template=self.prep_template, + data_type="Multiomic", + ) # different data types new = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) parameters = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) with self.assertRaises(qdb.exceptions.QiitaDBArtifactCreationError): qdb.artifact.Artifact.create( - self.filepaths_processed, "Demultiplexed", + self.filepaths_processed, + "Demultiplexed", parents=[qdb.artifact.Artifact(1), new], - processing_parameters=parameters) + processing_parameters=parameters, + ) def test_create_root(self): before = datetime.now() obs = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template, - name='Test artifact') - self.assertEqual(obs.name, 'Test artifact') + self.filepaths_root, + "FASTQ", + prep_template=self.prep_template, + name="Test artifact", + ) + self.assertEqual(obs.name, "Test artifact") self.assertTrue(before < obs.timestamp < datetime.now()) self.assertIsNone(obs.processing_parameters) - self.assertEqual(obs.visibility, 'sandbox') + self.assertEqual(obs.visibility, "sandbox") self.assertEqual(obs.artifact_type, "FASTQ") self.assertEqual(obs.data_type, self.prep_template.data_type()) self.assertFalse(obs.can_be_submitted_to_ebi) self.assertFalse(obs.can_be_submitted_to_vamps) - db_fastq_dir = qdb.util.get_mountpoint('FASTQ')[0][1] + db_fastq_dir = qdb.util.get_mountpoint("FASTQ")[0][1] path_builder = partial(join, db_fastq_dir, str(obs.id)) exp_fps = [ (path_builder(basename(self.fp1)), "raw_forward_seqs"), - (path_builder(basename(self.fp2)), "raw_barcodes")] - self.assertEqual([(x['fp'], x['fp_type']) - for x in obs.filepaths], exp_fps) + (path_builder(basename(self.fp2)), "raw_barcodes"), + ] + self.assertEqual([(x["fp"], x["fp_type"]) for x in obs.filepaths], exp_fps) self.assertEqual(obs.parents, []) self.assertEqual(obs.prep_templates, [self.prep_template]) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): obs.ebi_run_accessions - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): obs.is_submitted_to_vamps self.assertEqual(obs.study, qdb.study.Study(1)) @@ -839,31 +929,32 @@ def test_create_root(self): def test_create_root_analysis(self): before = datetime.now() obs = qdb.artifact.Artifact.create( - self.filepaths_biom, "BIOM", name='Test artifact analysis', - analysis=qdb.analysis.Analysis(1), data_type="16S") - self.assertEqual(obs.name, 'Test artifact analysis') + self.filepaths_biom, + "BIOM", + name="Test artifact analysis", + analysis=qdb.analysis.Analysis(1), + data_type="16S", + ) + self.assertEqual(obs.name, "Test artifact analysis") self.assertTrue(before < obs.timestamp < datetime.now()) self.assertIsNone(obs.processing_parameters) - self.assertEqual(obs.visibility, 'sandbox') + self.assertEqual(obs.visibility, "sandbox") self.assertEqual(obs.artifact_type, "BIOM") self.assertEqual(obs.data_type, "16S") self.assertFalse(obs.can_be_submitted_to_ebi) self.assertFalse(obs.can_be_submitted_to_vamps) - db_fastq_dir = qdb.util.get_mountpoint('BIOM')[0][1] + db_fastq_dir = qdb.util.get_mountpoint("BIOM")[0][1] path_builder = partial(join, db_fastq_dir, str(obs.id)) exp_fps = [(path_builder(basename(self.fp4)), "biom")] - self.assertEqual([(x['fp'], x['fp_type']) - for x in obs.filepaths], exp_fps) + self.assertEqual([(x["fp"], x["fp_type"]) for x in obs.filepaths], exp_fps) self.assertEqual(obs.parents, []) self.assertEqual(obs.prep_templates, []) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): obs.ebi_run_accessions - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): obs.is_submitted_to_vamps self.assertIsNone(obs.study) @@ -874,37 +965,38 @@ def test_create_root_analysis(self): def test_create_processed(self): # make a copy of files for the can_be_submitted_to_ebi tests - lcopy = self.fp3 + '.fna' + lcopy = self.fp3 + ".fna" self._clean_up_files.append(lcopy) copyfile(self.fp3, lcopy) exp_params = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) before = datetime.now() obs = qdb.artifact.Artifact.create( - self.filepaths_processed, "Demultiplexed", + self.filepaths_processed, + "Demultiplexed", parents=[qdb.artifact.Artifact(1)], - processing_parameters=exp_params) - self.assertEqual(obs.name, 'noname') + processing_parameters=exp_params, + ) + self.assertEqual(obs.name, "noname") self.assertTrue(before < obs.timestamp < datetime.now()) self.assertEqual(obs.processing_parameters, exp_params) - self.assertEqual(obs.visibility, 'private') + self.assertEqual(obs.visibility, "private") self.assertEqual(obs.artifact_type, "Demultiplexed") self.assertEqual(obs.data_type, qdb.artifact.Artifact(1).data_type) self.assertTrue(obs.can_be_submitted_to_ebi) self.assertTrue(obs.can_be_submitted_to_vamps) self.assertFalse(obs.is_submitted_to_vamps) - db_demultiplexed_dir = qdb.util.get_mountpoint('Demultiplexed')[0][1] + db_demultiplexed_dir = qdb.util.get_mountpoint("Demultiplexed")[0][1] path_builder = partial(join, db_demultiplexed_dir, str(obs.id)) - exp_fps = [(path_builder(basename(self.fp3)), - "preprocessed_fasta")] - self.assertEqual([(x['fp'], x['fp_type']) - for x in obs.filepaths], exp_fps) + exp_fps = [(path_builder(basename(self.fp3)), "preprocessed_fasta")] + self.assertEqual([(x["fp"], x["fp_type"]) for x in obs.filepaths], exp_fps) self.assertEqual(obs.parents, [qdb.artifact.Artifact(1)]) self.assertEqual( - obs.prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + obs.prep_templates, [qdb.metadata_template.prep_template.PrepTemplate(1)] + ) self.assertEqual(obs.ebi_run_accessions, dict()) self.assertEqual(obs.study, qdb.study.Study(1)) self.assertFalse(exists(self.filepaths_processed[0][0])) @@ -913,40 +1005,46 @@ def test_create_processed(self): # let's create another demultiplexed on top of the previous one to # test can_be_submitted_to_ebi exp_params = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': obs.id}) + qdb.software.DefaultParameters(1), {"input_data": obs.id} + ) new = qdb.artifact.Artifact.create( - [(lcopy, 4)], "Demultiplexed", parents=[obs], - processing_parameters=exp_params) + [(lcopy, 4)], + "Demultiplexed", + parents=[obs], + processing_parameters=exp_params, + ) self.assertFalse(new.can_be_submitted_to_ebi) def test_create_copy_files(self): exp_params = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) before = datetime.now() obs = qdb.artifact.Artifact.create( - self.filepaths_processed, "Demultiplexed", + self.filepaths_processed, + "Demultiplexed", parents=[qdb.artifact.Artifact(1)], - processing_parameters=exp_params, move_files=False) - self.assertEqual(obs.name, 'noname') + processing_parameters=exp_params, + move_files=False, + ) + self.assertEqual(obs.name, "noname") self.assertTrue(before < obs.timestamp < datetime.now()) self.assertEqual(obs.processing_parameters, exp_params) - self.assertEqual(obs.visibility, 'private') + self.assertEqual(obs.visibility, "private") self.assertEqual(obs.artifact_type, "Demultiplexed") self.assertEqual(obs.data_type, qdb.artifact.Artifact(1).data_type) self.assertTrue(obs.can_be_submitted_to_ebi) self.assertTrue(obs.can_be_submitted_to_vamps) self.assertFalse(obs.is_submitted_to_vamps) - db_demultiplexed_dir = qdb.util.get_mountpoint('Demultiplexed')[0][1] + db_demultiplexed_dir = qdb.util.get_mountpoint("Demultiplexed")[0][1] path_builder = partial(join, db_demultiplexed_dir, str(obs.id)) - exp_fps = [(path_builder(basename(self.fp3)), - "preprocessed_fasta")] - self.assertEqual([(x['fp'], x['fp_type']) - for x in obs.filepaths], exp_fps) + exp_fps = [(path_builder(basename(self.fp3)), "preprocessed_fasta")] + self.assertEqual([(x["fp"], x["fp_type"]) for x in obs.filepaths], exp_fps) self.assertEqual(obs.parents, [qdb.artifact.Artifact(1)]) self.assertEqual( - obs.prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + obs.prep_templates, [qdb.metadata_template.prep_template.PrepTemplate(1)] + ) self.assertEqual(obs.ebi_run_accessions, dict()) self.assertEqual(obs.study, qdb.study.Study(1)) self.assertTrue(exists(self.filepaths_processed[0][0])) @@ -956,42 +1054,45 @@ def test_create_biom(self): before = datetime.now() cmd = qdb.software.Command(3) exp_params = qdb.software.Parameters.from_default_params( - next(cmd.default_parameter_sets), {'input_data': 1}) + next(cmd.default_parameter_sets), {"input_data": 1} + ) obs = qdb.artifact.Artifact.create( - self.filepaths_biom, "BIOM", parents=[qdb.artifact.Artifact(2)], - processing_parameters=exp_params) - self.assertEqual(obs.name, 'noname') + self.filepaths_biom, + "BIOM", + parents=[qdb.artifact.Artifact(2)], + processing_parameters=exp_params, + ) + self.assertEqual(obs.name, "noname") self.assertTrue(before < obs.timestamp < datetime.now()) self.assertEqual(obs.processing_parameters, exp_params) - self.assertEqual(obs.visibility, 'private') - self.assertEqual(obs.artifact_type, 'BIOM') + self.assertEqual(obs.visibility, "private") + self.assertEqual(obs.artifact_type, "BIOM") self.assertEqual(obs.data_type, qdb.artifact.Artifact(2).data_type) self.assertFalse(obs.can_be_submitted_to_ebi) self.assertFalse(obs.can_be_submitted_to_vamps) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): obs.ebi_run_accessions - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): obs.is_submitted_to_vamps - db_biom_dir = qdb.util.get_mountpoint('BIOM')[0][1] + db_biom_dir = qdb.util.get_mountpoint("BIOM")[0][1] path_builder = partial(join, db_biom_dir, str(obs.id)) - exp_fps = [(path_builder(basename(self.fp4)), 'biom')] - self.assertEqual([(x['fp'], x['fp_type']) - for x in obs.filepaths], exp_fps) + exp_fps = [(path_builder(basename(self.fp4)), "biom")] + self.assertEqual([(x["fp"], x["fp_type"]) for x in obs.filepaths], exp_fps) self.assertEqual(obs.parents, [qdb.artifact.Artifact(2)]) - self.assertEqual(obs.prep_templates, - [qdb.metadata_template.prep_template.PrepTemplate(1)]) + self.assertEqual( + obs.prep_templates, [qdb.metadata_template.prep_template.PrepTemplate(1)] + ) self.assertEqual(obs.study, qdb.study.Study(1)) self.assertIsNone(obs.analysis) def test_delete_error_public(self): test = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) test.visibility = "public" - self._clean_up_files.extend([x['fp'] for x in test.filepaths]) + self._clean_up_files.extend([x["fp"] for x in test.filepaths]) with self.assertRaises(qdb.exceptions.QiitaDBArtifactDeletionError): qdb.artifact.Artifact.delete(test.id) @@ -1005,48 +1106,58 @@ def test_delete_error_analyzed(self): def test_delete_error_ebi(self): parameters = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) obs = qdb.artifact.Artifact.create( - self.filepaths_processed, "Demultiplexed", + self.filepaths_processed, + "Demultiplexed", parents=[qdb.artifact.Artifact(1)], - processing_parameters=parameters) - obs.ebi_run_accessions = {'1.SKB1.640202': 'ERR1000001', - '1.SKB2.640194': 'ERR1000002'} - self._clean_up_files.extend([x['fp'] for x in obs.filepaths]) + processing_parameters=parameters, + ) + obs.ebi_run_accessions = { + "1.SKB1.640202": "ERR1000001", + "1.SKB2.640194": "ERR1000002", + } + self._clean_up_files.extend([x["fp"] for x in obs.filepaths]) with self.assertRaises(qdb.exceptions.QiitaDBArtifactDeletionError): qdb.artifact.Artifact.delete(obs.id) def test_delete_error_vamps(self): parameters = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) obs = qdb.artifact.Artifact.create( - self.filepaths_processed, "Demultiplexed", + self.filepaths_processed, + "Demultiplexed", parents=[qdb.artifact.Artifact(1)], - processing_parameters=parameters) + processing_parameters=parameters, + ) obs.is_submitted_to_vamps = True - self._clean_up_files.extend([x['fp'] for x in obs.filepaths]) + self._clean_up_files.extend([x["fp"] for x in obs.filepaths]) with self.assertRaises(qdb.exceptions.QiitaDBArtifactDeletionError): qdb.artifact.Artifact.delete(obs.id) def test_delete_in_construction_job(self): test = qdb.artifact.Artifact.create( - self.filepaths_root, 'FASTQ', prep_template=self.prep_template) - self._clean_up_files.extend([x['fp'] for x in test.filepaths]) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) + self._clean_up_files.extend([x["fp"] for x in test.filepaths]) json_str = ( '{"input_data": %d, "max_barcode_errors": 1.5, ' '"barcode_type": "golay_12", "max_bad_run_length": 3, ' '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": ""}' % test.id) + '"phred_offset": ""}' % test.id + ) qdb.processing_job.ProcessingJob.create( self.user, - qdb.software.Parameters.load(qdb.software.Command(1), - json_str=json_str)) - uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], - str(test.study.id)) + qdb.software.Parameters.load(qdb.software.Command(1), json_str=json_str), + ) + uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], str(test.study.id)) self._clean_up_files.extend( - [join(uploads_fp, basename(x['fp'])) for x in test.filepaths]) + [join(uploads_fp, basename(x["fp"])) for x in test.filepaths] + ) qdb.artifact.Artifact.delete(test.id) @@ -1055,31 +1166,34 @@ def test_delete_in_construction_job(self): def test_delete_error_running_job(self): test = qdb.artifact.Artifact.create( - self.filepaths_root, 'FASTQ', prep_template=self.prep_template) - self._clean_up_files.extend([x['fp'] for x in test.filepaths]) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) + self._clean_up_files.extend([x["fp"] for x in test.filepaths]) json_str = ( '{"input_data": %d, "max_barcode_errors": 1.5, ' '"barcode_type": "golay_12", "max_bad_run_length": 3, ' '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": ""}' % test.id) + '"phred_offset": ""}' % test.id + ) job = qdb.processing_job.ProcessingJob.create( self.user, - qdb.software.Parameters.load(qdb.software.Command(1), - json_str=json_str)) - job._set_status('running') + qdb.software.Parameters.load(qdb.software.Command(1), json_str=json_str), + ) + job._set_status("running") with self.assertRaises(qdb.exceptions.QiitaDBArtifactDeletionError): qdb.artifact.Artifact.delete(test.id) def test_delete(self): test = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) - uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], - str(test.study.id)) + uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], str(test.study.id)) self._clean_up_files.extend( - [join(uploads_fp, basename(x['fp'])) for x in test.filepaths]) + [join(uploads_fp, basename(x["fp"])) for x in test.filepaths] + ) qdb.artifact.Artifact.delete(test.id) @@ -1088,41 +1202,45 @@ def test_delete(self): # Analysis artifact parameters = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) test = qdb.artifact.Artifact.create( - self.filepaths_processed, "Demultiplexed", + self.filepaths_processed, + "Demultiplexed", parents=[qdb.artifact.Artifact(9)], - processing_parameters=parameters) + processing_parameters=parameters, + ) self._clean_up_files.extend( - [join(uploads_fp, basename(x['fp'])) for x in test.filepaths]) + [join(uploads_fp, basename(x["fp"])) for x in test.filepaths] + ) qdb.artifact.Artifact.delete(test.id) with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): qdb.artifact.Artifact(test.id) def test_delete_with_html(self): - # creating a single file html_summary fd, html_fp = mkstemp(suffix=".html") close(fd) - self.filepaths_root.append((html_fp, 'html_summary')) + self.filepaths_root.append((html_fp, "html_summary")) self._clean_up_files.append(html_fp) # creating a folder with a file for html_summary_dir summary_dir = mkdtemp() - open(join(summary_dir, 'index.html'), 'w').write('this is a test') - self.filepaths_root.append((summary_dir, 'html_summary_dir')) + open(join(summary_dir, "index.html"), "w").write("this is a test") + self.filepaths_root.append((summary_dir, "html_summary_dir")) self._clean_up_files.append(summary_dir) test = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) - uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], - str(test.study.id)) + uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], str(test.study.id)) self._clean_up_files.extend( - [join(uploads_fp, basename(x['fp'])) for x in test.filepaths]) + [join(uploads_fp, basename(x["fp"])) for x in test.filepaths] + ) qdb.artifact.Artifact.delete(test.id) @@ -1134,11 +1252,12 @@ def test_delete_with_html(self): def test_delete_with_jobs(self): test = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) - uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], - str(test.study.id)) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) + uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], str(test.study.id)) self._clean_up_files.extend( - [join(uploads_fp, basename(x['fp'])) for x in test.filepaths]) + [join(uploads_fp, basename(x["fp"])) for x in test.filepaths] + ) json_str = ( '{"input_data": %d, "max_barcode_errors": 1.5, ' @@ -1146,12 +1265,13 @@ def test_delete_with_jobs(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": ""}' % test.id) + '"phred_offset": ""}' % test.id + ) job = qdb.processing_job.ProcessingJob.create( self.user, - qdb.software.Parameters.load(qdb.software.Command(1), - json_str=json_str)) - job._set_status('success') + qdb.software.Parameters.load(qdb.software.Command(1), json_str=json_str), + ) + job._set_status("success") qdb.artifact.Artifact.delete(test.id) @@ -1163,29 +1283,28 @@ def test_delete_with_jobs(self): def test_being_deleted_by(self): test = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) - uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], - str(test.study.id)) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) + uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], str(test.study.id)) self._clean_up_files.extend( - [join(uploads_fp, basename(x['fp'])) for x in test.filepaths]) + [join(uploads_fp, basename(x["fp"])) for x in test.filepaths] + ) # verifying that there are no jobs in the list self.assertIsNone(test.being_deleted_by) # creating new deleting job - qiita_plugin = qdb.software.Software.from_name_and_version( - 'Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_artifact') - params = qdb.software.Parameters.load( - cmd, values_dict={'artifact': test.id}) + qiita_plugin = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_artifact") + params = qdb.software.Parameters.load(cmd, values_dict={"artifact": test.id}) job = qdb.processing_job.ProcessingJob.create(self.user, params, True) - job._set_status('running') + job._set_status("running") # verifying that there is a job and is the same than above self.assertEqual(job, test.being_deleted_by) # let's set it as error and now we should not have it anymore - job._set_error('Killed by admin') + job._set_error("Killed by admin") self.assertIsNone(test.being_deleted_by) # now, let's actually remove @@ -1198,35 +1317,40 @@ def test_being_deleted_by(self): qdb.artifact.Artifact(test.id) def test_delete_as_output_job(self): - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('\n') - data = {'OTU table': {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'}} + with open(fp, "w") as f: + f.write("\n") + data = {"OTU table": {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"}} job = qdb.processing_job.ProcessingJob.create( self.user, qdb.software.Parameters.load( - qdb.software.Command.get_validator('BIOM'), - values_dict={'files': dumps({'biom': [fp]}), - 'artifact_type': 'BIOM', - 'template': 1, - 'provenance': dumps( - {'job': "bcc7ebcd-39c1-43e4-af2d-822e3589f14d", - 'cmd_out_id': 3, 'name': 'test-delete'})} - ) + qdb.software.Command.get_validator("BIOM"), + values_dict={ + "files": dumps({"biom": [fp]}), + "artifact_type": "BIOM", + "template": 1, + "provenance": dumps( + { + "job": "bcc7ebcd-39c1-43e4-af2d-822e3589f14d", + "cmd_out_id": 3, + "name": "test-delete", + } + ), + }, + ), ) parent = qdb.processing_job.ProcessingJob( - "bcc7ebcd-39c1-43e4-af2d-822e3589f14d") + "bcc7ebcd-39c1-43e4-af2d-822e3589f14d" + ) parent._set_validator_jobs([job]) - job._set_status('running') + job._set_status("running") job.complete(True, artifacts_data=data) - job = qdb.processing_job.ProcessingJob( - "bcc7ebcd-39c1-43e4-af2d-822e3589f14d") + job = qdb.processing_job.ProcessingJob("bcc7ebcd-39c1-43e4-af2d-822e3589f14d") job.release_validators() - artifact = job.outputs['OTU table'] - self._clean_up_files.extend([x['fp'] for x in artifact.filepaths]) + artifact = job.outputs["OTU table"] + self._clean_up_files.extend([x["fp"] for x in artifact.filepaths]) qdb.artifact.Artifact.delete(artifact.id) @@ -1241,7 +1365,8 @@ def test_name_setter(self): def test_visibility_setter(self): a = qdb.artifact.Artifact.create( - self.filepaths_root, "FASTQ", prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) self.assertEqual(a.visibility, "sandbox") a.visibility = "awaiting_approval" @@ -1266,7 +1391,7 @@ def test_visibility_setter(self): a5 = qdb.artifact.Artifact(5) a6 = qdb.artifact.Artifact(6) - a4.visibility = 'public' + a4.visibility = "public" self.assertEqual(a1.visibility, "public") self.assertEqual(a2.visibility, "public") @@ -1276,7 +1401,7 @@ def test_visibility_setter(self): self.assertEqual(a6.visibility, "public") # Same if we go back - a4.visibility = 'private' + a4.visibility = "private" self.assertEqual(a1.visibility, "private") self.assertEqual(a2.visibility, "private") @@ -1289,15 +1414,16 @@ def test_visibility_setter(self): # want to check that this property is inherited as visibility is; # however, for the time being we don't need to do that and there is # no downside on adding it here. - mtd = 'The greatest human filtering method' + mtd = "The greatest human filtering method" self.assertEqual(mtd, a1.human_reads_filter_method) self.assertIsNone(a2.human_reads_filter_method) self.assertIsNone(a3.human_reads_filter_method) # let's change some values - with self.assertRaisesRegex(ValueError, '"This should fail" is not a ' - 'valid human_reads_filter_method'): - a2.human_reads_filter_method = 'This should fail' + with self.assertRaisesRegex( + ValueError, '"This should fail" is not a valid human_reads_filter_method' + ): + a2.human_reads_filter_method = "This should fail" self.assertIsNone(a2.human_reads_filter_method) a2.human_reads_filter_method = mtd self.assertEqual(mtd, a2.human_reads_filter_method) @@ -1307,33 +1433,34 @@ def test_ebi_run_accessions_setter(self): a = qdb.artifact.Artifact(3) self.assertEqual(a.ebi_run_accessions, dict()) new_vals = { - '1.SKB1.640202': 'ERR1000001', - '1.SKB2.640194': 'ERR1000002', - '1.SKB3.640195': 'ERR1000003', - '1.SKB4.640189': 'ERR1000004', - '1.SKB5.640181': 'ERR1000005', - '1.SKB6.640176': 'ERR1000006', - '1.SKB7.640196': 'ERR1000007', - '1.SKB8.640193': 'ERR1000008', - '1.SKB9.640200': 'ERR1000009', - '1.SKD1.640179': 'ERR1000010', - '1.SKD2.640178': 'ERR1000011', - '1.SKD3.640198': 'ERR1000012', - '1.SKD4.640185': 'ERR1000013', - '1.SKD5.640186': 'ERR1000014', - '1.SKD6.640190': 'ERR1000015', - '1.SKD7.640191': 'ERR1000016', - '1.SKD8.640184': 'ERR1000017', - '1.SKD9.640182': 'ERR1000018', - '1.SKM1.640183': 'ERR1000019', - '1.SKM2.640199': 'ERR1000020', - '1.SKM3.640197': 'ERR1000021', - '1.SKM4.640180': 'ERR1000022', - '1.SKM5.640177': 'ERR1000023', - '1.SKM6.640187': 'ERR1000024', - '1.SKM7.640188': 'ERR1000025', - '1.SKM8.640201': 'ERR1000026', - '1.SKM9.640192': 'ERR1000027'} + "1.SKB1.640202": "ERR1000001", + "1.SKB2.640194": "ERR1000002", + "1.SKB3.640195": "ERR1000003", + "1.SKB4.640189": "ERR1000004", + "1.SKB5.640181": "ERR1000005", + "1.SKB6.640176": "ERR1000006", + "1.SKB7.640196": "ERR1000007", + "1.SKB8.640193": "ERR1000008", + "1.SKB9.640200": "ERR1000009", + "1.SKD1.640179": "ERR1000010", + "1.SKD2.640178": "ERR1000011", + "1.SKD3.640198": "ERR1000012", + "1.SKD4.640185": "ERR1000013", + "1.SKD5.640186": "ERR1000014", + "1.SKD6.640190": "ERR1000015", + "1.SKD7.640191": "ERR1000016", + "1.SKD8.640184": "ERR1000017", + "1.SKD9.640182": "ERR1000018", + "1.SKM1.640183": "ERR1000019", + "1.SKM2.640199": "ERR1000020", + "1.SKM3.640197": "ERR1000021", + "1.SKM4.640180": "ERR1000022", + "1.SKM5.640177": "ERR1000023", + "1.SKM6.640187": "ERR1000024", + "1.SKM7.640188": "ERR1000025", + "1.SKM8.640201": "ERR1000026", + "1.SKM9.640192": "ERR1000027", + } a.ebi_run_accessions = new_vals self.assertEqual(a.ebi_run_accessions, new_vals) @@ -1353,7 +1480,7 @@ def test_html_summary_setter(self): close(fd) self._clean_up_files.append(fp) - db_fastq_dir = qdb.util.get_mountpoint('FASTQ')[0][1] + db_fastq_dir = qdb.util.get_mountpoint("FASTQ")[0][1] path_builder = partial(join, db_fastq_dir, str(a.id)) # Check the setter works when the artifact does not have the summary @@ -1378,9 +1505,10 @@ def test_html_summary_setter(self): # Check that the setter correctly removes the directory if a new # summary is added. Magic number 0. There is only one html_summary_dir # added on the previous test - old_dir_fp = [x['fp'] for x in a.filepaths - if x['fp_type'] == 'html_summary_dir'][0] - fd, fp = mkstemp(suffix='.html') + old_dir_fp = [ + x["fp"] for x in a.filepaths if x["fp_type"] == "html_summary_dir" + ][0] + fd, fp = mkstemp(suffix=".html") close(fd) self._clean_up_files.append(fp) a.set_html_summary(fp) @@ -1388,8 +1516,9 @@ def test_html_summary_setter(self): self.assertEqual(a.html_summary_fp[1], exp3) self.assertFalse(exists(exp2)) self.assertFalse(exists(old_dir_fp)) - summary_dir = [x['fp'] for x in a.filepaths - if x['fp_type'] == 'html_summary_dir'] + summary_dir = [ + x["fp"] for x in a.filepaths if x["fp_type"] == "html_summary_dir" + ] self.assertEqual(summary_dir, []) # let's check if we update, we do _not_ remove the files @@ -1398,10 +1527,11 @@ def test_html_summary_setter(self): def test_descendants_with_jobs_one_element(self): artifact = qdb.artifact.Artifact.create( - self.filepaths_root, 'FASTQ', prep_template=self.prep_template) + self.filepaths_root, "FASTQ", prep_template=self.prep_template + ) obs = self.prep_template.artifact.descendants_with_jobs.nodes() - exp = [('artifact', artifact)] + exp = [("artifact", artifact)] self.assertCountEqual(obs, exp) def test_has_human(self): @@ -1410,16 +1540,18 @@ def test_has_human(self): # create a per_sample_FASTQ artifact = qdb.artifact.Artifact.create( - [(self.fwd, 1), (self.rev, 2)], "per_sample_FASTQ", - prep_template=self.prep_template_per_sample_fastq) + [(self.fwd, 1), (self.rev, 2)], + "per_sample_FASTQ", + prep_template=self.prep_template_per_sample_fastq, + ) # this should be False as there are no human samples self.assertFalse(artifact.has_human) # let's make it True by making the samle human-* df = pd.DataFrame.from_dict( - {'1.SKB8.640193': {'env_package': 'human-oral'}}, - orient='index', dtype=str) + {"1.SKB8.640193": {"env_package": "human-oral"}}, orient="index", dtype=str + ) artifact.study.sample_template.update(df) self.assertTrue(artifact.has_human) @@ -1430,7 +1562,8 @@ def test_has_human(self): qdb.sql_connection.TRN.add( f"""UPDATE qiita.prep_template SET data_type_id = 1 - WHERE prep_template_id = {pt.id}""") + WHERE prep_template_id = {pt.id}""" + ) qdb.sql_connection.TRN.execute() self.assertFalse(artifact.has_human) @@ -1443,8 +1576,8 @@ def test_descendants_with_jobs(self): self.assertEqual(len(a.analysis.artifacts), 2) # 2. add a new root and make sure we see it c = qdb.artifact.Artifact.create( - self.filepaths_root, "BIOM", analysis=a.analysis, - data_type="16S") + self.filepaths_root, "BIOM", analysis=a.analysis, data_type="16S" + ) self.assertEqual(len(a.analysis.artifacts), 3) # 3. add jobs conencting the new artifact to the other root # - currently: @@ -1458,20 +1591,26 @@ def test_descendants_with_jobs(self): # c ------------| cmd = qdb.software.Command.create( qdb.software.Software(1), - "CommandWithMultipleInputs", "", { - 'input_x': ['artifact:["BIOM"]', None], - 'input_y': ['artifact:["BIOM"]', None]}, {'out': 'BIOM'}) + "CommandWithMultipleInputs", + "", + { + "input_x": ['artifact:["BIOM"]', None], + "input_y": ['artifact:["BIOM"]', None], + }, + {"out": "BIOM"}, + ) params = qdb.software.Parameters.load( - cmd, values_dict={'input_x': a.children[0].id, 'input_y': c.id}) + cmd, values_dict={"input_x": a.children[0].id, "input_y": c.id} + ) wf = qdb.processing_job.ProcessingWorkflow.from_scratch( - self.user, params, name='Test WF') + self.user, params, name="Test WF" + ) job1 = list(wf.graph.nodes())[0] cmd_dp = qdb.software.DefaultParameters.create("", cmd) - wf.add(cmd_dp, req_params={'input_x': a.id, 'input_y': c.id}) + wf.add(cmd_dp, req_params={"input_x": a.id, "input_y": c.id}) job2 = list(wf.graph.nodes())[1] - jobs = [j[1] for e in a.descendants_with_jobs.edges - for j in e if j[0] == 'job'] + jobs = [j[1] for e in a.descendants_with_jobs.edges for j in e if j[0] == "job"] self.assertIn(job1, jobs) self.assertIn(job2, jobs) @@ -1484,11 +1623,9 @@ def test_descendants_with_jobs(self): # | | # |-----|---> job1 -> out # c ------------| - wf.add(cmd_dp, connections={ - job1: {'out': 'input_x'}, job2: {'out': 'input_y'}}) + wf.add(cmd_dp, connections={job1: {"out": "input_x"}, job2: {"out": "input_y"}}) job3 = list(wf.graph.nodes())[2] - jobs = [j[1] for e in a.descendants_with_jobs.edges - for j in e if j[0] == 'job'] + jobs = [j[1] for e in a.descendants_with_jobs.edges for j in e if j[0] == "job"] self.assertIn(job3, jobs) @@ -1505,17 +1642,15 @@ def test_archive(self): self.assertEqual(4, obs_artifacts) # check errors - with self.assertRaisesRegex(QE, 'Only public artifacts can be ' - 'archived'): + with self.assertRaisesRegex(QE, "Only public artifacts can be archived"): A.archive(1) - A(1).visibility = 'public' + A(1).visibility = "public" - with self.assertRaisesRegex(QE, 'Only BIOM artifacts can be archived'): + with self.assertRaisesRegex(QE, "Only BIOM artifacts can be archived"): A.archive(1) - A(8).visibility = 'public' - with self.assertRaisesRegex(QE, 'Only non analysis artifacts can ' - 'be archived'): + A(8).visibility = "public" + with self.assertRaisesRegex(QE, "Only non analysis artifacts can be archived"): A.archive(8) for aid in range(5, 7): @@ -1534,22 +1669,22 @@ def test_archive(self): # actually remove files, which we will need for other tests so lets # make a copy and then restore them mfolder = dirname(dirname(abspath(__file__))) - mpath = join(mfolder, 'support_files', 'test_data') + mpath = join(mfolder, "support_files", "test_data") mp = partial(join, mpath) fps = [ - mp('processed_data/1_study_1001_closed_reference_otu_table.biom'), - mp('processed_data/' - '1_study_1001_closed_reference_otu_table_Silva.biom'), - mp('raw_data/1_s_G1_L001_sequences.fastq.gz'), - mp('raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz')] + mp("processed_data/1_study_1001_closed_reference_otu_table.biom"), + mp("processed_data/1_study_1001_closed_reference_otu_table_Silva.biom"), + mp("raw_data/1_s_G1_L001_sequences.fastq.gz"), + mp("raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz"), + ] for fp in fps: - copyfile(fp, f'{fp}.bk') + copyfile(fp, f"{fp}.bk") PT = qdb.metadata_template.prep_template.PrepTemplate QEE = qdb.exceptions.QiitaDBExecutionError pt = A(1).prep_templates[0] # it should fail as this prep is public and have been submitted to ENA - with self.assertRaisesRegex(QEE, 'Cannot remove prep template 1'): + with self.assertRaisesRegex(QEE, "Cannot remove prep template 1"): PT.delete(pt.id) # now, remove those restrictions + analysis + linked artifacts sql = "DELETE FROM qiita.artifact_processing_job" @@ -1568,8 +1703,8 @@ def test_archive(self): # bringing back the filepaths for fp in fps: - copyfile(f'{fp}.bk', fp) + copyfile(f"{fp}.bk", fp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_base.py b/qiita_db/test/test_base.py index db606013f..b0d83a5c7 100644 --- a/qiita_db/test/test_base.py +++ b/qiita_db/test/test_base.py @@ -8,10 +8,10 @@ from unittest import TestCase, main +import qiita_db as qdb from qiita_core.exceptions import IncompetentQiitaDeveloperError -from qiita_core.util import qiita_test_checker from qiita_core.qiita_settings import qiita_config -import qiita_db as qdb +from qiita_core.util import qiita_test_checker @qiita_test_checker() @@ -53,10 +53,10 @@ def test_check_id(self): def test_check_portal(self): """Correctly checks if object is accessable in portal given""" - qiita_config.portal = 'QIITA' + qiita_config.portal = "QIITA" tester = qdb.analysis.Analysis(1) self.assertTrue(tester._check_portal(1)) - qiita_config.portal = 'EMP' + qiita_config.portal = "EMP" self.assertFalse(tester._check_portal(1)) self.assertTrue(self.tester._check_portal(1)) @@ -82,5 +82,5 @@ def test_not_equal_type(self): self.assertNotEqual(self.tester, new) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 36cf21f7c..688df83ab 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -6,55 +6,53 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os import remove, close, mkdir +import configparser +from functools import partial +from os import close, mkdir, remove from os.path import exists, join -from tempfile import mkstemp, mkdtemp from shutil import rmtree +from tempfile import mkdtemp, mkstemp from unittest import TestCase, main -from six import StringIO -from functools import partial import pandas as pd - -from qiita_core.util import qiita_test_checker +from six import StringIO import qiita_db as qdb - -import configparser +from qiita_core.util import qiita_test_checker @qiita_test_checker() class TestMakeStudyFromCmd(TestCase): def setUp(self): qdb.study.StudyPerson.create( - 'SomeDude', 'somedude@foo.bar', 'some', - '111 fake street', '111-121-1313') - qdb.user.User.create('test@test.com', 'password') + "SomeDude", "somedude@foo.bar", "some", "111 fake street", "111-121-1313" + ) + qdb.user.User.create("test@test.com", "password") self.config1 = CONFIG_1 self.config2 = CONFIG_2 def test_make_study_from_cmd(self): fh = StringIO(self.config1) - qdb.commands.load_study_from_cmd('test@test.com', 'newstudy', fh) + qdb.commands.load_study_from_cmd("test@test.com", "newstudy", fh) with qdb.sql_connection.TRN: sql = """SELECT study_id FROM qiita.study WHERE email = %s AND study_title = %s""" - qdb.sql_connection.TRN.add(sql, ['test@test.com', 'newstudy']) + qdb.sql_connection.TRN.add(sql, ["test@test.com", "newstudy"]) study_id = qdb.sql_connection.TRN.execute_fetchflatten() self.assertEqual(study_id, [2]) fh2 = StringIO(self.config2) with self.assertRaises(configparser.NoOptionError): - qdb.commands.load_study_from_cmd('test@test.com', 'newstudy2', fh2) + qdb.commands.load_study_from_cmd("test@test.com", "newstudy2", fh2) @qiita_test_checker() class TestLoadArtifactFromCmd(TestCase): def setUp(self): - self.artifact_count = qdb.util.get_count('qiita.artifact') - self.fp_count = qdb.util.get_count('qiita.filepath') + self.artifact_count = qdb.util.get_count("qiita.artifact") + self.fp_count = qdb.util.get_count("qiita.filepath") self.files_to_remove = [] def tearDown(self): @@ -65,48 +63,63 @@ def tearDown(self): def test_load_artifact_from_cmd_error(self): with self.assertRaises(ValueError): qdb.commands.load_artifact_from_cmd( - ["fp1", "fp2"], ["preprocessed_fasta"], "Demultiplexed", - parents=[1], dflt_params_id=10, - required_params='{"input_data": 1}') + ["fp1", "fp2"], + ["preprocessed_fasta"], + "Demultiplexed", + parents=[1], + dflt_params_id=10, + required_params='{"input_data": 1}', + ) with self.assertRaises(ValueError): qdb.commands.load_artifact_from_cmd( - ["fp1"], ["preprocessed_fasta"], "Demultiplexed", - parents=[1, 2], dflt_params_id=10) + ["fp1"], + ["preprocessed_fasta"], + "Demultiplexed", + parents=[1, 2], + dflt_params_id=10, + ) def test_load_artifact_from_cmd_root(self): - fd, forward_fp = mkstemp(suffix='_forward.fastq.gz') + fd, forward_fp = mkstemp(suffix="_forward.fastq.gz") close(fd) self.files_to_remove.append(forward_fp) - fd, reverse_fp = mkstemp(suffix='_reverse.fastq.gz') + fd, reverse_fp = mkstemp(suffix="_reverse.fastq.gz") close(fd) self.files_to_remove.append(reverse_fp) - fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq.gz') + fd, barcodes_fp = mkstemp(suffix="_barcodes.fastq.gz") close(fd) self.files_to_remove.append(barcodes_fp) fps = [forward_fp, reverse_fp, barcodes_fp] for fp in fps: - with open(fp, 'w') as f: - f.write('\n') - ftypes = ['raw_forward_seqs', 'raw_reverse_seqs', 'raw_barcodes'] + with open(fp, "w") as f: + f.write("\n") + ftypes = ["raw_forward_seqs", "raw_reverse_seqs", "raw_barcodes"] metadata = pd.DataFrame.from_dict( - {'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}}, - orient='index', dtype=str) + { + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + }, + orient="index", + dtype=str, + ) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "16S") + metadata, qdb.study.Study(1), "16S" + ) obs = qdb.commands.load_artifact_from_cmd( - fps, ftypes, 'FASTQ', prep_template=pt.id) - self.files_to_remove.extend([x['fp'] for x in obs.filepaths]) + fps, ftypes, "FASTQ", prep_template=pt.id + ) + self.files_to_remove.extend([x["fp"] for x in obs.filepaths]) self.assertEqual(obs.id, self.artifact_count + 1) - self.assertTrue( - qdb.util.check_count('qiita.filepath', self.fp_count + 4)) + self.assertTrue(qdb.util.check_count("qiita.filepath", self.fp_count + 4)) def test_load_artifact_from_cmd_processed(self): fd, file1 = mkstemp() @@ -116,35 +129,43 @@ def test_load_artifact_from_cmd_processed(self): close(fd) self.files_to_remove.append(file2) fps = [file1, file2] - ftypes = ['preprocessed_fasta', 'preprocessed_fastq'] + ftypes = ["preprocessed_fasta", "preprocessed_fastq"] for fp in fps: - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("\n") obs = qdb.commands.load_artifact_from_cmd( - fps, ftypes, 'Demultiplexed', parents=[1], dflt_params_id=1, + fps, + ftypes, + "Demultiplexed", + parents=[1], + dflt_params_id=1, required_params='{"input_data": 1}', - optional_params='{"min_per_read_length_fraction": 0.80}') - self.files_to_remove.extend([x['fp'] for x in obs.filepaths]) + optional_params='{"min_per_read_length_fraction": 0.80}', + ) + self.files_to_remove.extend([x["fp"] for x in obs.filepaths]) self.assertEqual(obs.id, self.artifact_count + 1) - self.assertTrue( - qdb.util.check_count('qiita.filepath', self.fp_count + 2)) + self.assertTrue(qdb.util.check_count("qiita.filepath", self.fp_count + 2)) def test_load_artifact_from_cmd_biom(self): - fd, otu_table_fp = mkstemp(suffix='_otu_table.biom') + fd, otu_table_fp = mkstemp(suffix="_otu_table.biom") close(fd) self.files_to_remove.append(otu_table_fp) fps = [otu_table_fp] - ftypes = ['biom'] + ftypes = ["biom"] for fp in fps: - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("\n") obs = qdb.commands.load_artifact_from_cmd( - fps, ftypes, 'BIOM', parents=[3], dflt_params_id=10, - required_params='{"input_data": 3}') - self.files_to_remove.extend([x['fp'] for x in obs.filepaths]) + fps, + ftypes, + "BIOM", + parents=[3], + dflt_params_id=10, + required_params='{"input_data": 3}', + ) + self.files_to_remove.extend([x["fp"] for x in obs.filepaths]) self.assertEqual(obs.id, self.artifact_count + 1) - self.assertTrue( - qdb.util.check_count('qiita.filepath', self.fp_count + 1)) + self.assertTrue(qdb.util.check_count("qiita.filepath", self.fp_count + 1)) @qiita_test_checker() @@ -162,10 +183,11 @@ def setUp(self): "study_description": "Description of a test study", "study_abstract": "No abstract right now...", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } self.study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Test study", info) + qdb.user.User("test@foo.bar"), "Test study", info + ) def test_load_sample_template_from_cmd(self): """Correctly adds a sample template to the DB""" @@ -182,23 +204,23 @@ def setUp(self): def test_load_prep_template_from_cmd(self): """Correctly adds a prep template to the DB""" fh = StringIO(self.pt_contents) - st = qdb.commands.load_prep_template_from_cmd(fh, 1, '18S') + st = qdb.commands.load_prep_template_from_cmd(fh, 1, "18S") self.assertEqual(st.id, 3) @qiita_test_checker() class TestLoadParametersFromCmd(TestCase): def setUp(self): - fd, self.fp = mkstemp(suffix='_params.txt') + fd, self.fp = mkstemp(suffix="_params.txt") close(fd) - fd, self.fp_wrong = mkstemp(suffix='_params.txt') + fd, self.fp_wrong = mkstemp(suffix="_params.txt") close(fd) - with open(self.fp, 'w') as f: + with open(self.fp, "w") as f: f.write(PARAMETERS) - with open(self.fp_wrong, 'w') as f: + with open(self.fp_wrong, "w") as f: f.write(PARAMETERS_ERROR) self.files_to_remove = [self.fp, self.fp_wrong] @@ -213,17 +235,17 @@ def tearDown(self): class TestPatch(TestCase): def setUp(self): self.patches_dir = mkdtemp() - self.py_patches_dir = join(self.patches_dir, 'python_patches') + self.py_patches_dir = join(self.patches_dir, "python_patches") mkdir(self.py_patches_dir) - patch2_fp = join(self.patches_dir, '2.sql') - patch10_fp = join(self.patches_dir, '10.sql') + patch2_fp = join(self.patches_dir, "2.sql") + patch10_fp = join(self.patches_dir, "10.sql") - with open(patch2_fp, 'w') as f: + with open(patch2_fp, "w") as f: f.write("CREATE TABLE qiita.patchtest2 (testing integer);\n") f.write("INSERT INTO qiita.patchtest2 VALUES (1);\n") f.write("INSERT INTO qiita.patchtest2 VALUES (9);\n") - with open(patch10_fp, 'w') as f: + with open(patch10_fp, "w") as f: f.write("CREATE TABLE qiita.patchtest10 (testing integer);\n") def tearDown(self): @@ -249,7 +271,8 @@ def _check_patchtest2(self, exists=True): exp = [[1], [9]] with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - """SELECT * FROM qiita.patchtest2 ORDER BY testing""") + """SELECT * FROM qiita.patchtest2 ORDER BY testing""" + ) obs = qdb.sql_connection.TRN.execute_fetchindex() self.assertEqual(obs, exp) @@ -257,7 +280,8 @@ def _check_patchtest10(self): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( """SELECT EXISTS(SELECT * FROM information_schema.tables - WHERE table_name = 'patchtest10')""") + WHERE table_name = 'patchtest10')""" + ) obs = qdb.sql_connection.TRN.execute_fetchflatten()[0] self.assertTrue(obs) @@ -277,24 +301,26 @@ def test_unpatched(self): """Test patching from unpatched state""" # Reset the settings table to the unpatched state qdb.sql_connection.perform_as_transaction( - "UPDATE settings SET current_patch = 'unpatched'") + "UPDATE settings SET current_patch = 'unpatched'" + ) - self._assert_current_patch('unpatched') + self._assert_current_patch("unpatched") qdb.environment_manager.patch(self.patches_dir) self._check_patchtest2() self._check_patchtest10() - self._assert_current_patch('10.sql') + self._assert_current_patch("10.sql") def test_skip_patch(self): """Test patching from a patched state""" qdb.sql_connection.perform_as_transaction( - "UPDATE settings SET current_patch = '2.sql'") - self._assert_current_patch('2.sql') + "UPDATE settings SET current_patch = '2.sql'" + ) + self._assert_current_patch("2.sql") # If it tried to apply patch 2.sql again, this will error qdb.environment_manager.patch(self.patches_dir) - self._assert_current_patch('10.sql') + self._assert_current_patch("10.sql") self._check_patchtest10() # Since we "tricked" the system, patchtest2 should not exist @@ -303,23 +329,25 @@ def test_skip_patch(self): def test_nonexistent_patch(self): """Test case where current patch does not exist""" qdb.sql_connection.perform_as_transaction( - "UPDATE settings SET current_patch = 'nope.sql'") - self._assert_current_patch('nope.sql') + "UPDATE settings SET current_patch = 'nope.sql'" + ) + self._assert_current_patch("nope.sql") with self.assertRaises(RuntimeError): qdb.environment_manager.patch(self.patches_dir) def test_python_patch(self): # Write a test python patch - patch10_py_fp = join(self.py_patches_dir, '10.py') - with open(patch10_py_fp, 'w') as f: + patch10_py_fp = join(self.py_patches_dir, "10.py") + with open(patch10_py_fp, "w") as f: f.write(PY_PATCH) # Reset the settings table to the unpatched state qdb.sql_connection.perform_as_transaction( - "UPDATE settings SET current_patch = 'unpatched'") + "UPDATE settings SET current_patch = 'unpatched'" + ) - self._assert_current_patch('unpatched') + self._assert_current_patch("unpatched") qdb.environment_manager.patch(self.patches_dir) @@ -329,20 +357,20 @@ def test_python_patch(self): exp = [[1], [100]] self.assertEqual(obs, exp) - self._assert_current_patch('10.sql') + self._assert_current_patch("10.sql") @qiita_test_checker() class TestUpdateArtifactFromCmd(TestCase): def setUp(self): - fd, seqs_fp = mkstemp(suffix='_seqs.fastq') + fd, seqs_fp = mkstemp(suffix="_seqs.fastq") close(fd) - fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') + fd, barcodes_fp = mkstemp(suffix="_barcodes.fastq") close(fd) self.filepaths = [seqs_fp, barcodes_fp] self.checksums = [] for fp in sorted(self.filepaths): - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("%s\n" % fp) self.checksums.append(qdb.util.compute_checksum(fp)) self.filepaths_types = ["raw_forward_seqs", "raw_barcodes"] @@ -352,10 +380,8 @@ def setUp(self): def tearDown(self): new_uploaded_files = qdb.util.get_files_from_uploads_folders("1") new_files = set(new_uploaded_files).difference(self.uploaded_files) - path_builder = partial( - join, qdb.util.get_mountpoint("uploads")[0][1], '1') - self._clean_up_files.extend( - [path_builder(fp) for _, fp, _ in new_files]) + path_builder = partial(join, qdb.util.get_mountpoint("uploads")[0][1], "1") + self._clean_up_files.extend([path_builder(fp) for _, fp, _ in new_files]) for f in self._clean_up_files: if exists(f): remove(f) @@ -363,58 +389,69 @@ def tearDown(self): def test_update_artifact_from_cmd_error(self): with self.assertRaises(ValueError): qdb.commands.update_artifact_from_cmd( - self.filepaths[1:], self.filepaths_types, 1) + self.filepaths[1:], self.filepaths_types, 1 + ) with self.assertRaises(ValueError): qdb.commands.update_artifact_from_cmd( - self.filepaths, self.filepaths_types[1:], 1) + self.filepaths, self.filepaths_types[1:], 1 + ) def test_update_artifact_from_cmd(self): # Generate some files for an artifact - fd, fp1 = mkstemp(suffix='_seqs.fastq') + fd, fp1 = mkstemp(suffix="_seqs.fastq") close(fd) - with open(fp1, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" - "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" - "+\n" - "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n") - - fd, fp2 = mkstemp(suffix='_barcodes.fastq') + with open(fp1, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n" + "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n" + "+\n" + "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n" + ) + + fd, fp2 = mkstemp(suffix="_barcodes.fastq") close(fd) - with open(fp2, 'w') as f: - f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" - "NNNCNNNNNNNNN\n" - "+\n" - "#############\n") + with open(fp2, "w") as f: + f.write( + "@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n" + "NNNCNNNNNNNNN\n" + "+\n" + "#############\n" + ) filepaths = [(fp1, 1), (fp2, 3)] # Create a new prep template metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}} - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) - self.prep_template = \ - qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "16S") + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) + self.prep_template = qdb.metadata_template.prep_template.PrepTemplate.create( + metadata, qdb.study.Study(1), "16S" + ) artifact = qdb.artifact.Artifact.create( - filepaths, "FASTQ", prep_template=self.prep_template) + filepaths, "FASTQ", prep_template=self.prep_template + ) for x in artifact.filepaths: - self._clean_up_files.append(x['fp']) + self._clean_up_files.append(x["fp"]) new_artifact = qdb.commands.update_artifact_from_cmd( - self.filepaths, self.filepaths_types, artifact.id) + self.filepaths, self.filepaths_types, artifact.id + ) for x in new_artifact.filepaths: - self._clean_up_files.append(x['fp']) + self._clean_up_files.append(x["fp"]) - for obs, exp in zip(sorted(artifact.filepaths, key=lambda x: x['fp']), - self.checksums): - self.assertEqual(qdb.util.compute_checksum(obs['fp']), exp) + for obs, exp in zip( + sorted(artifact.filepaths, key=lambda x: x["fp"]), self.checksums + ): + self.assertEqual(qdb.util.compute_checksum(obs["fp"]), exp) CONFIG_1 = """[required] @@ -464,22 +501,24 @@ def test_update_artifact_from_cmd(self): "9606\thomo sapiens\tFasting_mouse_I.D._607\n" "PC.636\treceived\t06/18/14 16:44:00\ttype_1\tTrue\tLocation_1\tTrue\t" "HS_ID_PC.636\tFast\t20080116\t31.0856060708\t4.16781143893\t" - "9606\thomo sapiens\tFasting_mouse_I.D._636") + "9606\thomo sapiens\tFasting_mouse_I.D._636" +) PREP_TEMPLATE = ( - 'sample_name\tbarcode\tcenter_name\tcenter_project_name\t' - 'description_prep\tebi_submission_accession\temp_status\tprimer\t' - 'run_prefix\tstr_column\tplatform\tlibrary_construction_protocol\t' - 'experiment_design_description\tinstrument_model\n' - 'SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tskb7\tNone\tEMP\t' - 'GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 3\tA\tB\tC\t' - 'Illumina MiSeq\n' - 'SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\tskb8\tNone\tEMP\t' - 'GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 1\tA\tB\tC\t' - 'Illumina MiSeq\n' - 'SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\tskd8\tNone\tEMP\t' - 'GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 2\tA\tB\tC\t' - 'Illumina MiSeq\n') + "sample_name\tbarcode\tcenter_name\tcenter_project_name\t" + "description_prep\tebi_submission_accession\temp_status\tprimer\t" + "run_prefix\tstr_column\tplatform\tlibrary_construction_protocol\t" + "experiment_design_description\tinstrument_model\n" + "SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tskb7\tNone\tEMP\t" + "GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 3\tA\tB\tC\t" + "Illumina MiSeq\n" + "SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\tskb8\tNone\tEMP\t" + "GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 1\tA\tB\tC\t" + "Illumina MiSeq\n" + "SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\tskd8\tNone\tEMP\t" + "GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 2\tA\tB\tC\t" + "Illumina MiSeq\n" +) PY_PATCH = """ from qiita_db.study import Study diff --git a/qiita_db/test/test_logger.py b/qiita_db/test/test_logger.py index b78f08052..ea4b65d78 100644 --- a/qiita_db/test/test_logger.py +++ b/qiita_db/test/test_logger.py @@ -8,26 +8,24 @@ from unittest import TestCase, main -from qiita_core.util import qiita_test_checker import qiita_db as qdb +from qiita_core.util import qiita_test_checker @qiita_test_checker() class LoggerTests(TestCase): def test_create_log_entry(self): """""" - qdb.logger.LogEntry.create('Runtime', 'runtime message') - qdb.logger.LogEntry.create('Fatal', 'fatal message', info={1: 2}) - qdb.logger.LogEntry.create('Warning', 'warning message', info={9: 0}) + qdb.logger.LogEntry.create("Runtime", "runtime message") + qdb.logger.LogEntry.create("Fatal", "fatal message", info={1: 2}) + qdb.logger.LogEntry.create("Warning", "warning message", info={9: 0}) with self.assertRaises(qdb.exceptions.QiitaDBLookupError): # This severity level does not exist in the test schema - qdb.logger.LogEntry.create('Chicken', 'warning message', - info={9: 0}) + qdb.logger.LogEntry.create("Chicken", "warning message", info={9: 0}) def test_severity_property(self): """""" - log_entry = qdb.logger.LogEntry.create('Warning', 'warning test', - info=None) + log_entry = qdb.logger.LogEntry.create("Warning", "warning test", info=None) self.assertEqual(log_entry.severity, 1) def test_time_property(self): @@ -36,8 +34,7 @@ def test_time_property(self): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) before = qdb.sql_connection.TRN.execute_fetchflatten()[0] - log_entry = qdb.logger.LogEntry.create( - 'Warning', 'warning test', info=None) + log_entry = qdb.logger.LogEntry.create("Warning", "warning test", info=None) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) after = qdb.sql_connection.TRN.execute_fetchflatten()[0] @@ -46,31 +43,34 @@ def test_time_property(self): def test_info_property(self): """""" log_entry = qdb.logger.LogEntry.create( - 'Warning', 'warning test', info={1: 2, 'test': 'yeah'}) - self.assertEqual(log_entry.info, [{'1': 2, 'test': 'yeah'}]) + "Warning", "warning test", info={1: 2, "test": "yeah"} + ) + self.assertEqual(log_entry.info, [{"1": 2, "test": "yeah"}]) def test_message_property(self): """""" - log_entry = qdb.logger.LogEntry.create( - 'Warning', 'warning test', info=None) - self.assertEqual(log_entry.msg, 'warning test') + log_entry = qdb.logger.LogEntry.create("Warning", "warning test", info=None) + self.assertEqual(log_entry.msg, "warning test") def test_add_info(self): """""" log_entry = qdb.logger.LogEntry.create( - 'Warning', 'warning test', info={1: 2, 'test': 'yeah'}) - log_entry.add_info({'another': 'set', 'of': 'entries', 'test': 3}) - self.assertEqual(log_entry.info, [{'1': 2, 'test': 'yeah'}, - {'another': 'set', 'of': 'entries', - 'test': 3}]) + "Warning", "warning test", info={1: 2, "test": "yeah"} + ) + log_entry.add_info({"another": "set", "of": "entries", "test": 3}) + self.assertEqual( + log_entry.info, + [{"1": 2, "test": "yeah"}, {"another": "set", "of": "entries", "test": 3}], + ) def test_clear_info(self): """""" log_entry = qdb.logger.LogEntry.create( - 'Warning', 'warning test', info={1: 2, 'test': 'yeah'}) + "Warning", "warning test", info={1: 2, "test": "yeah"} + ) log_entry.clear_info() self.assertEqual(log_entry.info, []) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_meta_util.py b/qiita_db/test/test_meta_util.py index fdf55d101..a8989dbdb 100644 --- a/qiita_db/test/test_meta_util.py +++ b/qiita_db/test/test_meta_util.py @@ -6,19 +6,18 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -import numpy.testing as npt -from tarfile import open as topen from os import remove from os.path import exists, join +from tarfile import open as topen +from unittest import TestCase, main +import numpy.testing as npt import pandas as pd +import qiita_db as qdb from qiita_core.qiita_settings import qiita_config, r_client from qiita_core.util import qiita_test_checker -import qiita_db as qdb - @qiita_test_checker() class MetaUtilTests(TestCase): @@ -33,55 +32,52 @@ def tearDown(self): remove(fp) def _set_artifact_private(self): - id_status = qdb.util.convert_to_id('private', 'visibility') + id_status = qdb.util.convert_to_id("private", "visibility") qdb.sql_connection.perform_as_transaction( - "UPDATE qiita.artifact SET visibility_id = %d" % id_status) + "UPDATE qiita.artifact SET visibility_id = %d" % id_status + ) def _set_artifact_public(self): - id_status = qdb.util.convert_to_id('public', 'visibility') + id_status = qdb.util.convert_to_id("public", "visibility") qdb.sql_connection.perform_as_transaction( - "UPDATE qiita.artifact SET visibility_id = %d" % id_status) + "UPDATE qiita.artifact SET visibility_id = %d" % id_status + ) def test_validate_filepath_access_by_user(self): self._set_artifact_private() # shared has access to all study files and analysis files - user = qdb.user.User('shared@foo.bar') + user = qdb.user.User("shared@foo.bar") for i in [1, 2, 3, 4, 5, 9, 12, 15, 16, 17, 18, 19, 20, 21]: - self.assertTrue(qdb.meta_util.validate_filepath_access_by_user( - user, i)) + self.assertTrue(qdb.meta_util.validate_filepath_access_by_user(user, i)) # Now shared should not have access to the study files qdb.study.Study(1).unshare(user) for i in [1, 2, 3, 4, 5, 9, 12, 17, 18, 19, 20, 21]: - self.assertFalse(qdb.meta_util.validate_filepath_access_by_user( - user, i)) + self.assertFalse(qdb.meta_util.validate_filepath_access_by_user(user, i)) # Note that 15 is the biom from the analysis and 16 is the # analysis mapping file and here we are testing access for i in [15, 16]: - self.assertTrue(qdb.meta_util.validate_filepath_access_by_user( - user, i)) + self.assertTrue(qdb.meta_util.validate_filepath_access_by_user(user, i)) # Now shared should not have access to any files qdb.analysis.Analysis(1).unshare(user) for i in [1, 2, 3, 4, 5, 9, 12, 15, 16, 17, 18, 19, 20, 21]: - self.assertFalse(qdb.meta_util.validate_filepath_access_by_user( - user, i)) + self.assertFalse(qdb.meta_util.validate_filepath_access_by_user(user, i)) # Now the Analysis is public so the user should have access again. Note # that we are not using the internal Analysis methods to skip # validation; thus simplifying the test code for a in qdb.analysis.Analysis(1).artifacts: - a.visibility = 'public' + a.visibility = "public" # Note that 15 is the biom from the analysis and 16 is the # analysis mapping file and here we are testing access for i in [15, 16]: - self.assertTrue(qdb.meta_util.validate_filepath_access_by_user( - user, i)) + self.assertTrue(qdb.meta_util.validate_filepath_access_by_user(user, i)) # returning to private for a in qdb.analysis.Analysis(1).artifacts: - a.visibility = 'private' + a.visibility = "private" # Now shared has access to public study files self._set_artifact_public() @@ -108,10 +104,11 @@ def test_validate_filepath_access_by_user(self): "study_description": "Description of a test study", "study_abstract": "No abstract right now...", "principal_investigator_id": 1, - "lab_person_id": 1 + "lab_person_id": 1, } study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Test study", info) + qdb.user.User("test@foo.bar"), "Test study", info + ) for i in [1, 2, 3, 4, 5, 9, 12, 17, 18, 19, 20, 21]: obs = qdb.meta_util.validate_filepath_access_by_user(user, i) if i < 3: @@ -121,7 +118,8 @@ def test_validate_filepath_access_by_user(self): # test in case there is a prep template that failed qdb.sql_connection.perform_as_transaction( - "INSERT INTO qiita.prep_template (data_type_id) VALUES (2)") + "INSERT INTO qiita.prep_template (data_type_id) VALUES (2)" + ) for i in [1, 2, 3, 4, 5, 9, 12, 17, 18, 19, 20, 21]: obs = qdb.meta_util.validate_filepath_access_by_user(user, i) if i < 3: @@ -130,38 +128,38 @@ def test_validate_filepath_access_by_user(self): self.assertTrue(obs) # admin should have access to everything - admin = qdb.user.User('admin@foo.bar') + admin = qdb.user.User("admin@foo.bar") with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add( - "SELECT filepath_id FROM qiita.filepath") + qdb.sql_connection.TRN.add("SELECT filepath_id FROM qiita.filepath") fids = qdb.sql_connection.TRN.execute_fetchflatten() for i in fids: - self.assertTrue(qdb.meta_util.validate_filepath_access_by_user( - admin, i)) + self.assertTrue(qdb.meta_util.validate_filepath_access_by_user(admin, i)) # testing access to a prep info file without artifacts # returning artifacts to private self._set_artifact_private() PT = qdb.metadata_template.prep_template.PrepTemplate md_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA', - 'barcodesequence': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'} + "SKB8.640193": { + "center_name": "ANL", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "linkerprimersequence": "GTGCCAGCMGCCGCGGTAA", + "barcodesequence": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", } - md = pd.DataFrame.from_dict(md_dict, orient='index', dtype=str) + } + md = pd.DataFrame.from_dict(md_dict, orient="index", dtype=str) # creating prep info on Study(1), which is our default Study - pt = npt.assert_warns(qdb.exceptions.QiitaDBWarning, PT.create, md, - qdb.study.Study(1), "18S") + pt = npt.assert_warns( + qdb.exceptions.QiitaDBWarning, PT.create, md, qdb.study.Study(1), "18S" + ) for idx, _ in pt.get_filepaths(): - self.assertFalse(qdb.meta_util.validate_filepath_access_by_user( - user, idx)) + self.assertFalse(qdb.meta_util.validate_filepath_access_by_user(user, idx)) # returning to original sharing PT.delete(pt.id) @@ -177,7 +175,7 @@ def test_get_lat_longs(self): old_visibility = {} for pt in qdb.study.Study(1).prep_templates(): old_visibility[pt] = pt.artifact.visibility - pt.artifact.visibility = 'public' + pt.artifact.visibility = "public" exp = [ [1, 74.0894932572, 65.3283470202], [1, 57.571893782, 32.5563076447], @@ -203,7 +201,8 @@ def test_get_lat_longs(self): [1, 40.8623799474, 6.66444220187], [1, 95.2060749748, 27.3592668624], [1, 78.3634273709, 74.423907894], - [1, 38.2627021402, 3.48274264219]] + [1, 38.2627021402, 3.48274264219], + ] obs = qdb.meta_util.get_lat_longs() self.assertCountEqual(obs, exp) @@ -212,41 +211,43 @@ def test_get_lat_longs(self): def test_get_lat_longs_EMP_portal(self): info = { - 'timeseries_type_id': 1, - 'lab_person_id': None, - 'principal_investigator_id': 3, - 'metadata_complete': False, - 'mixs_compliant': True, - 'study_description': 'desc', - 'study_alias': 'alias', - 'study_abstract': 'abstract'} + "timeseries_type_id": 1, + "lab_person_id": None, + "principal_investigator_id": 3, + "metadata_complete": False, + "mixs_compliant": True, + "study_description": "desc", + "study_alias": "alias", + "study_abstract": "abstract", + } study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), 'test_study_1', info=info) - qdb.portal.Portal('EMP').add_studies([study.id]) + qdb.user.User("test@foo.bar"), "test_study_1", info=info + ) + qdb.portal.Portal("EMP").add_studies([study.id]) md = { - 'my.sample': { - 'physical_specimen_location': 'location1', - 'physical_specimen_remaining': True, - 'dna_extracted': True, - 'sample_type': 'type1', - 'collection_timestamp': '2014-05-29 12:24:51', - 'host_subject_id': 'NotIdentified', - 'Description': 'Test Sample 4', - 'str_column': 'Value for sample 4', - 'int_column': 4, - 'latitude': 42.42, - 'longitude': 41.41, - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens'} + "my.sample": { + "physical_specimen_location": "location1", + "physical_specimen_remaining": True, + "dna_extracted": True, + "sample_type": "type1", + "collection_timestamp": "2014-05-29 12:24:51", + "host_subject_id": "NotIdentified", + "Description": "Test Sample 4", + "str_column": "Value for sample 4", + "int_column": 4, + "latitude": 42.42, + "longitude": 41.41, + "taxon_id": 9606, + "scientific_name": "homo sapiens", + } } - md_ext = pd.DataFrame.from_dict(md, orient='index', dtype=str) - st = qdb.metadata_template.sample_template.SampleTemplate.create( - md_ext, study) + md_ext = pd.DataFrame.from_dict(md, orient="index", dtype=str) + st = qdb.metadata_template.sample_template.SampleTemplate.create(md_ext, study) - qiita_config.portal = 'EMP' + qiita_config.portal = "EMP" obs = qdb.meta_util.get_lat_longs() exp = [] @@ -259,7 +260,7 @@ def test_update_redis_stats(self): # helper function to get the values in the stats_daily table def _get_daily_stats(): with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add('SELECT * FROM qiita.stats_daily') + qdb.sql_connection.TRN.add("SELECT * FROM qiita.stats_daily") return qdb.sql_connection.TRN.execute_fetchindex() # checking empty status of stats in DB @@ -271,22 +272,29 @@ def _get_daily_stats(): portal = qiita_config.portal # let's first test the dictionaries vals = [ - ('number_studies', {b'sandbox': b'0', b'public': b'0', - b'private': b'1'}, r_client.hgetall), - ('number_of_samples', {b'sandbox': b'0', b'public': b'0', - b'private': b'27'}, r_client.hgetall), - ('per_data_type_stats', {b'No data': b'0'}, r_client.hgetall)] + ( + "number_studies", + {b"sandbox": b"0", b"public": b"0", b"private": b"1"}, + r_client.hgetall, + ), + ( + "number_of_samples", + {b"sandbox": b"0", b"public": b"0", b"private": b"27"}, + r_client.hgetall, + ), + ("per_data_type_stats", {b"No data": b"0"}, r_client.hgetall), + ] for k, exp, f in vals: - redis_key = '%s:stats:%s' % (portal, k) + redis_key = "%s:stats:%s" % (portal, k) self.assertDictEqual(f(redis_key), exp) # then the unique values vals = [ - ('num_users', b'7', r_client.get), - ('lat_longs', b'[]', r_client.get), - ('num_studies_ebi', b'1', r_client.get), - ('num_samples_ebi', b'27', r_client.get), - ('number_samples_ebi_prep', b'54', r_client.get), - ('num_processing_jobs', b'474', r_client.get) + ("num_users", b"7", r_client.get), + ("lat_longs", b"[]", r_client.get), + ("num_studies_ebi", b"1", r_client.get), + ("num_samples_ebi", b"27", r_client.get), + ("number_samples_ebi_prep", b"54", r_client.get), + ("num_processing_jobs", b"474", r_client.get), # not testing img/time for simplicity # ('img', r_client.get), # ('time', r_client.get) @@ -298,13 +306,12 @@ def _get_daily_stats(): db_stats = dict(db_stats[0]) for k, exp, f in vals: - redis_key = '%s:stats:%s' % (portal, k) + redis_key = "%s:stats:%s" % (portal, k) # checking redis values self.assertEqual(f(redis_key), exp) # checking DB values; note that redis stores all values as bytes, # thus we have to convert what's in the DB to bytes - self.assertEqual( - f(redis_key), str.encode(str(db_stats['stats'][k]))) + self.assertEqual(f(redis_key), str.encode(str(db_stats["stats"][k]))) # regenerating stats to make sure that we have 2 rows in the DB qdb.meta_util.update_redis_stats() @@ -314,19 +321,20 @@ def _get_daily_stats(): self.assertEqual(2, len(db_stats)) def test_generate_biom_and_metadata_release(self): - level = 'private' + level = "private" qdb.meta_util.generate_biom_and_metadata_release(level) portal = qiita_config.portal working_dir = qiita_config.working_dir vals = [ - ('filepath', r_client.get), - ('md5sum', r_client.get), - ('time', r_client.get)] + ("filepath", r_client.get), + ("md5sum", r_client.get), + ("time", r_client.get), + ] # we are storing the [0] filepath, [1] md5sum and [2] time but we are # only going to check the filepath contents so ignoring the others - tgz = vals[0][1]('%s:release:%s:%s' % (portal, level, vals[0][0])) - tgz = join(working_dir, tgz.decode('ascii')) + tgz = vals[0][1]("%s:release:%s:%s" % (portal, level, vals[0][0])) + tgz = join(working_dir, tgz.decode("ascii")) self.files_to_remove.extend([tgz]) @@ -335,21 +343,19 @@ def test_generate_biom_and_metadata_release(self): tmp.close() # files names might change due to updates and patches so just check # that the prefix exists. - fn = 'processed_data/1_study_1001_closed_reference_otu_table.biom' + fn = "processed_data/1_study_1001_closed_reference_otu_table.biom" self.assertTrue(fn in tgz_obs) tgz_obs.remove(fn) # yes, this file is there twice self.assertTrue(fn in tgz_obs) tgz_obs.remove(fn) # let's check the next biom - fn = ('processed_data/1_study_1001_closed_reference_otu_table_Silva.' - 'biom') + fn = "processed_data/1_study_1001_closed_reference_otu_table_Silva.biom" self.assertTrue(fn in tgz_obs) tgz_obs.remove(fn) # now let's check prep info files based on their suffix, just take # the first one and check/rm the occurances of that file - fn_prep = [f for f in tgz_obs - if f.startswith('templates/1_prep_1_')][0] + fn_prep = [f for f in tgz_obs if f.startswith("templates/1_prep_1_")][0] # 3 times self.assertTrue(fn_prep in tgz_obs) tgz_obs.remove(fn_prep) @@ -357,7 +363,7 @@ def test_generate_biom_and_metadata_release(self): tgz_obs.remove(fn_prep) self.assertTrue(fn_prep in tgz_obs) tgz_obs.remove(fn_prep) - fn_sample = [f for f in tgz_obs if f.startswith('templates/1_')][0] + fn_sample = [f for f in tgz_obs if f.startswith("templates/1_")][0] # 3 times self.assertTrue(fn_sample in tgz_obs) tgz_obs.remove(fn_sample) @@ -372,67 +378,64 @@ def test_generate_biom_and_metadata_release(self): tmp = topen(tgz, "r:gz") fhd = tmp.extractfile(txt) - txt_obs = [line.decode('ascii') for line in fhd.readlines()] + txt_obs = [line.decode("ascii") for line in fhd.readlines()] tmp.close() txt_exp = [ - 'biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t' - 'target gene\tmerging scheme\tartifact software\t' - 'parent software\n', - 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' - '%s\t%s\t4\tIllumina\t16S rRNA\t' - 'Pick closed-reference OTUs | Split libraries FASTQ\t' - 'QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n' % (fn_sample, fn_prep), - 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' - '%s\t%s\t5\tIllumina\t16S rRNA\t' - 'Pick closed-reference OTUs | Split libraries FASTQ\t' - 'QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n' % (fn_sample, fn_prep), - 'processed_data/1_study_1001_closed_reference_otu_table_Silva.bio' - 'm\t%s\t%s\t6\tIllumina\t16S rRNA\t' - 'Pick closed-reference OTUs | Split libraries FASTQ\t' - 'QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1' % (fn_sample, fn_prep)] + "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" + "target gene\tmerging scheme\tartifact software\t" + "parent software\n", + "processed_data/1_study_1001_closed_reference_otu_table.biom\t" + "%s\t%s\t4\tIllumina\t16S rRNA\t" + "Pick closed-reference OTUs | Split libraries FASTQ\t" + "QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n" % (fn_sample, fn_prep), + "processed_data/1_study_1001_closed_reference_otu_table.biom\t" + "%s\t%s\t5\tIllumina\t16S rRNA\t" + "Pick closed-reference OTUs | Split libraries FASTQ\t" + "QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n" % (fn_sample, fn_prep), + "processed_data/1_study_1001_closed_reference_otu_table_Silva.bio" + "m\t%s\t%s\t6\tIllumina\t16S rRNA\t" + "Pick closed-reference OTUs | Split libraries FASTQ\t" + "QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1" % (fn_sample, fn_prep), + ] self.assertEqual(txt_obs, txt_exp) # whatever the configuration was, we will change to settings so we can # test the other option when dealing with the end '/' with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add( - "SELECT base_data_dir FROM settings") + qdb.sql_connection.TRN.add("SELECT base_data_dir FROM settings") obdr = qdb.sql_connection.TRN.execute_fetchlast() - if obdr[-1] == '/': + if obdr[-1] == "/": bdr = obdr[:-1] else: - bdr = obdr + '/' + bdr = obdr + "/" - qdb.sql_connection.TRN.add( - "UPDATE settings SET base_data_dir = '%s'" % bdr) + qdb.sql_connection.TRN.add("UPDATE settings SET base_data_dir = '%s'" % bdr) bdr = qdb.sql_connection.TRN.execute() qdb.meta_util.generate_biom_and_metadata_release(level) # we are storing the [0] filepath, [1] md5sum and [2] time but we are # only going to check the filepath contents so ignoring the others - tgz = vals[0][1]('%s:release:%s:%s' % (portal, level, vals[0][0])) - tgz = join(working_dir, tgz.decode('ascii')) + tgz = vals[0][1]("%s:release:%s:%s" % (portal, level, vals[0][0])) + tgz = join(working_dir, tgz.decode("ascii")) tmp = topen(tgz, "r:gz") tgz_obs = [ti.name for ti in tmp] tmp.close() # files names might change due to updates and patches so just check # that the prefix exists. - fn = 'processed_data/1_study_1001_closed_reference_otu_table.biom' + fn = "processed_data/1_study_1001_closed_reference_otu_table.biom" self.assertTrue(fn in tgz_obs) tgz_obs.remove(fn) # yes, this file is there twice self.assertTrue(fn in tgz_obs) tgz_obs.remove(fn) # let's check the next biom - fn = ('processed_data/1_study_1001_closed_reference_otu_table_Silva.' - 'biom') + fn = "processed_data/1_study_1001_closed_reference_otu_table_Silva.biom" self.assertTrue(fn in tgz_obs) tgz_obs.remove(fn) # now let's check prep info files based on their suffix, just take # the first one and check/rm the occurances of that file - fn_prep = [f for f in tgz_obs - if f.startswith('templates/1_prep_1_')][0] + fn_prep = [f for f in tgz_obs if f.startswith("templates/1_prep_1_")][0] # 3 times self.assertTrue(fn_prep in tgz_obs) tgz_obs.remove(fn_prep) @@ -440,7 +443,7 @@ def test_generate_biom_and_metadata_release(self): tgz_obs.remove(fn_prep) self.assertTrue(fn_prep in tgz_obs) tgz_obs.remove(fn_prep) - fn_sample = [f for f in tgz_obs if f.startswith('templates/1_')][0] + fn_sample = [f for f in tgz_obs if f.startswith("templates/1_")][0] # 3 times self.assertTrue(fn_sample in tgz_obs) tgz_obs.remove(fn_sample) @@ -455,37 +458,39 @@ def test_generate_biom_and_metadata_release(self): tmp = topen(tgz, "r:gz") fhd = tmp.extractfile(txt) - txt_obs = [line.decode('ascii') for line in fhd.readlines()] + txt_obs = [line.decode("ascii") for line in fhd.readlines()] tmp.close() txt_exp = [ - 'biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t' - 'target gene\tmerging scheme\tartifact software\t' - 'parent software\n', - 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' - '%s\t%s\t4\tIllumina\t16S rRNA\t' - 'Pick closed-reference OTUs | Split libraries FASTQ\t' - 'QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n' % (fn_sample, fn_prep), - 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' - '%s\t%s\t5\tIllumina\t16S rRNA\t' - 'Pick closed-reference OTUs | Split libraries FASTQ\t' - 'QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n' % (fn_sample, fn_prep), - 'processed_data/1_study_1001_closed_reference_otu_table_Silva.bio' - 'm\t%s\t%s\t6\tIllumina\t16S rRNA\t' - 'Pick closed-reference OTUs | Split libraries FASTQ' - '\tQIIMEq2 v1.9.1\tQIIMEq2 v1.9.1' % (fn_sample, fn_prep)] + "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" + "target gene\tmerging scheme\tartifact software\t" + "parent software\n", + "processed_data/1_study_1001_closed_reference_otu_table.biom\t" + "%s\t%s\t4\tIllumina\t16S rRNA\t" + "Pick closed-reference OTUs | Split libraries FASTQ\t" + "QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n" % (fn_sample, fn_prep), + "processed_data/1_study_1001_closed_reference_otu_table.biom\t" + "%s\t%s\t5\tIllumina\t16S rRNA\t" + "Pick closed-reference OTUs | Split libraries FASTQ\t" + "QIIMEq2 v1.9.1\tQIIMEq2 v1.9.1\n" % (fn_sample, fn_prep), + "processed_data/1_study_1001_closed_reference_otu_table_Silva.bio" + "m\t%s\t%s\t6\tIllumina\t16S rRNA\t" + "Pick closed-reference OTUs | Split libraries FASTQ" + "\tQIIMEq2 v1.9.1\tQIIMEq2 v1.9.1" % (fn_sample, fn_prep), + ] self.assertEqual(txt_obs, txt_exp) # returning configuration qdb.sql_connection.perform_as_transaction( - "UPDATE settings SET base_data_dir = '%s'" % obdr) + "UPDATE settings SET base_data_dir = '%s'" % obdr + ) # testing public/default release qdb.meta_util.generate_biom_and_metadata_release() # we are storing the [0] filepath, [1] md5sum and [2] time but we are # only going to check the filepath contents so ignoring the others - tgz = vals[0][1]('%s:release:%s:%s' % (portal, 'public', vals[0][0])) - tgz = join(working_dir, tgz.decode('ascii')) + tgz = vals[0][1]("%s:release:%s:%s" % (portal, "public", vals[0][0])) + tgz = join(working_dir, tgz.decode("ascii")) tmp = topen(tgz, "r:gz") tgz_obs = [ti.name for ti in tmp] @@ -496,27 +501,33 @@ def test_generate_biom_and_metadata_release(self): tmp = topen(tgz, "r:gz") fhd = tmp.extractfile(txt) - txt_obs = [line.decode('ascii') for line in fhd.readlines()] + txt_obs = [line.decode("ascii") for line in fhd.readlines()] tmp.close() # we should only get the header txt_exp = [ - 'biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t' - 'target gene\tmerging scheme\tartifact software\t' - 'parent software'] + "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" + "target gene\tmerging scheme\tartifact software\t" + "parent software" + ] self.assertEqual(txt_obs, txt_exp) def test_generate_plugin_releases(self): qdb.meta_util.generate_plugin_releases() working_dir = qiita_config.working_dir - tgz = r_client.get('release-archive:filepath') - with topen(join(working_dir, tgz.decode('ascii')), "r:gz") as tmp: + tgz = r_client.get("release-archive:filepath") + with topen(join(working_dir, tgz.decode("ascii")), "r:gz") as tmp: tgz_obs = [ti.name for ti in tmp] # the expected folder/file in the tgz should be named as the time # when it was created so let's test that - time = r_client.get('release-archive:time').decode('ascii').replace( - '-', '').replace(':', '').replace(' ', '-') + time = ( + r_client.get("release-archive:time") + .decode("ascii") + .replace("-", "") + .replace(":", "") + .replace(" ", "-") + ) self.assertEqual(tgz_obs, [time]) def test_update_resource_allocation_redis(self): @@ -525,8 +536,13 @@ def test_update_resource_allocation_redis(self): col_name = "samples * columns" version = "1.9.1" qdb.meta_util.update_resource_allocation_redis(False) - title_mem_str = 'resources$#%s$#%s$#%s$#%s:%s' % ( - cname, sname, version, col_name, 'title_mem') + title_mem_str = "resources$#%s$#%s$#%s$#%s:%s" % ( + cname, + sname, + version, + col_name, + "title_mem", + ) title_mem = str(r_client.get(title_mem_str)) self.assertTrue( "model: (k * (np.log(x))) + " @@ -534,8 +550,13 @@ def test_update_resource_allocation_redis(self): "(a * ((np.log(x))**2.5))" in title_mem ) - title_time_str = 'resources$#%s$#%s$#%s$#%s:%s' % ( - cname, sname, version, col_name, 'title_time') + title_time_str = "resources$#%s$#%s$#%s$#%s:%s" % ( + cname, + sname, + version, + col_name, + "title_time", + ) title_time = str(r_client.get(title_time_str)) self.assertTrue( "model: (a * ((np.log(x))**3)) + " @@ -544,5 +565,5 @@ def test_update_resource_allocation_redis(self): ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_ontology.py b/qiita_db/test/test_ontology.py index 483c0dc06..f3f4f204b 100644 --- a/qiita_db/test/test_ontology.py +++ b/qiita_db/test/test_ontology.py @@ -8,8 +8,8 @@ from unittest import TestCase, main -from qiita_core.util import qiita_test_checker import qiita_db as qdb +from qiita_core.util import qiita_test_checker @qiita_test_checker() @@ -19,56 +19,53 @@ def setUp(self): def _remove_term(self, term): sql = "DELETE FROM qiita.term WHERE ontology_id = %s AND term = %s" - qdb.sql_connection.perform_as_transaction( - sql, [self.ontology.id, term]) + qdb.sql_connection.perform_as_transaction(sql, [self.ontology.id, term]) def testConvertToID(self): - self.assertEqual(qdb.util.convert_to_id('ENA', 'ontology'), 999999999) + self.assertEqual(qdb.util.convert_to_id("ENA", "ontology"), 999999999) def testConvertFromID(self): - self.assertEqual( - qdb.util.convert_from_id(999999999, 'ontology'), 'ENA') + self.assertEqual(qdb.util.convert_from_id(999999999, "ontology"), "ENA") def testShortNameProperty(self): - self.assertEqual(self.ontology.shortname, 'ENA') + self.assertEqual(self.ontology.shortname, "ENA") def testTerms(self): obs = self.ontology.terms - self.assertEqual( - obs, ['WGS', 'Metagenomics', 'AMPLICON', 'RNA-Seq', 'Other']) + self.assertEqual(obs, ["WGS", "Metagenomics", "AMPLICON", "RNA-Seq", "Other"]) def test_user_defined_terms(self): obs = self.ontology.user_defined_terms self.assertEqual(obs, []) def test_term_type(self): - obs = self.ontology.term_type('RNA-Seq') - self.assertEqual('ontology', obs) + obs = self.ontology.term_type("RNA-Seq") + self.assertEqual("ontology", obs) - obs = self.ontology.term_type('Sasquatch') - self.assertEqual('not_ontology', obs) + obs = self.ontology.term_type("Sasquatch") + self.assertEqual("not_ontology", obs) - self.ontology.add_user_defined_term('Test Term') - obs = self.ontology.term_type('Test Term') - self.assertEqual('user_defined', obs) + self.ontology.add_user_defined_term("Test Term") + obs = self.ontology.term_type("Test Term") + self.assertEqual("user_defined", obs) - self._remove_term('Test Term') + self._remove_term("Test Term") def test_add_user_defined_term(self): - self.assertFalse('Test Term' in self.ontology.user_defined_terms) + self.assertFalse("Test Term" in self.ontology.user_defined_terms) pre = len(self.ontology.user_defined_terms) - self.ontology.add_user_defined_term('Test Term') + self.ontology.add_user_defined_term("Test Term") post = len(self.ontology.user_defined_terms) - self.assertTrue('Test Term' in self.ontology.user_defined_terms) - self.assertEqual(post-pre, 1) + self.assertTrue("Test Term" in self.ontology.user_defined_terms) + self.assertEqual(post - pre, 1) # Clean up the previously added term to avoid test failures - self._remove_term('Test Term') + self._remove_term("Test Term") def testContains(self): - self.assertTrue('Metagenomics' in self.ontology) - self.assertFalse('NotATerm' in self.ontology) + self.assertTrue("Metagenomics" in self.ontology) + self.assertFalse("NotATerm" in self.ontology) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_portal.py b/qiita_db/test/test_portal.py index 59b277a42..5eaf6b4cc 100644 --- a/qiita_db/test/test_portal.py +++ b/qiita_db/test/test_portal.py @@ -10,9 +10,9 @@ import numpy.testing as npt -from qiita_core.util import qiita_test_checker -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb +from qiita_core.qiita_settings import qiita_config +from qiita_core.util import qiita_test_checker @qiita_test_checker() @@ -21,15 +21,15 @@ def setUp(self): self.portal = qiita_config.portal self.study = qdb.study.Study(1) self.analysis = qdb.analysis.Analysis(1) - self.qiita_portal = qdb.portal.Portal('QIITA') - self.emp_portal = qdb.portal.Portal('EMP') + self.qiita_portal = qdb.portal.Portal("QIITA") + self.emp_portal = qdb.portal.Portal("EMP") def tearDown(self): qiita_config.portal = self.portal def test_list_portals(self): obs = qdb.portal.Portal.list_portals() - exp = ['EMP'] + exp = ["EMP"] self.assertEqual(obs, exp) def test_add_portal(self): @@ -37,46 +37,74 @@ def test_add_portal(self): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT * FROM qiita.portal_type") obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [[1, 'QIITA', 'QIITA portal. Access to all data stored ' - 'in database.'], - [2, 'EMP', 'EMP portal'], - [4, 'NEWPORTAL', 'SOMEDESC']] + exp = [ + [1, "QIITA", "QIITA portal. Access to all data stored in database."], + [2, "EMP", "EMP portal"], + [4, "NEWPORTAL", "SOMEDESC"], + ] self.assertCountEqual(obs, exp) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT * FROM qiita.analysis_portal") obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [[1, 1], [2, 1], [3, 1], [4, 1], [5, 1], [6, 1], [7, 2], [8, 2], - [9, 2], [10, 2], [11, 4], [12, 4], [13, 4], [14, 4], - [15, 4], [16, 4], [17, 4]] + exp = [ + [1, 1], + [2, 1], + [3, 1], + [4, 1], + [5, 1], + [6, 1], + [7, 2], + [8, 2], + [9, 2], + [10, 2], + [11, 4], + [12, 4], + [13, 4], + [14, 4], + [15, 4], + [16, 4], + [17, 4], + ] self.assertCountEqual(obs, exp) with self.assertRaises(qdb.exceptions.QiitaDBDuplicateError): qdb.portal.Portal.create("EMP", "DOESNTMATTERFORDESC") - qdb.portal.Portal.delete('NEWPORTAL') + qdb.portal.Portal.delete("NEWPORTAL") def test_remove_portal(self): qdb.portal.Portal.create("NEWPORTAL", "SOMEDESC") # Select some samples on a default analysis qiita_config.portal = "NEWPORTAL" a = qdb.user.User("test@foo.bar").default_analysis - a.add_samples({1: ['1.SKB8.640193', '1.SKD5.640186']}) + a.add_samples({1: ["1.SKB8.640193", "1.SKD5.640186"]}) qdb.portal.Portal.delete("NEWPORTAL") with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT * FROM qiita.portal_type") obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [[1, 'QIITA', 'QIITA portal. Access to all data stored ' - 'in database.'], - [2, 'EMP', 'EMP portal']] + exp = [ + [1, "QIITA", "QIITA portal. Access to all data stored in database."], + [2, "EMP", "EMP portal"], + ] self.assertCountEqual(obs, exp) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT * FROM qiita.analysis_portal") obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [[1, 1], [2, 1], [3, 1], [4, 1], [5, 1], [6, 1], [7, 2], [8, 2], - [9, 2], [10, 2]] + exp = [ + [1, 1], + [2, 1], + [3, 1], + [4, 1], + [5, 1], + [6, 1], + [7, 2], + [8, 2], + [9, 2], + [10, 2], + ] self.assertCountEqual(obs, exp) with self.assertRaises(qdb.exceptions.QiitaDBLookupError): @@ -93,16 +121,17 @@ def test_remove_portal(self): "mixs_compliant": True, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " - "fried chicken", + "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " - "gut microbiome", + "gut microbiome", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } qdb.portal.Portal.create("NEWPORTAL3", "SOMEDESC") qiita_config.portal = "NEWPORTAL3" qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried chicken microbiome", info) + qdb.user.User("test@foo.bar"), "Fried chicken microbiome", info + ) qiita_config.portal = "QIITA" with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.portal.Portal.delete("NEWPORTAL3") @@ -128,10 +157,11 @@ def test_get_studies_by_portal(self): def test_add_study_portals(self): obs = qdb.portal.Portal.create("NEWPORTAL4", "SOMEDESC") obs.add_studies([self.study.id]) - self.assertCountEqual(self.study._portals, ['NEWPORTAL4', 'QIITA']) + self.assertCountEqual(self.study._portals, ["NEWPORTAL4", "QIITA"]) - npt.assert_warns(qdb.exceptions.QiitaDBWarning, obs.add_studies, - [self.study.id]) + npt.assert_warns( + qdb.exceptions.QiitaDBWarning, obs.add_studies, [self.study.id] + ) obs.remove_studies([self.study.id]) qdb.portal.Portal.delete("NEWPORTAL4") @@ -144,54 +174,67 @@ def test_remove_study_portals(self): # Set up the analysis in EMP portal self.emp_portal.add_analyses([self.analysis.id]) obs = self.analysis._portals - self.assertCountEqual(obs, ['QIITA', 'EMP']) + self.assertCountEqual(obs, ["QIITA", "EMP"]) # Test study removal failure with self.assertRaises(qdb.exceptions.QiitaDBError): self.emp_portal.remove_studies([self.study.id]) obs = self.study._portals - self.assertCountEqual(obs, ['QIITA', 'EMP']) + self.assertCountEqual(obs, ["QIITA", "EMP"]) # Test study removal self.emp_portal.remove_analyses([self.analysis.id]) self.emp_portal.remove_studies([self.study.id]) obs = self.study._portals - self.assertEqual(obs, ['QIITA']) + self.assertEqual(obs, ["QIITA"]) obs = npt.assert_warns( - qdb.exceptions.QiitaDBWarning, self.emp_portal.remove_studies, - [self.study.id]) + qdb.exceptions.QiitaDBWarning, + self.emp_portal.remove_studies, + [self.study.id], + ) def test_get_analyses_by_portal(self): - qiita_config.portal = 'EMP' - exp = {qdb.analysis.Analysis(7), qdb.analysis.Analysis(8), - qdb.analysis.Analysis(9), qdb.analysis.Analysis(10)} + qiita_config.portal = "EMP" + exp = { + qdb.analysis.Analysis(7), + qdb.analysis.Analysis(8), + qdb.analysis.Analysis(9), + qdb.analysis.Analysis(10), + } obs = self.emp_portal.get_analyses() self.assertEqual(obs, exp) - qiita_config.portal = 'QIITA' - exp = {qdb.analysis.Analysis(1), qdb.analysis.Analysis(2), - qdb.analysis.Analysis(3), qdb.analysis.Analysis(4), - qdb.analysis.Analysis(5), qdb.analysis.Analysis(6)} + qiita_config.portal = "QIITA" + exp = { + qdb.analysis.Analysis(1), + qdb.analysis.Analysis(2), + qdb.analysis.Analysis(3), + qdb.analysis.Analysis(4), + qdb.analysis.Analysis(5), + qdb.analysis.Analysis(6), + } obs = self.qiita_portal.get_analyses() self.assertEqual(obs, exp) def test_add_analysis_portals(self): obs = self.analysis._portals - self.assertEqual(obs, ['QIITA']) + self.assertEqual(obs, ["QIITA"]) with self.assertRaises(qdb.exceptions.QiitaDBError): self.emp_portal.add_analyses([self.analysis.id]) obs = self.analysis._portals - self.assertEqual(obs, ['QIITA']) + self.assertEqual(obs, ["QIITA"]) self.emp_portal.add_studies([1]) self.emp_portal.add_analyses([self.analysis.id]) obs = self.analysis._portals - self.assertEqual(obs, ['EMP', 'QIITA']) + self.assertEqual(obs, ["EMP", "QIITA"]) npt.assert_warns( - qdb.exceptions.QiitaDBWarning, self.emp_portal.add_analyses, - [self.analysis.id]) + qdb.exceptions.QiitaDBWarning, + self.emp_portal.add_analyses, + [self.analysis.id], + ) self.emp_portal.remove_analyses([self.analysis.id]) self.emp_portal.remove_studies([1]) @@ -204,18 +247,20 @@ def test_remove_analysis_portals(self): self.emp_portal.add_studies([1]) self.emp_portal.add_analyses([self.analysis.id]) obs = self.analysis._portals - self.assertCountEqual(obs, ['QIITA', 'EMP']) + self.assertCountEqual(obs, ["QIITA", "EMP"]) # Test removal self.emp_portal.remove_analyses([self.analysis.id]) obs = self.analysis._portals - self.assertEqual(obs, ['QIITA']) + self.assertEqual(obs, ["QIITA"]) obs = npt.assert_warns( - qdb.exceptions.QiitaDBWarning, self.emp_portal.remove_analyses, - [self.analysis.id]) + qdb.exceptions.QiitaDBWarning, + self.emp_portal.remove_analyses, + [self.analysis.id], + ) self.emp_portal.remove_studies([1]) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_processing_job.py b/qiita_db/test/test_processing_job.py index 86b396663..8fb5ca8dc 100644 --- a/qiita_db/test/test_processing_job.py +++ b/qiita_db/test/test_processing_job.py @@ -6,38 +6,47 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main from datetime import datetime +from json import dumps, loads from os import close from tempfile import mkstemp -from json import dumps, loads from time import sleep +from unittest import TestCase, main import networkx as nx import pandas as pd import qiita_db as qdb -from qiita_core.util import qiita_test_checker from qiita_core.qiita_settings import qiita_config +from qiita_core.util import qiita_test_checker def _create_job(force=True): job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( qdb.software.Command(2), - values_dict={"min_seq_len": 100, "max_seq_len": 1000, - "trim_seq_length": False, "min_qual_score": 25, - "max_ambig": 6, "max_homopolymer": 6, - "max_primer_mismatch": 0, - "barcode_type": "golay_12", - "max_barcode_errors": 1.5, - "disable_bc_correction": False, - "qual_score_window": 0, "disable_primers": False, - "reverse_primers": "disable", - "reverse_primer_mismatches": 0, - "truncate_ambi_bases": False, "input_data": 1}), - force) + values_dict={ + "min_seq_len": 100, + "max_seq_len": 1000, + "trim_seq_length": False, + "min_qual_score": 25, + "max_ambig": 6, + "max_homopolymer": 6, + "max_primer_mismatch": 0, + "barcode_type": "golay_12", + "max_barcode_errors": 1.5, + "disable_bc_correction": False, + "qual_score_window": 0, + "disable_primers": False, + "reverse_primers": "disable", + "reverse_primer_mismatches": 0, + "truncate_ambi_bases": False, + "input_data": 1, + }, + ), + force, + ) return job @@ -45,7 +54,8 @@ def _create_job(force=True): class ProcessingJobUtilTest(TestCase): def test_system_call(self): obs_out, obs_err, obs_status = qdb.processing_job._system_call( - 'echo "Test system call stdout"') + 'echo "Test system call stdout"' + ) self.assertEqual(obs_out, "Test system call stdout\n") self.assertEqual(obs_err, "") @@ -53,7 +63,8 @@ def test_system_call(self): def test_system_call_error(self): obs_out, obs_err, obs_status = qdb.processing_job._system_call( - '>&2 echo "Test system call stderr"; exit 1') + '>&2 echo "Test system call stderr"; exit 1' + ) self.assertEqual(obs_out, "") self.assertEqual(obs_err, "Test system call stderr\n") self.assertEqual(obs_status, 1) @@ -63,13 +74,17 @@ def test_system_call_error(self): class ProcessingJobTest(TestCase): def setUp(self): self.tester1 = qdb.processing_job.ProcessingJob( - "063e553b-327c-4818-ab4a-adfe58e49860") + "063e553b-327c-4818-ab4a-adfe58e49860" + ) self.tester2 = qdb.processing_job.ProcessingJob( - "bcc7ebcd-39c1-43e4-af2d-822e3589f14d") + "bcc7ebcd-39c1-43e4-af2d-822e3589f14d" + ) self.tester3 = qdb.processing_job.ProcessingJob( - "b72369f9-a886-4193-8d3d-f7b504168e75") + "b72369f9-a886-4193-8d3d-f7b504168e75" + ) self.tester4 = qdb.processing_job.ProcessingJob( - "d19f76ee-274e-4c1b-b3a2-a12d73507c55") + "d19f76ee-274e-4c1b-b3a2-a12d73507c55" + ) self._clean_up_files = [] @@ -80,28 +95,42 @@ def _get_all_job_ids(self): return qdb.sql_connection.TRN.execute_fetchflatten() def _wait_for_job(self, job): - while job.status not in ('error', 'success'): + while job.status not in ("error", "success"): sleep(0.5) def test_exists(self): - self.assertTrue(qdb.processing_job.ProcessingJob.exists( - "063e553b-327c-4818-ab4a-adfe58e49860")) - self.assertTrue(qdb.processing_job.ProcessingJob.exists( - "bcc7ebcd-39c1-43e4-af2d-822e3589f14d")) - self.assertTrue(qdb.processing_job.ProcessingJob.exists( - "b72369f9-a886-4193-8d3d-f7b504168e75")) - self.assertTrue(qdb.processing_job.ProcessingJob.exists( - "d19f76ee-274e-4c1b-b3a2-a12d73507c55")) - self.assertFalse(qdb.processing_job.ProcessingJob.exists( - "d19f76ee-274e-4c1b-b3a2-b12d73507c55")) - self.assertFalse(qdb.processing_job.ProcessingJob.exists( - "some-other-string")) + self.assertTrue( + qdb.processing_job.ProcessingJob.exists( + "063e553b-327c-4818-ab4a-adfe58e49860" + ) + ) + self.assertTrue( + qdb.processing_job.ProcessingJob.exists( + "bcc7ebcd-39c1-43e4-af2d-822e3589f14d" + ) + ) + self.assertTrue( + qdb.processing_job.ProcessingJob.exists( + "b72369f9-a886-4193-8d3d-f7b504168e75" + ) + ) + self.assertTrue( + qdb.processing_job.ProcessingJob.exists( + "d19f76ee-274e-4c1b-b3a2-a12d73507c55" + ) + ) + self.assertFalse( + qdb.processing_job.ProcessingJob.exists( + "d19f76ee-274e-4c1b-b3a2-b12d73507c55" + ) + ) + self.assertFalse(qdb.processing_job.ProcessingJob.exists("some-other-string")) def test_user(self): - exp_user = qdb.user.User('test@foo.bar') + exp_user = qdb.user.User("test@foo.bar") self.assertEqual(self.tester1.user, exp_user) self.assertEqual(self.tester2.user, exp_user) - exp_user = qdb.user.User('shared@foo.bar') + exp_user = qdb.user.User("shared@foo.bar") self.assertEqual(self.tester3.user, exp_user) self.assertEqual(self.tester4.user, exp_user) @@ -120,9 +149,11 @@ def test_parameters(self): '"sequence_max_n":0,"rev_comp_barcode":false,' '"rev_comp_mapping_barcodes":false,"rev_comp":false,' '"phred_quality_threshold":3,"barcode_type":"golay_12",' - '"max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}') - exp_params = qdb.software.Parameters.load(qdb.software.Command(1), - json_str=json_str) + '"max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}' + ) + exp_params = qdb.software.Parameters.load( + qdb.software.Command(1), json_str=json_str + ) self.assertEqual(self.tester1.parameters, exp_params) json_str = ( @@ -132,9 +163,11 @@ def test_parameters(self): '"max_barcode_errors":1.5,"disable_bc_correction":false,' '"qual_score_window":0,"disable_primers":false,' '"reverse_primers":"disable","reverse_primer_mismatches":0,' - '"truncate_ambi_bases":false,"input_data":1}') - exp_params = qdb.software.Parameters.load(qdb.software.Command(2), - json_str=json_str) + '"truncate_ambi_bases":false,"input_data":1}' + ) + exp_params = qdb.software.Parameters.load( + qdb.software.Command(2), json_str=json_str + ) self.assertEqual(self.tester2.parameters, exp_params) json_str = ( @@ -142,17 +175,21 @@ def test_parameters(self): '"sequence_max_n":0,"rev_comp_barcode":false,' '"rev_comp_mapping_barcodes":true,"rev_comp":false,' '"phred_quality_threshold":3,"barcode_type":"golay_12",' - '"max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}') - exp_params = qdb.software.Parameters.load(qdb.software.Command(1), - json_str=json_str) + '"max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}' + ) + exp_params = qdb.software.Parameters.load( + qdb.software.Command(1), json_str=json_str + ) self.assertEqual(self.tester3.parameters, exp_params) json_str = ( '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,' '"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,' - '"input_data":2}') - exp_params = qdb.software.Parameters.load(qdb.software.Command(3), - json_str=json_str) + '"input_data":2}' + ) + exp_params = qdb.software.Parameters.load( + qdb.software.Command(3), json_str=json_str + ) self.assertEqual(self.tester4.parameters, exp_params) def test_input_artifacts(self): @@ -164,10 +201,10 @@ def test_input_artifacts(self): self.assertEqual(self.tester4.input_artifacts, exp) def test_status(self): - self.assertEqual(self.tester1.status, 'queued') - self.assertEqual(self.tester2.status, 'running') - self.assertEqual(self.tester3.status, 'success') - self.assertEqual(self.tester4.status, 'error') + self.assertEqual(self.tester1.status, "queued") + self.assertEqual(self.tester2.status, "running") + self.assertEqual(self.tester3.status, "success") + self.assertEqual(self.tester4.status, "error") def test_submit(self): # In order to test a success, we need to actually run the job, which @@ -183,18 +220,15 @@ def test_log(self): def test_heartbeat(self): self.assertIsNone(self.tester1.heartbeat) - self.assertEqual(self.tester2.heartbeat, - datetime(2015, 11, 22, 21, 00, 00)) - self.assertEqual(self.tester3.heartbeat, - datetime(2015, 11, 22, 21, 15, 00)) - self.assertEqual(self.tester4.heartbeat, - datetime(2015, 11, 22, 21, 30, 00)) + self.assertEqual(self.tester2.heartbeat, datetime(2015, 11, 22, 21, 00, 00)) + self.assertEqual(self.tester3.heartbeat, datetime(2015, 11, 22, 21, 15, 00)) + self.assertEqual(self.tester4.heartbeat, datetime(2015, 11, 22, 21, 30, 00)) def test_step(self): self.assertIsNone(self.tester1.step) - self.assertEqual(self.tester2.step, 'demultiplexing') + self.assertEqual(self.tester2.step, "demultiplexing") self.assertIsNone(self.tester3.step) - self.assertEqual(self.tester4.step, 'generating demux file') + self.assertEqual(self.tester4.step, "generating demux file") def test_children(self): self.assertEqual(list(self.tester1.children), []) @@ -213,16 +247,15 @@ def test_create(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": "auto"}') - exp_params = qdb.software.Parameters.load(exp_command, - json_str=json_str) - exp_user = qdb.user.User('test@foo.bar') - obs = qdb.processing_job.ProcessingJob.create( - exp_user, exp_params, True) + '"phred_offset": "auto"}' + ) + exp_params = qdb.software.Parameters.load(exp_command, json_str=json_str) + exp_user = qdb.user.User("test@foo.bar") + obs = qdb.processing_job.ProcessingJob.create(exp_user, exp_params, True) self.assertEqual(obs.user, exp_user) self.assertEqual(obs.command, exp_command) self.assertEqual(obs.parameters, exp_params) - self.assertEqual(obs.status, 'in_construction') + self.assertEqual(obs.status, "in_construction") self.assertEqual(obs.log, None) self.assertEqual(obs.heartbeat, None) self.assertEqual(obs.step, None) @@ -232,11 +265,10 @@ def test_create(self): exp_command = qdb.software.Command(1) exp_params.values["a tests with '"] = 'this is a tests with "' exp_params.values['a tests with "'] = "this is a tests with '" - obs = qdb.processing_job.ProcessingJob.create( - exp_user, exp_params) + obs = qdb.processing_job.ProcessingJob.create(exp_user, exp_params) self.assertEqual(obs.user, exp_user) self.assertEqual(obs.command, exp_command) - self.assertEqual(obs.status, 'in_construction') + self.assertEqual(obs.status, "in_construction") self.assertEqual(obs.log, None) self.assertEqual(obs.heartbeat, None) self.assertEqual(obs.step, None) @@ -244,27 +276,26 @@ def test_create(self): def test_set_status(self): job = _create_job() - self.assertEqual(job.status, 'in_construction') - job._set_status('queued') - self.assertEqual(job.status, 'queued') - job._set_status('running') - self.assertEqual(job.status, 'running') + self.assertEqual(job.status, "in_construction") + job._set_status("queued") + self.assertEqual(job.status, "queued") + job._set_status("running") + self.assertEqual(job.status, "running") with self.assertRaises(qdb.exceptions.QiitaDBStatusError): - job._set_status('queued') - job._set_status('error') - self.assertEqual(job.status, 'error') - job._set_status('running') - self.assertEqual(job.status, 'running') - job._set_status('success') - self.assertEqual(job.status, 'success') + job._set_status("queued") + job._set_status("error") + self.assertEqual(job.status, "error") + job._set_status("running") + self.assertEqual(job.status, "running") + job._set_status("success") + self.assertEqual(job.status, "success") with self.assertRaises(qdb.exceptions.QiitaDBStatusError): - job._set_status('running') + job._set_status("running") def test_submit_error(self): job = _create_job() - job._set_status('queued') - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + job._set_status("queued") + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): job.submit() def test_submit_environment(self): @@ -276,12 +307,11 @@ def test_submit_environment(self): with qdb.sql_connection.TRN: sql = """UPDATE qiita.software SET environment_script = %s WHERE software_id = %s""" - qdb.sql_connection.TRN.add(sql, [ - f'{current} ENVIRONMENT', software.id]) + qdb.sql_connection.TRN.add(sql, [f"{current} ENVIRONMENT", software.id]) job.submit() - self.assertEqual(job.status, 'error') + self.assertEqual(job.status, "error") qdb.sql_connection.TRN.rollback() @@ -296,27 +326,28 @@ def test_complete_multiple_outputs(self): # in the test DB there is no command with such characteristics cmd = qdb.software.Command.create( qdb.software.Software(1), - "TestCommand", "Test command", - {'input': ['artifact:["Demultiplexed"]', None]}, - {'out1': 'BIOM', 'out2': 'BIOM'}) + "TestCommand", + "Test command", + {"input": ['artifact:["Demultiplexed"]', None]}, + {"out1": "BIOM", "out2": "BIOM"}, + ) job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), - qdb.software.Parameters.load( - cmd, - values_dict={"input": 1})) + qdb.user.User("test@foo.bar"), + qdb.software.Parameters.load(cmd, values_dict={"input": 1}), + ) job._set_status("running") fd, fp1 = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp1) close(fd) - with open(fp1, 'w') as f: - f.write('\n') + with open(fp1, "w") as f: + f.write("\n") fd, fp2 = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp2) close(fd) - with open(fp2, 'w') as f: - f.write('\n') + with open(fp2, "w") as f: + f.write("\n") # `job` has 2 output artifacts. Each of these artifacts needs to be # validated by 2 different validation jobs. We are creating those jobs @@ -324,82 +355,97 @@ def test_complete_multiple_outputs(self): # jobs with the validator jobs. params = qdb.software.Parameters.load( qdb.software.Command(4), - values_dict={'template': 1, 'files': fp1, - 'artifact_type': 'BIOM', - 'provenance': dumps( - {'job': job.id, - 'cmd_out_id': qdb.util.convert_to_id( - 'out1', "command_output", "name"), - 'name': 'out1'})}) - user = qdb.user.User('test@foo.bar') + values_dict={ + "template": 1, + "files": fp1, + "artifact_type": "BIOM", + "provenance": dumps( + { + "job": job.id, + "cmd_out_id": qdb.util.convert_to_id( + "out1", "command_output", "name" + ), + "name": "out1", + } + ), + }, + ) + user = qdb.user.User("test@foo.bar") obs1 = qdb.processing_job.ProcessingJob.create(user, params, True) - obs1._set_status('running') + obs1._set_status("running") params = qdb.software.Parameters.load( qdb.software.Command(4), - values_dict={'template': 1, 'files': fp2, - 'artifact_type': 'BIOM', - 'provenance': dumps( - {'job': job.id, - 'cmd_out_id': qdb.util.convert_to_id( - 'out1', "command_output", "name"), - 'name': 'out1'})}) + values_dict={ + "template": 1, + "files": fp2, + "artifact_type": "BIOM", + "provenance": dumps( + { + "job": job.id, + "cmd_out_id": qdb.util.convert_to_id( + "out1", "command_output", "name" + ), + "name": "out1", + } + ), + }, + ) obs2 = qdb.processing_job.ProcessingJob.create(user, params, True) - obs2._set_status('running') + obs2._set_status("running") # Make sure that we link the original job with its validator jobs job._set_validator_jobs([obs1, obs2]) - artifact_data_1 = {'filepaths': [(fp1, 'biom')], - 'artifact_type': 'BIOM'} + artifact_data_1 = {"filepaths": [(fp1, "biom")], "artifact_type": "BIOM"} # Complete one of the validator jobs. This jobs should store all the # information about the new artifact, but it does not create it. The # job then goes to a "waiting" state, where it waits until all the # validator jobs are completed. obs1._complete_artifact_definition(artifact_data_1) - self.assertEqual(obs1.status, 'waiting') - self.assertEqual(job.status, 'running') + self.assertEqual(obs1.status, "waiting") + self.assertEqual(job.status, "running") # When we complete the second validation job, the previous validation # job is realeaed from its waiting state. All jobs then create the # artifacts in a single transaction, so either all of them successfully # complete, or all of them fail. - artifact_data_2 = {'filepaths': [(fp2, 'biom')], - 'artifact_type': 'BIOM'} + artifact_data_2 = {"filepaths": [(fp2, "biom")], "artifact_type": "BIOM"} obs2._complete_artifact_definition(artifact_data_2) - self.assertEqual(obs1.status, 'waiting') - self.assertEqual(obs2.status, 'waiting') - self.assertEqual(job.status, 'running') + self.assertEqual(obs1.status, "waiting") + self.assertEqual(obs2.status, "waiting") + self.assertEqual(job.status, "running") job.release_validators() - self.assertEqual(obs1.status, 'success') - self.assertEqual(obs2.status, 'success') - self.assertEqual(job.status, 'success') + self.assertEqual(obs1.status, "success") + self.assertEqual(obs2.status, "success") + self.assertEqual(job.status, "success") def test_complete_artifact_definition(self): job = _create_job() - job._set_status('running') + job._set_status("running") fd, fp = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") - artifact_data = {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'} + artifact_data = {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} params = qdb.software.Parameters.load( qdb.software.Command(4), - values_dict={'template': 1, 'files': fp, - 'artifact_type': 'BIOM', - 'provenance': dumps( - {'job': job.id, - 'cmd_out_id': 3})} + values_dict={ + "template": 1, + "files": fp, + "artifact_type": "BIOM", + "provenance": dumps({"job": job.id, "cmd_out_id": 3}), + }, ) obs = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), params) + qdb.user.User("test@foo.bar"), params + ) job._set_validator_jobs([obs]) obs._complete_artifact_definition(artifact_data) - self.assertEqual(obs.status, 'waiting') - self.assertEqual(job.status, 'running') + self.assertEqual(obs.status, "waiting") + self.assertEqual(job.status, "running") # Upload case implicitly tested by "test_complete_type" def test_complete_artifact_transformation(self): @@ -408,61 +454,67 @@ def test_complete_artifact_transformation(self): def test_complete_no_artifact_data(self): job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( - qdb.software.Command(5), - values_dict={"input_data": 1})) - job._set_status('running') + qdb.software.Command(5), values_dict={"input_data": 1} + ), + ) + job._set_status("running") job.complete(True) - self.assertEqual(job.status, 'success') + self.assertEqual(job.status, "success") job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), + qdb.user.User("test@foo.bar"), qdb.software.Parameters.load( - qdb.software.Command(5), - values_dict={"input_data": 1}), - True) - job._set_status('running') - job.complete(False, error='Some Error') - self.assertEqual(job.status, 'error') + qdb.software.Command(5), values_dict={"input_data": 1} + ), + True, + ) + job._set_status("running") + job.complete(False, error="Some Error") + self.assertEqual(job.status, "error") def test_complete_type(self): fd, fp = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") - exp_artifact_count = qdb.util.get_count('qiita.artifact') + 1 - artifacts_data = {'ignored': {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'}} + exp_artifact_count = qdb.util.get_count("qiita.artifact") + 1 + artifacts_data = { + "ignored": {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} + } metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}} - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "16S") + metadata, qdb.study.Study(1), "16S" + ) self._clean_up_files.extend([ptfp for _, ptfp in pt.get_filepaths()]) params = qdb.software.Parameters.load( qdb.software.Command(4), - values_dict={'template': pt.id, 'files': fp, - 'artifact_type': 'BIOM'}) + values_dict={"template": pt.id, "files": fp, "artifact_type": "BIOM"}, + ) obs = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), params, True) - obs._set_status('running') + qdb.user.User("test@foo.bar"), params, True + ) + obs._set_status("running") obs.complete(True, artifacts_data=artifacts_data) - self.assertEqual(obs.status, 'success') - self.assertEqual(qdb.util.get_count('qiita.artifact'), - exp_artifact_count) + self.assertEqual(obs.status, "success") + self.assertEqual(qdb.util.get_count("qiita.artifact"), exp_artifact_count) self._clean_up_files.extend( - [x['fp'] for x in - qdb.artifact.Artifact(exp_artifact_count).filepaths]) + [x["fp"] for x in qdb.artifact.Artifact(exp_artifact_count).filepaths] + ) def test_complete_success(self): # Note that here we are submitting and creating other multiple jobs; @@ -471,15 +523,16 @@ def test_complete_success(self): # # This first part of the test is just to test that by default the # naming of the output artifact will be the name of the output - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('\n') - artifacts_data = {'demultiplexed': {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'}} + with open(fp, "w") as f: + f.write("\n") + artifacts_data = { + "demultiplexed": {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} + } job = _create_job() - job._set_status('running') + job._set_status("running") # here we can test that job.release_validator_job hasn't been created # yet so it has to be None @@ -488,19 +541,19 @@ def test_complete_success(self): self._wait_for_job(job) # let's check for the job that released the validators self.assertIsNotNone(job.release_validator_job) - self.assertEqual(job.release_validator_job.parameters.values['job'], - job.id) + self.assertEqual(job.release_validator_job.parameters.values["job"], job.id) # Retrieve the job that is performing the validation: validators = list(job.validator_jobs) self.assertEqual(len(validators), 1) # the validator actually runs on the system so it gets an external_id # assigned, let's test that is not None - self.assertFalse(validators[0].external_id == 'Not Available') + self.assertFalse(validators[0].external_id == "Not Available") # Test the output artifact is going to be named based on the # input parameters self.assertEqual( - loads(validators[0].parameters.values['provenance'])['name'], - "demultiplexed") + loads(validators[0].parameters.values["provenance"])["name"], + "demultiplexed", + ) # To test that the naming of the output artifact is based on the # parameters that the command is indicating, we need to update the @@ -515,17 +568,18 @@ def test_complete_success(self): qdb.sql_connection.TRN.add(sql, [[1, 19], [2, 20]], many=True) qdb.sql_connection.TRN.execute() - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") - artifacts_data = {'demultiplexed': {'filepaths': [(fp, 'biom')], - 'artifact_type': 'BIOM'}} + artifacts_data = { + "demultiplexed": {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} + } job = _create_job() - job._set_status('running') + job._set_status("running") alljobs = set(self._get_all_job_ids()) job.complete(True, artifacts_data=artifacts_data) @@ -535,9 +589,10 @@ def test_complete_success(self): # is completed. Note that this is tested by making sure that the status # of this job is running, and that we have one more job than before # (see assertEqual with len of all jobs) - self.assertEqual(job.status, 'running') - self.assertTrue(job.step.startswith( - 'Validating outputs (1 remaining) via job(s)')) + self.assertEqual(job.status, "running") + self.assertTrue( + job.step.startswith("Validating outputs (1 remaining) via job(s)") + ) obsjobs = set(self._get_all_job_ids()) @@ -551,109 +606,107 @@ def test_complete_success(self): self.assertEqual(len(validators), 1) # here we can test that the validator shape and allocation is correct validator = validators[0] - self.assertEqual(validator.parameters.values['artifact_type'], 'BIOM') + self.assertEqual(validator.parameters.values["artifact_type"], "BIOM") self.assertEqual( validator.resource_allocation_info, - '-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00 --nice=10000') + "-p qiita -N 1 -n 1 --mem 90gb --time 150:00:00 --nice=10000", + ) self.assertEqual(validator.shape, (27, 53, None)) # Test the output artifact is going to be named based on the # input parameters self.assertEqual( - loads(validator.parameters.values['provenance'])['name'], - "demultiplexed golay_12 1.5") + loads(validator.parameters.values["provenance"])["name"], + "demultiplexed golay_12 1.5", + ) def test_complete_failure(self): job = _create_job() job.complete(False, error="Job failure") - self.assertEqual(job.status, 'error') - self.assertEqual(job.log, - qdb.logger.LogEntry.newest_records(numrecords=1)[0]) - self.assertEqual(job.log.msg, 'Job failure') + self.assertEqual(job.status, "error") + self.assertEqual(job.log, qdb.logger.LogEntry.newest_records(numrecords=1)[0]) + self.assertEqual(job.log.msg, "Job failure") # Test the artifact definition case job = _create_job() - job._set_status('running') + job._set_status("running") params = qdb.software.Parameters.load( qdb.software.Command(4), - values_dict={'template': 1, 'files': 'ignored', - 'artifact_type': 'BIOM', - 'provenance': dumps( - {'job': job.id, - 'cmd_out_id': 3})} + values_dict={ + "template": 1, + "files": "ignored", + "artifact_type": "BIOM", + "provenance": dumps({"job": job.id, "cmd_out_id": 3}), + }, ) obs = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), params, True) + qdb.user.User("test@foo.bar"), params, True + ) job._set_validator_jobs([obs]) obs.complete(False, error="Validation failure") - self.assertEqual(obs.status, 'error') - self.assertEqual(obs.log.msg, 'Validation failure') + self.assertEqual(obs.status, "error") + self.assertEqual(obs.log.msg, "Validation failure") - self.assertEqual(job.status, 'running') + self.assertEqual(job.status, "running") job.release_validators() - self.assertEqual(job.status, 'error') + self.assertEqual(job.status, "error") self.assertEqual( - job.log.msg, '1 validator jobs failed: Validator %s ' - 'error message: Validation failure' % obs.id) + job.log.msg, + "1 validator jobs failed: Validator %s " + "error message: Validation failure" % obs.id, + ) def test_complete_error(self): - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): self.tester1.complete(True, artifacts_data={}) def test_set_error(self): job1 = _create_job() - job1._set_status('queued') + job1._set_status("queued") job2 = _create_job() - job2._set_status('running') + job2._set_status("running") for t in [job1, job2]: - t._set_error('Job failure') - self.assertEqual(t.status, 'error') - self.assertEqual( - t.log, qdb.logger.LogEntry.newest_records(numrecords=1)[0]) + t._set_error("Job failure") + self.assertEqual(t.status, "error") + self.assertEqual(t.log, qdb.logger.LogEntry.newest_records(numrecords=1)[0]) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): self.tester3._set_error("Job failure") def test_update_heartbeat_state(self): job = _create_job() - job._set_status('running') + job._set_status("running") before = datetime.now() job.update_heartbeat_state() self.assertTrue(before < job.heartbeat < datetime.now()) job = _create_job() - job._set_status('queued') + job._set_status("queued") before = datetime.now() job.update_heartbeat_state() self.assertTrue(before < job.heartbeat < datetime.now()) - self.assertEqual(job.status, 'running') + self.assertEqual(job.status, "running") - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): self.tester3.update_heartbeat_state() def test_step_setter(self): job = _create_job() - job._set_status('running') - job.step = 'demultiplexing' - self.assertEqual(job.step, 'demultiplexing') - job.step = 'generating demux file' - self.assertEqual(job.step, 'generating demux file') + job._set_status("running") + job.step = "demultiplexing" + self.assertEqual(job.step, "demultiplexing") + job.step = "generating demux file" + self.assertEqual(job.step, "generating demux file") - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): - self.tester1.step = 'demultiplexing' + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): + self.tester1.step = "demultiplexing" - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): - self.tester3.step = 'demultiplexing' + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): + self.tester3.step = "demultiplexing" - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): - self.tester4.step = 'demultiplexing' + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): + self.tester4.step = "demultiplexing" def test_update_children(self): # Create a workflow so we can test this functionality @@ -664,17 +717,18 @@ def test_update_children(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": "auto"}') - exp_params = qdb.software.Parameters.load(exp_command, - json_str=json_str) - exp_user = qdb.user.User('test@foo.bar') + '"phred_offset": "auto"}' + ) + exp_params = qdb.software.Parameters.load(exp_command, json_str=json_str) + exp_user = qdb.user.User("test@foo.bar") name = "Test processing workflow" tester = qdb.processing_job.ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) parent = list(tester.graph.nodes())[0] - connections = {parent: {'demultiplexed': 'input_data'}} + connections = {parent: {"demultiplexed": "input_data"}} dflt_params = qdb.software.DefaultParameters(10) tester.add(dflt_params, connections=connections) # we could get the child using tester.graph.nodes()[1] but networkx @@ -685,12 +739,11 @@ def test_update_children(self): obs = parent._update_children(mapping) exp = [child] self.assertTrue(obs, exp) - self.assertEqual(child.input_artifacts, - [qdb.artifact.Artifact(3)]) + self.assertEqual(child.input_artifacts, [qdb.artifact.Artifact(3)]) def test_outputs(self): job = _create_job() - job._set_status('running') + job._set_status("running") QE = qdb.exceptions with self.assertRaises(QE.QiitaDBOperationNotPermittedError): @@ -699,50 +752,52 @@ def test_outputs(self): fd, fp = mkstemp(suffix="_table.biom") self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") - artifact_data = {'filepaths': [(fp, 'biom')], 'artifact_type': 'BIOM'} + artifact_data = {"filepaths": [(fp, "biom")], "artifact_type": "BIOM"} params = qdb.software.Parameters.load( qdb.software.Command(4), - values_dict={'template': 1, 'files': fp, - 'artifact_type': 'BIOM', - 'provenance': dumps( - {'job': job.id, - 'cmd_out_id': 3, - 'name': 'outArtifact'})} + values_dict={ + "template": 1, + "files": fp, + "artifact_type": "BIOM", + "provenance": dumps( + {"job": job.id, "cmd_out_id": 3, "name": "outArtifact"} + ), + }, ) obs = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), params, True) + qdb.user.User("test@foo.bar"), params, True + ) job._set_validator_jobs([obs]) - exp_artifact_count = qdb.util.get_count('qiita.artifact') + 1 + exp_artifact_count = qdb.util.get_count("qiita.artifact") + 1 obs._complete_artifact_definition(artifact_data) job.release_validators() - self.assertEqual(job.status, 'success') + self.assertEqual(job.status, "success") artifact = qdb.artifact.Artifact(exp_artifact_count) obs = job.outputs - self.assertEqual(obs, {'OTU table': artifact}) - self._clean_up_files.extend([x['fp'] for x in artifact.filepaths]) - self.assertEqual(artifact.name, 'outArtifact') + self.assertEqual(obs, {"OTU table": artifact}) + self._clean_up_files.extend([x["fp"] for x in artifact.filepaths]) + self.assertEqual(artifact.name, "outArtifact") def test_processing_job_workflow(self): # testing None - job = qdb.processing_job.ProcessingJob( - "063e553b-327c-4818-ab4a-adfe58e49860") + job = qdb.processing_job.ProcessingJob("063e553b-327c-4818-ab4a-adfe58e49860") self.assertIsNone(job.processing_job_workflow) # testing actual workflow - job = qdb.processing_job.ProcessingJob( - "b72369f9-a886-4193-8d3d-f7b504168e75") - self.assertEqual(job.processing_job_workflow, - qdb.processing_job.ProcessingWorkflow(1)) + job = qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75") + self.assertEqual( + job.processing_job_workflow, qdb.processing_job.ProcessingWorkflow(1) + ) # testing child job from workflow - job = qdb.processing_job.ProcessingJob( - 'd19f76ee-274e-4c1b-b3a2-a12d73507c55') - self.assertEqual(job.processing_job_workflow, - qdb.processing_job.ProcessingWorkflow(1)) + job = qdb.processing_job.ProcessingJob("d19f76ee-274e-4c1b-b3a2-a12d73507c55") + self.assertEqual( + job.processing_job_workflow, qdb.processing_job.ProcessingWorkflow(1) + ) def test_hidden(self): self.assertTrue(self.tester1.hidden) @@ -765,7 +820,7 @@ def test_hide(self): self.tester3.hide() job = _create_job() - job._set_error('Setting to error for testing') + job._set_error("Setting to error for testing") self.assertFalse(job.hidden) job.hide() self.assertTrue(job.hidden) @@ -773,13 +828,14 @@ def test_hide(self): def test_shape(self): jids = { # Split libraries FASTQ - '6d368e16-2242-4cf8-87b4-a5dc40bb890b': (27, 53, 116), + "6d368e16-2242-4cf8-87b4-a5dc40bb890b": (27, 53, 116), # Pick closed-reference OTUs - '80bf25f3-5f1d-4e10-9369-315e4244f6d5': (27, 53, 0), + "80bf25f3-5f1d-4e10-9369-315e4244f6d5": (27, 53, 0), # Single Rarefaction / Analysis - '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0': (5, 56, 3770436), + "8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0": (5, 56, 3770436), # Split libraries - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d': (27, 53, 116)} + "bcc7ebcd-39c1-43e4-af2d-822e3589f14d": (27, 53, 116), + } for jid, shape in jids.items(): job = qdb.processing_job.ProcessingJob(jid) @@ -787,72 +843,87 @@ def test_shape(self): def test_shape_special_cases(self): # get any given job/command/allocation and make sure nothing changed - pj = qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b') + pj = qdb.processing_job.ProcessingJob("6d368e16-2242-4cf8-87b4-a5dc40bb890b") command = pj.command current_allocation = pj.resource_allocation_info - self.assertEqual(current_allocation, - '-p qiita -N 1 -n 1 --mem 120gb --time 80:00:00 ' - '--nice=10000') + self.assertEqual( + current_allocation, + "-p qiita -N 1 -n 1 --mem 120gb --time 80:00:00 --nice=10000", + ) # now, let's update that job allocation and make sure that things # work as expected tests = [ # (resource allocation, specific allocation) # 1. tests that nlog works - ('-p qiita -N 1 -n 1 --mem nlog({samples})*100 --time {columns}', - '-p qiita -N 1 -n 1 --mem 329B --time 0:00:53 --nice=10000'), + ( + "-p qiita -N 1 -n 1 --mem nlog({samples})*100 --time {columns}", + "-p qiita -N 1 -n 1 --mem 329B --time 0:00:53 --nice=10000", + ), # 2. days in time works fine - ('-p qiita -N 1 -n 1 --mem 10g --time {columns}*10000', - '-p qiita -N 1 -n 1 --mem 10g --time 6-3:13:20 --nice=10000'), - ('-p qiita -N 1 -n 1 --mem 20g --time {columns}*1631', - '-p qiita -N 1 -n 1 --mem 20g --time 1-0:00:43 --nice=10000'), + ( + "-p qiita -N 1 -n 1 --mem 10g --time {columns}*10000", + "-p qiita -N 1 -n 1 --mem 10g --time 6-3:13:20 --nice=10000", + ), + ( + "-p qiita -N 1 -n 1 --mem 20g --time {columns}*1631", + "-p qiita -N 1 -n 1 --mem 20g --time 1-0:00:43 --nice=10000", + ), # 3. conditionals work - ('-p qiita -N 1 -n 1 --mem 10g --time {columns}*1631 ' - 'if {columns}*1631 < 86400 else 86400', - '-p qiita -N 1 -n 1 --mem 10g --time 1-0:00:00 --nice=10000'), - ('-p qiita -N 1 -n 1 --mem 10g --time {columns}*1631 ' - 'if {columns}*1631 > 86400 else 86400', - '-p qiita -N 1 -n 1 --mem 10g --time 1-0:00:43 --nice=10000'), + ( + "-p qiita -N 1 -n 1 --mem 10g --time {columns}*1631 " + "if {columns}*1631 < 86400 else 86400", + "-p qiita -N 1 -n 1 --mem 10g --time 1-0:00:00 --nice=10000", + ), + ( + "-p qiita -N 1 -n 1 --mem 10g --time {columns}*1631 " + "if {columns}*1631 > 86400 else 86400", + "-p qiita -N 1 -n 1 --mem 10g --time 1-0:00:43 --nice=10000", + ), # --qos=qiita_prio - ('-p qiita -N 1 -n 1 --mem 10g --time 1:00:00 --qos=qiita_prio', - '-p qiita -N 1 -n 1 --mem 10g --time 1:00:00 --qos=qiita_prio ' - '--nice=10000'), + ( + "-p qiita -N 1 -n 1 --mem 10g --time 1:00:00 --qos=qiita_prio", + "-p qiita -N 1 -n 1 --mem 10g --time 1:00:00 --qos=qiita_prio " + "--nice=10000", + ), # all the combinations - ('-p qiita -N 1 -n 1 --mem nlog({samples})*100000 --time ' - '{columns}*1631 if {columns}*1631 > 86400 else 86400 ' - '--qos=qiita_prio', - '-p qiita -N 1 -n 1 --mem 322K --time 1-0:00:43 ' - '--qos=qiita_prio --nice=10000'), + ( + "-p qiita -N 1 -n 1 --mem nlog({samples})*100000 --time " + "{columns}*1631 if {columns}*1631 > 86400 else 86400 " + "--qos=qiita_prio", + "-p qiita -N 1 -n 1 --mem 322K --time 1-0:00:43 " + "--qos=qiita_prio --nice=10000", + ), ] for ra, sra in tests: - sql = ("UPDATE qiita.processing_job_resource_allocation " - f"SET allocation = '{ra}'" - f"WHERE name = '{command.name}'") + sql = ( + "UPDATE qiita.processing_job_resource_allocation " + f"SET allocation = '{ra}'" + f"WHERE name = '{command.name}'" + ) qdb.sql_connection.perform_as_transaction(sql) self.assertEqual(sra, pj.resource_allocation_info) # return allocation - sql = ("UPDATE qiita.processing_job_resource_allocation " - f"SET allocation = '{current_allocation}'" - f"WHERE name = '{command.name}'") + sql = ( + "UPDATE qiita.processing_job_resource_allocation " + f"SET allocation = '{current_allocation}'" + f"WHERE name = '{command.name}'" + ) qdb.sql_connection.perform_as_transaction(sql) def test_get_resource_allocation_info(self): jids = { # Split libraries FASTQ - '6d368e16-2242-4cf8-87b4-a5dc40bb890b': - '-p qiita -N 1 -n 1 --mem 120gb --time 80:00:00 --nice=10000', + "6d368e16-2242-4cf8-87b4-a5dc40bb890b": "-p qiita -N 1 -n 1 --mem 120gb --time 80:00:00 --nice=10000", # Pick closed-reference OTUs - '80bf25f3-5f1d-4e10-9369-315e4244f6d5': - '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000', + "80bf25f3-5f1d-4e10-9369-315e4244f6d5": "-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000", # Single Rarefaction / Analysis - '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0': - '-p qiita -N 1 -n 5 --mem-per-cpu 8gb --time 168:00:00 ' - '--nice=10000', + "8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0": "-p qiita -N 1 -n 5 --mem-per-cpu 8gb --time 168:00:00 " + "--nice=10000", # Split libraries - 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d': - '-p qiita -N 1 -n 1 --mem 60gb --time 25:00:00 --nice=10000'} + "bcc7ebcd-39c1-43e4-af2d-822e3589f14d": "-p qiita -N 1 -n 1 --mem 60gb --time 25:00:00 --nice=10000", + } for jid, allocation in jids.items(): job = qdb.processing_job.ProcessingJob(jid) @@ -860,50 +931,61 @@ def test_get_resource_allocation_info(self): # now let's test get_resource_allocation_info formulas, fun!! job_changed = qdb.processing_job.ProcessingJob( - '6d368e16-2242-4cf8-87b4-a5dc40bb890b') + "6d368e16-2242-4cf8-87b4-a5dc40bb890b" + ) job_not_changed = qdb.processing_job.ProcessingJob( - '80bf25f3-5f1d-4e10-9369-315e4244f6d5') + "80bf25f3-5f1d-4e10-9369-315e4244f6d5" + ) # helper to set memory allocations easier def _set_allocation(memory): sql = """UPDATE qiita.processing_job_resource_allocation SET allocation = '{0}' WHERE name = 'Split libraries FASTQ'""".format( - '-p qiita --mem %s' % memory) + "-p qiita --mem %s" % memory + ) qdb.sql_connection.perform_as_transaction(sql) # let's start with something simple, samples*1000 # 27*1000 ~ 27000 - _set_allocation('{samples}*1000') + _set_allocation("{samples}*1000") self.assertEqual( job_not_changed.resource_allocation_info, - '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000') - self.assertEqual(job_changed.resource_allocation_info, - '-p qiita --mem 26K --nice=10000') + "-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000", + ) + self.assertEqual( + job_changed.resource_allocation_info, "-p qiita --mem 26K --nice=10000" + ) # a little more complex ((samples+columns)*1000000)+4000000 # (( 27 + 31 )*1000000)+4000000 ~ 62000000 - _set_allocation('(({samples}+{columns})*1000000)+4000000') + _set_allocation("(({samples}+{columns})*1000000)+4000000") self.assertEqual( job_not_changed.resource_allocation_info, - '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000') - self.assertEqual(job_changed.resource_allocation_info, - '-p qiita --mem 80M --nice=10000') + "-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000", + ) + self.assertEqual( + job_changed.resource_allocation_info, "-p qiita --mem 80M --nice=10000" + ) # now something real input_size+(2*1e+9) # 116 +(2*1e+9) ~ 2000000116 - _set_allocation('{input_size}+(2*1e+9)') + _set_allocation("{input_size}+(2*1e+9)") self.assertEqual( job_not_changed.resource_allocation_info, - '-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000') - self.assertEqual(job_changed.resource_allocation_info, - '-p qiita --mem 2G --nice=10000') + "-p qiita -N 1 -n 5 --mem 120gb --time 130:00:00 --nice=10000", + ) + self.assertEqual( + job_changed.resource_allocation_info, "-p qiita --mem 2G --nice=10000" + ) # restore allocation - sql = ("UPDATE qiita.processing_job_resource_allocation " - "SET allocation = '-p qiita -N 1 -n 1 --mem 120gb " - "--time 80:00:00' " - "WHERE name = 'Split libraries FASTQ'") + sql = ( + "UPDATE qiita.processing_job_resource_allocation " + "SET allocation = '-p qiita -N 1 -n 1 --mem 120gb " + "--time 80:00:00' " + "WHERE name = 'Split libraries FASTQ'" + ) qdb.sql_connection.perform_as_transaction(sql) def test_notification_mail_generation(self): @@ -912,111 +994,142 @@ def test_notification_mail_generation(self): # as 'test@foo.bar' is not set to receive notifications, let's # first manually set their configuration to 'true'. - sql = ("UPDATE qiita.qiita_user SET receive_processing_job_emails" - " = true WHERE email = 'test@foo.bar'") + sql = ( + "UPDATE qiita.qiita_user SET receive_processing_job_emails" + " = true WHERE email = 'test@foo.bar'" + ) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) # with or w/out an error message, a status of 'waiting' should # immediately return with a 'None' message. - obs = self.tester1._generate_notification_message('waiting', None) + obs = self.tester1._generate_notification_message("waiting", None) self.assertEqual(obs, None) - obs = self.tester1._generate_notification_message('waiting', - 'Hello World') + obs = self.tester1._generate_notification_message("waiting", "Hello World") self.assertEqual(obs, None) # An error message in the parameter should show a difference for # messages of type 'error'. - obs = self.tester1._generate_notification_message('error', None) - - exp = {'subject': ('Split libraries FASTQ: error, 063e553b-327c-4818-' - 'ab4a-adfe58e49860 [Not Available]'), - 'message': ('Split libraries FASTQ\nPrep IDs: 1' - f'\n{qiita_config.base_url}/study/description/1?' - 'prep_id=1\nData Type: 18S\nNew status: error')} + obs = self.tester1._generate_notification_message("error", None) + + exp = { + "subject": ( + "Split libraries FASTQ: error, 063e553b-327c-4818-" + "ab4a-adfe58e49860 [Not Available]" + ), + "message": ( + "Split libraries FASTQ\nPrep IDs: 1" + f"\n{qiita_config.base_url}/study/description/1?" + "prep_id=1\nData Type: 18S\nNew status: error" + ), + } self.assertDictEqual(obs, exp) - obs = self.tester1._generate_notification_message('error', - 'An Error Message') - exp = {'subject': ('Split libraries FASTQ: error, 063e553b-327c-4818-' - 'ab4a-adfe58e49860 [Not Available]'), - 'message': ('Split libraries FASTQ\nPrep IDs: 1' - f'\n{qiita_config.base_url}/study/description/1?' - 'prep_id=1\nData Type: 18S\nNew status: error' - '\n\nError:\nAn Error Message')} + obs = self.tester1._generate_notification_message("error", "An Error Message") + exp = { + "subject": ( + "Split libraries FASTQ: error, 063e553b-327c-4818-" + "ab4a-adfe58e49860 [Not Available]" + ), + "message": ( + "Split libraries FASTQ\nPrep IDs: 1" + f"\n{qiita_config.base_url}/study/description/1?" + "prep_id=1\nData Type: 18S\nNew status: error" + "\n\nError:\nAn Error Message" + ), + } self.assertDictEqual(obs, exp) # The inclusion of an error message has no effect on other valid # status types e.g. 'running'. - obs = self.tester1._generate_notification_message('running', None) - exp = {'subject': ('Split libraries FASTQ: running, 063e553b-327c-' - '4818-ab4a-adfe58e49860 [Not Available]'), - 'message': ('Split libraries FASTQ\nPrep IDs: 1' - f'\n{qiita_config.base_url}/study/description/1?' - 'prep_id=1\nData Type: 18S\nNew status: running')} + obs = self.tester1._generate_notification_message("running", None) + exp = { + "subject": ( + "Split libraries FASTQ: running, 063e553b-327c-" + "4818-ab4a-adfe58e49860 [Not Available]" + ), + "message": ( + "Split libraries FASTQ\nPrep IDs: 1" + f"\n{qiita_config.base_url}/study/description/1?" + "prep_id=1\nData Type: 18S\nNew status: running" + ), + } self.assertDictEqual(obs, exp) - obs = self.tester1._generate_notification_message('running', 'Yahoo!') - exp = {'subject': ('Split libraries FASTQ: running, 063e553b-327c-' - '4818-ab4a-adfe58e49860 [Not Available]'), - 'message': ('Split libraries FASTQ\nPrep IDs: 1' - f'\n{qiita_config.base_url}/study/description/1?' - 'prep_id=1\nData Type: 18S\nNew status: running')} + obs = self.tester1._generate_notification_message("running", "Yahoo!") + exp = { + "subject": ( + "Split libraries FASTQ: running, 063e553b-327c-" + "4818-ab4a-adfe58e49860 [Not Available]" + ), + "message": ( + "Split libraries FASTQ\nPrep IDs: 1" + f"\n{qiita_config.base_url}/study/description/1?" + "prep_id=1\nData Type: 18S\nNew status: running" + ), + } self.assertDictEqual(obs, exp) # checking analysis emails - jid = '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0' + jid = "8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0" pj = qdb.processing_job.ProcessingJob(jid) - obs = pj._generate_notification_message('running', 'Yahoo!') - exp = {'subject': ('Single Rarefaction: running, 8a7a8461-e8a1-' - '4b4e-a428-1bc2f4d3ebd0 [126652530]'), - 'message': 'Analysis Job Single Rarefaction\n' - f'{qiita_config.base_url}/analysis/description/1/\n' - 'New status: running'} + obs = pj._generate_notification_message("running", "Yahoo!") + exp = { + "subject": ( + "Single Rarefaction: running, 8a7a8461-e8a1-" + "4b4e-a428-1bc2f4d3ebd0 [126652530]" + ), + "message": "Analysis Job Single Rarefaction\n" + f"{qiita_config.base_url}/analysis/description/1/\n" + "New status: running", + } self.assertDictEqual(obs, exp) # as 'test@foo.bar' is not set to receive notifications, let's # first manually set their configuration to 'true'. # reset test@foo.bar to 'false' to test expectations for a non- # privileged user. - sql = ("UPDATE qiita.qiita_user SET receive_processing_job_emails" - " = false WHERE email = 'test@foo.bar'") + sql = ( + "UPDATE qiita.qiita_user SET receive_processing_job_emails" + " = false WHERE email = 'test@foo.bar'" + ) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) # waiting should still return w/out a message. - obs = self.tester1._generate_notification_message('waiting', None) + obs = self.tester1._generate_notification_message("waiting", None) self.assertEqual(obs, None) # an error status should now return nothing. - obs = self.tester1._generate_notification_message('error', - 'An Error Message') + obs = self.tester1._generate_notification_message("error", "An Error Message") self.assertEqual(obs, None) # other status messages should also return nothing. - obs = self.tester1._generate_notification_message('running', None) + obs = self.tester1._generate_notification_message("running", None) self.assertEqual(obs, None) @qiita_test_checker() class ProcessingWorkflowTests(TestCase): def test_name(self): - self.assertEqual(qdb.processing_job.ProcessingWorkflow(1).name, - 'Testing processing workflow') + self.assertEqual( + qdb.processing_job.ProcessingWorkflow(1).name, "Testing processing workflow" + ) def test_user(self): - self.assertEqual(qdb.processing_job.ProcessingWorkflow(1).user, - qdb.user.User('shared@foo.bar')) + self.assertEqual( + qdb.processing_job.ProcessingWorkflow(1).user, + qdb.user.User("shared@foo.bar"), + ) def test_graph(self): obs = qdb.processing_job.ProcessingWorkflow(1).graph self.assertTrue(isinstance(obs, nx.DiGraph)) exp_nodes = [ - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75'), - qdb.processing_job.ProcessingJob( - 'd19f76ee-274e-4c1b-b3a2-a12d73507c55')] + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75"), + qdb.processing_job.ProcessingJob("d19f76ee-274e-4c1b-b3a2-a12d73507c55"), + ] self.assertCountEqual(obs.nodes(), exp_nodes) self.assertEqual(list(obs.edges()), [(exp_nodes[0], exp_nodes[1])]) @@ -1024,8 +1137,8 @@ def test_graph_only_root(self): obs = qdb.processing_job.ProcessingWorkflow(2).graph self.assertTrue(isinstance(obs, nx.DiGraph)) exp_nodes = [ - qdb.processing_job.ProcessingJob( - 'ac653cb5-76a6-4a45-929e-eb9b2dee6b63')] + qdb.processing_job.ProcessingJob("ac653cb5-76a6-4a45-929e-eb9b2dee6b63") + ] self.assertCountEqual(obs.nodes(), exp_nodes) self.assertEqual(list(obs.edges()), []) @@ -1036,8 +1149,7 @@ def test_raise_if_not_in_construction(self): def test_raise_if_not_in_construction_error(self): tester = qdb.processing_job.ProcessingWorkflow(1) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): tester._raise_if_not_in_construction() def test_submit(self): @@ -1047,13 +1159,14 @@ def test_submit(self): pass def test_from_default_workflow(self): - exp_user = qdb.user.User('test@foo.bar') + exp_user = qdb.user.User("test@foo.bar") dflt_wf = qdb.software.DefaultWorkflow(1) - req_params = {qdb.software.Command(1): {'input_data': 1}} + req_params = {qdb.software.Command(1): {"input_data": 1}} name = "Test processing workflow" obs = qdb.processing_job.ProcessingWorkflow.from_default_workflow( - exp_user, dflt_wf, req_params, name=name, force=True) + exp_user, dflt_wf, req_params, name=name, force=True + ) self.assertEqual(obs.name, name) self.assertEqual(obs.user, exp_user) obs_graph = obs.graph @@ -1069,39 +1182,52 @@ def test_from_default_workflow(self): self.assertTrue(obs_dst.command, qdb.software.Command(1)) obs_params = obs_dst.parameters.values exp_params = { - 'input_data': [obs_src.id, u'demultiplexed'], - 'reference': 1, - 'similarity': 0.97, - 'sortmerna_coverage': 0.97, - 'sortmerna_e_value': 1, - 'sortmerna_max_pos': 10000, - 'threads': 1} + "input_data": [obs_src.id, "demultiplexed"], + "reference": 1, + "similarity": 0.97, + "sortmerna_coverage": 0.97, + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "threads": 1, + } self.assertEqual(obs_params, exp_params) - exp_pending = {obs_src.id: {'input_data': 'demultiplexed'}} + exp_pending = {obs_src.id: {"input_data": "demultiplexed"}} self.assertEqual(obs_dst.pending, exp_pending) def test_from_default_workflow_error(self): with self.assertRaises(qdb.exceptions.QiitaDBError) as err: qdb.processing_job.ProcessingWorkflow.from_default_workflow( - qdb.user.User('test@foo.bar'), qdb.software.DefaultWorkflow(1), - {}, name="Test name") - - exp = ('Provided required parameters do not match the initial set of ' - 'commands for the workflow. Command(s) "Split libraries FASTQ"' - ' are missing the required parameter set.') + qdb.user.User("test@foo.bar"), + qdb.software.DefaultWorkflow(1), + {}, + name="Test name", + ) + + exp = ( + "Provided required parameters do not match the initial set of " + 'commands for the workflow. Command(s) "Split libraries FASTQ"' + " are missing the required parameter set." + ) self.assertEqual(str(err.exception), exp) - req_params = {qdb.software.Command(1): {'input_data': 1}, - qdb.software.Command(2): {'input_data': 2}} + req_params = { + qdb.software.Command(1): {"input_data": 1}, + qdb.software.Command(2): {"input_data": 2}, + } with self.assertRaises(qdb.exceptions.QiitaDBError) as err: qdb.processing_job.ProcessingWorkflow.from_default_workflow( - qdb.user.User('test@foo.bar'), qdb.software.DefaultWorkflow(1), - req_params, name="Test name") - exp = ('Provided required parameters do not match the initial set of ' - 'commands for the workflow. Paramters for command(s) ' - '"Split libraries" have been provided, but they are not the ' - 'initial commands for the workflow.') + qdb.user.User("test@foo.bar"), + qdb.software.DefaultWorkflow(1), + req_params, + name="Test name", + ) + exp = ( + "Provided required parameters do not match the initial set of " + "commands for the workflow. Paramters for command(s) " + '"Split libraries" have been provided, but they are not the ' + "initial commands for the workflow." + ) self.assertEqual(str(err.exception), exp) def test_from_scratch(self): @@ -1112,14 +1238,15 @@ def test_from_scratch(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": "auto"}') - exp_params = qdb.software.Parameters.load(exp_command, - json_str=json_str) - exp_user = qdb.user.User('test@foo.bar') + '"phred_offset": "auto"}' + ) + exp_params = qdb.software.Parameters.load(exp_command, json_str=json_str) + exp_user = qdb.user.User("test@foo.bar") name = "Test processing workflow" obs = qdb.processing_job.ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) self.assertEqual(obs.name, name) self.assertEqual(obs.user, exp_user) obs_graph = obs.graph @@ -1137,17 +1264,18 @@ def test_add(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0, ' - '"phred_offset": "auto"}') - exp_params = qdb.software.Parameters.load(exp_command, - json_str=json_str) - exp_user = qdb.user.User('test@foo.bar') + '"phred_offset": "auto"}' + ) + exp_params = qdb.software.Parameters.load(exp_command, json_str=json_str) + exp_user = qdb.user.User("test@foo.bar") name = "Test processing workflow" obs = qdb.processing_job.ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) parent = list(obs.graph.nodes())[0] - connections = {parent: {'demultiplexed': 'input_data'}} + connections = {parent: {"demultiplexed": "input_data"}} dflt_params = qdb.software.DefaultParameters(10) obs.add(dflt_params, connections=connections, force=True) @@ -1163,20 +1291,21 @@ def test_add(self): self.assertTrue(isinstance(obs_dst, qdb.processing_job.ProcessingJob)) obs_params = obs_dst.parameters.values exp_params = { - 'input_data': [parent.id, u'demultiplexed'], - 'reference': 1, - 'similarity': 0.97, - 'sortmerna_coverage': 0.97, - 'sortmerna_e_value': 1, - 'sortmerna_max_pos': 10000, - 'threads': 1} + "input_data": [parent.id, "demultiplexed"], + "reference": 1, + "similarity": 0.97, + "sortmerna_coverage": 0.97, + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "threads": 1, + } self.assertEqual(obs_params, exp_params) # Adding a new root job # This also tests that the `graph` property returns the graph correctly # when there are root nodes that don't have any children dflt_params = qdb.software.DefaultParameters(1) - obs.add(dflt_params, req_params={'input_data': 1}, force=True) + obs.add(dflt_params, req_params={"input_data": 1}, force=True) obs_graph = obs.graph self.assertTrue(isinstance(obs_graph, nx.DiGraph)) @@ -1187,34 +1316,38 @@ def test_add(self): obs_new_jobs = set(root_obs_nodes) - set(obs_nodes) self.assertEqual(len(obs_new_jobs), 1) obs_job = obs_new_jobs.pop() - exp_params = {'barcode_type': u'golay_12', - 'input_data': 1, - 'max_bad_run_length': 3, - 'max_barcode_errors': 1.5, - 'min_per_read_length_fraction': 0.75, - 'phred_quality_threshold': 3, - 'rev_comp': False, - 'rev_comp_barcode': False, - 'rev_comp_mapping_barcodes': False, - 'sequence_max_n': 0, - 'phred_offset': 'auto'} + exp_params = { + "barcode_type": "golay_12", + "input_data": 1, + "max_bad_run_length": 3, + "max_barcode_errors": 1.5, + "min_per_read_length_fraction": 0.75, + "phred_quality_threshold": 3, + "rev_comp": False, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "sequence_max_n": 0, + "phred_offset": "auto", + } self.assertEqual(obs_job.parameters.values, exp_params) def test_add_error(self): - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): qdb.processing_job.ProcessingWorkflow(1).add({}, None) # test that the qdb.util.max_artifacts_in_workflow with qdb.sql_connection.TRN: qdb.sql_connection.perform_as_transaction( - "UPDATE settings set max_artifacts_in_workflow = 1") + "UPDATE settings set max_artifacts_in_workflow = 1" + ) with self.assertRaisesRegex( - ValueError, "Cannot add new job because it will create " - "more artifacts "): + ValueError, "Cannot add new job because it will create more artifacts " + ): qdb.processing_job.ProcessingWorkflow(2).add( qdb.software.DefaultParameters(1), - req_params={'input_data': 1}, force=True) + req_params={"input_data": 1}, + force=True, + ) qdb.sql_connection.TRN.rollback() def test_remove(self): @@ -1225,17 +1358,18 @@ def test_remove(self): '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0,' - '"phred_offset": "auto"}') - exp_params = qdb.software.Parameters.load(exp_command, - json_str=json_str) - exp_user = qdb.user.User('test@foo.bar') + '"phred_offset": "auto"}' + ) + exp_params = qdb.software.Parameters.load(exp_command, json_str=json_str) + exp_user = qdb.user.User("test@foo.bar") name = "Test processing workflow" tester = qdb.processing_job.ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) parent = list(tester.graph.nodes())[0] - connections = {parent: {'demultiplexed': 'input_data'}} + connections = {parent: {"demultiplexed": "input_data"}} dflt_params = qdb.software.DefaultParameters(10) tester.add(dflt_params, connections=connections) @@ -1250,13 +1384,14 @@ def test_remove(self): self.assertEqual(list(g.edges()), []) # Test with cascade = true - exp_user = qdb.user.User('test@foo.bar') + exp_user = qdb.user.User("test@foo.bar") dflt_wf = qdb.software.DefaultWorkflow(1) - req_params = {qdb.software.Command(1): {'input_data': 1}} + req_params = {qdb.software.Command(1): {"input_data": 1}} name = "Test processing workflow" tester = qdb.processing_job.ProcessingWorkflow.from_default_workflow( - exp_user, dflt_wf, req_params, name=name, force=True) + exp_user, dflt_wf, req_params, name=name, force=True + ) element = list(tester.graph.edges())[0] tester.remove(element[0], cascade=True) @@ -1264,22 +1399,21 @@ def test_remove(self): self.assertEqual(list(tester.graph.nodes()), []) def test_remove_error(self): - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): qdb.processing_job.ProcessingWorkflow(1).remove( - qdb.processing_job.ProcessingJob( - 'b72369f9-a886-4193-8d3d-f7b504168e75')) + qdb.processing_job.ProcessingJob("b72369f9-a886-4193-8d3d-f7b504168e75") + ) - exp_user = qdb.user.User('test@foo.bar') + exp_user = qdb.user.User("test@foo.bar") dflt_wf = qdb.software.DefaultWorkflow(1) - req_params = {qdb.software.Command(1): {'input_data': 1}} + req_params = {qdb.software.Command(1): {"input_data": 1}} name = "Test processing workflow" tester = qdb.processing_job.ProcessingWorkflow.from_default_workflow( - exp_user, dflt_wf, req_params, name=name, force=True) + exp_user, dflt_wf, req_params, name=name, force=True + ) - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): element = list(tester.graph.edges())[0] tester.remove(element[0]) @@ -1288,23 +1422,26 @@ def test_remove_error(self): class ProcessingJobDuplicated(TestCase): def test_create_duplicated(self): job = _create_job() - job._set_status('success') - with self.assertRaisesRegex(ValueError, 'Cannot create job because ' - 'the parameters are the same as jobs ' - 'that are queued, running or already ' - 'have succeeded:') as context: + job._set_status("success") + with self.assertRaisesRegex( + ValueError, + "Cannot create job because " + "the parameters are the same as jobs " + "that are queued, running or already " + "have succeeded:", + ) as context: _create_job(False) # If it failed it's because we have jobs in non finished status so # setting them as error. This is basically testing that the duplicated # job creation allows to create if all jobs are error and if success # that the job doesn't have children - for jobs in str(context.exception).split('\n')[1:]: - jid, status = jobs.split(': ') - if status != 'success': - qdb.processing_job.ProcessingJob(jid)._set_status('error') + for jobs in str(context.exception).split("\n")[1:]: + jid, status = jobs.split(": ") + if status != "success": + qdb.processing_job.ProcessingJob(jid)._set_status("error") _create_job(False) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_reference.py b/qiita_db/test/test_reference.py index 3e2c4f4ec..d3edbb0e2 100644 --- a/qiita_db/test/test_reference.py +++ b/qiita_db/test/test_reference.py @@ -6,13 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main from os import close, remove from os.path import basename, join from tempfile import mkstemp +from unittest import TestCase, main -from qiita_core.util import qiita_test_checker import qiita_db as qdb +from qiita_core.util import qiita_test_checker @qiita_test_checker() @@ -28,7 +28,7 @@ def setUp(self): fd, self.tree_fp = mkstemp(suffix="_tree.tre") close(fd) - _, self.db_dir = qdb.util.get_mountpoint('reference')[0] + _, self.db_dir = qdb.util.get_mountpoint("reference")[0] self._clean_up_files = [] @@ -40,13 +40,15 @@ def test_create(self): """Correctly creates the rows in the DB for the reference""" # Check that the returned object has the correct id obs = qdb.reference.Reference.create( - self.name, self.version, self.seqs_fp, self.tax_fp, self.tree_fp) + self.name, self.version, self.seqs_fp, self.tax_fp, self.tree_fp + ) self.assertEqual(obs.id, 3) # Check that the information on the database is correct with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - "SELECT * FROM qiita.reference WHERE reference_id=3") + "SELECT * FROM qiita.reference WHERE reference_id=3" + ) obs = qdb.sql_connection.TRN.execute_fetchindex() self.assertEqual(obs[0][1], self.name) self.assertEqual(obs[0][2], self.version) @@ -62,15 +64,14 @@ def test_create(self): OR filepath_id=%s""" qdb.sql_connection.TRN.add(sql, [seqs_id, tax_id, tree_id]) obs = qdb.sql_connection.TRN.execute_fetchindex() - exp_seq = "%s_%s_%s" % (self.name, self.version, - basename(self.seqs_fp)) - exp_tax = "%s_%s_%s" % (self.name, self.version, - basename(self.tax_fp)) - exp_tree = "%s_%s_%s" % (self.name, self.version, - basename(self.tree_fp)) - exp = [[seqs_id, exp_seq, 10, '0', 1, 6, 0], - [tax_id, exp_tax, 11, '0', 1, 6, 0], - [tree_id, exp_tree, 12, '0', 1, 6, 0]] + exp_seq = "%s_%s_%s" % (self.name, self.version, basename(self.seqs_fp)) + exp_tax = "%s_%s_%s" % (self.name, self.version, basename(self.tax_fp)) + exp_tree = "%s_%s_%s" % (self.name, self.version, basename(self.tree_fp)) + exp = [ + [seqs_id, exp_seq, 10, "0", 1, 6, 0], + [tax_id, exp_tax, 11, "0", 1, 6, 0], + [tree_id, exp_tree, 12, "0", 1, 6, 0], + ] self.assertEqual(obs, exp) def test_sequence_fp(self): @@ -90,8 +91,8 @@ def test_tree_fp(self): def test_tree_fp_empty(self): ref = qdb.reference.Reference(2) - self.assertEqual(ref.tree_fp, '') + self.assertEqual(ref.tree_fp, "") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py index 70825fe59..55e5dbda2 100644 --- a/qiita_db/test/test_setup.py +++ b/qiita_db/test/test_setup.py @@ -8,7 +8,7 @@ from unittest import TestCase, main -from qiita_db.util import get_count, check_count +from qiita_db.util import check_count, get_count class SetupTest(TestCase): @@ -72,11 +72,11 @@ def test_analysis_users(self): self.assertEqual(get_count("qiita.analysis_users"), 1) def test_ontology(self): - self.assertTrue(check_count('qiita.ontology', 1)) + self.assertTrue(check_count("qiita.ontology", 1)) def test_ontology_terms(self): - self.assertTrue(check_count('qiita.term', 5)) + self.assertTrue(check_count("qiita.term", 5)) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_software.py b/qiita_db/test/test_software.py index f7e63ef39..42884ea4c 100644 --- a/qiita_db/test/test_software.py +++ b/qiita_db/test/test_software.py @@ -6,19 +6,18 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main +import warnings from copy import deepcopy +from json import dumps +from os import close, remove from os.path import exists -from os import remove, close from tempfile import mkstemp -import warnings +from unittest import TestCase, main import networkx as nx -from qiita_core.util import qiita_test_checker import qiita_db as qdb - -from json import dumps +from qiita_core.util import qiita_test_checker @qiita_test_checker() @@ -26,117 +25,133 @@ class CommandTests(TestCase): def setUp(self): self.software = qdb.software.Software(1) self.parameters = { - 'req_art': ['artifact:["BIOM"]', None], - 'req_param': ['string', None], - 'opt_int_param': ['integer', '4'], - 'opt_choice_param': ['choice:["opt1", "opt2"]', 'opt1'], - 'opt_mchoice_param': ['mchoice:["opt1", "opt2", "opt3"]', - ['opt1', 'opt2']], - 'opt_bool': ['boolean', 'False']} - self.outputs = {'out1': 'BIOM'} + "req_art": ['artifact:["BIOM"]', None], + "req_param": ["string", None], + "opt_int_param": ["integer", "4"], + "opt_choice_param": ['choice:["opt1", "opt2"]', "opt1"], + "opt_mchoice_param": ['mchoice:["opt1", "opt2", "opt3"]', ["opt1", "opt2"]], + "opt_bool": ["boolean", "False"], + } + self.outputs = {"out1": "BIOM"} def test_get_commands_by_input_type(self): qdb.software.Software.deactivate_all() - obs = list(qdb.software.Command.get_commands_by_input_type(['FASTQ'])) + obs = list(qdb.software.Command.get_commands_by_input_type(["FASTQ"])) self.assertEqual(obs, []) cmd = qdb.software.Command(1) cmd.activate() - obs = list(qdb.software.Command.get_commands_by_input_type(['FASTQ'])) + obs = list(qdb.software.Command.get_commands_by_input_type(["FASTQ"])) exp = [cmd] self.assertCountEqual(obs, exp) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ', 'per_sample_FASTQ'])) + obs = list( + qdb.software.Command.get_commands_by_input_type( + ["FASTQ", "per_sample_FASTQ"] + ) + ) self.assertCountEqual(obs, exp) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ', 'SFF'])) + obs = list(qdb.software.Command.get_commands_by_input_type(["FASTQ", "SFF"])) self.assertEqual(obs, exp) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ', 'SFF'], active_only=False)) + obs = list( + qdb.software.Command.get_commands_by_input_type( + ["FASTQ", "SFF"], active_only=False + ) + ) exp = [qdb.software.Command(1), qdb.software.Command(2)] self.assertCountEqual(obs, exp) new_cmd = qdb.software.Command.create( - self.software, "Analysis Only Command", + self.software, + "Analysis Only Command", "This is a command for testing", - {'req_art': ['artifact:["FASTQ"]', None]}, - analysis_only=True) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ', 'SFF'], active_only=False)) + {"req_art": ['artifact:["FASTQ"]', None]}, + analysis_only=True, + ) + obs = list( + qdb.software.Command.get_commands_by_input_type( + ["FASTQ", "SFF"], active_only=False + ) + ) exp = [qdb.software.Command(1), qdb.software.Command(2)] self.assertCountEqual(obs, exp) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ', 'SFF'], active_only=False, exclude_analysis=False)) + obs = list( + qdb.software.Command.get_commands_by_input_type( + ["FASTQ", "SFF"], active_only=False, exclude_analysis=False + ) + ) exp = [qdb.software.Command(1), qdb.software.Command(2), new_cmd] self.assertCountEqual(obs, exp) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ'], active_only=False, exclude_analysis=False, - prep_type='Metagenomic')) + obs = list( + qdb.software.Command.get_commands_by_input_type( + ["FASTQ"], + active_only=False, + exclude_analysis=False, + prep_type="Metagenomic", + ) + ) exp = [qdb.software.Command(1), new_cmd] self.assertCountEqual(obs, exp) - obs = list(qdb.software.Command.get_commands_by_input_type( - ['FASTQ'], active_only=False, exclude_analysis=False, - prep_type='18S')) + obs = list( + qdb.software.Command.get_commands_by_input_type( + ["FASTQ"], active_only=False, exclude_analysis=False, prep_type="18S" + ) + ) exp = [qdb.software.Command(1)] self.assertCountEqual(obs, exp) def test_get_html_artifact(self): with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.software.Command.get_html_generator('BIOM') + qdb.software.Command.get_html_generator("BIOM") exp = qdb.software.Command(5) exp.activate() - obs = qdb.software.Command.get_html_generator('BIOM') + obs = qdb.software.Command.get_html_generator("BIOM") self.assertEqual(obs, exp) with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.software.Command.get_html_generator('Demultiplexed') + qdb.software.Command.get_html_generator("Demultiplexed") exp = qdb.software.Command(7) exp.activate() - obs = qdb.software.Command.get_html_generator('Demultiplexed') + obs = qdb.software.Command.get_html_generator("Demultiplexed") self.assertEqual(obs, exp) with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.software.Command.get_html_generator('Unknown') + qdb.software.Command.get_html_generator("Unknown") def test_get_validator(self): with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.software.Command.get_validator('BIOM') + qdb.software.Command.get_validator("BIOM") exp = qdb.software.Command(4) exp.activate() - obs = qdb.software.Command.get_validator('BIOM') + obs = qdb.software.Command.get_validator("BIOM") self.assertEqual(obs, exp) with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.software.Command.get_validator('Demultiplexed') + qdb.software.Command.get_validator("Demultiplexed") exp = qdb.software.Command(6) exp.activate() - obs = qdb.software.Command.get_validator('Demultiplexed') + obs = qdb.software.Command.get_validator("Demultiplexed") self.assertEqual(obs, exp) with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.software.Command.get_validator('Unknown') + qdb.software.Command.get_validator("Unknown") def test_exists(self): - self.assertFalse(qdb.software.Command.exists( - self.software, "donotexists")) - self.assertTrue(qdb.software.Command.exists( - self.software, "Split libraries")) + self.assertFalse(qdb.software.Command.exists(self.software, "donotexists")) + self.assertTrue(qdb.software.Command.exists(self.software, "Split libraries")) def test_software(self): - self.assertEqual(qdb.software.Command(1).software, - qdb.software.Software(1)) - self.assertEqual(qdb.software.Command(2).software, - qdb.software.Software(1)) + self.assertEqual(qdb.software.Command(1).software, qdb.software.Software(1)) + self.assertEqual(qdb.software.Command(2).software, qdb.software.Software(1)) def test_name(self): self.assertEqual(qdb.software.Command(1).name, "Split libraries FASTQ") @@ -147,9 +162,9 @@ def test_post_processing_cmd(self): self.assertEqual(qdb.software.Command(1).post_processing_cmd, None) results = {} - results['script_env'] = 'source deactivate; source activate qiita' - results['script_path'] = 'qiita_db/test/support_files/worker.py' - results['script_params'] = {'a': 'A', 'b': 'B'} + results["script_env"] = "source deactivate; source activate qiita" + results["script_path"] = "qiita_db/test/support_files/worker.py" + results["script_params"] = {"a": "A", "b": "B"} results = dumps(results) @@ -162,11 +177,13 @@ def test_post_processing_cmd(self): results = qdb.software.Command(1).post_processing_cmd # test method returns 'ls' - self.assertEqual(results['script_env'], - 'source deactivate; source activate qiita') - self.assertEqual(results['script_path'], - 'qiita_db/test/support_files/worker.py') - self.assertEqual(results['script_params'], {'a': 'A', 'b': 'B'}) + self.assertEqual( + results["script_env"], "source deactivate; source activate qiita" + ) + self.assertEqual( + results["script_path"], "qiita_db/test/support_files/worker.py" + ) + self.assertEqual(results["script_params"], {"a": "A", "b": "B"}) # clean up table sql = """UPDATE qiita.software_command @@ -177,179 +194,205 @@ def test_post_processing_cmd(self): def test_description(self): self.assertEqual( qdb.software.Command(1).description, - "Demultiplexes and applies quality control to FASTQ data") + "Demultiplexes and applies quality control to FASTQ data", + ) self.assertEqual( qdb.software.Command(2).description, - "Demultiplexes and applies quality control to FASTA data") + "Demultiplexes and applies quality control to FASTA data", + ) def test_parameters(self): - exp_params = {'barcode_type': ['string', 'golay_12'], - 'input_data': ['artifact', None], - 'max_bad_run_length': ['integer', '3'], - 'max_barcode_errors': ['float', '1.5'], - 'min_per_read_length_fraction': ['float', '0.75'], - 'phred_quality_threshold': ['integer', '3'], - 'rev_comp': ['bool', 'False'], - 'rev_comp_barcode': ['bool', 'False'], - 'rev_comp_mapping_barcodes': ['bool', 'False'], - 'sequence_max_n': ['integer', '0'], - 'phred_offset': ['choice:["auto", "33", "64"]', 'auto']} + exp_params = { + "barcode_type": ["string", "golay_12"], + "input_data": ["artifact", None], + "max_bad_run_length": ["integer", "3"], + "max_barcode_errors": ["float", "1.5"], + "min_per_read_length_fraction": ["float", "0.75"], + "phred_quality_threshold": ["integer", "3"], + "rev_comp": ["bool", "False"], + "rev_comp_barcode": ["bool", "False"], + "rev_comp_mapping_barcodes": ["bool", "False"], + "sequence_max_n": ["integer", "0"], + "phred_offset": ['choice:["auto", "33", "64"]', "auto"], + } self.assertEqual(qdb.software.Command(1).parameters, exp_params) exp_params = { - 'barcode_type': ['string', 'golay_12'], - 'disable_bc_correction': ['bool', 'False'], - 'disable_primers': ['bool', 'False'], - 'input_data': ['artifact', None], - 'max_ambig': ['integer', '6'], - 'max_barcode_errors': ['float', '1.5'], - 'max_homopolymer': ['integer', '6'], - 'max_primer_mismatch': ['integer', '0'], - 'max_seq_len': ['integer', '1000'], - 'min_qual_score': ['integer', '25'], - 'min_seq_len': ['integer', '200'], - 'qual_score_window': ['integer', '0'], - 'reverse_primer_mismatches': ['integer', '0'], - 'reverse_primers': - ['choice:["disable", "truncate_only", "truncate_remove"]', - 'disable'], - 'trim_seq_length': ['bool', 'False'], - 'truncate_ambi_bases': ['bool', 'False']} + "barcode_type": ["string", "golay_12"], + "disable_bc_correction": ["bool", "False"], + "disable_primers": ["bool", "False"], + "input_data": ["artifact", None], + "max_ambig": ["integer", "6"], + "max_barcode_errors": ["float", "1.5"], + "max_homopolymer": ["integer", "6"], + "max_primer_mismatch": ["integer", "0"], + "max_seq_len": ["integer", "1000"], + "min_qual_score": ["integer", "25"], + "min_seq_len": ["integer", "200"], + "qual_score_window": ["integer", "0"], + "reverse_primer_mismatches": ["integer", "0"], + "reverse_primers": [ + 'choice:["disable", "truncate_only", "truncate_remove"]', + "disable", + ], + "trim_seq_length": ["bool", "False"], + "truncate_ambi_bases": ["bool", "False"], + } self.assertEqual(qdb.software.Command(2).parameters, exp_params) def test_required_parameters(self): - exp_params = { - 'input_data': ('artifact', ['FASTQ', 'per_sample_FASTQ'])} + exp_params = {"input_data": ("artifact", ["FASTQ", "per_sample_FASTQ"])} obs = qdb.software.Command(1).required_parameters self.assertCountEqual(list(obs.keys()), exp_params.keys()) - self.assertEqual(obs['input_data'][0], exp_params['input_data'][0]) - self.assertCountEqual(obs['input_data'][1], - exp_params['input_data'][1]) + self.assertEqual(obs["input_data"][0], exp_params["input_data"][0]) + self.assertCountEqual(obs["input_data"][1], exp_params["input_data"][1]) - exp_params = { - 'input_data': ('artifact', ['SFF', 'FASTA', 'FASTA_Sanger'])} + exp_params = {"input_data": ("artifact", ["SFF", "FASTA", "FASTA_Sanger"])} obs = qdb.software.Command(2).required_parameters self.assertCountEqual(list(obs.keys()), exp_params.keys()) - self.assertEqual(obs['input_data'][0], exp_params['input_data'][0]) - self.assertCountEqual(obs['input_data'][1], - exp_params['input_data'][1]) + self.assertEqual(obs["input_data"][0], exp_params["input_data"][0]) + self.assertCountEqual(obs["input_data"][1], exp_params["input_data"][1]) def test_optional_parameters(self): - exp_params = {'barcode_type': ['string', 'golay_12'], - 'max_bad_run_length': ['integer', '3'], - 'max_barcode_errors': ['float', '1.5'], - 'min_per_read_length_fraction': ['float', '0.75'], - 'phred_quality_threshold': ['integer', '3'], - 'rev_comp': ['bool', 'False'], - 'rev_comp_barcode': ['bool', 'False'], - 'rev_comp_mapping_barcodes': ['bool', 'False'], - 'sequence_max_n': ['integer', '0'], - 'phred_offset': ['choice:["auto", "33", "64"]', 'auto']} - self.assertEqual(qdb.software.Command(1).optional_parameters, - exp_params) + exp_params = { + "barcode_type": ["string", "golay_12"], + "max_bad_run_length": ["integer", "3"], + "max_barcode_errors": ["float", "1.5"], + "min_per_read_length_fraction": ["float", "0.75"], + "phred_quality_threshold": ["integer", "3"], + "rev_comp": ["bool", "False"], + "rev_comp_barcode": ["bool", "False"], + "rev_comp_mapping_barcodes": ["bool", "False"], + "sequence_max_n": ["integer", "0"], + "phred_offset": ['choice:["auto", "33", "64"]', "auto"], + } + self.assertEqual(qdb.software.Command(1).optional_parameters, exp_params) exp_params = exp_params = { - 'barcode_type': ['string', 'golay_12'], - 'disable_bc_correction': ['bool', 'False'], - 'disable_primers': ['bool', 'False'], - 'max_ambig': ['integer', '6'], - 'max_barcode_errors': ['float', '1.5'], - 'max_homopolymer': ['integer', '6'], - 'max_primer_mismatch': ['integer', '0'], - 'max_seq_len': ['integer', '1000'], - 'min_qual_score': ['integer', '25'], - 'min_seq_len': ['integer', '200'], - 'qual_score_window': ['integer', '0'], - 'reverse_primer_mismatches': ['integer', '0'], - 'reverse_primers': - ['choice:["disable", "truncate_only", "truncate_remove"]', - 'disable'], - 'trim_seq_length': ['bool', 'False'], - 'truncate_ambi_bases': ['bool', 'False']} - self.assertEqual(qdb.software.Command(2).optional_parameters, - exp_params) + "barcode_type": ["string", "golay_12"], + "disable_bc_correction": ["bool", "False"], + "disable_primers": ["bool", "False"], + "max_ambig": ["integer", "6"], + "max_barcode_errors": ["float", "1.5"], + "max_homopolymer": ["integer", "6"], + "max_primer_mismatch": ["integer", "0"], + "max_seq_len": ["integer", "1000"], + "min_qual_score": ["integer", "25"], + "min_seq_len": ["integer", "200"], + "qual_score_window": ["integer", "0"], + "reverse_primer_mismatches": ["integer", "0"], + "reverse_primers": [ + 'choice:["disable", "truncate_only", "truncate_remove"]', + "disable", + ], + "trim_seq_length": ["bool", "False"], + "truncate_ambi_bases": ["bool", "False"], + } + self.assertEqual(qdb.software.Command(2).optional_parameters, exp_params) def test_default_parameter_sets(self): obs = list(qdb.software.Command(1).default_parameter_sets) - exp = [qdb.software.DefaultParameters(1), - qdb.software.DefaultParameters(2), - qdb.software.DefaultParameters(3), - qdb.software.DefaultParameters(4), - qdb.software.DefaultParameters(5), - qdb.software.DefaultParameters(6), - qdb.software.DefaultParameters(7), - qdb.software.DefaultParameters(11), - qdb.software.DefaultParameters(12)] + exp = [ + qdb.software.DefaultParameters(1), + qdb.software.DefaultParameters(2), + qdb.software.DefaultParameters(3), + qdb.software.DefaultParameters(4), + qdb.software.DefaultParameters(5), + qdb.software.DefaultParameters(6), + qdb.software.DefaultParameters(7), + qdb.software.DefaultParameters(11), + qdb.software.DefaultParameters(12), + ] self.assertEqual(obs, exp) obs = list(qdb.software.Command(2).default_parameter_sets) - exp = [qdb.software.DefaultParameters(8), - qdb.software.DefaultParameters(9)] + exp = [qdb.software.DefaultParameters(8), qdb.software.DefaultParameters(9)] self.assertEqual(obs, exp) def test_outputs(self): obs = qdb.software.Command(1).outputs - exp = [['demultiplexed', 'Demultiplexed']] + exp = [["demultiplexed", "Demultiplexed"]] self.assertEqual(obs, exp) obs = qdb.software.Command(2).outputs - exp = [['demultiplexed', 'Demultiplexed']] + exp = [["demultiplexed", "Demultiplexed"]] self.assertEqual(obs, exp) obs = qdb.software.Command(3).outputs - exp = [['OTU table', 'BIOM']] + exp = [["OTU table", "BIOM"]] self.assertEqual(obs, exp) def test_create_error(self): # no parameters with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Command.create( - self.software, "Test command", "Testing command", {}, - self.outputs) + self.software, "Test command", "Testing command", {}, self.outputs + ) with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Command.create( - self.software, "Test command", "Testing command", None, - self.outputs) + self.software, "Test command", "Testing command", None, self.outputs + ) # malformed params parameters = deepcopy(self.parameters) - parameters['req_param'].append('breaking_the_format') + parameters["req_param"].append("breaking_the_format") with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Command.create( - self.software, "Test command", "Testing command", - parameters, self.outputs) + self.software, + "Test command", + "Testing command", + parameters, + self.outputs, + ) # unsupported parameter type parameters = deepcopy(self.parameters) - parameters['opt_int_param'][0] = 'unsupported_type' + parameters["opt_int_param"][0] = "unsupported_type" with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Command.create( - self.software, "Test command", "Testing command", - parameters, self.outputs) + self.software, + "Test command", + "Testing command", + parameters, + self.outputs, + ) # bad default choice parameters = deepcopy(self.parameters) - parameters['opt_choice_param'][1] = 'unsupported_choice' + parameters["opt_choice_param"][1] = "unsupported_choice" with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Command.create( - self.software, "Test command", "Testing command", - parameters, self.outputs) + self.software, + "Test command", + "Testing command", + parameters, + self.outputs, + ) # duplicate with self.assertRaises(qdb.exceptions.QiitaDBDuplicateError): qdb.software.Command.create( - self.software, "Split libraries", - "This is a command for testing", self.parameters, - self.outputs) + self.software, + "Split libraries", + "This is a command for testing", + self.parameters, + self.outputs, + ) # the output type doesn't exist - with self.assertRaisesRegex(ValueError, "Error creating QIIMEq2, Split" - " libraries - wrong output, This is a " - "command for testing - Unknown " - "artifact_type: BLA!"): + with self.assertRaisesRegex( + ValueError, + "Error creating QIIMEq2, Split" + " libraries - wrong output, This is a " + "command for testing - Unknown " + "artifact_type: BLA!", + ): qdb.software.Command.create( - self.software, "Split libraries - wrong output", - "This is a command for testing", self.parameters, - {'out': 'BLA!'}) + self.software, + "Split libraries - wrong output", + "This is a command for testing", + self.parameters, + {"out": "BLA!"}, + ) def test_create(self): # let's deactivate all current plugins and commands; this is not @@ -360,106 +403,129 @@ def test_create(self): # note that here we are adding commands to an existing software obs = qdb.software.Command.create( - self.software, "Test Command", "This is a command for testing", - self.parameters, self.outputs) + self.software, + "Test Command", + "This is a command for testing", + self.parameters, + self.outputs, + ) self.assertEqual(obs.name, "Test Command") self.assertEqual(obs.description, "This is a command for testing") - exp_required = {'req_param': ('string', [None]), - 'req_art': ('artifact', ['BIOM'])} + exp_required = { + "req_param": ("string", [None]), + "req_art": ("artifact", ["BIOM"]), + } self.assertEqual(obs.required_parameters, exp_required) exp_optional = { - 'opt_int_param': ['integer', '4'], - 'opt_choice_param': ['choice:["opt1", "opt2"]', 'opt1'], - 'opt_mchoice_param': ['mchoice:["opt1", "opt2", "opt3"]', - ['opt1', 'opt2']], - 'opt_bool': ['boolean', 'False']} + "opt_int_param": ["integer", "4"], + "opt_choice_param": ['choice:["opt1", "opt2"]', "opt1"], + "opt_mchoice_param": ['mchoice:["opt1", "opt2", "opt3"]', ["opt1", "opt2"]], + "opt_bool": ["boolean", "False"], + } self.assertEqual(obs.optional_parameters, exp_optional) self.assertFalse(obs.analysis_only) self.assertEqual(obs.naming_order, []) - self.assertEqual(obs.merging_scheme, - {'parameters': [], 'outputs': [], - 'ignore_parent_command': False}) + self.assertEqual( + obs.merging_scheme, + {"parameters": [], "outputs": [], "ignore_parent_command": False}, + ) # here we are creating a new software that we will add new commads to obs = qdb.software.Command.create( - self.software, "Test Command 2", "This is a command for testing", - self.parameters, analysis_only=True) + self.software, + "Test Command 2", + "This is a command for testing", + self.parameters, + analysis_only=True, + ) self.assertEqual(obs.name, "Test Command 2") self.assertEqual(obs.description, "This is a command for testing") self.assertEqual(obs.required_parameters, exp_required) self.assertEqual(obs.optional_parameters, exp_optional) self.assertTrue(obs.analysis_only) self.assertEqual(obs.naming_order, []) - self.assertEqual(obs.merging_scheme, - {'parameters': [], 'outputs': [], - 'ignore_parent_command': False}) + self.assertEqual( + obs.merging_scheme, + {"parameters": [], "outputs": [], "ignore_parent_command": False}, + ) # Test that the internal parameters in "Validate" # are created automatically software = qdb.software.Software.create( - "New Type Software", "1.0.0", - "This is adding a new software for testing", "env_name", - "start_plugin", "artifact definition") + "New Type Software", + "1.0.0", + "This is adding a new software for testing", + "env_name", + "start_plugin", + "artifact definition", + ) parameters = { - 'template': ('prep_template', None), - 'analysis': ('analysis', None), - 'files': ('string', None), - 'artifact_type': ('string', None)} + "template": ("prep_template", None), + "analysis": ("analysis", None), + "files": ("string", None), + "artifact_type": ("string", None), + } validate = qdb.software.Command.create( - software, "Validate", "Test creating a validate command", - parameters) + software, "Validate", "Test creating a validate command", parameters + ) self.assertEqual(validate.name, "Validate") - self.assertEqual( - validate.description, "Test creating a validate command") + self.assertEqual(validate.description, "Test creating a validate command") exp_required = { - 'template': ('prep_template', [None]), - 'analysis': ('analysis', [None]), - 'files': ('string', [None]), - 'artifact_type': ('string', [None])} + "template": ("prep_template", [None]), + "analysis": ("analysis", [None]), + "files": ("string", [None]), + "artifact_type": ("string", [None]), + } self.assertEqual(validate.required_parameters, exp_required) - exp_optional = {'name': ['string', 'dflt_name'], - 'provenance': ['string', None]} + exp_optional = {"name": ["string", "dflt_name"], "provenance": ["string", None]} self.assertEqual(validate.optional_parameters, exp_optional) self.assertFalse(validate.analysis_only) self.assertEqual(validate.naming_order, []) - self.assertEqual(validate.merging_scheme, - {'parameters': [], 'outputs': [], - 'ignore_parent_command': False}) + self.assertEqual( + validate.merging_scheme, + {"parameters": [], "outputs": [], "ignore_parent_command": False}, + ) # Test that the naming and merge information is provided parameters = { - 'req_art': ['artifact:["BIOM"]', None], - 'opt_int_param': ['integer', '4', 1, True], - 'opt_choice_param': ['choice:["opt1", "opt2"]', 'opt1', 2, True], - 'opt_bool': ['boolean', 'False', None, False]} - outputs = {'out1': ('BIOM', True)} + "req_art": ['artifact:["BIOM"]', None], + "opt_int_param": ["integer", "4", 1, True], + "opt_choice_param": ['choice:["opt1", "opt2"]', "opt1", 2, True], + "opt_bool": ["boolean", "False", None, False], + } + outputs = {"out1": ("BIOM", True)} obs = qdb.software.Command.create( - self.software, "Test Command Merge", "Testing cmd", parameters, - outputs=outputs) + self.software, + "Test Command Merge", + "Testing cmd", + parameters, + outputs=outputs, + ) self.assertEqual(obs.name, "Test Command Merge") self.assertEqual(obs.description, "Testing cmd") - exp_required = {'req_art': ('artifact', ['BIOM'])} + exp_required = {"req_art": ("artifact", ["BIOM"])} self.assertEqual(obs.required_parameters, exp_required) exp_optional = { - 'opt_int_param': ['integer', '4'], - 'opt_choice_param': ['choice:["opt1", "opt2"]', 'opt1'], - 'opt_bool': ['boolean', 'False']} + "opt_int_param": ["integer", "4"], + "opt_choice_param": ['choice:["opt1", "opt2"]', "opt1"], + "opt_bool": ["boolean", "False"], + } self.assertEqual(obs.optional_parameters, exp_optional) self.assertFalse(obs.analysis_only) - self.assertEqual(obs.naming_order, - ['opt_int_param', 'opt_choice_param']) - exp = {'parameters': ['opt_choice_param', 'opt_int_param'], - 'outputs': ['out1'], - 'ignore_parent_command': False} + self.assertEqual(obs.naming_order, ["opt_int_param", "opt_choice_param"]) + exp = { + "parameters": ["opt_choice_param", "opt_int_param"], + "outputs": ["out1"], + "ignore_parent_command": False, + } self.assertEqual(obs.merging_scheme, exp) # now that we are done with the regular creation testing we can create # a new command with the name of an old deprecated command and make # sure that is not deprecated now # 1. let's find the previous command and make sure is deprecated - cmd_name = 'Split libraries FASTQ' - old_cmd = [cmd for cmd in self.software.commands - if cmd.name == cmd_name][0] + cmd_name = "Split libraries FASTQ" + old_cmd = [cmd for cmd in self.software.commands if cmd.name == cmd_name][0] self.assertFalse(old_cmd.active) # 2. let's create a new command with the same name and check that now @@ -468,13 +534,19 @@ def test_create(self): # is an 'artifact definition', so this will allow us to test that # a previous Validate command is not active new_cmd = qdb.software.Command.create( - software, cmd_name, cmd_name, parameters, outputs=outputs) + software, cmd_name, cmd_name, parameters, outputs=outputs + ) self.assertEqual(old_cmd.name, new_cmd.name) self.assertTrue(old_cmd.active) self.assertTrue(new_cmd.active) # find an old Validate command - old_validate = [c for c in qdb.software.Software.from_name_and_version( - 'BIOM type', '2.1.4 - Qiime2').commands if c.name == 'Validate'][0] + old_validate = [ + c + for c in qdb.software.Software.from_name_and_version( + "BIOM type", "2.1.4 - Qiime2" + ).commands + if c.name == "Validate" + ][0] self.assertEqual(old_validate.name, validate.name) self.assertTrue(validate.active) self.assertFalse(old_validate.active) @@ -487,13 +559,15 @@ def test_activate(self): self.assertTrue(tester.active) def test_processing_jobs(self): - exp_jids = ['6d368e16-2242-4cf8-87b4-a5dc40bb890b', - '4c7115e8-4c8e-424c-bf25-96c292ca1931', - 'b72369f9-a886-4193-8d3d-f7b504168e75', - '46b76f74-e100-47aa-9bf2-c0208bcea52d', - '6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f', - '063e553b-327c-4818-ab4a-adfe58e49860', - 'ac653cb5-76a6-4a45-929e-eb9b2dee6b63'] + exp_jids = [ + "6d368e16-2242-4cf8-87b4-a5dc40bb890b", + "4c7115e8-4c8e-424c-bf25-96c292ca1931", + "b72369f9-a886-4193-8d3d-f7b504168e75", + "46b76f74-e100-47aa-9bf2-c0208bcea52d", + "6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f", + "063e553b-327c-4818-ab4a-adfe58e49860", + "ac653cb5-76a6-4a45-929e-eb9b2dee6b63", + ] jobs = qdb.software.Command(1).processing_jobs set_jobs = set(jobs) @@ -505,7 +579,7 @@ def test_processing_jobs(self): exp = set([qdb.processing_job.ProcessingJob(j) for j in exp_jids]) self.assertEqual(len(set_jobs & exp), len(exp_jids)) - exp_jids = ['bcc7ebcd-39c1-43e4-af2d-822e3589f14d'] + exp_jids = ["bcc7ebcd-39c1-43e4-af2d-822e3589f14d"] exp = [qdb.processing_job.ProcessingJob(j) for j in exp_jids] self.assertCountEqual(qdb.software.Command(2).processing_jobs, exp) self.assertCountEqual(qdb.software.Command(4).processing_jobs, []) @@ -546,20 +620,24 @@ def test_iter(self): # Command 2 is Split libraries and has defined resources self.assertEqual( qdb.software.Command(2).resource_allocation, - '-p qiita -N 1 -n 1 --mem 60gb --time 25:00:00') + "-p qiita -N 1 -n 1 --mem 60gb --time 25:00:00", + ) # Command 9 is Summarize Taxa and has no defined resources so it goes # to defaults self.assertEqual( qdb.software.Command(9).resource_allocation, - '-p qiita -N 1 -n 5 --mem-per-cpu 8gb --time 168:00:00') + "-p qiita -N 1 -n 5 --mem-per-cpu 8gb --time 168:00:00", + ) # delete allocations to test errors qdb.sql_connection.perform_as_transaction( - "DELETE FROM qiita.processing_job_resource_allocation") + "DELETE FROM qiita.processing_job_resource_allocation" + ) - with self.assertRaisesRegex(ValueError, "Could not match 'Split " - "libraries' to a resource allocation!"): + with self.assertRaisesRegex( + ValueError, "Could not match 'Split libraries' to a resource allocation!" + ): qdb.software.Command(2).resource_allocation @@ -574,21 +652,20 @@ def tearDown(self): remove(f) def test_from_name_and_version(self): - obs = qdb.software.Software.from_name_and_version('QIIMEq2', '1.9.1') + obs = qdb.software.Software.from_name_and_version("QIIMEq2", "1.9.1") exp = qdb.software.Software(1) self.assertEqual(obs, exp) - obs = qdb.software.Software.from_name_and_version( - 'BIOM type', '2.1.4 - Qiime2') + obs = qdb.software.Software.from_name_and_version("BIOM type", "2.1.4 - Qiime2") exp = qdb.software.Software(2) self.assertEqual(obs, exp) # Wrong name with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - qdb.software.Software.from_name_and_version('QiIMEq2', '1.9.1') + qdb.software.Software.from_name_and_version("QiIMEq2", "1.9.1") # Wrong version with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - qdb.software.Software.from_name_and_version('QIIMEq2', '1.9.0') + qdb.software.Software.from_name_and_version("QIIMEq2", "1.9.0") def test_name(self): self.assertEqual(qdb.software.Software(1).name, "QIIMEq2") @@ -597,14 +674,19 @@ def test_version(self): self.assertEqual(qdb.software.Software(1).version, "1.9.1") def test_description(self): - exp = ("Quantitative Insights Into Microbial Ecology (QIIME) is an " - "open-source bioinformatics pipeline for performing microbiome " - "analysis from raw DNA sequencing data") + exp = ( + "Quantitative Insights Into Microbial Ecology (QIIME) is an " + "open-source bioinformatics pipeline for performing microbiome " + "analysis from raw DNA sequencing data" + ) self.assertEqual(qdb.software.Software(1).description, exp) def test_commands(self): - exp = [qdb.software.Command(1), qdb.software.Command(2), - qdb.software.Command(3)] + exp = [ + qdb.software.Command(1), + qdb.software.Command(2), + qdb.software.Command(3), + ] obs = qdb.software.Software(1).commands self.assertEqual(len(obs), 7) for e in exp: @@ -612,63 +694,65 @@ def test_commands(self): def test_get_command(self): s = qdb.software.Software(1) - obs = s.get_command('Split libraries FASTQ') + obs = s.get_command("Split libraries FASTQ") self.assertEqual(obs, qdb.software.Command(1)) with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - s.get_command('UNKNOWN') + s.get_command("UNKNOWN") def test_publications(self): - self.assertEqual(qdb.software.Software(1).publications, - [['10.1038/nmeth.f.303', '20383131']]) + self.assertEqual( + qdb.software.Software(1).publications, [["10.1038/nmeth.f.303", "20383131"]] + ) def test_environment_script(self): tester = qdb.software.Software(1) - self.assertEqual(tester.environment_script, 'source activate qiita') + self.assertEqual(tester.environment_script, "source activate qiita") def test_start_script(self): tester = qdb.software.Software(2) - self.assertEqual(tester.start_script, 'start_biom') + self.assertEqual(tester.start_script, "start_biom") def test_default_workflows(self): obs = list(qdb.software.DefaultWorkflow.iter(True)) - exp = [qdb.software.DefaultWorkflow(1), - qdb.software.DefaultWorkflow(2), - qdb.software.DefaultWorkflow(3)] + exp = [ + qdb.software.DefaultWorkflow(1), + qdb.software.DefaultWorkflow(2), + qdb.software.DefaultWorkflow(3), + ] self.assertEqual(obs, exp) obs = list(qdb.software.DefaultWorkflow.iter(False)) self.assertEqual(obs, exp) - self.assertEqual( - qdb.software.DefaultWorkflow(1).artifact_type, 'FASTQ') + self.assertEqual(qdb.software.DefaultWorkflow(1).artifact_type, "FASTQ") qdb.software.DefaultWorkflow(1).active = False obs = list(qdb.software.DefaultWorkflow.iter(False)) self.assertEqual(obs, exp) obs = list(qdb.software.DefaultWorkflow.iter(True)) - exp = [qdb.software.DefaultWorkflow(2), - qdb.software.DefaultWorkflow(3)] + exp = [qdb.software.DefaultWorkflow(2), qdb.software.DefaultWorkflow(3)] self.assertEqual(obs, exp) obs = qdb.software.DefaultWorkflow(1).data_type - exp = ['16S', '18S'] + exp = ["16S", "18S"] self.assertEqual(obs, exp) obs = qdb.software.DefaultWorkflow(2).data_type - exp = ['18S'] + exp = ["18S"] self.assertEqual(obs, exp) dw = qdb.software.DefaultWorkflow(1) - exp = ('This accepts html Qiita!' - '

BYE!') + exp = ( + 'This accepts html Qiita!' + "

BYE!" + ) self.assertEqual(dw.description, exp) - exp = 'bla!' + exp = "bla!" dw.description = exp self.assertEqual(dw.description, exp) def test_type(self): - self.assertEqual(qdb.software.Software(1).type, - "artifact transformation") + self.assertEqual(qdb.software.Software(1).type, "artifact transformation") def test_active(self): self.assertTrue(qdb.software.Software(1).active) @@ -676,13 +760,14 @@ def test_active(self): def test_client_id(self): self.assertEqual( qdb.software.Software(1).client_id, - 'yKDgajoKn5xlOA8tpo48Rq8mWJkH9z4LBCx2SvqWYLIryaan2u') + "yKDgajoKn5xlOA8tpo48Rq8mWJkH9z4LBCx2SvqWYLIryaan2u", + ) def test_client_secret(self): self.assertEqual( qdb.software.Software(1).client_secret, - '9xhU5rvzq8dHCEI5sSN95jesUULrZi6pT6Wuc71fDbFbsrnWarcSq56TJLN4kP4hH' - ) + "9xhU5rvzq8dHCEI5sSN95jesUULrZi6pT6Wuc71fDbFbsrnWarcSq56TJLN4kP4hH", + ) def test_deactivate_all(self): obs = qdb.software.Software(1) @@ -692,106 +777,151 @@ def test_deactivate_all(self): def test_from_file(self): exp = qdb.software.Software(1) - client_id = 'yKDgajoKn5xlOA8tpo48Rq8mWJkH9z4LBCx2SvqWYLIryaan2u' - client_secret = ('9xhU5rvzq8dHCEI5sSN95jesUULrZi6pT6Wuc71fDbFbsrnWarc' - 'Sq56TJLN4kP4hH') + client_id = "yKDgajoKn5xlOA8tpo48Rq8mWJkH9z4LBCx2SvqWYLIryaan2u" + client_secret = ( + "9xhU5rvzq8dHCEI5sSN95jesUULrZi6pT6Wuc71fDbFbsrnWarcSq56TJLN4kP4hH" + ) # Activate existing plugin - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ('QIIMEq2', '1.9.1', - 'Quantitative Insights Into Microbial Ecology (QIIME) ' - 'is an open-source bioinformatics pipeline for ' - 'performing microbiome analysis from raw DNA ' - 'sequencing data', 'source activate qiita', - 'start_target_gene', 'artifact transformation', - '[["10.1038/nmeth.f.303", "20383131"]]', client_id, - client_secret)) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "QIIMEq2", + "1.9.1", + "Quantitative Insights Into Microbial Ecology (QIIME) " + "is an open-source bioinformatics pipeline for " + "performing microbiome analysis from raw DNA " + "sequencing data", + "source activate qiita", + "start_target_gene", + "artifact transformation", + '[["10.1038/nmeth.f.303", "20383131"]]', + client_id, + client_secret, + ) + ) obs = qdb.software.Software.from_file(fp) self.assertEqual(obs, exp) # Activate an existing plugin with a warning - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ('QIIMEq2', '1.9.1', 'Different description', - 'source activate qiime', 'start_qiime', - 'artifact transformation', - '[["10.1038/nmeth.f.303", "20383131"]]', client_id, - client_secret)) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "QIIMEq2", + "1.9.1", + "Different description", + "source activate qiime", + "start_qiime", + "artifact transformation", + '[["10.1038/nmeth.f.303", "20383131"]]', + client_id, + client_secret, + ) + ) with warnings.catch_warnings(record=True) as warns: obs = qdb.software.Software.from_file(fp) obs_warns = [str(w.message) for w in warns] - exp_warns = ['Plugin "QIIMEq2" version "1.9.1" config file does ' - 'not match with stored information. Check the config ' - 'file or run "qiita plugin update" to update the ' - 'plugin information. Offending values: description, ' - 'environment_script, start_script'] + exp_warns = [ + 'Plugin "QIIMEq2" version "1.9.1" config file does ' + "not match with stored information. Check the config " + 'file or run "qiita plugin update" to update the ' + "plugin information. Offending values: description, " + "environment_script, start_script" + ] self.assertCountEqual(obs_warns, exp_warns) self.assertEqual(obs, exp) self.assertEqual( obs.description, - 'Quantitative Insights Into Microbial Ecology (QIIME) is an ' - 'open-source bioinformatics pipeline for performing microbiome ' - 'analysis from raw DNA sequencing data') - self.assertEqual(obs.environment_script, 'source activate qiita') - self.assertEqual(obs.start_script, 'start_target_gene') + "Quantitative Insights Into Microbial Ecology (QIIME) is an " + "open-source bioinformatics pipeline for performing microbiome " + "analysis from raw DNA sequencing data", + ) + self.assertEqual(obs.environment_script, "source activate qiita") + self.assertEqual(obs.start_script, "start_target_gene") # Update an existing plugin obs = qdb.software.Software.from_file(fp, update=True) self.assertEqual(obs, exp) - self.assertEqual(obs.description, 'Different description') - self.assertEqual(obs.environment_script, 'source activate qiime') - self.assertEqual(obs.start_script, 'start_qiime') + self.assertEqual(obs.description, "Different description") + self.assertEqual(obs.environment_script, "source activate qiime") + self.assertEqual(obs.start_script, "start_qiime") # Create a new plugin - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ('NewPlugin', '0.0.1', 'Some description', - 'source activate newplug', 'start_new_plugin', - 'artifact definition', '', client_id, - client_secret)) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "NewPlugin", + "0.0.1", + "Some description", + "source activate newplug", + "start_new_plugin", + "artifact definition", + "", + client_id, + client_secret, + ) + ) obs = qdb.software.Software.from_file(fp) self.assertNotEqual(obs, exp) - self.assertEqual(obs.name, 'NewPlugin') + self.assertEqual(obs.name, "NewPlugin") # Update publications - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) exp = obs - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ('NewPlugin', '0.0.1', 'Some description', - 'source activate newplug', 'start_new_plugin', - 'artifact definition', '[["10.1039/nmeth.f.303", null]]', - client_id, client_secret)) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "NewPlugin", + "0.0.1", + "Some description", + "source activate newplug", + "start_new_plugin", + "artifact definition", + '[["10.1039/nmeth.f.303", null]]', + client_id, + client_secret, + ) + ) obs = qdb.software.Software.from_file(fp, update=True) self.assertEqual(obs, exp) self.assertEqual(obs.publications, [["10.1039/nmeth.f.303", None]]) # Correctly ignores if there are no publications - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ('Target Gene type', '0.1.0', - 'Target gene artifact types plugin', 'source ' - '~/virtualenv/python2.7/bin/activate; export ' - 'PATH=$HOME/miniconda3/bin/:$PATH; source activate qiita', - 'start_target_gene_types', 'artifact definition', '', - '4MOBzUBHBtUmwhaC258H7PS0rBBLyGQrVxGPgc9g305bvVhf6h', - 'rFb7jwAb3UmSUN57Bjlsi4DTl2owLwRpwCc0SggRNEVb2Ebae2p5Umnq' - '20rNMhmqN')) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "Target Gene type", + "0.1.0", + "Target gene artifact types plugin", + "source " + "~/virtualenv/python2.7/bin/activate; export " + "PATH=$HOME/miniconda3/bin/:$PATH; source activate qiita", + "start_target_gene_types", + "artifact definition", + "", + "4MOBzUBHBtUmwhaC258H7PS0rBBLyGQrVxGPgc9g305bvVhf6h", + "rFb7jwAb3UmSUN57Bjlsi4DTl2owLwRpwCc0SggRNEVb2Ebae2p5Umnq20rNMhmqN", + ) + ) with warnings.catch_warnings(record=True) as warns: obs = qdb.software.Software.from_file(fp) obs_warns = [str(w.message) for w in warns] @@ -802,29 +932,47 @@ def test_from_file(self): self.assertEqual(obs.publications, []) # Raise an error if changing plugin type - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ("NewPlugin", "0.0.1", "Some description", - "source activate newplug", "start_new_plugin", - "artifact transformation", "", client_id, - client_secret)) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "NewPlugin", + "0.0.1", + "Some description", + "source activate newplug", + "start_new_plugin", + "artifact transformation", + "", + client_id, + client_secret, + ) + ) QE = qdb.exceptions with self.assertRaises(QE.QiitaDBOperationNotPermittedError): qdb.software.Software.from_file(fp) # Raise an error if client_id or client_secret are different - fd, fp = mkstemp(suffix='.conf') + fd, fp = mkstemp(suffix=".conf") close(fd) self._clean_up_files.append(fp) - with open(fp, 'w') as f: - f.write(CONF_TEMPLATE % - ('Target Gene type', '0.1.0', - 'Target gene artifact types plugin', - 'source activate qiita', 'start_target_gene_types', - 'artifact definition', '', 'client_id', 'client_secret')) + with open(fp, "w") as f: + f.write( + CONF_TEMPLATE + % ( + "Target Gene type", + "0.1.0", + "Target gene artifact types plugin", + "source activate qiita", + "start_target_gene_types", + "artifact definition", + "", + "client_id", + "client_secret", + ) + ) with self.assertRaises(QE.QiitaDBOperationNotPermittedError): qdb.software.Software.from_file(fp) @@ -832,8 +980,8 @@ def test_from_file(self): # But allow to update if update = True obs = qdb.software.Software.from_file(fp, update=True) self.assertEqual(obs, qdb.software.Software(3)) - self.assertEqual(obs.client_id, 'client_id') - self.assertEqual(obs.client_secret, 'client_secret') + self.assertEqual(obs.client_id, "client_id") + self.assertEqual(obs.client_secret, "client_secret") def test_exists(self): self.assertTrue(qdb.software.Software.exists("QIIMEq2", "1.9.1")) @@ -842,72 +990,90 @@ def test_exists(self): def test_create(self): obs = qdb.software.Software.create( - "New Software", "0.1.0", - "This is adding a new software for testing", "env_name", - "start_plugin", "artifact transformation") + "New Software", + "0.1.0", + "This is adding a new software for testing", + "env_name", + "start_plugin", + "artifact transformation", + ) self.assertEqual(obs.name, "New Software") self.assertEqual(obs.version, "0.1.0") - self.assertEqual(obs.description, - "This is adding a new software for testing") + self.assertEqual(obs.description, "This is adding a new software for testing") self.assertEqual(obs.commands, []) self.assertEqual(obs.publications, []) - self.assertEqual(obs.environment_script, 'env_name') - self.assertEqual(obs.start_script, 'start_plugin') - self.assertEqual(obs.type, 'artifact transformation') + self.assertEqual(obs.environment_script, "env_name") + self.assertEqual(obs.start_script, "start_plugin") + self.assertEqual(obs.type, "artifact transformation") self.assertIsNotNone(obs.client_id) self.assertIsNotNone(obs.client_secret) self.assertFalse(obs.active) # create with publications - exp_publications = [['10.1000/nmeth.f.101', '12345678'], - ['10.1001/nmeth.f.101', '23456789']] + exp_publications = [ + ["10.1000/nmeth.f.101", "12345678"], + ["10.1001/nmeth.f.101", "23456789"], + ] obs = qdb.software.Software.create( - "Published Software", "1.0.0", "Another testing software", - "env_name", "start_plugin", "artifact transformation", - publications=exp_publications) + "Published Software", + "1.0.0", + "Another testing software", + "env_name", + "start_plugin", + "artifact transformation", + publications=exp_publications, + ) self.assertEqual(obs.name, "Published Software") self.assertEqual(obs.version, "1.0.0") self.assertEqual(obs.description, "Another testing software") self.assertEqual(obs.commands, []) self.assertEqual(obs.publications, exp_publications) - self.assertEqual(obs.environment_script, 'env_name') - self.assertEqual(obs.start_script, 'start_plugin') - self.assertEqual(obs.type, 'artifact transformation') + self.assertEqual(obs.environment_script, "env_name") + self.assertEqual(obs.start_script, "start_plugin") + self.assertEqual(obs.type, "artifact transformation") self.assertIsNotNone(obs.client_id) self.assertIsNotNone(obs.client_secret) self.assertFalse(obs.active) # Create with client_id, client_secret obs = qdb.software.Software.create( - "Another Software", "0.1.0", - "This is adding another software for testing", "env_a_name", - "start_plugin_script", "artifact transformation", - client_id='SomeNewClientId', client_secret='SomeNewClientSecret') + "Another Software", + "0.1.0", + "This is adding another software for testing", + "env_a_name", + "start_plugin_script", + "artifact transformation", + client_id="SomeNewClientId", + client_secret="SomeNewClientSecret", + ) self.assertEqual(obs.name, "Another Software") self.assertEqual(obs.version, "0.1.0") - self.assertEqual(obs.description, - "This is adding another software for testing") + self.assertEqual(obs.description, "This is adding another software for testing") self.assertEqual(obs.commands, []) self.assertEqual(obs.publications, []) - self.assertEqual(obs.environment_script, 'env_a_name') - self.assertEqual(obs.start_script, 'start_plugin_script') - self.assertEqual(obs.type, 'artifact transformation') - self.assertEqual(obs.client_id, 'SomeNewClientId') - self.assertEqual(obs.client_secret, 'SomeNewClientSecret') + self.assertEqual(obs.environment_script, "env_a_name") + self.assertEqual(obs.start_script, "start_plugin_script") + self.assertEqual(obs.type, "artifact transformation") + self.assertEqual(obs.client_id, "SomeNewClientId") + self.assertEqual(obs.client_secret, "SomeNewClientSecret") self.assertFalse(obs.active) def test_add_publications(self): obs = qdb.software.Software.create( - "New Software", "0.1.0", - "This is adding a new software for testing", "env_name", - "start_plugin", "artifact transformation") + "New Software", + "0.1.0", + "This is adding a new software for testing", + "env_name", + "start_plugin", + "artifact transformation", + ) self.assertEqual(obs.publications, []) - obs.add_publications([['10.1000/nmeth.f.101', '12345678']]) - exp = [['10.1000/nmeth.f.101', '12345678']] + obs.add_publications([["10.1000/nmeth.f.101", "12345678"]]) + exp = [["10.1000/nmeth.f.101", "12345678"]] self.assertCountEqual(obs.publications, exp) # Add a publication that already exists - obs.add_publications([['10.1000/nmeth.f.101', '12345678']]) + obs.add_publications([["10.1000/nmeth.f.101", "12345678"]]) self.assertCountEqual(obs.publications, exp) def test_activate(self): @@ -928,7 +1094,7 @@ def test_deprecated(self): self.assertFalse(tester.deprecated) with self.assertRaises(ValueError): - tester.deprecated = 'error!' + tester.deprecated = "error!" @qiita_test_checker() @@ -936,281 +1102,383 @@ class DefaultParametersTests(TestCase): def test_exists(self): cmd = qdb.software.Command(1) obs = qdb.software.DefaultParameters.exists( - cmd, max_bad_run_length=3, min_per_read_length_fraction=0.75, - sequence_max_n=0, rev_comp_barcode=False, - rev_comp_mapping_barcodes=False, rev_comp=False, - phred_quality_threshold=3, barcode_type="golay_12", - max_barcode_errors=1.5, phred_offset='auto') + cmd, + max_bad_run_length=3, + min_per_read_length_fraction=0.75, + sequence_max_n=0, + rev_comp_barcode=False, + rev_comp_mapping_barcodes=False, + rev_comp=False, + phred_quality_threshold=3, + barcode_type="golay_12", + max_barcode_errors=1.5, + phred_offset="auto", + ) self.assertTrue(obs) obs = qdb.software.DefaultParameters.exists( - cmd, max_bad_run_length=3, min_per_read_length_fraction=0.65, - sequence_max_n=0, rev_comp_barcode=False, - rev_comp_mapping_barcodes=False, rev_comp=False, - phred_quality_threshold=3, barcode_type="hamming_8", - max_barcode_errors=1.5, phred_offset='auto') + cmd, + max_bad_run_length=3, + min_per_read_length_fraction=0.65, + sequence_max_n=0, + rev_comp_barcode=False, + rev_comp_mapping_barcodes=False, + rev_comp=False, + phred_quality_threshold=3, + barcode_type="hamming_8", + max_barcode_errors=1.5, + phred_offset="auto", + ) self.assertFalse(obs) def test_name(self): self.assertEqual(qdb.software.DefaultParameters(1).name, "Defaults") def test_values(self): - exp = {'min_per_read_length_fraction': 0.75, - 'max_barcode_errors': 1.5, 'max_bad_run_length': 3, - 'rev_comp': False, 'phred_quality_threshold': 3, - 'rev_comp_barcode': False, 'sequence_max_n': 0, - 'barcode_type': 'golay_12', 'rev_comp_mapping_barcodes': False, - 'phred_offset': 'auto'} + exp = { + "min_per_read_length_fraction": 0.75, + "max_barcode_errors": 1.5, + "max_bad_run_length": 3, + "rev_comp": False, + "phred_quality_threshold": 3, + "rev_comp_barcode": False, + "sequence_max_n": 0, + "barcode_type": "golay_12", + "rev_comp_mapping_barcodes": False, + "phred_offset": "auto", + } self.assertEqual(qdb.software.DefaultParameters(1).values, exp) def test_command(self): self.assertEqual( - qdb.software.DefaultParameters(1).command, qdb.software.Command(1)) + qdb.software.DefaultParameters(1).command, qdb.software.Command(1) + ) def test_create(self): cmd = qdb.software.Command(1) obs = qdb.software.DefaultParameters.create( - "test_create", cmd, max_bad_run_length=3, - min_per_read_length_fraction=0.75, sequence_max_n=0, - rev_comp_barcode=False, rev_comp_mapping_barcodes=False, - rev_comp=False, phred_quality_threshold=3, - barcode_type="hamming_8", max_barcode_errors=1.5, - phred_offset='auto') + "test_create", + cmd, + max_bad_run_length=3, + min_per_read_length_fraction=0.75, + sequence_max_n=0, + rev_comp_barcode=False, + rev_comp_mapping_barcodes=False, + rev_comp=False, + phred_quality_threshold=3, + barcode_type="hamming_8", + max_barcode_errors=1.5, + phred_offset="auto", + ) self.assertEqual(obs.name, "test_create") - exp = {'max_bad_run_length': 3, 'min_per_read_length_fraction': 0.75, - 'sequence_max_n': 0, 'rev_comp_barcode': False, - 'rev_comp_mapping_barcodes': False, 'rev_comp': False, - 'phred_quality_threshold': 3, 'barcode_type': "hamming_8", - 'max_barcode_errors': 1.5, 'phred_offset': 'auto'} + exp = { + "max_bad_run_length": 3, + "min_per_read_length_fraction": 0.75, + "sequence_max_n": 0, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "rev_comp": False, + "phred_quality_threshold": 3, + "barcode_type": "hamming_8", + "max_barcode_errors": 1.5, + "phred_offset": "auto", + } self.assertEqual(obs.values, exp) self.assertEqual(obs.command, cmd) class ParametersTests(TestCase): def test_init_error(self): - with self.assertRaises( - qdb.exceptions.QiitaDBOperationNotPermittedError): - qdb.software.Parameters({'a': 1}, None) + with self.assertRaises(qdb.exceptions.QiitaDBOperationNotPermittedError): + qdb.software.Parameters({"a": 1}, None) def test_eq(self): # Test difference due to type a = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) b = qdb.software.DefaultParameters(1) self.assertFalse(a == b) # Test difference due to command b = qdb.software.Parameters.from_default_params( - next(qdb.software.Command(2).default_parameter_sets), - {'input_data': 1}) + next(qdb.software.Command(2).default_parameter_sets), {"input_data": 1} + ) self.assertFalse(a == b) # Test difference due to values b = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 2}) + qdb.software.DefaultParameters(1), {"input_data": 2} + ) self.assertFalse(a == b) # Test equality b = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) self.assertTrue(a == b) def test_load_json(self): - json_str = ('{"barcode_type": "golay_12", "input_data": 1, ' - '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' - '"min_per_read_length_fraction": 0.75, ' - '"phred_quality_threshold": 3, "rev_comp": false, ' - '"rev_comp_barcode": false, "phred_offset": "auto", ' - '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0}') + json_str = ( + '{"barcode_type": "golay_12", "input_data": 1, ' + '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' + '"min_per_read_length_fraction": 0.75, ' + '"phred_quality_threshold": 3, "rev_comp": false, ' + '"rev_comp_barcode": false, "phred_offset": "auto", ' + '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0}' + ) cmd = qdb.software.Command(1) obs = qdb.software.Parameters.load(cmd, json_str=json_str) exp_values = { - "barcode_type": "golay_12", "input_data": 1, - "max_bad_run_length": 3, "max_barcode_errors": 1.5, + "barcode_type": "golay_12", + "input_data": 1, + "max_bad_run_length": 3, + "max_barcode_errors": 1.5, "min_per_read_length_fraction": 0.75, - "phred_quality_threshold": 3, "rev_comp": False, - "rev_comp_barcode": False, "rev_comp_mapping_barcodes": False, - "sequence_max_n": 0, "phred_offset": "auto"} + "phred_quality_threshold": 3, + "rev_comp": False, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "sequence_max_n": 0, + "phred_offset": "auto", + } self.assertEqual(obs.values, exp_values) def test_load_dictionary(self): exp_values = { - "barcode_type": "golay_12", "input_data": 1, - "max_bad_run_length": 3, "max_barcode_errors": 1.5, + "barcode_type": "golay_12", + "input_data": 1, + "max_bad_run_length": 3, + "max_barcode_errors": 1.5, "min_per_read_length_fraction": 0.75, - "phred_quality_threshold": 3, "rev_comp": False, - "rev_comp_barcode": False, "rev_comp_mapping_barcodes": False, - "sequence_max_n": 0, "phred_offset": "auto"} + "phred_quality_threshold": 3, + "rev_comp": False, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "sequence_max_n": 0, + "phred_offset": "auto", + } cmd = qdb.software.Command(1) obs = qdb.software.Parameters.load(cmd, values_dict=exp_values) self.assertEqual(obs.values, exp_values) def test_load_error_missing_required(self): - json_str = ('{"barcode_type": "golay_12",' - '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' - '"min_per_read_length_fraction": 0.75, ' - '"phred_quality_threshold": 3, "rev_comp": false, ' - '"rev_comp_barcode": false, "phred_offset": "auto", ' - '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0}') + json_str = ( + '{"barcode_type": "golay_12",' + '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' + '"min_per_read_length_fraction": 0.75, ' + '"phred_quality_threshold": 3, "rev_comp": false, ' + '"rev_comp_barcode": false, "phred_offset": "auto", ' + '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0}' + ) cmd = qdb.software.Command(1) with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Parameters.load(cmd, json_str=json_str) def test_load_loads_defaults(self): values = { - "barcode_type": "golay_12", "input_data": 1, - "phred_quality_threshold": 3, "rev_comp": False, - "rev_comp_barcode": False, "rev_comp_mapping_barcodes": False, - "sequence_max_n": 0, "phred_offset": "auto"} + "barcode_type": "golay_12", + "input_data": 1, + "phred_quality_threshold": 3, + "rev_comp": False, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "sequence_max_n": 0, + "phred_offset": "auto", + } cmd = qdb.software.Command(1) obs = qdb.software.Parameters.load(cmd, values_dict=values) - values.update({ - "max_bad_run_length": '3', "max_barcode_errors": '1.5', - "min_per_read_length_fraction": '0.75'}) + values.update( + { + "max_bad_run_length": "3", + "max_barcode_errors": "1.5", + "min_per_read_length_fraction": "0.75", + } + ) self.assertEqual(obs.values, values) def test_load_error_extra_parameters(self): - json_str = ('{"barcode_type": "golay_12", "input_data": 1, ' - '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' - '"min_per_read_length_fraction": 0.75, ' - '"phred_quality_threshold": 3, "rev_comp": false, ' - '"rev_comp_barcode": false, "phred_offset": "auto",' - '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0,' - '"extra_param": 1}') + json_str = ( + '{"barcode_type": "golay_12", "input_data": 1, ' + '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' + '"min_per_read_length_fraction": 0.75, ' + '"phred_quality_threshold": 3, "rev_comp": false, ' + '"rev_comp_barcode": false, "phred_offset": "auto",' + '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0,' + '"extra_param": 1}' + ) cmd = qdb.software.Command(1) with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Parameters.load(cmd, json_str=json_str) def test_from_default_parameters(self): obs = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}) + qdb.software.DefaultParameters(1), {"input_data": 1} + ) self.assertEqual(obs._command, qdb.software.Command(1)) - exp = {'min_per_read_length_fraction': 0.75, - 'max_barcode_errors': 1.5, 'max_bad_run_length': 3, - 'rev_comp': False, 'phred_quality_threshold': 3, - 'rev_comp_barcode': False, 'sequence_max_n': 0, - 'barcode_type': 'golay_12', 'rev_comp_mapping_barcodes': False, - 'input_data': 1, 'phred_offset': 'auto'} + exp = { + "min_per_read_length_fraction": 0.75, + "max_barcode_errors": 1.5, + "max_bad_run_length": 3, + "rev_comp": False, + "phred_quality_threshold": 3, + "rev_comp_barcode": False, + "sequence_max_n": 0, + "barcode_type": "golay_12", + "rev_comp_mapping_barcodes": False, + "input_data": 1, + "phred_offset": "auto", + } self.assertEqual(obs._values, exp) obs = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}, - opt_params={'max_bad_run_length': 5}) + qdb.software.DefaultParameters(1), + {"input_data": 1}, + opt_params={"max_bad_run_length": 5}, + ) self.assertEqual(obs._command, qdb.software.Command(1)) - exp = {'min_per_read_length_fraction': 0.75, - 'max_barcode_errors': 1.5, 'max_bad_run_length': 5, - 'rev_comp': False, 'phred_quality_threshold': 3, - 'rev_comp_barcode': False, 'sequence_max_n': 0, - 'barcode_type': 'golay_12', 'rev_comp_mapping_barcodes': False, - 'input_data': 1, 'phred_offset': 'auto'} + exp = { + "min_per_read_length_fraction": 0.75, + "max_barcode_errors": 1.5, + "max_bad_run_length": 5, + "rev_comp": False, + "phred_quality_threshold": 3, + "rev_comp_barcode": False, + "sequence_max_n": 0, + "barcode_type": "golay_12", + "rev_comp_mapping_barcodes": False, + "input_data": 1, + "phred_offset": "auto", + } self.assertEqual(obs._values, exp) def test_from_default_params_error_missing_reqd(self): with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {}) + qdb.software.DefaultParameters(1), {} + ) def test_from_default_params_error_extra_reqd(self): with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), - {'input_data': 1, 'another_one': 2}) + qdb.software.DefaultParameters(1), {"input_data": 1, "another_one": 2} + ) def test_from_default_params_error_extra_opts(self): with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}, - opt_params={'Unknown': 'foo'}) + qdb.software.DefaultParameters(1), + {"input_data": 1}, + opt_params={"Unknown": "foo"}, + ) def test_command(self): obs = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}).command + qdb.software.DefaultParameters(1), {"input_data": 1} + ).command self.assertEqual(obs, qdb.software.Command(1)) def test_values(self): obs = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}).values - exp = {'min_per_read_length_fraction': 0.75, - 'max_barcode_errors': 1.5, 'max_bad_run_length': 3, - 'rev_comp': False, 'phred_quality_threshold': 3, - 'rev_comp_barcode': False, 'sequence_max_n': 0, - 'barcode_type': 'golay_12', 'rev_comp_mapping_barcodes': False, - 'phred_offset': 'auto', 'input_data': 1} + qdb.software.DefaultParameters(1), {"input_data": 1} + ).values + exp = { + "min_per_read_length_fraction": 0.75, + "max_barcode_errors": 1.5, + "max_bad_run_length": 3, + "rev_comp": False, + "phred_quality_threshold": 3, + "rev_comp_barcode": False, + "sequence_max_n": 0, + "barcode_type": "golay_12", + "rev_comp_mapping_barcodes": False, + "phred_offset": "auto", + "input_data": 1, + } self.assertEqual(obs, exp) def test_dumps(self): obs = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 1}).dump() - exp = ('{"barcode_type": "golay_12", "input_data": 1, ' - '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' - '"min_per_read_length_fraction": 0.75, "phred_offset": "auto", ' - '"phred_quality_threshold": 3, "rev_comp": false, ' - '"rev_comp_barcode": false, ' - '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0}') + qdb.software.DefaultParameters(1), {"input_data": 1} + ).dump() + exp = ( + '{"barcode_type": "golay_12", "input_data": 1, ' + '"max_bad_run_length": 3, "max_barcode_errors": 1.5, ' + '"min_per_read_length_fraction": 0.75, "phred_offset": "auto", ' + '"phred_quality_threshold": 3, "rev_comp": false, ' + '"rev_comp_barcode": false, ' + '"rev_comp_mapping_barcodes": false, "sequence_max_n": 0}' + ) self.assertEqual(obs, exp) class DefaultWorkflowNodeTests(TestCase): def test_default_parameter(self): obs = qdb.software.DefaultWorkflowNode(1) - self.assertEqual( - obs.default_parameter, qdb.software.DefaultParameters(1)) + self.assertEqual(obs.default_parameter, qdb.software.DefaultParameters(1)) obs = qdb.software.DefaultWorkflowNode(2) - self.assertEqual( - obs.default_parameter, qdb.software.DefaultParameters(10)) + self.assertEqual(obs.default_parameter, qdb.software.DefaultParameters(10)) class DefaultWorkflowEdgeTests(TestCase): def test_connections(self): tester = qdb.software.DefaultWorkflowEdge(1) obs = tester.connections - self.assertEqual( - obs, [['demultiplexed', 'input_data', 'Demultiplexed']]) + self.assertEqual(obs, [["demultiplexed", "input_data", "Demultiplexed"]]) class DefaultWorkflowTests(TestCase): def test_name(self): - self.assertEqual(qdb.software.DefaultWorkflow(1).name, - "FASTQ upstream workflow") - self.assertEqual(qdb.software.DefaultWorkflow(2).name, - "FASTA upstream workflow") - self.assertEqual(qdb.software.DefaultWorkflow(3).name, - "Per sample FASTQ upstream workflow") + self.assertEqual( + qdb.software.DefaultWorkflow(1).name, "FASTQ upstream workflow" + ) + self.assertEqual( + qdb.software.DefaultWorkflow(2).name, "FASTA upstream workflow" + ) + self.assertEqual( + qdb.software.DefaultWorkflow(3).name, "Per sample FASTQ upstream workflow" + ) def test_graph(self): obs = qdb.software.DefaultWorkflow(1).graph self.assertTrue(isinstance(obs, nx.DiGraph)) - exp = [qdb.software.DefaultWorkflowNode(1), - qdb.software.DefaultWorkflowNode(2)] + exp = [qdb.software.DefaultWorkflowNode(1), qdb.software.DefaultWorkflowNode(2)] self.assertCountEqual(obs.nodes(), exp) - exp = [(qdb.software.DefaultWorkflowNode(1), + exp = [ + ( + qdb.software.DefaultWorkflowNode(1), qdb.software.DefaultWorkflowNode(2), - {'connections': qdb.software.DefaultWorkflowEdge(1)})] + {"connections": qdb.software.DefaultWorkflowEdge(1)}, + ) + ] self.assertCountEqual(obs.edges(data=True), exp) obs = qdb.software.DefaultWorkflow(2).graph self.assertTrue(isinstance(obs, nx.DiGraph)) - exp = [qdb.software.DefaultWorkflowNode(3), - qdb.software.DefaultWorkflowNode(4)] + exp = [qdb.software.DefaultWorkflowNode(3), qdb.software.DefaultWorkflowNode(4)] self.assertCountEqual(obs.nodes(), exp) - exp = [(qdb.software.DefaultWorkflowNode(3), + exp = [ + ( + qdb.software.DefaultWorkflowNode(3), qdb.software.DefaultWorkflowNode(4), - {'connections': qdb.software.DefaultWorkflowEdge(2)})] + {"connections": qdb.software.DefaultWorkflowEdge(2)}, + ) + ] self.assertCountEqual(obs.edges(data=True), exp) def test_parameters(self): - empty_params = {'prep': {}, 'sample': {}} + empty_params = {"prep": {}, "sample": {}} dw = qdb.software.DefaultWorkflow(1) self.assertEqual(dw.parameters, empty_params) values = { - 'sample': {'environment_name': 'human'}, - 'prep': {'instrument_mode': 'MiSeq'}, - 'extra': {'x': 'y'} + "sample": {"environment_name": "human"}, + "prep": {"instrument_mode": "MiSeq"}, + "extra": {"x": "y"}, } with self.assertRaises(ValueError): dw.parameters = values - del values['extra'] + del values["extra"] dw.parameters = values self.assertEqual(values, dw.parameters) dw.parameters = empty_params @@ -1232,5 +1500,5 @@ def test_parameters(self): """ -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_sql.py b/qiita_db/test/test_sql.py index ccd55b0b1..768457a0c 100644 --- a/qiita_db/test/test_sql.py +++ b/qiita_db/test/test_sql.py @@ -6,20 +6,21 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from tempfile import mkstemp from os import close, remove from os.path import exists +from tempfile import mkstemp +from unittest import TestCase, main import pandas as pd -from qiita_core.util import qiita_test_checker import qiita_db as qdb +from qiita_core.util import qiita_test_checker @qiita_test_checker() class TestSQL(TestCase): """Tests that the database triggers and procedures work properly""" + def setUp(self): self._files_to_remove = [] @@ -48,21 +49,24 @@ def test_find_artifact_roots_is_child(self): def test_find_artifact_roots_is_child_multiple_parents_one_root(self): """Correctly returns the roots if the children has multiple parents - but a single root + but a single root """ - fd, fp = mkstemp(suffix='_table.biom') + fd, fp = mkstemp(suffix="_table.biom") close(fd) self._files_to_remove.append(fp) - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("test") fp = [(fp, 7)] params = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(10), {'input_data': 2}) + qdb.software.DefaultParameters(10), {"input_data": 2} + ) new = qdb.artifact.Artifact.create( - fp, "BIOM", + fp, + "BIOM", parents=[qdb.artifact.Artifact(2), qdb.artifact.Artifact(3)], - processing_parameters=params) - self._files_to_remove.extend([x['fp'] for x in new.filepaths]) + processing_parameters=params, + ) + self._files_to_remove.extend([x["fp"] for x in new.filepaths]) with qdb.sql_connection.TRN: sql = "SELECT * FROM qiita.find_artifact_roots(%s)" qdb.sql_connection.TRN.add(sql, [new.id]) @@ -73,49 +77,57 @@ def test_find_artifact_roots_is_child_multiple_parents_one_root(self): def _create_root_artifact(self): """Creates a new root artifact""" metadata = pd.DataFrame.from_dict( - {'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'target_gene': '16S rRNA', - 'target_subfragment': 'V4', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}}, - orient='index', dtype=str) + { + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "target_gene": "16S rRNA", + "target_subfragment": "V4", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + }, + orient="index", + dtype=str, + ) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(1), "18S") - fd, fp = mkstemp(suffix='_seqs.fastq') + metadata, qdb.study.Study(1), "18S" + ) + fd, fp = mkstemp(suffix="_seqs.fastq") close(fd) self._files_to_remove.append(fp) - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("test") fp = [(fp, 1)] new_root = qdb.artifact.Artifact.create(fp, "FASTQ", prep_template=pt) - self._files_to_remove.extend([x['fp'] for x in new_root.filepaths]) + self._files_to_remove.extend([x["fp"] for x in new_root.filepaths]) return new_root def _create_child_artifact(self, parents): """Creates a new artifact with the given parents""" # Add a child of 2 roots - fd, fp = mkstemp(suffix='_seqs.fna') + fd, fp = mkstemp(suffix="_seqs.fna") close(fd) self._files_to_remove.append(fp) - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("test") fp = [(fp, 4)] params = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 2}) + qdb.software.DefaultParameters(1), {"input_data": 2} + ) new = qdb.artifact.Artifact.create( - fp, "Demultiplexed", parents=parents, - processing_parameters=params) + fp, "Demultiplexed", parents=parents, processing_parameters=params + ) return new def test_find_artifact_roots_is_root_without_children(self): """Correctly returns the root if the artifact is already the root - and doesn't have any children - """ + and doesn't have any children + """ sql = "SELECT * FROM qiita.find_artifact_roots(%s)" # Add a new root @@ -133,18 +145,22 @@ def test_find_artifact_roots_is_child_multiple_parents_multiple_root(self): new_root = self._create_root_artifact() # Add a child of 2 roots - fd, fp = mkstemp(suffix='_seqs.fna') + fd, fp = mkstemp(suffix="_seqs.fna") close(fd) self._files_to_remove.append(fp) - with open(fp, 'w') as f: + with open(fp, "w") as f: f.write("test") fp = [(fp, 4)] params = qdb.software.Parameters.from_default_params( - qdb.software.DefaultParameters(1), {'input_data': 2}) + qdb.software.DefaultParameters(1), {"input_data": 2} + ) new = qdb.artifact.Artifact.create( - fp, "Demultiplexed", parents=[qdb.artifact.Artifact(1), new_root], - processing_parameters=params) - self._files_to_remove.extend([x['fp'] for x in new.filepaths]) + fp, + "Demultiplexed", + parents=[qdb.artifact.Artifact(1), new_root], + processing_parameters=params, + ) + self._files_to_remove.extend([x["fp"] for x in new.filepaths]) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql, [new.id]) obs = qdb.sql_connection.TRN.execute_fetchindex() @@ -170,8 +186,7 @@ def test_artifact_ancestry_leaf(self): self.assertCountEqual(obs, exp) def test_artifact_ancestry_leaf_multiple_parents(self): - """Correctly returns the ancestry of a leaf artifact w multiple parents - """ + """Correctly returns the ancestry of a leaf artifact w multiple parents""" root = self._create_root_artifact() parent1 = self._create_child_artifact([root]) parent2 = self._create_child_artifact([root]) @@ -181,8 +196,12 @@ def test_artifact_ancestry_leaf_multiple_parents(self): sql = "SELECT * FROM qiita.artifact_ancestry(%s)" qdb.sql_connection.TRN.add(sql, [child.id]) obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [[child.id, parent1.id], [child.id, parent2.id], - [parent1.id, root.id], [parent2.id, root.id]] + exp = [ + [child.id, parent1.id], + [child.id, parent2.id], + [parent1.id, root.id], + [parent2.id, root.id], + ] self.assertCountEqual(obs, exp) def test_artifact_ancestry_middle(self): @@ -225,15 +244,28 @@ def test_artifact_descendants_middle(self): def test_isnumeric(self): """Test SQL function isnumeric""" - exp = [['', False], ['.', False], ['.0', True], ['0.', True], - ['0', True], ['1', True], ['123', True], ['123.456', True], - ['abc', False], ['1..2', False], ['1.2.3.4', False], - ['1x234', False], ['1.234e-5', True]] + exp = [ + ["", False], + [".", False], + [".0", True], + ["0.", True], + ["0", True], + ["1", True], + ["123", True], + ["123.456", True], + ["abc", False], + ["1..2", False], + ["1.2.3.4", False], + ["1x234", False], + ["1.234e-5", True], + ] - sql = ("WITH test(x) AS (" - "VALUES (''), ('.'), ('.0'), ('0.'), ('0'), ('1'), ('123'), " - "('123.456'), ('abc'), ('1..2'), ('1.2.3.4'), ('1x234'), " - "('1.234e-5')) SELECT x, isnumeric(x) FROM test;") + sql = ( + "WITH test(x) AS (" + "VALUES (''), ('.'), ('.0'), ('0.'), ('0'), ('1'), ('123'), " + "('123.456'), ('abc'), ('1..2'), ('1.2.3.4'), ('1x234'), " + "('1.234e-5')) SELECT x, isnumeric(x) FROM test;" + ) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) obs = qdb.sql_connection.TRN.execute_fetchindex() @@ -241,11 +273,13 @@ def test_isnumeric(self): def test_artifact_descendants_with_jobs(self): """Test SQL function artifact_descendants_with_jobs""" - exp = [['c350b068-add7-49a5-8846-604ac032cc88', 1, 2], - ['d883dab4-503b-45c2-815d-2126ff52dede', 1, 3], - ['a4c4b9b9-20ca-47f5-bd30-725cce71df2b', 2, 4], - ['624dce65-43a5-4156-a4b6-6c1d02114b67', 2, 5], - ['81bbe8d0-b4c2-42eb-ada9-f07c1c91e59f', 2, 6]] + exp = [ + ["c350b068-add7-49a5-8846-604ac032cc88", 1, 2], + ["d883dab4-503b-45c2-815d-2126ff52dede", 1, 3], + ["a4c4b9b9-20ca-47f5-bd30-725cce71df2b", 2, 4], + ["624dce65-43a5-4156-a4b6-6c1d02114b67", 2, 5], + ["81bbe8d0-b4c2-42eb-ada9-f07c1c91e59f", 2, 6], + ] sql = """SELECT * FROM qiita.artifact_descendants_with_jobs(1)""" with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) @@ -256,5 +290,5 @@ def test_artifact_descendants_with_jobs(self): self.assertEqual(e[1:], o[1:]) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/test/test_sql_connection.py b/qiita_db/test/test_sql_connection.py index b3fd40742..4f2f295c5 100644 --- a/qiita_db/test/test_sql_connection.py +++ b/qiita_db/test/test_sql_connection.py @@ -6,19 +6,18 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from os import remove, close +from os import close, remove from os.path import exists from tempfile import mkstemp +from unittest import TestCase, main -from psycopg2._psycopg import connection from psycopg2 import connect +from psycopg2._psycopg import connection from psycopg2.extensions import TRANSACTION_STATUS_IDLE -from qiita_core.util import qiita_test_checker -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb - +from qiita_core.qiita_settings import qiita_config +from qiita_core.util import qiita_test_checker DB_CREATE_TEST_TABLE = """CREATE TABLE qiita.test_table ( str_column varchar DEFAULT 'foo' NOT NULL, @@ -31,11 +30,14 @@ @qiita_test_checker() class TestBase(TestCase): def setUp(self): - # Add the test table to the database, so we can use it in the tests - with connect(user=qiita_config.user, password=qiita_config.password, - host=qiita_config.host, port=qiita_config.port, - database=qiita_config.database) as self.con: + with connect( + user=qiita_config.user, + password=qiita_config.password, + host=qiita_config.host, + port=qiita_config.port, + database=qiita_config.database, + ) as self.con: with self.con.cursor() as cur: cur.execute(DB_CREATE_TEST_TABLE) self.con.commit() @@ -55,17 +57,25 @@ def _populate_test_table(self): sql = """INSERT INTO qiita.test_table (str_column, bool_column, int_column) VALUES (%s, %s, %s)""" - sql_args = [('test1', True, 1), ('test2', True, 2), - ('test3', False, 3), ('test4', False, 4)] + sql_args = [ + ("test1", True, 1), + ("test2", True, 2), + ("test3", False, 3), + ("test4", False, 4), + ] with self.con.cursor() as cur: cur.executemany(sql, sql_args) self.con.commit() def _assert_sql_equal(self, exp): """Aux function for testing""" - with connect(user=qiita_config.user, password=qiita_config.password, - host=qiita_config.host, port=qiita_config.port, - database=qiita_config.database) as con: + with connect( + user=qiita_config.user, + password=qiita_config.password, + host=qiita_config.host, + port=qiita_config.port, + database=qiita_config.database, + ) as con: with con.cursor() as cur: cur.execute("SELECT * FROM qiita.test_table") obs = cur.fetchall() @@ -97,7 +107,7 @@ def test_add(self): args3 = (False,) qdb.sql_connection.TRN.add(sql1, args3) sql3 = "INSERT INTO qiita.test_table (int_column) VALEUS (%(foo)s)" - args4 = {'foo': 1} + args4 = {"foo": 1} qdb.sql_connection.TRN.add(sql3, args4) exp = [(sql1, args1), (sql2, None), (sql1, args3), (sql3, args4)] @@ -123,8 +133,7 @@ def test_add_error(self): qdb.sql_connection.TRN.add("SELECT 42", 1) with self.assertRaises(TypeError): - qdb.sql_connection.TRN.add("SELECT 42", {'foo': 'bar'}, - many=True) + qdb.sql_connection.TRN.add("SELECT 42", {"foo": "bar"}, many=True) with self.assertRaises(TypeError): qdb.sql_connection.TRN.add("SELECT 42", [1, 1], many=True) @@ -148,51 +157,55 @@ def test_execute_many(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s)""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) sql = """UPDATE qiita.test_table SET int_column = %s, bool_column = %s WHERE str_column = %s""" - qdb.sql_connection.TRN.add(sql, [20, False, 'insert2']) + qdb.sql_connection.TRN.add(sql, [20, False, "insert2"]) obs = qdb.sql_connection.TRN.execute() self.assertEqual(obs, [None, None, None, None]) self._assert_sql_equal([]) - self._assert_sql_equal([('insert1', True, 1), - ('insert3', True, 3), - ('insert2', False, 20)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert3", True, 3), ("insert2", False, 20)] + ) def test_execute_return(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - qdb.sql_connection.TRN.add(sql, ['test_insert', 2]) + qdb.sql_connection.TRN.add(sql, ["test_insert", 2]) sql = """UPDATE qiita.test_table SET bool_column = %s WHERE str_column = %s RETURNING int_column""" - qdb.sql_connection.TRN.add(sql, [False, 'test_insert']) + qdb.sql_connection.TRN.add(sql, [False, "test_insert"]) obs = qdb.sql_connection.TRN.execute() - self.assertEqual(obs, [[['test_insert', 2]], [[2]]]) + self.assertEqual(obs, [[["test_insert", 2]], [[2]]]) def test_execute_return_many(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) sql = """UPDATE qiita.test_table SET bool_column = %s WHERE str_column = %s""" - qdb.sql_connection.TRN.add(sql, [False, 'insert2']) + qdb.sql_connection.TRN.add(sql, [False, "insert2"]) sql = "SELECT * FROM qiita.test_table" qdb.sql_connection.TRN.add(sql) obs = qdb.sql_connection.TRN.execute() - exp = [[['insert1', 1]], # First query of the many query - [['insert2', 2]], # Second query of the many query - [['insert3', 3]], # Third query of the many query - None, # Update query - [['insert1', True, 1], # First result select - ['insert3', True, 3], # Second result select - ['insert2', False, 2]]] # Third result select + exp = [ + [["insert1", 1]], # First query of the many query + [["insert2", 2]], # Second query of the many query + [["insert3", 3]], # Third query of the many query + None, # Update query + [ + ["insert1", True, 1], # First result select + ["insert3", True, 3], # Second result select + ["insert2", False, 2], + ], + ] # Third result select self.assertEqual(obs, exp) def test_execute_huge_transaction(self): @@ -221,29 +234,30 @@ def test_execute_commit_false(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) obs = qdb.sql_connection.TRN.execute() - exp = [[['insert1', 1]], [['insert2', 2]], [['insert3', 3]]] + exp = [[["insert1", 1]], [["insert2", 2]], [["insert3", 3]]] self.assertEqual(obs, exp) self._assert_sql_equal([]) qdb.sql_connection.TRN.commit() - self._assert_sql_equal([('insert1', True, 1), ('insert2', True, 2), - ('insert3', True, 3)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert2", True, 2), ("insert3", True, 3)] + ) def test_execute_commit_false_rollback(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) obs = qdb.sql_connection.TRN.execute() - exp = [[['insert1', 1]], [['insert2', 2]], [['insert3', 3]]] + exp = [[["insert1", 1]], [["insert2", 2]], [["insert3", 3]]] self.assertEqual(obs, exp) self._assert_sql_equal([]) @@ -256,32 +270,33 @@ def test_execute_commit_false_wipe_queries(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) obs = qdb.sql_connection.TRN.execute() - exp = [[['insert1', 1]], [['insert2', 2]], [['insert3', 3]]] + exp = [[["insert1", 1]], [["insert2", 2]], [["insert3", 3]]] self.assertEqual(obs, exp) self._assert_sql_equal([]) sql = """UPDATE qiita.test_table SET bool_column = %s WHERE str_column = %s""" - args = [False, 'insert2'] + args = [False, "insert2"] qdb.sql_connection.TRN.add(sql, args) self.assertEqual(qdb.sql_connection.TRN._queries, [(sql, args)]) qdb.sql_connection.TRN.execute() self._assert_sql_equal([]) - self._assert_sql_equal([('insert1', True, 1), ('insert3', True, 3), - ('insert2', False, 2)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert3", True, 3), ("insert2", False, 2)] + ) def test_execute_fetchlast(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) sql = """SELECT EXISTS( @@ -293,23 +308,25 @@ def test_execute_fetchindex(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) - self.assertEqual(qdb.sql_connection.TRN.execute_fetchindex(), - [['insert3', 3]]) + self.assertEqual( + qdb.sql_connection.TRN.execute_fetchindex(), [["insert3", 3]] + ) sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert4', 4], ['insert5', 5], ['insert6', 6]] + args = [["insert4", 4], ["insert5", 5], ["insert6", 6]] qdb.sql_connection.TRN.add(sql, args, many=True) - self.assertEqual(qdb.sql_connection.TRN.execute_fetchindex(3), - [['insert4', 4]]) + self.assertEqual( + qdb.sql_connection.TRN.execute_fetchindex(3), [["insert4", 4]] + ) def test_execute_fetchflatten(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s)""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) sql = "SELECT str_column, int_column FROM qiita.test_table" @@ -323,14 +340,14 @@ def test_execute_fetchflatten(self): sql = "SELECT 42" qdb.sql_connection.TRN.add(sql) obs = qdb.sql_connection.TRN.execute_fetchflatten(idx=3) - self.assertEqual(obs, ['insert1', 1, 'insert2', 2, 'insert3', 3]) + self.assertEqual(obs, ["insert1", 1, "insert2", 2, "insert3", 3]) def test_context_manager_rollback(self): try: with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) qdb.sql_connection.TRN.execute() @@ -340,37 +357,42 @@ def test_context_manager_rollback(self): self._assert_sql_equal([]) self.assertEqual( qdb.sql_connection.TRN._connection.get_transaction_status(), - TRANSACTION_STATUS_IDLE) + TRANSACTION_STATUS_IDLE, + ) def test_context_manager_execute(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) self._assert_sql_equal([]) - self._assert_sql_equal([('insert1', True, 1), ('insert2', True, 2), - ('insert3', True, 3)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert2", True, 2), ("insert3", True, 3)] + ) self.assertEqual( qdb.sql_connection.TRN._connection.get_transaction_status(), - TRANSACTION_STATUS_IDLE) + TRANSACTION_STATUS_IDLE, + ) def test_context_manager_no_commit(self): with qdb.sql_connection.TRN: sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) qdb.sql_connection.TRN.execute() self._assert_sql_equal([]) - self._assert_sql_equal([('insert1', True, 1), ('insert2', True, 2), - ('insert3', True, 3)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert2", True, 2), ("insert3", True, 3)] + ) self.assertEqual( qdb.sql_connection.TRN._connection.get_transaction_status(), - TRANSACTION_STATUS_IDLE) + TRANSACTION_STATUS_IDLE, + ) def test_context_manager_multiple(self): self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 0) @@ -383,24 +405,27 @@ def test_context_manager_multiple(self): self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 2) sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) # We exited the second context, nothing should have been executed self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 1) self.assertEqual( qdb.sql_connection.TRN._connection.get_transaction_status(), - TRANSACTION_STATUS_IDLE) + TRANSACTION_STATUS_IDLE, + ) self._assert_sql_equal([]) # We have exited the first context, everything should have been # executed and committed self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 0) - self._assert_sql_equal([('insert1', True, 1), ('insert2', True, 2), - ('insert3', True, 3)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert2", True, 2), ("insert3", True, 3)] + ) self.assertEqual( qdb.sql_connection.TRN._connection.get_transaction_status(), - TRANSACTION_STATUS_IDLE) + TRANSACTION_STATUS_IDLE, + ) def test_context_manager_multiple_2(self): self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 0) @@ -419,18 +444,20 @@ def tester(): self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 1) sql = """INSERT INTO qiita.test_table (str_column, int_column) VALUES (%s, %s) RETURNING str_column, int_column""" - args = [['insert1', 1], ['insert2', 2], ['insert3', 3]] + args = [["insert1", 1], ["insert2", 2], ["insert3", 3]] qdb.sql_connection.TRN.add(sql, args, many=True) tester() self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 1) self._assert_sql_equal([]) self.assertEqual(qdb.sql_connection.TRN._contexts_entered, 0) - self._assert_sql_equal([('insert1', True, 1), ('insert2', True, 2), - ('insert3', True, 3)]) + self._assert_sql_equal( + [("insert1", True, 1), ("insert2", True, 2), ("insert3", True, 3)] + ) self.assertEqual( qdb.sql_connection.TRN._connection.get_transaction_status(), - TRANSACTION_STATUS_IDLE) + TRANSACTION_STATUS_IDLE, + ) def test_post_commit_funcs(self): fd, fp = mkstemp() @@ -438,8 +465,8 @@ def test_post_commit_funcs(self): self._files_to_remove.append(fp) def func(fp): - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT 42") @@ -462,8 +489,8 @@ def test_post_rollback_funcs(self): self._files_to_remove.append(fp) def func(fp): - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT 42") diff --git a/qiita_db/test/test_study.py b/qiita_db/test/test_study.py index 23663046d..7dfcad38c 100644 --- a/qiita_db/test/test_study.py +++ b/qiita_db/test/test_study.py @@ -1,10 +1,10 @@ -from unittest import TestCase, main from datetime import datetime +from unittest import TestCase, main +import qiita_db as qdb from qiita_core.exceptions import IncompetentQiitaDeveloperError from qiita_core.qiita_settings import qiita_config from qiita_core.util import qiita_test_checker -import qiita_db as qdb # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. @@ -22,16 +22,28 @@ def setUp(self): def test_create_studyperson(self): new = qdb.study.StudyPerson.create( - 'SomeDude', 'somedude@foo.bar', 'affil', '111 fake street', - '111-121-1313') + "SomeDude", "somedude@foo.bar", "affil", "111 fake street", "111-121-1313" + ) nid = new.id self.assertEqual(nid, 4) with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add("SELECT * FROM qiita.study_person " - "WHERE study_person_id = %d" % nid) + qdb.sql_connection.TRN.add( + "SELECT * FROM qiita.study_person WHERE study_person_id = %d" % nid + ) obs = qdb.sql_connection.TRN.execute_fetchindex() - self.assertEqual(obs, [[nid, 'SomeDude', 'somedude@foo.bar', 'affil', - '111 fake street', '111-121-1313']]) + self.assertEqual( + obs, + [ + [ + nid, + "SomeDude", + "somedude@foo.bar", + "affil", + "111 fake street", + "111-121-1313", + ] + ], + ) qdb.study.StudyPerson.delete(nid) @@ -40,37 +52,39 @@ def test_delete(self): qdb.study.StudyPerson.delete(1) obs = qdb.study.StudyPerson.create( - 'SomeDude', 'somedude@foo.bar', 'affil', '111 fake street', - '111-121-1313') + "SomeDude", "somedude@foo.bar", "affil", "111 fake street", "111-121-1313" + ) - self.assertTrue( - qdb.study.StudyPerson.exists('SomeDude', 'affil')) + self.assertTrue(qdb.study.StudyPerson.exists("SomeDude", "affil")) qdb.study.StudyPerson.delete(obs.id) - self.assertFalse( - qdb.study.StudyPerson.exists('SomeDude', 'affil')) + self.assertFalse(qdb.study.StudyPerson.exists("SomeDude", "affil")) def test_retrieve_non_existant_people(self): with self.assertRaises(qdb.exceptions.QiitaDBLookupError): - qdb.study.StudyPerson.from_name_and_affiliation('Boaty McBoatFace', - 'UCSD') + qdb.study.StudyPerson.from_name_and_affiliation("Boaty McBoatFace", "UCSD") - p = qdb.study.StudyPerson.from_name_and_affiliation('LabDude', - 'knight lab') - self.assertEqual(p.name, 'LabDude') - self.assertEqual(p.affiliation, 'knight lab') - self.assertEqual(p.address, '123 lab street') - self.assertEqual(p.phone, '121-222-3333') - self.assertEqual(p.email, 'lab_dude@foo.bar') + p = qdb.study.StudyPerson.from_name_and_affiliation("LabDude", "knight lab") + self.assertEqual(p.name, "LabDude") + self.assertEqual(p.affiliation, "knight lab") + self.assertEqual(p.address, "123 lab street") + self.assertEqual(p.phone, "121-222-3333") + self.assertEqual(p.email, "lab_dude@foo.bar") def test_iter(self): """Make sure that each and every StudyPerson is retrieved""" expected = [ - ('LabDude', 'lab_dude@foo.bar', 'knight lab', '123 lab street', - '121-222-3333'), - ('empDude', 'emp_dude@foo.bar', 'broad', None, '444-222-3333'), - ('PIDude', 'PI_dude@foo.bar', 'Wash U', '123 PI street', None)] + ( + "LabDude", + "lab_dude@foo.bar", + "knight lab", + "123 lab street", + "121-222-3333", + ), + ("empDude", "emp_dude@foo.bar", "broad", None, "444-222-3333"), + ("PIDude", "PI_dude@foo.bar", "Wash U", "123 PI street", None), + ] for i, person in enumerate(qdb.study.StudyPerson.iter()): - self.assertEqual(person.id, i+1) + self.assertEqual(person.id, i + 1) self.assertEqual(person.name, expected[i][0]) self.assertEqual(person.email, expected[i][1]) self.assertEqual(person.affiliation, expected[i][2]) @@ -78,60 +92,57 @@ def test_iter(self): self.assertEqual(person.phone, expected[i][4]) def test_exists(self): - self.assertTrue(qdb.study.StudyPerson.exists('LabDude', 'knight lab')) - self.assertFalse(qdb.study.StudyPerson.exists( - 'AnotherDude', 'knight lab')) - self.assertFalse(qdb.study.StudyPerson.exists( - 'LabDude', 'Another lab')) + self.assertTrue(qdb.study.StudyPerson.exists("LabDude", "knight lab")) + self.assertFalse(qdb.study.StudyPerson.exists("AnotherDude", "knight lab")) + self.assertFalse(qdb.study.StudyPerson.exists("LabDude", "Another lab")) def test_create_studyperson_already_exists(self): - obs = qdb.study.StudyPerson.create( - 'LabDude', 'lab_dude@foo.bar', 'knight lab') - self.assertEqual(obs.name, 'LabDude') - self.assertEqual(obs.email, 'lab_dude@foo.bar') + obs = qdb.study.StudyPerson.create("LabDude", "lab_dude@foo.bar", "knight lab") + self.assertEqual(obs.name, "LabDude") + self.assertEqual(obs.email, "lab_dude@foo.bar") def test_retrieve_name(self): - self.assertEqual(self.studyperson.name, 'LabDude') + self.assertEqual(self.studyperson.name, "LabDude") def test_set_name_fail(self): with self.assertRaises(AttributeError): - self.studyperson.name = 'Fail Dude' + self.studyperson.name = "Fail Dude" def test_retrieve_email(self): - self.assertEqual(self.studyperson.email, 'lab_dude@foo.bar') + self.assertEqual(self.studyperson.email, "lab_dude@foo.bar") def test_retrieve_affiliation(self): - self.assertEqual(self.studyperson.affiliation, 'knight lab') + self.assertEqual(self.studyperson.affiliation, "knight lab") def test_set_email_fail(self): with self.assertRaises(AttributeError): - self.studyperson.email = 'faildude@foo.bar' + self.studyperson.email = "faildude@foo.bar" def test_set_affiliation_fail(self): with self.assertRaises(AttributeError): - self.studyperson.affiliation = 'squire lab' + self.studyperson.affiliation = "squire lab" def test_retrieve_address(self): - self.assertEqual(self.studyperson.address, '123 lab street') + self.assertEqual(self.studyperson.address, "123 lab street") def test_retrieve_address_null(self): person = qdb.study.StudyPerson(2) self.assertEqual(person.address, None) def test_set_address(self): - self.studyperson.address = '123 nonsense road' - self.assertEqual(self.studyperson.address, '123 nonsense road') + self.studyperson.address = "123 nonsense road" + self.assertEqual(self.studyperson.address, "123 nonsense road") def test_retrieve_phone(self): - self.assertEqual(self.studyperson.phone, '121-222-3333') + self.assertEqual(self.studyperson.phone, "121-222-3333") def test_retrieve_phone_null(self): person = qdb.study.StudyPerson(3) self.assertEqual(person.phone, None) def test_set_phone(self): - self.studyperson.phone = '111111111111111111121' - self.assertEqual(self.studyperson.phone, '111111111111111111121') + self.studyperson.phone = "111111111111111111121" + self.assertEqual(self.studyperson.phone, "111111111111111111121") @qiita_test_checker() @@ -146,11 +157,11 @@ def setUp(self): "mixs_compliant": True, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " - "fried chicken", + "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " - "gut microbiome", + "gut microbiome", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } self.infoexp = { @@ -159,169 +170,197 @@ def setUp(self): "mixs_compliant": True, "study_alias": "FCM", "study_description": "Microbiome of people who eat nothing but " - "fried chicken", + "fried chicken", "study_abstract": "Exploring how a high fat diet changes the " - "gut microbiome", + "gut microbiome", "principal_investigator": qdb.study.StudyPerson(3), "lab_person": qdb.study.StudyPerson(1), - 'public_raw_download': False + "public_raw_download": False, } self.existingexp = { - 'mixs_compliant': True, - 'metadata_complete': True, - 'reprocess': False, - 'funding': None, - 'vamps_id': None, - 'first_contact': datetime(2014, 5, 19, 16, 10), - 'principal_investigator': qdb.study.StudyPerson(3), - 'timeseries_type_id': 1, - 'study_abstract': - "This is a preliminary study to examine the " - "microbiota associated with the Cannabis plant. Soils samples " - "from the bulk soil, soil associated with the roots, and the " - "rhizosphere were extracted and the DNA sequenced. Roots " - "from three independent plants of different strains were " - "examined. These roots were obtained November 11, 2011 from " - "plants that had been harvested in the summer. Future " - "studies will attempt to analyze the soils and rhizospheres " - "from the same location at different time points in the plant " - "lifecycle.", - 'spatial_series': False, - 'study_description': 'Analysis of the Cannabis Plant Microbiome', - 'study_alias': 'Cannabis Soils', - 'most_recent_contact': datetime(2014, 5, 19, 16, 11), - 'lab_person': qdb.study.StudyPerson(1)} + "mixs_compliant": True, + "metadata_complete": True, + "reprocess": False, + "funding": None, + "vamps_id": None, + "first_contact": datetime(2014, 5, 19, 16, 10), + "principal_investigator": qdb.study.StudyPerson(3), + "timeseries_type_id": 1, + "study_abstract": "This is a preliminary study to examine the " + "microbiota associated with the Cannabis plant. Soils samples " + "from the bulk soil, soil associated with the roots, and the " + "rhizosphere were extracted and the DNA sequenced. Roots " + "from three independent plants of different strains were " + "examined. These roots were obtained November 11, 2011 from " + "plants that had been harvested in the summer. Future " + "studies will attempt to analyze the soils and rhizospheres " + "from the same location at different time points in the plant " + "lifecycle.", + "spatial_series": False, + "study_description": "Analysis of the Cannabis Plant Microbiome", + "study_alias": "Cannabis Soils", + "most_recent_contact": datetime(2014, 5, 19, 16, 11), + "lab_person": qdb.study.StudyPerson(1), + } def tearDown(self): qiita_config.portal = self.portal - self._change_processed_data_status('private') + self._change_processed_data_status("private") def _change_processed_data_status(self, new_status): # Change the status of the studies by changing the status of their # artifacts - id_status = qdb.util.convert_to_id(new_status, 'visibility') + id_status = qdb.util.convert_to_id(new_status, "visibility") qdb.sql_connection.perform_as_transaction( - "UPDATE qiita.artifact SET visibility_id = %s", (id_status,)) + "UPDATE qiita.artifact SET visibility_id = %s", (id_status,) + ) def test_from_title(self): study = qdb.study.Study.from_title( - 'Identification of the Microbiomes for Cannabis Soils') + "Identification of the Microbiomes for Cannabis Soils" + ) self.assertEqual(study, qdb.study.Study(1)) with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - qdb.study.Study.from_title('Study title') + qdb.study.Study.from_title("Study title") def test_get_info(self): # Test get all info for single study - qiita_config.portal = 'QIITA' + qiita_config.portal = "QIITA" obs = qdb.study.Study.get_info([1]) self.assertEqual(len(obs), 1) obs = dict(obs[0]) exp = { - 'mixs_compliant': True, 'metadata_complete': True, - 'reprocess': False, 'timeseries_type': 'None', - 'funding': None, 'vamps_id': None, 'public_raw_download': False, - 'first_contact': datetime(2014, 5, 19, 16, 10), - 'principal_investigator_id': 3, 'timeseries_type_id': 1, - 'publications': [{'f1': '10.100/123456', 'f2': True}, - {'f1': '123456', 'f2': False}, - {'f1': '10.100/7891011', 'f2': True}, - {'f1': '7891011', 'f2': False}], - 'study_alias': 'Cannabis Soils', - 'spatial_series': False, 'notes': '', - 'study_abstract': 'This is a preliminary study to examine the ' - 'microbiota associated with the Cannabis plant. Soils samples from' - ' the bulk soil, soil associated with the roots, and the ' - 'rhizosphere were extracted and the DNA sequenced. Roots from ' - 'three independent plants of different strains were examined. ' - 'These roots were obtained November 11, 2011 from plants that had ' - 'been harvested in the summer. Future studies will attempt to ' - 'analyze the soils and rhizospheres from the same location at ' - 'different time points in the plant lifecycle.', - 'study_description': 'Analysis of the Cannabis Plant Microbiome', - 'intervention_type': 'None', 'email': 'test@foo.bar', - 'study_id': 1, - 'most_recent_contact': datetime(2014, 5, 19, 16, 11), - 'lab_person_id': 1, - 'study_title': 'Identification of the Microbiomes for Cannabis ' - 'Soils', - 'ebi_submission_status': 'submitted', - 'ebi_study_accession': 'EBI123456-BB', - 'autoloaded': False} + "mixs_compliant": True, + "metadata_complete": True, + "reprocess": False, + "timeseries_type": "None", + "funding": None, + "vamps_id": None, + "public_raw_download": False, + "first_contact": datetime(2014, 5, 19, 16, 10), + "principal_investigator_id": 3, + "timeseries_type_id": 1, + "publications": [ + {"f1": "10.100/123456", "f2": True}, + {"f1": "123456", "f2": False}, + {"f1": "10.100/7891011", "f2": True}, + {"f1": "7891011", "f2": False}, + ], + "study_alias": "Cannabis Soils", + "spatial_series": False, + "notes": "", + "study_abstract": "This is a preliminary study to examine the " + "microbiota associated with the Cannabis plant. Soils samples from" + " the bulk soil, soil associated with the roots, and the " + "rhizosphere were extracted and the DNA sequenced. Roots from " + "three independent plants of different strains were examined. " + "These roots were obtained November 11, 2011 from plants that had " + "been harvested in the summer. Future studies will attempt to " + "analyze the soils and rhizospheres from the same location at " + "different time points in the plant lifecycle.", + "study_description": "Analysis of the Cannabis Plant Microbiome", + "intervention_type": "None", + "email": "test@foo.bar", + "study_id": 1, + "most_recent_contact": datetime(2014, 5, 19, 16, 11), + "lab_person_id": 1, + "study_title": "Identification of the Microbiomes for Cannabis Soils", + "ebi_submission_status": "submitted", + "ebi_study_accession": "EBI123456-BB", + "autoloaded": False, + } self.assertDictEqual(obs, exp) # Test get specific keys for single study - exp_keys = ['metadata_complete', 'reprocess', 'timeseries_type', - 'publications', 'study_title'] + exp_keys = [ + "metadata_complete", + "reprocess", + "timeseries_type", + "publications", + "study_title", + ] obs = qdb.study.Study.get_info([1], exp_keys) self.assertEqual(len(obs), 1) - exp = [{ - 'metadata_complete': True, 'reprocess': False, - 'timeseries_type': 'None', - 'publications': [{'f1': '10.100/123456', 'f2': True}, - {'f1': '123456', 'f2': False}, - {'f1': '10.100/7891011', 'f2': True}, - {'f1': '7891011', 'f2': False}], - 'study_title': 'Identification of the Microbiomes for Cannabis ' - 'Soils'}] + exp = [ + { + "metadata_complete": True, + "reprocess": False, + "timeseries_type": "None", + "publications": [ + {"f1": "10.100/123456", "f2": True}, + {"f1": "123456", "f2": False}, + {"f1": "10.100/7891011", "f2": True}, + {"f1": "7891011", "f2": False}, + ], + "study_title": "Identification of the Microbiomes for Cannabis Soils", + } + ] self.assertEqual(obs, exp) # Test get specific keys for all studies info = { - 'timeseries_type_id': 1, - 'lab_person_id': None, - 'principal_investigator_id': 3, - 'metadata_complete': False, - 'mixs_compliant': True, - 'study_description': 'desc', - 'study_alias': 'alias', - 'study_abstract': 'abstract'} - user = qdb.user.User('test@foo.bar') - - s = qdb.study.Study.create(user, 'test_study_1', info=info) + "timeseries_type_id": 1, + "lab_person_id": None, + "principal_investigator_id": 3, + "metadata_complete": False, + "mixs_compliant": True, + "study_description": "desc", + "study_alias": "alias", + "study_abstract": "abstract", + } + user = qdb.user.User("test@foo.bar") + + s = qdb.study.Study.create(user, "test_study_1", info=info) obs = qdb.study.Study.get_info(info_cols=exp_keys) exp = [ - {'metadata_complete': True, 'reprocess': False, - 'timeseries_type': 'None', 'publications': [ - {'f1': '10.100/123456', 'f2': True}, - {'f1': '123456', 'f2': False}, - {'f1': '10.100/7891011', 'f2': True}, - {'f1': '7891011', 'f2': False}], - 'study_title': ('Identification of the Microbiomes for ' - 'Cannabis Soils')}, - {'metadata_complete': False, 'reprocess': False, - 'timeseries_type': 'None', 'publications': None, - 'study_title': 'test_study_1'}] + { + "metadata_complete": True, + "reprocess": False, + "timeseries_type": "None", + "publications": [ + {"f1": "10.100/123456", "f2": True}, + {"f1": "123456", "f2": False}, + {"f1": "10.100/7891011", "f2": True}, + {"f1": "7891011", "f2": False}, + ], + "study_title": ("Identification of the Microbiomes for Cannabis Soils"), + }, + { + "metadata_complete": False, + "reprocess": False, + "timeseries_type": "None", + "publications": None, + "study_title": "test_study_1", + }, + ] self.assertEqual(obs, exp) qdb.study.Study.delete(s.id) # test portal restriction working - qiita_config.portal = 'EMP' + qiita_config.portal = "EMP" with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.study.Study.get_info([1]) def test_has_access_public(self): - self._change_processed_data_status('public') + self._change_processed_data_status("public") - qiita_config.portal = 'QIITA' - self.assertTrue( - self.study.has_access(qdb.user.User("demo@microbio.me"))) - qiita_config.portal = 'EMP' + qiita_config.portal = "QIITA" + self.assertTrue(self.study.has_access(qdb.user.User("demo@microbio.me"))) + qiita_config.portal = "EMP" with self.assertRaises(qdb.exceptions.QiitaDBError): qdb.study.Study(1).has_access(qdb.user.User("demo@microbio.me")) def test_has_access_no_public(self): - self._change_processed_data_status('public') - self.assertFalse( - self.study.has_access(qdb.user.User("demo@microbio.me"), True)) + self._change_processed_data_status("public") + self.assertFalse(self.study.has_access(qdb.user.User("demo@microbio.me"), True)) def test_can_edit(self): - self.assertTrue(self.study.can_edit(qdb.user.User('test@foo.bar'))) - self.assertTrue(self.study.can_edit(qdb.user.User('shared@foo.bar'))) - self.assertTrue(self.study.can_edit(qdb.user.User('admin@foo.bar'))) - self.assertFalse( - self.study.can_edit(qdb.user.User('demo@microbio.me'))) + self.assertTrue(self.study.can_edit(qdb.user.User("test@foo.bar"))) + self.assertTrue(self.study.can_edit(qdb.user.User("shared@foo.bar"))) + self.assertTrue(self.study.can_edit(qdb.user.User("admin@foo.bar"))) + self.assertFalse(self.study.can_edit(qdb.user.User("demo@microbio.me"))) def test_owner(self): self.assertEqual(self.study.owner, qdb.user.User("test@foo.bar")) @@ -342,9 +381,8 @@ def test_public_raw_download(self): def test_share(self): # Clear all sharing associations - self._change_processed_data_status('sandbox') - qdb.sql_connection.perform_as_transaction( - "delete from qiita.study_users") + self._change_processed_data_status("sandbox") + qdb.sql_connection.perform_as_transaction("delete from qiita.study_users") self.assertEqual(self.study.shared_with, []) # Try to share with the owner, which should not work @@ -353,96 +391,102 @@ def test_share(self): # Then share the study with shared@foo.bar self.study.share(qdb.user.User("shared@foo.bar")) - self.assertEqual(self.study.shared_with, - [qdb.user.User("shared@foo.bar")]) + self.assertEqual(self.study.shared_with, [qdb.user.User("shared@foo.bar")]) def test_unshare(self): - self._change_processed_data_status('sandbox') + self._change_processed_data_status("sandbox") self.study.unshare(qdb.user.User("shared@foo.bar")) self.assertEqual(self.study.shared_with, []) def test_has_access_shared(self): - self._change_processed_data_status('sandbox') + self._change_processed_data_status("sandbox") self.assertTrue(self.study.has_access(qdb.user.User("shared@foo.bar"))) def test_has_access_private(self): - self._change_processed_data_status('sandbox') + self._change_processed_data_status("sandbox") self.assertTrue(self.study.has_access(qdb.user.User("test@foo.bar"))) def test_has_access_admin(self): - self._change_processed_data_status('sandbox') + self._change_processed_data_status("sandbox") self.assertTrue(self.study.has_access(qdb.user.User("admin@foo.bar"))) def test_has_access_no_access(self): - self._change_processed_data_status('sandbox') - self.assertFalse( - self.study.has_access(qdb.user.User("demo@microbio.me"))) + self._change_processed_data_status("sandbox") + self.assertFalse(self.study.has_access(qdb.user.User("demo@microbio.me"))) def test_get_by_status(self): - obs = qdb.study.Study.get_by_status('sandbox') + obs = qdb.study.Study.get_by_status("sandbox") self.assertEqual(obs, set()) s = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils', - self.info) - obs = qdb.study.Study.get_by_status('private') + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils", + self.info, + ) + obs = qdb.study.Study.get_by_status("private") self.assertEqual(obs, {qdb.study.Study(1)}) - obs = qdb.study.Study.get_by_status('sandbox') + obs = qdb.study.Study.get_by_status("sandbox") self.assertEqual(obs, {s}) - obs = qdb.study.Study.get_by_status('public') + obs = qdb.study.Study.get_by_status("public") self.assertEqual(obs, set()) - obs = qdb.study.Study.get_by_status('awaiting_approval') + obs = qdb.study.Study.get_by_status("awaiting_approval") self.assertEqual(obs, set()) qdb.study.Study.delete(s.id) def test_exists(self): - self.assertTrue(qdb.study.Study.exists( - 'Identification of the Microbiomes for Cannabis Soils')) - self.assertFalse(qdb.study.Study.exists('Not Cannabis Soils')) + self.assertTrue( + qdb.study.Study.exists( + "Identification of the Microbiomes for Cannabis Soils" + ) + ) + self.assertFalse(qdb.study.Study.exists("Not Cannabis Soils")) def test_create_duplicate(self): to_test = [ - 'Identification of the Microbiomes for Cannabis Soils', - 'Identification of the Microbiomes for Cannabis Soils', - ' Identification of the Microbiomes for Cannabis Soils', - 'Identification of the Microbiomes for Cannabis Soils ', - ' Identification of the Microbiomes for Cannabis Soils ' + "Identification of the Microbiomes for Cannabis Soils", + "Identification of the Microbiomes for Cannabis Soils", + " Identification of the Microbiomes for Cannabis Soils", + "Identification of the Microbiomes for Cannabis Soils ", + " Identification of the Microbiomes for Cannabis Soils ", ] for tt in to_test: with self.assertRaises(qdb.exceptions.QiitaDBDuplicateError): - qdb.study.Study.create( - qdb.user.User('test@foo.bar'), tt, self.info) + qdb.study.Study.create(qdb.user.User("test@foo.bar"), tt, self.info) def test_create_study_min_data(self): """Insert a study into the database""" before = datetime.now() obs = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried chicken microbiome 1", - self.info) + qdb.user.User("test@foo.bar"), "Fried chicken microbiome 1", self.info + ) after = datetime.now() - self.assertEqual(obs.status, 'sandbox') + self.assertEqual(obs.status, "sandbox") self.assertEqual(obs.title, "Fried chicken microbiome 1") obs_info = obs.info - insertion_timestamp = obs_info.pop('first_contact') - exp = {'mixs_compliant': True, 'metadata_complete': True, - 'reprocess': False, 'public_raw_download': False, - 'funding': None, 'vamps_id': None, - 'principal_investigator': qdb.study.StudyPerson(3), - 'timeseries_type_id': 1, - 'study_abstract': 'Exploring how a high fat diet changes the ' - 'gut microbiome', - 'spatial_series': None, - 'study_description': 'Microbiome of people who eat nothing but' - ' fried chicken', - 'study_alias': 'FCM', - 'most_recent_contact': None, - 'lab_person': qdb.study.StudyPerson(1), - 'notes': ''} + insertion_timestamp = obs_info.pop("first_contact") + exp = { + "mixs_compliant": True, + "metadata_complete": True, + "reprocess": False, + "public_raw_download": False, + "funding": None, + "vamps_id": None, + "principal_investigator": qdb.study.StudyPerson(3), + "timeseries_type_id": 1, + "study_abstract": "Exploring how a high fat diet changes the " + "gut microbiome", + "spatial_series": None, + "study_description": "Microbiome of people who eat nothing but" + " fried chicken", + "study_alias": "FCM", + "most_recent_contact": None, + "lab_person": qdb.study.StudyPerson(1), + "notes": "", + } self.assertEqual(obs_info, exp) # Check the timestamp separately, since it is set by the database # to the microsecond, and we can't predict it a priori @@ -452,9 +496,9 @@ def test_create_study_min_data(self): self.assertEqual(obs.investigation, None) self.assertEqual(obs.sample_template, None) self.assertEqual(obs.data_types, []) - self.assertEqual(obs.owner, qdb.user.User('test@foo.bar')) + self.assertEqual(obs.owner, qdb.user.User("test@foo.bar")) self.assertEqual(obs.environmental_packages, []) - self.assertEqual(obs._portals, ['QIITA']) + self.assertEqual(obs._portals, ["QIITA"]) self.assertEqual(obs.ebi_study_accession, None) self.assertEqual(obs.ebi_submission_status, "not submitted") qdb.study.Study.delete(obs.id) @@ -462,13 +506,17 @@ def test_create_study_min_data(self): def test_create_nonqiita_portal(self): qiita_config.portal = "EMP" s = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "NEW!", self.info, - qdb.investigation.Investigation(1)) + qdb.user.User("test@foo.bar"), + "NEW!", + self.info, + qdb.investigation.Investigation(1), + ) # make sure portal is associated with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - "SELECT * from qiita.study_portal WHERE study_id = %s", [s.id]) + "SELECT * from qiita.study_portal WHERE study_id = %s", [s.id] + ) obs = qdb.sql_connection.TRN.execute_fetchindex() self.assertEqual(obs, [[s.id, 2], [s.id, 1]]) qdb.study.Study.delete(s.id) @@ -476,103 +524,114 @@ def test_create_nonqiita_portal(self): def test_create_study_with_investigation(self): """Insert a study into the database with an investigation""" new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried chicken microbiome 2", - self.info, qdb.investigation.Investigation(1)) + qdb.user.User("test@foo.bar"), + "Fried chicken microbiome 2", + self.info, + qdb.investigation.Investigation(1), + ) # check the investigation was assigned with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - "SELECT * from qiita.investigation_study WHERE study_id = %s", - [new.id]) + "SELECT * from qiita.investigation_study WHERE study_id = %s", [new.id] + ) obs = qdb.sql_connection.TRN.execute_fetchindex() self.assertEqual(obs, [[1, new.id]]) # testing Study.iter() - self.assertCountEqual(list(qdb.study.Study.iter()), - [qdb.study.Study(1), new]) + self.assertCountEqual(list(qdb.study.Study.iter()), [qdb.study.Study(1), new]) qdb.study.Study.delete(new.id) def test_create_study_all_data(self): """Insert a study into the database with every info field""" - self.info.update({ - 'vamps_id': 'MBE_1111111', - 'funding': 'FundAgency', - 'spatial_series': True, - 'metadata_complete': False, - 'reprocess': True, - 'first_contact': "10/24/2014 12:47PM", - 'study_id': 3827, - 'notes': 'an analysis was performed \n here and \n here' - }) + self.info.update( + { + "vamps_id": "MBE_1111111", + "funding": "FundAgency", + "spatial_series": True, + "metadata_complete": False, + "reprocess": True, + "first_contact": "10/24/2014 12:47PM", + "study_id": 3827, + "notes": "an analysis was performed \n here and \n here", + } + ) obs = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried chicken microbiome 3", - self.info) + qdb.user.User("test@foo.bar"), "Fried chicken microbiome 3", self.info + ) self.assertEqual(obs.id, 3827) - self.assertEqual(obs.status, 'sandbox') + self.assertEqual(obs.status, "sandbox") self.assertEqual(obs.title, "Fried chicken microbiome 3") - exp = {'mixs_compliant': True, 'metadata_complete': False, - 'reprocess': True, 'public_raw_download': False, - 'funding': 'FundAgency', 'vamps_id': 'MBE_1111111', - 'first_contact': datetime(2014, 10, 24, 12, 47), - 'principal_investigator': qdb.study.StudyPerson(3), - 'timeseries_type_id': 1, - 'study_abstract': 'Exploring how a high fat diet changes the ' - 'gut microbiome', - 'spatial_series': True, - 'study_description': 'Microbiome of people who eat nothing ' - 'but fried chicken', - 'study_alias': 'FCM', - 'most_recent_contact': None, - 'lab_person': qdb.study.StudyPerson(1), - 'notes': 'an analysis was performed \n here and \n here'} + exp = { + "mixs_compliant": True, + "metadata_complete": False, + "reprocess": True, + "public_raw_download": False, + "funding": "FundAgency", + "vamps_id": "MBE_1111111", + "first_contact": datetime(2014, 10, 24, 12, 47), + "principal_investigator": qdb.study.StudyPerson(3), + "timeseries_type_id": 1, + "study_abstract": "Exploring how a high fat diet changes the " + "gut microbiome", + "spatial_series": True, + "study_description": "Microbiome of people who eat nothing " + "but fried chicken", + "study_alias": "FCM", + "most_recent_contact": None, + "lab_person": qdb.study.StudyPerson(1), + "notes": "an analysis was performed \n here and \n here", + } self.assertEqual(obs.info, exp) self.assertEqual(obs.shared_with, []) self.assertEqual(obs.publications, []) self.assertEqual(obs.investigation, None) self.assertEqual(obs.sample_template, None) self.assertEqual(obs.data_types, []) - self.assertEqual(obs.owner, qdb.user.User('test@foo.bar')) + self.assertEqual(obs.owner, qdb.user.User("test@foo.bar")) self.assertEqual(obs.environmental_packages, []) - self.assertEqual(obs._portals, ['QIITA']) + self.assertEqual(obs._portals, ["QIITA"]) self.assertEqual(obs.ebi_study_accession, None) self.assertEqual(obs.ebi_submission_status, "not submitted") # testing Study.iter() - self.assertCountEqual(list(qdb.study.Study.iter()), - [qdb.study.Study(1), obs]) + self.assertCountEqual(list(qdb.study.Study.iter()), [qdb.study.Study(1), obs]) qdb.study.Study.delete(obs.id) def test_create_missing_required(self): - """ Insert a study that is missing a required info key""" + """Insert a study that is missing a required info key""" self.info.pop("study_alias") with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried Chicken Microbiome 4", - self.info) + qdb.user.User("test@foo.bar"), "Fried Chicken Microbiome 4", self.info + ) def test_create_study_with_not_allowed_key(self): """Insert a study with key from _non_info present""" self.info.update({"email": "wooo@sup.net"}) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried Chicken Microbiome 6", - self.info) + qdb.user.User("test@foo.bar"), "Fried Chicken Microbiome 6", self.info + ) def test_create_unknown_db_col(self): - """ Insert a study with an info key not in the database""" + """Insert a study with an info key not in the database""" self.info["SHOULDNOTBEHERE"] = "BWAHAHAHAHAHA" with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Fried Chicken Microbiome 7", - self.info) + qdb.user.User("test@foo.bar"), "Fried Chicken Microbiome 7", self.info + ) def test_delete(self): title = "Fried chicken microbiome 8" # the study is assigned to investigation 1 study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), title, self.info, - qdb.investigation.Investigation(1)) + qdb.user.User("test@foo.bar"), + title, + self.info, + qdb.investigation.Investigation(1), + ) # sharing with other user study.share(qdb.user.User("shared@foo.bar")) study.delete(study.id) @@ -585,71 +644,76 @@ def test_delete(self): qdb.study.Study.delete(41) def test_retrieve_title(self): - self.assertEqual(self.study.title, 'Identification of the Microbiomes' - ' for Cannabis Soils') + self.assertEqual( + self.study.title, "Identification of the Microbiomes for Cannabis Soils" + ) def test_set_title(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 1', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 1", + self.info, + ) new.title = "Cannabis soils" self.assertEqual(new.title, "Cannabis soils") qdb.study.Study.delete(new.id) def test_portals(self): - self.assertEqual(self.study._portals, ['QIITA']) + self.assertEqual(self.study._portals, ["QIITA"]) def test_ebi_study_accession(self): - self.assertEqual(self.study.ebi_study_accession, 'EBI123456-BB') + self.assertEqual(self.study.ebi_study_accession, "EBI123456-BB") new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 4', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 4", + self.info, + ) self.assertEqual(new.ebi_study_accession, None) qdb.study.Study.delete(new.id) def test_ebi_study_accession_setter(self): - new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), 'Test', self.info) + new = qdb.study.Study.create(qdb.user.User("test@foo.bar"), "Test", self.info) self.assertEqual(new.ebi_study_accession, None) - new.ebi_study_accession = 'EBI654321-BB' - self.assertEqual(new.ebi_study_accession, 'EBI654321-BB') + new.ebi_study_accession = "EBI654321-BB" + self.assertEqual(new.ebi_study_accession, "EBI654321-BB") # Raises an error if the study already has an EBI study accession with self.assertRaises(qdb.exceptions.QiitaDBError): - self.study.ebi_study_accession = 'EBI654321-BB' + self.study.ebi_study_accession = "EBI654321-BB" qdb.study.Study.delete(new.id) def test_ebi_submission_status(self): - self.assertEqual(self.study.ebi_submission_status, 'submitted') + self.assertEqual(self.study.ebi_submission_status, "submitted") # let's test that even with a failed job nothing changes # add a failed job for an artifact (2) that can be submitted - user = qdb.user.User('test@foo.bar') - qp = qdb.software.Software.from_name_and_version('Qiita', 'alpha') - cmd = qp.get_command('submit_to_EBI') - params = qdb.software.Parameters.load(cmd, values_dict={ - 'artifact': 2, 'submission_type': 'ADD'}) + user = qdb.user.User("test@foo.bar") + qp = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = qp.get_command("submit_to_EBI") + params = qdb.software.Parameters.load( + cmd, values_dict={"artifact": 2, "submission_type": "ADD"} + ) job = qdb.processing_job.ProcessingJob.create(user, params, True) - job._set_error('Killed by Admin') + job._set_error("Killed by Admin") # and just to be careful add a failed job for an artifact (1) that # cannot be submitted - qp = qdb.software.Software.from_name_and_version('Qiita', 'alpha') - cmd = qp.get_command('submit_to_EBI') - params = qdb.software.Parameters.load(cmd, values_dict={ - 'artifact': 1, 'submission_type': 'ADD'}) + qp = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = qp.get_command("submit_to_EBI") + params = qdb.software.Parameters.load( + cmd, values_dict={"artifact": 1, "submission_type": "ADD"} + ) job = qdb.processing_job.ProcessingJob.create(user, params, True) - job._set_error('Killed by Admin') + job._set_error("Killed by Admin") # should still return submited - self.assertEqual(self.study.ebi_submission_status, 'submitted') + self.assertEqual(self.study.ebi_submission_status, "submitted") new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 5', - self.info) - self.assertEqual(new.ebi_submission_status, 'not submitted') + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 5", + self.info, + ) + self.assertEqual(new.ebi_submission_status, "not submitted") qdb.study.Study.delete(new.id) def test_set_info(self): @@ -658,14 +722,15 @@ def test_set_info(self): "timeseries_type_id": 2, "metadata_complete": False, "lab_person_id": qdb.study.StudyPerson(2), - "vamps_id": 'MBE_111222', - 'notes': 'These are my notes!!! \n ... and more notes ...' + "vamps_id": "MBE_111222", + "notes": "These are my notes!!! \n ... and more notes ...", } - self.info['first_contact'] = "6/11/2014" + self.info["first_contact"] = "6/11/2014" new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 6', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 6", + self.info, + ) self.infoexp.update(newinfo) new.info = newinfo # add missing table cols @@ -692,18 +757,20 @@ def test_set_info_public_error(self): def test_set_info_disallowed_keys(self): """Tests for fail if sending non-info keys in info dict""" new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 7', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 7", + self.info, + ) with self.assertRaises(qdb.exceptions.QiitaDBColumnError): new.info = {"email": "fail@fail.com"} qdb.study.Study.delete(new.id) def test_info_empty(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 8', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 8", + self.info, + ) with self.assertRaises(IncompetentQiitaDeveloperError): new.info = {} qdb.study.Study.delete(new.id) @@ -712,114 +779,136 @@ def test_retrieve_status(self): self.assertEqual(self.study.status, "private") def test_retrieve_shared_with(self): - self.assertEqual(self.study.shared_with, - [qdb.user.User('shared@foo.bar')]) + self.assertEqual(self.study.shared_with, [qdb.user.User("shared@foo.bar")]) def test_retrieve_publications_empty(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 9', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 9", + self.info, + ) self.assertEqual(new.publications, []) def test_publication_setter(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), 'New study', self.info) + qdb.user.User("test@foo.bar"), "New study", self.info + ) self.assertEqual(new.publications, []) - new_values = [['10.100/654321', True], - ['10.100/1101987', True], - ['1101987', False]] + new_values = [ + ["10.100/654321", True], + ["10.100/1101987", True], + ["1101987", False], + ] new.publications = new_values self.assertEqual(new.publications, new_values) qdb.study.Study.delete(new.id) def test_publications_setter_typeerror(self): with self.assertRaises(TypeError): - self.study.publications = '123456' + self.study.publications = "123456" def test_retrieve_investigation(self): - self.assertEqual(self.study.investigation, - qdb.investigation.Investigation(1)) + self.assertEqual(self.study.investigation, qdb.investigation.Investigation(1)) def test_retrieve_investigation_empty(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 10', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 10", + self.info, + ) self.assertEqual(new.investigation, None) qdb.study.Study.delete(new.id) def test_retrieve_sample_template(self): self.assertEqual( self.study.sample_template, - qdb.metadata_template.sample_template.SampleTemplate(1)) + qdb.metadata_template.sample_template.SampleTemplate(1), + ) def test_retrieve_data_types(self): - self.assertEqual(self.study.data_types, ['18S']) + self.assertEqual(self.study.data_types, ["18S"]) def test_retrieve_data_types_none(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 11', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 11", + self.info, + ) self.assertEqual(new.data_types, []) qdb.study.Study.delete(new.id) def test_retrieve_artifacts(self): - exp = [qdb.artifact.Artifact(1), - qdb.artifact.Artifact(2), - qdb.artifact.Artifact(3), - qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), - qdb.artifact.Artifact(6), - qdb.artifact.Artifact(7)] + exp = [ + qdb.artifact.Artifact(1), + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(3), + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + qdb.artifact.Artifact(7), + ] self.assertEqual(self.study.artifacts(), exp) self.assertEqual(self.study.artifacts(dtype="16S"), exp[-2:]) self.assertEqual(self.study.artifacts(dtype="18S"), exp[:-2]) - self.assertEqual(self.study.artifacts(artifact_type="BIOM"), - [qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), - qdb.artifact.Artifact(6), - qdb.artifact.Artifact(7)]) + self.assertEqual( + self.study.artifacts(artifact_type="BIOM"), + [ + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + qdb.artifact.Artifact(7), + ], + ) - self.assertEqual(self.study.artifacts(dtype="18S", - artifact_type="BIOM"), - [qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5)]) + self.assertEqual( + self.study.artifacts(dtype="18S", artifact_type="BIOM"), + [qdb.artifact.Artifact(4), qdb.artifact.Artifact(5)], + ) def test_retrieve_artifacts_none(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 12', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 12", + self.info, + ) self.assertEqual(new.artifacts(), []) qdb.study.Study.delete(new.id) def test_retrieve_prep_templates(self): self.assertCountEqual( self.study.prep_templates(), - [qdb.metadata_template.prep_template.PrepTemplate(1), - qdb.metadata_template.prep_template.PrepTemplate(2)]) + [ + qdb.metadata_template.prep_template.PrepTemplate(1), + qdb.metadata_template.prep_template.PrepTemplate(2), + ], + ) def test_retrieve_prep_templates_none(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 13', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 13", + self.info, + ) self.assertEqual(new.prep_templates(), []) qdb.study.Study.delete(new.id) def test_analyses(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 13', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 13", + self.info, + ) - self.assertEqual(qdb.study.Study(1).analyses(), [ - qdb.analysis.Analysis(1), qdb.analysis.Analysis(2), - qdb.analysis.Analysis(3)]) + self.assertEqual( + qdb.study.Study(1).analyses(), + [ + qdb.analysis.Analysis(1), + qdb.analysis.Analysis(2), + qdb.analysis.Analysis(3), + ], + ) self.assertEqual(qdb.study.Study(2).analyses(), []) @@ -827,19 +916,20 @@ def test_analyses(self): def test_environmental_packages(self): obs = self.study.environmental_packages - exp = ['soil', 'plant-associated'] + exp = ["soil", "plant-associated"] self.assertEqual(sorted(obs), sorted(exp)) def test_environmental_packages_setter(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 14', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 14", + self.info, + ) obs = new.environmental_packages exp = [] self.assertEqual(obs, exp) - new_values = ['air', 'human-oral'] + new_values = ["air", "human-oral"] new.environmental_packages = new_values obs = new.environmental_packages self.assertEqual(sorted(obs), sorted(new_values)) @@ -847,64 +937,65 @@ def test_environmental_packages_setter(self): def test_environmental_packages_setter_typeerror(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 15', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 15", + self.info, + ) with self.assertRaises(TypeError): - new.environmental_packages = 'air' + new.environmental_packages = "air" qdb.study.Study.delete(new.id) def test_environmental_packages_setter_valueerror(self): new = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), - 'NOT Identification of the Microbiomes for Cannabis Soils 16', - self.info) + qdb.user.User("test@foo.bar"), + "NOT Identification of the Microbiomes for Cannabis Soils 16", + self.info, + ) with self.assertRaises(ValueError): - new.environmental_packages = ['air', 'not a package'] + new.environmental_packages = ["air", "not a package"] qdb.study.Study.delete(new.id) def test_environmental_packages_sandboxed(self): with self.assertRaises(qdb.exceptions.QiitaDBStatusError): - self.study.environmental_packages = ['air'] + self.study.environmental_packages = ["air"] def test_study_tags(self): # testing empty tags obs = qdb.study.Study.get_tags() - self.assertEqual(obs, {'admin': [], 'user': []}) + self.assertEqual(obs, {"admin": [], "user": []}) # inserting new tags - user = qdb.user.User('test@foo.bar') - tags = ['this is my tag', 'I want GOLD!!', 'this is my tag'] + user = qdb.user.User("test@foo.bar") + tags = ["this is my tag", "I want GOLD!!", "this is my tag"] qdb.study.Study.insert_tags(user, tags) # now as admin - admin = qdb.user.User('admin@foo.bar') - admin_tags = ['actual GOLD!', 'this is my tag'] + admin = qdb.user.User("admin@foo.bar") + admin_tags = ["actual GOLD!", "this is my tag"] qdb.study.Study.insert_tags(admin, admin_tags) # testing that insertion went fine obs = qdb.study.Study.get_tags() - exp = {'user': ['I want GOLD!!', 'this is my tag'], - 'admin': ['actual GOLD!']} + exp = {"user": ["I want GOLD!!", "this is my tag"], "admin": ["actual GOLD!"]} self.assertEqual(obs, exp) # assigning the tags to study as user study = qdb.study.Study(1) - tags = ['this is my tag', 'actual GOLD!'] + tags = ["this is my tag", "actual GOLD!"] message = study.update_tags(user, tags) self.assertCountEqual(study.tags, tags[:1]) - self.assertEqual(message, 'Only admins can assign: actual GOLD!') + self.assertEqual(message, "Only admins can assign: actual GOLD!") # now like admin message = study.update_tags(admin, tags) self.assertCountEqual(study.tags, tags) - self.assertEqual(message, '') + self.assertEqual(message, "") # cleaning tags message = study.update_tags(user, []) - self.assertEqual(study.tags, ['actual GOLD!']) - self.assertEqual(message, 'You cannot remove: actual GOLD!') + self.assertEqual(study.tags, ["actual GOLD!"]) + self.assertEqual(message, "You cannot remove: actual GOLD!") message = study.update_tags(admin, []) self.assertEqual(study.tags, []) - self.assertEqual(message, '') + self.assertEqual(message, "") if __name__ == "__main__": diff --git a/qiita_db/test/test_user.py b/qiita_db/test/test_user.py index 17110c7db..6040614a7 100644 --- a/qiita_db/test/test_user.py +++ b/qiita_db/test/test_user.py @@ -8,25 +8,28 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main from datetime import datetime, timedelta +from unittest import TestCase, main -from qiita_core.exceptions import (IncorrectEmailError, IncorrectPasswordError, - IncompetentQiitaDeveloperError) -from qiita_core.util import qiita_test_checker -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb +from qiita_core.exceptions import ( + IncompetentQiitaDeveloperError, + IncorrectEmailError, + IncorrectPasswordError, +) +from qiita_core.qiita_settings import qiita_config +from qiita_core.util import qiita_test_checker class SupportTests(TestCase): def test_validate_password(self): - valid1 = 'abcdefgh' - valid2 = 'abcdefgh1234' - valid3 = 'abcdefgh!@#$' - valid4 = 'aBC123!@#{}' - invalid1 = 'abc' - invalid2 = u'øabcdefghi' - invalid3 = 'abcd efgh' + valid1 = "abcdefgh" + valid2 = "abcdefgh1234" + valid3 = "abcdefgh!@#$" + valid4 = "aBC123!@#{}" + invalid1 = "abc" + invalid2 = "øabcdefghi" + invalid3 = "abcd efgh" self.assertTrue(qdb.user.validate_password(valid1)) self.assertTrue(qdb.user.validate_password(valid2)) @@ -37,14 +40,14 @@ def test_validate_password(self): self.assertFalse(qdb.user.validate_password(invalid3)) def test_validate_email(self): - valid1 = 'foo@bar.com' - valid2 = 'asdasd.asdasd.asd123asd@stuff.edu' - valid3 = 'w00t@123.456.789.com' - valid4 = 'name@a.b-c.d' - invalid1 = '@stuff.com' - invalid2 = 'asdasdásd@things.com' - invalid3 = '.asdas@com' - invalid4 = 'name@a.b-c.d-' + valid1 = "foo@bar.com" + valid2 = "asdasd.asdasd.asd123asd@stuff.edu" + valid3 = "w00t@123.456.789.com" + valid4 = "name@a.b-c.d" + invalid1 = "@stuff.com" + invalid2 = "asdasdásd@things.com" + invalid3 = ".asdas@com" + invalid4 = "name@a.b-c.d-" self.assertTrue(qdb.user.validate_email(valid1)) self.assertTrue(qdb.user.validate_email(valid2)) @@ -61,33 +64,33 @@ class UserTest(TestCase): """Tests the User object and all properties/methods""" def setUp(self): - self.user = qdb.user.User('admin@foo.bar') + self.user = qdb.user.User("admin@foo.bar") self.portal = qiita_config.portal self.userinfo = { - 'name': 'Dude', - 'affiliation': 'Nowhere University', - 'address': '123 fake st, Apt 0, Faketown, CO 80302', - 'phone': '111-222-3344', - 'pass_reset_code': None, - 'pass_reset_timestamp': None, - 'user_verify_code': None, - 'receive_processing_job_emails': True, - 'social_orcid': None, - 'social_researchgate': None, - 'social_googlescholar': None, - 'creation_timestamp': datetime(2015, 12, 3, 13, 52, 42, 751331) + "name": "Dude", + "affiliation": "Nowhere University", + "address": "123 fake st, Apt 0, Faketown, CO 80302", + "phone": "111-222-3344", + "pass_reset_code": None, + "pass_reset_timestamp": None, + "user_verify_code": None, + "receive_processing_job_emails": True, + "social_orcid": None, + "social_researchgate": None, + "social_googlescholar": None, + "creation_timestamp": datetime(2015, 12, 3, 13, 52, 42, 751331), } def tearDown(self): qiita_config.portal = self.portal def test_instantiate_user(self): - qdb.user.User('admin@foo.bar') + qdb.user.User("admin@foo.bar") def test_instantiate_unknown_user(self): with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - qdb.user.User('FAIL@OMG.bar') + qdb.user.User("FAIL@OMG.bar") def _check_correct_info(self, obs, exp, ts_before=None): """Compares info dict of user with special handling of specific keys. @@ -109,26 +112,29 @@ def _check_correct_info(self, obs, exp, ts_before=None): for key in exp: # user_verify_code and password seed randomly generated so just # making sure they exist and is correct length - if key == 'user_verify_code': + if key == "user_verify_code": self.assertEqual(len(obs[key]), 20) elif key == "password": self.assertEqual(len(obs[key]), 60) elif key == "creation_timestamp": - self.assertTrue(((exp[key] is None) and (obs[key] is None)) - or (ts_before <= exp[key])) + self.assertTrue( + ((exp[key] is None) and (obs[key] is None)) + or (ts_before <= exp[key]) + ) else: self.assertEqual(obs[key], exp[key]) def test_create_user(self): before = datetime.now() - user = qdb.user.User.create('testcreateuser@test.bar', 'password') + user = qdb.user.User.create("testcreateuser@test.bar", "password") # adding a couple of messages qdb.util.add_system_message("TESTMESSAGE_OLD", datetime.now()) qdb.util.add_system_message( - "TESTMESSAGE", datetime.now() + timedelta(milliseconds=1)) + "TESTMESSAGE", datetime.now() + timedelta(milliseconds=1) + ) - self.assertEqual(user.id, 'testcreateuser@test.bar') + self.assertEqual(user.id, "testcreateuser@test.bar") sql = """SELECT * FROM qiita.qiita_user WHERE email = 'testcreateuser@test.bar'""" @@ -138,39 +144,41 @@ def test_create_user(self): self.assertEqual(len(obs), 1) obs = dict(obs[0]) exp = { - 'password': '', - 'name': None, - 'pass_reset_timestamp': None, - 'affiliation': None, - 'pass_reset_code': None, - 'phone': None, - 'user_verify_code': '', - 'address': None, - 'user_level_id': 5, - 'receive_processing_job_emails': False, - 'email': 'testcreateuser@test.bar', - 'social_orcid': None, - 'social_researchgate': None, - 'social_googlescholar': None, - 'creation_timestamp': datetime.now()} + "password": "", + "name": None, + "pass_reset_timestamp": None, + "affiliation": None, + "pass_reset_code": None, + "phone": None, + "user_verify_code": "", + "address": None, + "user_level_id": 5, + "receive_processing_job_emails": False, + "email": "testcreateuser@test.bar", + "social_orcid": None, + "social_researchgate": None, + "social_googlescholar": None, + "creation_timestamp": datetime.now(), + } self._check_correct_info(obs, exp, before) # Make sure new system messages are linked to user sql = """SELECT message_id FROM qiita.message_user WHERE email = 'testcreateuser@test.bar'""" - m_id = qdb.util.get_count('qiita.message') + m_id = qdb.util.get_count("qiita.message") # the user should have the latest message (m_id) and the one before with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) obs = qdb.sql_connection.TRN.execute_fetchindex() - self.assertEqual(obs, [[m_id-1], [m_id]]) + self.assertEqual(obs, [[m_id - 1], [m_id]]) qdb.util.clear_system_messages() def test_create_user_info(self): before = datetime.now() - user = qdb.user.User.create('testcreateuserinfo@test.bar', 'password', - self.userinfo) - self.assertEqual(user.id, 'testcreateuserinfo@test.bar') + user = qdb.user.User.create( + "testcreateuserinfo@test.bar", "password", self.userinfo + ) + self.assertEqual(user.id, "testcreateuserinfo@test.bar") sql = """SELECT * FROM qiita.qiita_user WHERE email = 'testcreateuserinfo@test.bar'""" @@ -180,48 +188,51 @@ def test_create_user_info(self): self.assertEqual(len(obs), 1) obs = dict(obs[0]) exp = { - 'password': '', - 'name': 'Dude', - 'affiliation': 'Nowhere University', - 'address': '123 fake st, Apt 0, Faketown, CO 80302', - 'phone': '111-222-3344', - 'pass_reset_timestamp': None, - 'pass_reset_code': None, - 'user_verify_code': '', - 'user_level_id': 5, - 'receive_processing_job_emails': True, - 'email': 'testcreateuserinfo@test.bar', - 'social_orcid': None, - 'social_researchgate': None, - 'social_googlescholar': None, - 'creation_timestamp': datetime.now()} + "password": "", + "name": "Dude", + "affiliation": "Nowhere University", + "address": "123 fake st, Apt 0, Faketown, CO 80302", + "phone": "111-222-3344", + "pass_reset_timestamp": None, + "pass_reset_code": None, + "user_verify_code": "", + "user_level_id": 5, + "receive_processing_job_emails": True, + "email": "testcreateuserinfo@test.bar", + "social_orcid": None, + "social_researchgate": None, + "social_googlescholar": None, + "creation_timestamp": datetime.now(), + } self._check_correct_info(obs, exp, before) def test_create_user_column_not_allowed(self): self.userinfo["email"] = "FAIL" with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - qdb.user.User.create('new@test.bar', 'password', self.userinfo) + qdb.user.User.create("new@test.bar", "password", self.userinfo) def test_create_user_non_existent_column(self): self.userinfo["BADTHING"] = "FAIL" with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - qdb.user.User.create('new@test.bar', 'password', self.userinfo) + qdb.user.User.create("new@test.bar", "password", self.userinfo) def test_create_user_duplicate(self): with self.assertRaises(qdb.exceptions.QiitaDBDuplicateError): - qdb.user.User.create('test@foo.bar', 'password') + qdb.user.User.create("test@foo.bar", "password") def test_create_user_bad_email(self): with self.assertRaises(IncorrectEmailError): - qdb.user.User.create('notanemail', 'password') + qdb.user.User.create("notanemail", "password") def test_create_user_bad_password(self): with self.assertRaises(IncorrectPasswordError): - qdb.user.User.create('new@test.com', '') + qdb.user.User.create("new@test.com", "") def test_login(self): - self.assertEqual(qdb.user.User.login("test@foo.bar", "password"), - qdb.user.User("test@foo.bar")) + self.assertEqual( + qdb.user.User.login("test@foo.bar", "password"), + qdb.user.User("test@foo.bar"), + ) def test_login_incorrect_user(self): with self.assertRaises(IncorrectEmailError): @@ -246,26 +257,25 @@ def test_exists_invalid_email(self): qdb.user.User.exists("notanemail.@badformat") def test_get_email(self): - self.assertEqual(self.user.email, 'admin@foo.bar') + self.assertEqual(self.user.email, "admin@foo.bar") def test_get_level(self): self.assertEqual(self.user.level, "admin") def test_get_info(self): expinfo = { - 'name': 'Admin', - 'affiliation': 'Owner University', - 'address': '312 noname st, Apt K, Nonexistantown, CO 80302', - 'phone': '222-444-6789', - 'pass_reset_code': None, - 'pass_reset_timestamp': None, - 'user_verify_code': None, - 'receive_processing_job_emails': False, - 'phone': '222-444-6789', - 'social_orcid': None, - 'social_researchgate': None, - 'social_googlescholar': None, - 'creation_timestamp': datetime(2015, 12, 3, 13, 52, 42, 751331) + "name": "Admin", + "affiliation": "Owner University", + "address": "312 noname st, Apt K, Nonexistantown, CO 80302", + "phone": "222-444-6789", + "pass_reset_code": None, + "pass_reset_timestamp": None, + "user_verify_code": None, + "receive_processing_job_emails": False, + "social_orcid": None, + "social_researchgate": None, + "social_googlescholar": None, + "creation_timestamp": datetime(2015, 12, 3, 13, 52, 42, 751331), } # test database is re-populated during testing several times. @@ -273,10 +283,12 @@ def test_get_info(self): # i.e. we cannot predict its value. We just test that this date should # be within an hour and now. For the remainder of tests, we update # our expectation. - self.assertTrue(datetime.now() - timedelta(hours=1) < - self.user.info['creation_timestamp'] < - datetime.now()) - expinfo['creation_timestamp'] = self.user.info['creation_timestamp'] + self.assertTrue( + datetime.now() - timedelta(hours=1) + < self.user.info["creation_timestamp"] + < datetime.now() + ) + expinfo["creation_timestamp"] = self.user.info["creation_timestamp"] self.assertEqual(self.user.info, expinfo) @@ -306,7 +318,7 @@ def test_default_analysis(self): self.assertEqual(obs, qdb.analysis.Analysis(8)) def test_get_user_studies(self): - user = qdb.user.User('test@foo.bar') + user = qdb.user.User("test@foo.bar") qiita_config.portal = "QIITA" self.assertEqual(user.user_studies, {qdb.study.Study(1)}) @@ -314,7 +326,7 @@ def test_get_user_studies(self): self.assertEqual(user.user_studies, set()) def test_get_shared_studies(self): - user = qdb.user.User('shared@foo.bar') + user = qdb.user.User("shared@foo.bar") qiita_config.portal = "QIITA" self.assertEqual(user.shared_studies, {qdb.study.Study(1)}) @@ -322,7 +334,7 @@ def test_get_shared_studies(self): self.assertEqual(user.shared_studies, set()) def test_get_private_analyses(self): - user = qdb.user.User('test@foo.bar') + user = qdb.user.User("test@foo.bar") qiita_config.portal = "QIITA" exp = {qdb.analysis.Analysis(1)} self.assertEqual(user.private_analyses, exp) @@ -331,7 +343,7 @@ def test_get_private_analyses(self): self.assertEqual(user.private_analyses, set()) def test_get_shared_analyses(self): - user = qdb.user.User('shared@foo.bar') + user = qdb.user.User("shared@foo.bar") qiita_config.portal = "QIITA" self.assertEqual(user.shared_analyses, {qdb.analysis.Analysis(1)}) @@ -339,8 +351,8 @@ def test_get_shared_analyses(self): self.assertEqual(user.shared_analyses, set()) def test_verify_code(self): - email = 'testverifycode@test.bar' - qdb.user.User.create(email, 'password') + email = "testverifycode@test.bar" + qdb.user.User.create(email, "password") # making sure that we know the user codes sql = """UPDATE qiita.qiita_user SET user_verify_code='verifycode', @@ -348,33 +360,32 @@ def test_verify_code(self): WHERE email=%s""" qdb.sql_connection.perform_as_transaction(sql, [email]) - self.assertFalse( - qdb.user.User.verify_code(email, 'wrongcode', 'create')) - self.assertFalse( - qdb.user.User.verify_code(email, 'wrongcode', 'reset')) + self.assertFalse(qdb.user.User.verify_code(email, "wrongcode", "create")) + self.assertFalse(qdb.user.User.verify_code(email, "wrongcode", "reset")) - self.assertTrue( - qdb.user.User.verify_code(email, 'verifycode', 'create')) - self.assertTrue( - qdb.user.User.verify_code(email, 'resetcode', 'reset')) + self.assertTrue(qdb.user.User.verify_code(email, "verifycode", "create")) + self.assertTrue(qdb.user.User.verify_code(email, "resetcode", "reset")) # make sure errors raised if code already used or wrong type with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.user.User.verify_code(email, 'verifycode', 'create') + qdb.user.User.verify_code(email, "verifycode", "create") with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.user.User.verify_code(email, 'resetcode', 'reset') + qdb.user.User.verify_code(email, "resetcode", "reset") with self.assertRaises(IncompetentQiitaDeveloperError): - qdb.user.User.verify_code(email, 'fakecode', 'badtype') + qdb.user.User.verify_code(email, "fakecode", "badtype") # make sure default analyses created - sql = ("SELECT email, name, description, dflt FROM qiita.analysis " - "WHERE email = %s") + sql = ( + "SELECT email, name, description, dflt FROM qiita.analysis WHERE email = %s" + ) with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql, [email]) obs = qdb.sql_connection.TRN.execute_fetchindex() - exp = [[email, 'testverifycode@test.bar-dflt-2', 'dflt', True], - [email, 'testverifycode@test.bar-dflt-1', 'dflt', True]] + exp = [ + [email, "testverifycode@test.bar-dflt-2", "dflt", True], + [email, "testverifycode@test.bar-dflt-1", "dflt", True], + ] self.assertEqual(obs, exp) # Make sure default analyses are linked with the portal @@ -389,16 +400,17 @@ def test_verify_code(self): self.assertEqual(obs, 2) def _check_pass(self, user, passwd): - self.assertEqual(qdb.util.hash_password(passwd, user.password), - user.password) + self.assertEqual(qdb.util.hash_password(passwd, user.password), user.password) def test_password(self): - user = qdb.user.User('shared@foo.bar') - self.assertEqual(user.password, '$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHy' - 'IJjjgaG6dxuRJkUM8nXG9Efe') + user = qdb.user.User("shared@foo.bar") + self.assertEqual( + user.password, + "$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe", + ) def test_change_pass(self): - user = qdb.user.User.create('testchangepass@test.bar', 'password') + user = qdb.user.User.create("testchangepass@test.bar", "password") user._change_pass("newpassword") self._check_pass(user, "newpassword") self.assertIsNone(user.info["pass_reset_code"]) @@ -413,12 +425,12 @@ def test_change_password(self): self._check_pass(self.user, "newpassword") def test_change_password_wrong_oldpass(self): - user = qdb.user.User.create('changepasswrongold@test.bar', 'password') + user = qdb.user.User.create("changepasswrongold@test.bar", "password") user.change_password("WRONG", "newpass") self._check_pass(user, "password") def test_generate_reset_code(self): - user = qdb.user.User.create('new@test.bar', 'password') + user = qdb.user.User.create("new@test.bar", "password") sql = "SELECT LOCALTIMESTAMP" with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) @@ -427,16 +439,17 @@ def test_generate_reset_code(self): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql) after = qdb.sql_connection.TRN.execute_fetchflatten()[0] - sql = ("SELECT pass_reset_code, pass_reset_timestamp FROM " - "qiita.qiita_user WHERE email = %s") - qdb.sql_connection.TRN.add(sql, ('new@test.bar',)) + sql = ( + "SELECT pass_reset_code, pass_reset_timestamp FROM " + "qiita.qiita_user WHERE email = %s" + ) + qdb.sql_connection.TRN.add(sql, ("new@test.bar",)) obscode, obstime = qdb.sql_connection.TRN.execute_fetchindex()[0] self.assertEqual(len(obscode), 20) self.assertTrue(before < obstime < after) def test_change_forgot_password(self): - user = qdb.user.User.create( - 'changeforgotpassword@test.bar', 'password') + user = qdb.user.User.create("changeforgotpassword@test.bar", "password") user.generate_reset_code() code = user.info["pass_reset_code"] obsbool = user.change_forgot_password(code, "newpassword") @@ -444,7 +457,7 @@ def test_change_forgot_password(self): self._check_pass(user, "newpassword") def test_change_forgot_password_bad_code(self): - user = qdb.user.User.create('badcode@test.bar', 'password') + user = qdb.user.User.create("badcode@test.bar", "password") user.generate_reset_code() code = "AAAAAAA" obsbool = user.change_forgot_password(code, "newpassword") @@ -452,33 +465,36 @@ def test_change_forgot_password_bad_code(self): self._check_pass(user, "password") def test_messages(self): - qdb.util.add_system_message('SYS MESSAGE', datetime.now()) - user = qdb.user.User('test@foo.bar') + qdb.util.add_system_message("SYS MESSAGE", datetime.now()) + user = qdb.user.User("test@foo.bar") obs = user.messages() exp_msg = [ - 'SYS MESSAGE', 'message 1', - 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. ' - 'Pellentesque sed auctor ex, non placerat sapien. Vestibulum ' - 'vestibulum massa ut sapien condimentum, cursus consequat diam' - ' sodales. Nulla aliquam arcu ut massa auctor, et vehicula ' - 'mauris tempor. In lacinia viverra ante quis pellentesque. ' - 'Nunc vel mi accumsan, porttitor eros ut, pharetra elit. Nulla' - ' ac nisi quis dui egestas malesuada vitae ut mauris. Morbi ' - 'blandit non nisl a finibus. In erat velit, congue at ipsum ' - 'sit amet, venenatis bibendum sem. Curabitur vel odio sed est ' - 'rutrum rutrum. Quisque efficitur ut purus in ultrices. ' - 'Pellentesque eu auctor justo.', 'message 3'] + "SYS MESSAGE", + "message 1", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " + "Pellentesque sed auctor ex, non placerat sapien. Vestibulum " + "vestibulum massa ut sapien condimentum, cursus consequat diam" + " sodales. Nulla aliquam arcu ut massa auctor, et vehicula " + "mauris tempor. In lacinia viverra ante quis pellentesque. " + "Nunc vel mi accumsan, porttitor eros ut, pharetra elit. Nulla" + " ac nisi quis dui egestas malesuada vitae ut mauris. Morbi " + "blandit non nisl a finibus. In erat velit, congue at ipsum " + "sit amet, venenatis bibendum sem. Curabitur vel odio sed est " + "rutrum rutrum. Quisque efficitur ut purus in ultrices. " + "Pellentesque eu auctor justo.", + 'message 3', + ] self.assertCountEqual([(x[1]) for x in obs], exp_msg) self.assertTrue(all(x[2] < datetime.now() for x in obs)) self.assertFalse(all(x[3] for x in obs)) self.assertEqual([x[4] for x in obs], [True, False, False, False]) obs = user.messages(1) - exp_msg = ['SYS MESSAGE'] + exp_msg = ["SYS MESSAGE"] self.assertEqual([x[1] for x in obs], exp_msg) def test_mark_messages(self): - user = qdb.user.User('test@foo.bar') + user = qdb.user.User("test@foo.bar") user.mark_messages([1, 2]) obs = user.messages() exp = [True, True, False] @@ -490,7 +506,7 @@ def test_mark_messages(self): self.assertCountEqual([x[3] for x in obs], exp) def test_delete_messages(self): - user = qdb.user.User.create('deletemsg@test.bar', 'password') + user = qdb.user.User.create("deletemsg@test.bar", "password") self.assertEqual(user.messages(), []) qdb.util.add_message("New message", [user]) user_msgs = user.messages() @@ -502,79 +518,91 @@ def test_delete_messages(self): self.assertEqual([msg[1] for msg in user.messages()], []) def test_user_artifacts(self): - user = qdb.user.User('test@foo.bar') + user = qdb.user.User("test@foo.bar") obs = user.user_artifacts() - exp = {qdb.study.Study(1): [qdb.artifact.Artifact(1), - qdb.artifact.Artifact(2), - qdb.artifact.Artifact(3), - qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), - qdb.artifact.Artifact(6), - qdb.artifact.Artifact(7)]} + exp = { + qdb.study.Study(1): [ + qdb.artifact.Artifact(1), + qdb.artifact.Artifact(2), + qdb.artifact.Artifact(3), + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + qdb.artifact.Artifact(7), + ] + } self.assertEqual(obs, exp) - obs = user.user_artifacts(artifact_type='BIOM') - exp = {qdb.study.Study(1): [qdb.artifact.Artifact(4), - qdb.artifact.Artifact(5), - qdb.artifact.Artifact(6), - qdb.artifact.Artifact(7)]} + obs = user.user_artifacts(artifact_type="BIOM") + exp = { + qdb.study.Study(1): [ + qdb.artifact.Artifact(4), + qdb.artifact.Artifact(5), + qdb.artifact.Artifact(6), + qdb.artifact.Artifact(7), + ] + } self.assertEqual(obs, exp) def test_jobs(self): PJ = qdb.processing_job.ProcessingJob ignore_status = [] # generates expected jobs - jobs = qdb.user.User('shared@foo.bar').jobs( - ignore_status=ignore_status) - self.assertEqual(jobs, [PJ('b72369f9-a886-4193-8d3d-f7b504168e75')]) - - jobs = qdb.user.User('shared@foo.bar').jobs( - ignore_status=ignore_status, show_hidden=True) - self.assertEqual(jobs, [ - PJ('d19f76ee-274e-4c1b-b3a2-a12d73507c55'), - PJ('b72369f9-a886-4193-8d3d-f7b504168e75')]) + jobs = qdb.user.User("shared@foo.bar").jobs(ignore_status=ignore_status) + self.assertEqual(jobs, [PJ("b72369f9-a886-4193-8d3d-f7b504168e75")]) + + jobs = qdb.user.User("shared@foo.bar").jobs( + ignore_status=ignore_status, show_hidden=True + ) + self.assertEqual( + jobs, + [ + PJ("d19f76ee-274e-4c1b-b3a2-a12d73507c55"), + PJ("b72369f9-a886-4193-8d3d-f7b504168e75"), + ], + ) # just one job - self.assertEqual(qdb.user.User('shared@foo.bar').jobs( - limit=1, ignore_status=ignore_status), [ - PJ('b72369f9-a886-4193-8d3d-f7b504168e75')]) + self.assertEqual( + qdb.user.User("shared@foo.bar").jobs(limit=1, ignore_status=ignore_status), + [PJ("b72369f9-a886-4193-8d3d-f7b504168e75")], + ) # generates expected jobs - jobs = qdb.user.User('shared@foo.bar').jobs() + jobs = qdb.user.User("shared@foo.bar").jobs() self.assertEqual(jobs, []) def test_update_email(self): - user = qdb.user.User('shared@foo.bar') - with self.assertRaisesRegex(IncorrectEmailError, 'Bad email given:'): - user.update_email('bladfa.adferqerq@$EWE') + user = qdb.user.User("shared@foo.bar") + with self.assertRaisesRegex(IncorrectEmailError, "Bad email given:"): + user.update_email("bladfa.adferqerq@$EWE") - with self.assertRaisesRegex(IncorrectEmailError, - 'This email already exists'): - user.update_email('test@foo.bar') + with self.assertRaisesRegex(IncorrectEmailError, "This email already exists"): + user.update_email("test@foo.bar") - user.update_email('bla@ble.bli') + user.update_email("bla@ble.bli") def test_slurm_parameters(self): - self.assertEqual(qdb.user.User('shared@foo.bar').slurm_parameters, - '--nice=10000') - self.assertEqual(qdb.user.User('admin@foo.bar').slurm_parameters, - '--nice=5000') + self.assertEqual( + qdb.user.User("shared@foo.bar").slurm_parameters, "--nice=10000" + ) + self.assertEqual(qdb.user.User("admin@foo.bar").slurm_parameters, "--nice=5000") @qiita_test_checker() class DeleteUser(TestCase): def test_delete_users(self): # let's start with the errors - error = 'This email does not exist: x@y.z' + error = "This email does not exist: x@y.z" with self.assertRaisesRegex(IncorrectEmailError, error): - qdb.user.User.delete('x@y.z') + qdb.user.User.delete("x@y.z") with self.assertRaises(ValueError): - qdb.user.User.delete('shared@foo.bar') + qdb.user.User.delete("shared@foo.bar") - qdb.user.User.delete('shared@foo.bar', True) + qdb.user.User.delete("shared@foo.bar", True) # verify that the user doesn't exist any more with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): - qdb.user.User('shared@foo.bar') + qdb.user.User("shared@foo.bar") if __name__ == "__main__": diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index e59be819f..d4ad31df5 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -6,35 +6,43 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from tempfile import mkstemp, mkdtemp, NamedTemporaryFile, TemporaryFile -from os import close, remove, mkdir -from os.path import join, exists, basename -from shutil import rmtree from datetime import datetime from functools import partial +from os import close, mkdir, remove +from os.path import basename, exists, join +from shutil import rmtree from string import punctuation +from tempfile import NamedTemporaryFile, TemporaryFile, mkdtemp, mkstemp +from unittest import TestCase, main + import h5py -from six import StringIO, BytesIO +import matplotlib.pyplot as plt import pandas as pd +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from six import BytesIO, StringIO -from qiita_core.util import qiita_test_checker import qiita_db as qdb - -from matplotlib.figure import Figure -from matplotlib.axes import Axes -import matplotlib.pyplot as plt +from qiita_core.util import qiita_test_checker @qiita_test_checker() class DBUtilTestsBase(TestCase): def setUp(self): - self.table = 'study' + self.table = "study" self.required = [ - 'study_title', 'mixs_compliant', - 'metadata_complete', 'study_description', 'first_contact', - 'reprocess', 'timeseries_type_id', 'study_alias', - 'study_abstract', 'principal_investigator_id', 'email'] + "study_title", + "mixs_compliant", + "metadata_complete", + "study_description", + "first_contact", + "reprocess", + "timeseries_type_id", + "study_alias", + "study_abstract", + "principal_investigator_id", + "email", + ] self.files_to_remove = [] def tearDown(self): @@ -75,7 +83,7 @@ def test_check_required_columns(self): qdb.util.check_required_columns(self.required, self.table) def test_check_required_columns_fail(self): - self.required.remove('study_title') + self.required.remove("study_title") with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.util.check_required_columns(self.required, self.table) @@ -84,17 +92,29 @@ def test_check_table_cols(self): qdb.util.check_table_cols(self.required, self.table) def test_check_table_cols_fail(self): - self.required.append('BADTHINGNOINHERE') + self.required.append("BADTHINGNOINHERE") with self.assertRaises(qdb.exceptions.QiitaDBColumnError): qdb.util.check_table_cols(self.required, self.table) def test_get_table_cols(self): obs = qdb.util.get_table_cols("qiita_user") - exp = {"email", "user_level_id", "password", "name", "affiliation", - "address", "phone", "user_verify_code", "pass_reset_code", - "pass_reset_timestamp", "receive_processing_job_emails", - "social_orcid", "social_researchgate", "social_googlescholar", - "creation_timestamp"} + exp = { + "email", + "user_level_id", + "password", + "name", + "affiliation", + "address", + "phone", + "user_verify_code", + "pass_reset_code", + "pass_reset_timestamp", + "receive_processing_job_emails", + "social_orcid", + "social_researchgate", + "social_googlescholar", + "creation_timestamp", + } self.assertEqual(set(obs), exp) def test_exists_table(self): @@ -113,12 +133,11 @@ def test_exists_table(self): def test_convert_to_id(self): """Tests that ids are returned correctly""" + self.assertEqual(qdb.util.convert_to_id("directory", "filepath_type"), 8) self.assertEqual( - qdb.util.convert_to_id("directory", "filepath_type"), 8) - self.assertEqual( - qdb.util.convert_to_id("private", "visibility", "visibility"), 3) - self.assertEqual( - qdb.util.convert_to_id("EMP", "portal_type", "portal"), 2) + qdb.util.convert_to_id("private", "visibility", "visibility"), 3 + ) + self.assertEqual(qdb.util.convert_to_id("EMP", "portal_type", "portal"), 2) def test_convert_to_id_bad_value(self): """Tests that ids are returned correctly""" @@ -127,10 +146,18 @@ def test_convert_to_id_bad_value(self): def test_get_artifact_types(self): obs = qdb.util.get_artifact_types() - exp = {'SFF': 1, 'FASTA_Sanger': 2, 'FASTQ': 3, 'FASTA': 4, - 'per_sample_FASTQ': 5, 'Demultiplexed': 6, 'BIOM': 7, - 'beta_div_plots': 8, 'rarefaction_curves': 9, - 'taxa_summary': 10} + exp = { + "SFF": 1, + "FASTA_Sanger": 2, + "FASTQ": 3, + "FASTA": 4, + "per_sample_FASTQ": 5, + "Demultiplexed": 6, + "BIOM": 7, + "beta_div_plots": 8, + "rarefaction_curves": 9, + "taxa_summary": 10, + } self.assertEqual(obs, exp) obs = qdb.util.get_artifact_types(key_by_id=True) @@ -140,39 +167,61 @@ def test_get_artifact_types(self): def test_get_filepath_types(self): """Tests that get_filepath_types works with valid arguments""" obs = qdb.util.get_filepath_types() - exp = {'raw_forward_seqs': 1, 'raw_reverse_seqs': 2, - 'raw_barcodes': 3, 'preprocessed_fasta': 4, - 'preprocessed_fastq': 5, 'preprocessed_demux': 6, 'biom': 7, - 'directory': 8, 'plain_text': 9, 'reference_seqs': 10, - 'reference_tax': 11, 'reference_tree': 12, 'log': 13, - 'sample_template': 14, 'prep_template': 15, 'qiime_map': 16, - 'bam': 17 - } + exp = { + "raw_forward_seqs": 1, + "raw_reverse_seqs": 2, + "raw_barcodes": 3, + "preprocessed_fasta": 4, + "preprocessed_fastq": 5, + "preprocessed_demux": 6, + "biom": 7, + "directory": 8, + "plain_text": 9, + "reference_seqs": 10, + "reference_tax": 11, + "reference_tree": 12, + "log": 13, + "sample_template": 14, + "prep_template": 15, + "qiime_map": 16, + "bam": 17, + } with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add("SELECT filepath_type,filepath_type_id " - "FROM qiita.filepath_type") + qdb.sql_connection.TRN.add( + "SELECT filepath_type,filepath_type_id FROM qiita.filepath_type" + ) exp = dict(qdb.sql_connection.TRN.execute_fetchindex()) self.assertEqual(obs, exp) - obs = qdb.util.get_filepath_types(key='filepath_type_id') + obs = qdb.util.get_filepath_types(key="filepath_type_id") exp = {v: k for k, v in exp.items()} self.assertEqual(obs, exp) def test_get_filepath_types_fail(self): """Tests that get_Filetypes fails with invalid argument""" with self.assertRaises(qdb.exceptions.QiitaDBColumnError): - qdb.util.get_filepath_types(key='invalid') + qdb.util.get_filepath_types(key="invalid") def test_get_data_types(self): """Tests that get_data_types works with valid arguments""" obs = qdb.util.get_data_types() - exp = {'16S': 1, '18S': 2, 'ITS': 3, 'Proteomic': 4, 'Metabolomic': 5, - 'Metagenomic': 6, 'Multiomic': 7, 'Metatranscriptomics': 8, - 'Viromics': 9, 'Genomics': 10, 'Transcriptomics': 11, - 'Job Output Folder': 12} + exp = { + "16S": 1, + "18S": 2, + "ITS": 3, + "Proteomic": 4, + "Metabolomic": 5, + "Metagenomic": 6, + "Multiomic": 7, + "Metatranscriptomics": 8, + "Viromics": 9, + "Genomics": 10, + "Transcriptomics": 11, + "Job Output Folder": 12, + } self.assertEqual(obs, exp) - obs = qdb.util.get_data_types(key='data_type_id') + obs = qdb.util.get_data_types(key="data_type_id") exp = {v: k for k, v in exp.items()} self.assertEqual(obs, exp) @@ -189,12 +238,12 @@ def test_create_rand_string(self): def test_get_count(self): """Checks that get_count retrieves proper count""" - self.assertEqual(qdb.util.get_count('qiita.study_person'), 3) + self.assertEqual(qdb.util.get_count("qiita.study_person"), 3) def test_check_count(self): """Checks that check_count returns True and False appropriately""" - self.assertTrue(qdb.util.check_count('qiita.study_person', 3)) - self.assertFalse(qdb.util.check_count('qiita.study_person', 2)) + self.assertTrue(qdb.util.check_count("qiita.study_person", 3)) + self.assertFalse(qdb.util.check_count("qiita.study_person", 2)) def test_insert_filepaths(self): fd, fp = mkstemp() @@ -205,25 +254,28 @@ def test_insert_filepaths(self): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - "SELECT last_value FROM qiita.filepath_filepath_id_seq") + "SELECT last_value FROM qiita.filepath_filepath_id_seq" + ) exp_new_id = 1 + qdb.sql_connection.TRN.execute_fetchflatten()[0] obs = qdb.util.insert_filepaths([(fp, 1)], 2, "raw_data") self.assertEqual(obs, [exp_new_id]) # Check that the files have been copied correctly - exp_fp = join(qdb.util.get_db_files_base_dir(), "raw_data", - "2_%s" % basename(fp)) + exp_fp = join( + qdb.util.get_db_files_base_dir(), "raw_data", "2_%s" % basename(fp) + ) self.assertTrue(exists(exp_fp)) self.assertFalse(exists(fp)) self.files_to_remove.append(exp_fp) # Check that the filepaths have been added to the DB with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add("SELECT * FROM qiita.filepath " - "WHERE filepath_id=%d" % exp_new_id) + qdb.sql_connection.TRN.add( + "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % exp_new_id + ) obs = qdb.sql_connection.TRN.execute_fetchindex() exp_fp = "2_%s" % basename(fp) - exp = [[exp_new_id, exp_fp, 1, '852952723', 1, 5, 1]] + exp = [[exp_new_id, exp_fp, 1, "852952723", 1, 5, 1]] self.assertEqual(obs, exp) qdb.util.purge_filepaths() @@ -239,37 +291,43 @@ def test_insert_filepaths_copy(self): # autoincremented for each element introduced. with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - "SELECT last_value FROM qiita.filepath_filepath_id_seq") + "SELECT last_value FROM qiita.filepath_filepath_id_seq" + ) exp_new_id = 1 + qdb.sql_connection.TRN.execute_fetchflatten()[0] - obs = qdb.util.insert_filepaths([(fp, 1)], 2, "raw_data", - move_files=False, copy=True) + obs = qdb.util.insert_filepaths( + [(fp, 1)], 2, "raw_data", move_files=False, copy=True + ) self.assertEqual(obs, [exp_new_id]) # Check that the files have been copied correctly - exp_fp = join(qdb.util.get_db_files_base_dir(), "raw_data", - "2_%s" % basename(fp)) + exp_fp = join( + qdb.util.get_db_files_base_dir(), "raw_data", "2_%s" % basename(fp) + ) self.assertTrue(exists(exp_fp)) self.assertTrue(exists(fp)) self.files_to_remove.append(exp_fp) # Check that the filepaths have been added to the DB with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add("SELECT * FROM qiita.filepath " - "WHERE filepath_id=%d" % exp_new_id) + qdb.sql_connection.TRN.add( + "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % exp_new_id + ) obs = qdb.sql_connection.TRN.execute_fetchindex() exp_fp = "2_%s" % basename(fp) - exp = [[exp_new_id, exp_fp, 1, '852952723', 1, 5, 1]] + exp = [[exp_new_id, exp_fp, 1, "852952723", 1, 5, 1]] self.assertEqual(obs, exp) # let's do that again but with move_files = True exp_new_id += 1 - obs = qdb.util.insert_filepaths([(fp, 1)], 2, "raw_data", - move_files=True, copy=True) + obs = qdb.util.insert_filepaths( + [(fp, 1)], 2, "raw_data", move_files=True, copy=True + ) self.assertEqual(obs, [exp_new_id]) # Check that the files have been copied correctly - exp_fp = join(qdb.util.get_db_files_base_dir(), "raw_data", - "2_%s" % basename(fp)) + exp_fp = join( + qdb.util.get_db_files_base_dir(), "raw_data", "2_%s" % basename(fp) + ) self.assertTrue(exists(exp_fp)) self.assertTrue(exists(fp)) self.files_to_remove.append(exp_fp) @@ -285,107 +343,131 @@ def test_insert_filepaths_string(self): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add( - "SELECT last_value FROM qiita.filepath_filepath_id_seq") + "SELECT last_value FROM qiita.filepath_filepath_id_seq" + ) exp_new_id = 1 + qdb.sql_connection.TRN.execute_fetchflatten()[0] - obs = qdb.util.insert_filepaths( - [(fp, "raw_forward_seqs")], 2, "raw_data") + obs = qdb.util.insert_filepaths([(fp, "raw_forward_seqs")], 2, "raw_data") self.assertEqual(obs, [exp_new_id]) # Check that the files have been copied correctly - exp_fp = join(qdb.util.get_db_files_base_dir(), "raw_data", - "2_%s" % basename(fp)) + exp_fp = join( + qdb.util.get_db_files_base_dir(), "raw_data", "2_%s" % basename(fp) + ) self.assertTrue(exists(exp_fp)) self.files_to_remove.append(exp_fp) # Check that the filepaths have been added to the DB with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add("SELECT * FROM qiita.filepath " - "WHERE filepath_id=%d" % exp_new_id) + qdb.sql_connection.TRN.add( + "SELECT * FROM qiita.filepath WHERE filepath_id=%d" % exp_new_id + ) obs = qdb.sql_connection.TRN.execute_fetchindex() exp_fp = "2_%s" % basename(fp) - exp = [[exp_new_id, exp_fp, 1, '852952723', 1, 5, 1]] + exp = [[exp_new_id, exp_fp, 1, "852952723", 1, 5, 1]] self.assertEqual(obs, exp) qdb.util.purge_filepaths() def test_retrieve_filepaths(self): - obs = qdb.util.retrieve_filepaths('artifact_filepath', - 'artifact_id', 1) - path_builder = partial( - join, qdb.util.get_db_files_base_dir(), "raw_data") - exp = [{'fp_id': 1, - 'fp': path_builder("1_s_G1_L001_sequences.fastq.gz"), - 'fp_type': "raw_forward_seqs", - 'checksum': '2125826711', - 'fp_size': 58}, - {'fp_id': 2, - 'fp': path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), - 'fp_type': "raw_barcodes", - 'checksum': '2125826711', - 'fp_size': 58}] + obs = qdb.util.retrieve_filepaths("artifact_filepath", "artifact_id", 1) + path_builder = partial(join, qdb.util.get_db_files_base_dir(), "raw_data") + exp = [ + { + "fp_id": 1, + "fp": path_builder("1_s_G1_L001_sequences.fastq.gz"), + "fp_type": "raw_forward_seqs", + "checksum": "2125826711", + "fp_size": 58, + }, + { + "fp_id": 2, + "fp": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "fp_type": "raw_barcodes", + "checksum": "2125826711", + "fp_size": 58, + }, + ] self.assertEqual(obs, exp) def test_retrieve_filepaths_sort(self): obs = qdb.util.retrieve_filepaths( - 'artifact_filepath', 'artifact_id', 1, sort='descending') - path_builder = partial( - join, qdb.util.get_db_files_base_dir(), "raw_data") - exp = [{'fp_id': 2, - 'fp': path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), - 'fp_type': "raw_barcodes", - 'checksum': '2125826711', - 'fp_size': 58}, - {'fp_id': 1, - 'fp': path_builder("1_s_G1_L001_sequences.fastq.gz"), - 'fp_type': "raw_forward_seqs", - 'checksum': '2125826711', - 'fp_size': 58}] + "artifact_filepath", "artifact_id", 1, sort="descending" + ) + path_builder = partial(join, qdb.util.get_db_files_base_dir(), "raw_data") + exp = [ + { + "fp_id": 2, + "fp": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "fp_type": "raw_barcodes", + "checksum": "2125826711", + "fp_size": 58, + }, + { + "fp_id": 1, + "fp": path_builder("1_s_G1_L001_sequences.fastq.gz"), + "fp_type": "raw_forward_seqs", + "checksum": "2125826711", + "fp_size": 58, + }, + ] self.assertEqual(obs, exp) def test_retrieve_filepaths_type(self): obs = qdb.util.retrieve_filepaths( - 'artifact_filepath', 'artifact_id', 1, sort='descending', - fp_type='raw_barcodes') - path_builder = partial( - join, qdb.util.get_db_files_base_dir(), "raw_data") - exp = [{'fp_id': 2, - 'fp': path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), - 'fp_type': "raw_barcodes", - 'checksum': '2125826711', - 'fp_size': 58}] + "artifact_filepath", + "artifact_id", + 1, + sort="descending", + fp_type="raw_barcodes", + ) + path_builder = partial(join, qdb.util.get_db_files_base_dir(), "raw_data") + exp = [ + { + "fp_id": 2, + "fp": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "fp_type": "raw_barcodes", + "checksum": "2125826711", + "fp_size": 58, + } + ] self.assertEqual(obs, exp) obs = qdb.util.retrieve_filepaths( - 'artifact_filepath', 'artifact_id', 1, fp_type='raw_barcodes') - path_builder = partial( - join, qdb.util.get_db_files_base_dir(), "raw_data") - exp = [{'fp_id': 2, - 'fp': path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), - 'fp_type': "raw_barcodes", - 'checksum': '2125826711', - 'fp_size': 58}] + "artifact_filepath", "artifact_id", 1, fp_type="raw_barcodes" + ) + path_builder = partial(join, qdb.util.get_db_files_base_dir(), "raw_data") + exp = [ + { + "fp_id": 2, + "fp": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "fp_type": "raw_barcodes", + "checksum": "2125826711", + "fp_size": 58, + } + ] self.assertEqual(obs, exp) obs = qdb.util.retrieve_filepaths( - 'artifact_filepath', 'artifact_id', 1, fp_type='biom') - path_builder = partial( - join, qdb.util.get_db_files_base_dir(), "raw_data") + "artifact_filepath", "artifact_id", 1, fp_type="biom" + ) + path_builder = partial(join, qdb.util.get_db_files_base_dir(), "raw_data") self.assertEqual(obs, []) def test_retrieve_filepaths_error(self): with self.assertRaises(qdb.exceptions.QiitaDBError): - qdb.util.retrieve_filepaths('artifact_filepath', 'artifact_id', 1, - sort='Unknown') + qdb.util.retrieve_filepaths( + "artifact_filepath", "artifact_id", 1, sort="Unknown" + ) def test_empty_trash_upload_folder(self): # creating file to delete so we know it actually works - study_id = '1' + study_id = "1" uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], study_id) - trash = join(uploads_fp, 'trash') + trash = join(uploads_fp, "trash") if not exists(trash): mkdir(trash) - fp = join(trash, 'my_file_to_delete.txt') - open(fp, 'w').close() + fp = join(trash, "my_file_to_delete.txt") + open(fp, "w").close() self.assertTrue(exists(fp)) qdb.util.empty_trash_upload_folder() @@ -399,49 +481,53 @@ def test_move_filepaths_to_upload_folder(self): # there are no conflicts with this study_id = 1 # creating the 2 sets of files for the 2 artifacts - fd, seqs_fp1 = mkstemp(suffix='_seqs.fastq') + fd, seqs_fp1 = mkstemp(suffix="_seqs.fastq") close(fd) html_fp1 = mkdtemp() - html_fp1 = join(html_fp1, 'support_files') + html_fp1 = join(html_fp1, "support_files") mkdir(html_fp1) - with open(join(html_fp1, 'index.html'), 'w') as fp: + with open(join(html_fp1, "index.html"), "w") as fp: fp.write(">AAA\nAAA") - fd, seqs_fp2 = mkstemp(suffix='_seqs.fastq') + fd, seqs_fp2 = mkstemp(suffix="_seqs.fastq") close(fd) html_fp2 = mkdtemp() - html_fp2 = join(html_fp2, 'support_files') + html_fp2 = join(html_fp2, "support_files") mkdir(html_fp2) - with open(join(html_fp2, 'index.html'), 'w') as fp: + with open(join(html_fp2, "index.html"), "w") as fp: fp.write(">AAA\nAAA") # creating new prep info file metadata_dict = { - 'SKB8.640193': {'center_name': 'ANL', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}} - metadata = pd.DataFrame.from_dict( - metadata_dict, orient='index', dtype=str) + "SKB8.640193": { + "center_name": "ANL", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) pt1 = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(study_id), "16S") + metadata, qdb.study.Study(study_id), "16S" + ) pt2 = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, qdb.study.Study(study_id), "16S") + metadata, qdb.study.Study(study_id), "16S" + ) # inserting artifact 1 artifact1 = qdb.artifact.Artifact.create( - [(seqs_fp1, 1), (html_fp1, 'html_summary')], "FASTQ", - prep_template=pt1) + [(seqs_fp1, 1), (html_fp1, "html_summary")], "FASTQ", prep_template=pt1 + ) filepaths = artifact1.filepaths # inserting artifact 2 artifact2 = qdb.artifact.Artifact.create( - [(seqs_fp2, 1), (html_fp2, 'html_summary')], "FASTQ", - prep_template=pt2) + [(seqs_fp2, 1), (html_fp2, "html_summary")], "FASTQ", prep_template=pt2 + ) filepaths.extend(artifact2.filepaths) # get before delete files in upload folders @@ -462,19 +548,19 @@ def test_move_filepaths_to_upload_folder(self): diff_upload = set(GUPLOADS("1")) - set(upload_files) self.assertEqual(len(diff_upload), 2) self.assertCountEqual( - [x[1] for x in diff_upload], - [basename(seqs_fp1), basename(seqs_fp2)]) + [x[1] for x in diff_upload], [basename(seqs_fp1), basename(seqs_fp2)] + ) # now let's create another artifact with the same filenames that # artifact1 so we can test successfull overlapping of names - with open(seqs_fp1, 'w') as fp: + with open(seqs_fp1, "w") as fp: fp.write(">AAA\nAAA") mkdir(html_fp1) - with open(join(html_fp1, 'index.html'), 'w') as fp: + with open(join(html_fp1, "index.html"), "w") as fp: fp.write(">AAA\nAAA") artifact3 = qdb.artifact.Artifact.create( - [(seqs_fp1, 1), (html_fp1, 'html_summary')], "FASTQ", - prep_template=pt1) + [(seqs_fp1, 1), (html_fp1, "html_summary")], "FASTQ", prep_template=pt1 + ) filepaths.extend(artifact3.filepaths) qdb.artifact.Artifact.delete(artifact3.id) @@ -482,32 +568,32 @@ def test_move_filepaths_to_upload_folder(self): diff_upload = set(GUPLOADS("1")) - set(upload_files) self.assertEqual(len(diff_upload), 2) self.assertCountEqual( - [x[1] for x in diff_upload], - [basename(seqs_fp1), basename(seqs_fp2)]) + [x[1] for x in diff_upload], [basename(seqs_fp1), basename(seqs_fp2)] + ) bd = qdb.util.get_mountpoint("uploads")[0][1] for x in filepaths: - self.files_to_remove.append(join(bd, "1", basename(x['fp']))) + self.files_to_remove.append(join(bd, "1", basename(x["fp"]))) def test_get_mountpoint(self): - exp = [(5, join(qdb.util.get_db_files_base_dir(), 'raw_data'))] + exp = [(5, join(qdb.util.get_db_files_base_dir(), "raw_data"))] obs = qdb.util.get_mountpoint("raw_data") self.assertEqual(obs, exp) - exp = [(1, join(qdb.util.get_db_files_base_dir(), 'analysis'))] + exp = [(1, join(qdb.util.get_db_files_base_dir(), "analysis"))] obs = qdb.util.get_mountpoint("analysis") self.assertEqual(obs, exp) - exp = [(2, join(qdb.util.get_db_files_base_dir(), 'job'))] + exp = [(2, join(qdb.util.get_db_files_base_dir(), "job"))] obs = qdb.util.get_mountpoint("job") self.assertEqual(obs, exp) # inserting new ones so we can test that it retrieves these and # doesn't alter other ones qdb.sql_connection.perform_as_transaction( - "UPDATE qiita.data_directory SET active=false WHERE " - "data_directory_id=1") - count = qdb.util.get_count('qiita.data_directory') + "UPDATE qiita.data_directory SET active=false WHERE data_directory_id=1" + ) + count = qdb.util.get_count("qiita.data_directory") sql = """INSERT INTO qiita.data_directory (data_type, mountpoint, subdirectory, active) VALUES ('analysis', 'analysis_tmp', true, true), @@ -515,55 +601,56 @@ def test_get_mountpoint(self): qdb.sql_connection.perform_as_transaction(sql) # this should have been updated - exp = [(count + 1, join(qdb.util.get_db_files_base_dir(), - 'analysis_tmp'))] + exp = [(count + 1, join(qdb.util.get_db_files_base_dir(), "analysis_tmp"))] obs = qdb.util.get_mountpoint("analysis") self.assertEqual(obs, exp) # these 2 shouldn't - exp = [(5, join(qdb.util.get_db_files_base_dir(), 'raw_data'))] + exp = [(5, join(qdb.util.get_db_files_base_dir(), "raw_data"))] obs = qdb.util.get_mountpoint("raw_data") self.assertEqual(obs, exp) - exp = [(2, join(qdb.util.get_db_files_base_dir(), 'job'))] + exp = [(2, join(qdb.util.get_db_files_base_dir(), "job"))] obs = qdb.util.get_mountpoint("job") self.assertEqual(obs, exp) # testing multi returns - exp = [(5, join(qdb.util.get_db_files_base_dir(), 'raw_data')), - (count + 2, join(qdb.util.get_db_files_base_dir(), - 'raw_data_tmp'))] + exp = [ + (5, join(qdb.util.get_db_files_base_dir(), "raw_data")), + (count + 2, join(qdb.util.get_db_files_base_dir(), "raw_data_tmp")), + ] obs = qdb.util.get_mountpoint("raw_data", retrieve_all=True) self.assertEqual(obs, exp) # testing retrieve subdirectory exp = [ - (5, join(qdb.util.get_db_files_base_dir(), 'raw_data'), False), - (count + 2, join(qdb.util.get_db_files_base_dir(), 'raw_data_tmp'), - True)] - obs = qdb.util.get_mountpoint("raw_data", retrieve_all=True, - retrieve_subdir=True) + (5, join(qdb.util.get_db_files_base_dir(), "raw_data"), False), + (count + 2, join(qdb.util.get_db_files_base_dir(), "raw_data_tmp"), True), + ] + obs = qdb.util.get_mountpoint( + "raw_data", retrieve_all=True, retrieve_subdir=True + ) self.assertEqual(obs, exp) def test_get_mountpoint_path_by_id(self): - exp = join(qdb.util.get_db_files_base_dir(), 'raw_data') + exp = join(qdb.util.get_db_files_base_dir(), "raw_data") obs = qdb.util.get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) - exp = join(qdb.util.get_db_files_base_dir(), 'analysis') + exp = join(qdb.util.get_db_files_base_dir(), "analysis") obs = qdb.util.get_mountpoint_path_by_id(1) self.assertEqual(obs, exp) - exp = join(qdb.util.get_db_files_base_dir(), 'job') + exp = join(qdb.util.get_db_files_base_dir(), "job") obs = qdb.util.get_mountpoint_path_by_id(2) self.assertEqual(obs, exp) # inserting new ones so we can test that it retrieves these and # doesn't alter other ones qdb.sql_connection.perform_as_transaction( - "UPDATE qiita.data_directory SET active=false WHERE " - "data_directory_id=1") - count = qdb.util.get_count('qiita.data_directory') + "UPDATE qiita.data_directory SET active=false WHERE data_directory_id=1" + ) + count = qdb.util.get_count("qiita.data_directory") sql = """INSERT INTO qiita.data_directory (data_type, mountpoint, subdirectory, active) VALUES ('analysis', 'analysis_tmp', true, true), @@ -571,23 +658,23 @@ def test_get_mountpoint_path_by_id(self): qdb.sql_connection.perform_as_transaction(sql) # this should have been updated - exp = join(qdb.util.get_db_files_base_dir(), 'analysis_tmp') + exp = join(qdb.util.get_db_files_base_dir(), "analysis_tmp") obs = qdb.util.get_mountpoint_path_by_id(count + 1) self.assertEqual(obs, exp) # these 2 shouldn't - exp = join(qdb.util.get_db_files_base_dir(), 'raw_data') + exp = join(qdb.util.get_db_files_base_dir(), "raw_data") obs = qdb.util.get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) - exp = join(qdb.util.get_db_files_base_dir(), 'job') + exp = join(qdb.util.get_db_files_base_dir(), "job") obs = qdb.util.get_mountpoint_path_by_id(2) self.assertEqual(obs, exp) def test_get_files_from_uploads_folders(self): # something has been uploaded and ignoring hidden files/folders # and folders - exp = (7, 'uploaded_file.txt', '0B') + exp = (7, "uploaded_file.txt", "0B") obs = qdb.util.get_files_from_uploads_folders("1") self.assertIn(exp, obs) @@ -597,17 +684,17 @@ def test_get_files_from_uploads_folders(self): self.assertEqual(obs, exp) def test_move_upload_files_to_trash(self): - test_filename = 'this_is_a_test_file.txt' + test_filename = "this_is_a_test_file.txt" # create file to move to trash fid, folder = qdb.util.get_mountpoint("uploads")[0] - test_fp = join(folder, '1', test_filename) - with open(test_fp, 'w') as f: - f.write('test') + test_fp = join(folder, "1", test_filename) + with open(test_fp, "w") as f: + f.write("test") self.files_to_remove.append(test_fp) - exp = (fid, 'this_is_a_test_file.txt', '4B') + exp = (fid, "this_is_a_test_file.txt", "4B") obs = qdb.util.get_files_from_uploads_folders("1") self.assertIn(exp, obs) @@ -628,121 +715,128 @@ def test_move_upload_files_to_trash(self): qdb.util.move_upload_files_to_trash(1, [(10, test_filename)]) # removing trash folder - rmtree(join(folder, '1', 'trash')) + rmtree(join(folder, "1", "trash")) def test_get_environmental_packages(self): obs = qdb.util.get_environmental_packages() - exp = [['air', 'ep_air'], - ['built environment', 'ep_built_environment'], - ['host-associated', 'ep_host_associated'], - ['human-amniotic-fluid', 'ep_human_amniotic_fluid'], - ['human-associated', 'ep_human_associated'], - ['human-blood', 'ep_human_blood'], - ['human-gut', 'ep_human_gut'], - ['human-oral', 'ep_human_oral'], - ['human-skin', 'ep_human_skin'], - ['human-urine', 'ep_human_urine'], - ['human-vaginal', 'ep_human_vaginal'], - ['microbial mat/biofilm', 'ep_microbial_mat_biofilm'], - ['miscellaneous natural or artificial environment', - 'ep_misc_artif'], - ['plant-associated', 'ep_plant_associated'], - ['sediment', 'ep_sediment'], - ['soil', 'ep_soil'], - ['wastewater/sludge', 'ep_wastewater_sludge'], - ['water', 'ep_water']] + exp = [ + ["air", "ep_air"], + ["built environment", "ep_built_environment"], + ["host-associated", "ep_host_associated"], + ["human-amniotic-fluid", "ep_human_amniotic_fluid"], + ["human-associated", "ep_human_associated"], + ["human-blood", "ep_human_blood"], + ["human-gut", "ep_human_gut"], + ["human-oral", "ep_human_oral"], + ["human-skin", "ep_human_skin"], + ["human-urine", "ep_human_urine"], + ["human-vaginal", "ep_human_vaginal"], + ["microbial mat/biofilm", "ep_microbial_mat_biofilm"], + ["miscellaneous natural or artificial environment", "ep_misc_artif"], + ["plant-associated", "ep_plant_associated"], + ["sediment", "ep_sediment"], + ["soil", "ep_soil"], + ["wastewater/sludge", "ep_wastewater_sludge"], + ["water", "ep_water"], + ] self.assertEqual(sorted(obs), sorted(exp)) def test_get_timeseries_types(self): obs = qdb.util.get_timeseries_types() - exp = [[1, 'None', 'None'], - [2, 'real', 'single intervention'], - [3, 'real', 'multiple intervention'], - [4, 'real', 'combo intervention'], - [5, 'pseudo', 'single intervention'], - [6, 'pseudo', 'multiple intervention'], - [7, 'pseudo', 'combo intervention'], - [8, 'mixed', 'single intervention'], - [9, 'mixed', 'multiple intervention'], - [10, 'mixed', 'combo intervention']] + exp = [ + [1, "None", "None"], + [2, "real", "single intervention"], + [3, "real", "multiple intervention"], + [4, "real", "combo intervention"], + [5, "pseudo", "single intervention"], + [6, "pseudo", "multiple intervention"], + [7, "pseudo", "combo intervention"], + [8, "mixed", "single intervention"], + [9, "mixed", "multiple intervention"], + [10, "mixed", "combo intervention"], + ] self.assertEqual(obs, exp) def test_get_filepath_information(self): obs = qdb.util.get_filepath_information(1) # This path is machine specific. Just checking that is not empty - self.assertIsNotNone(obs.pop('fullpath')) - exp = {'filepath_id': 1, 'filepath': '1_s_G1_L001_sequences.fastq.gz', - 'filepath_type': 'raw_forward_seqs', 'checksum': '2125826711', - 'data_type': 'raw_data', 'mountpoint': 'raw_data', - 'subdirectory': False, 'active': True} + self.assertIsNotNone(obs.pop("fullpath")) + exp = { + "filepath_id": 1, + "filepath": "1_s_G1_L001_sequences.fastq.gz", + "filepath_type": "raw_forward_seqs", + "checksum": "2125826711", + "data_type": "raw_data", + "mountpoint": "raw_data", + "subdirectory": False, + "active": True, + } self.assertEqual(obs, exp) def test_filepath_id_to_rel_path(self): obs = qdb.util.filepath_id_to_rel_path(1) - exp = 'raw_data/1_s_G1_L001_sequences.fastq.gz' + exp = "raw_data/1_s_G1_L001_sequences.fastq.gz" self.assertEqual(obs, exp) obs = qdb.util.filepath_id_to_rel_path(3) - exp = 'preprocessed_data/1_seqs.fna' + exp = "preprocessed_data/1_seqs.fna" self.assertEqual(obs, exp) fd, fp = mkstemp() close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") self.files_to_remove.append(fp) - test = qdb.util.insert_filepaths( - [(fp, "raw_forward_seqs")], 2, "FASTQ")[0] + test = qdb.util.insert_filepaths([(fp, "raw_forward_seqs")], 2, "FASTQ")[0] sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" qdb.sql_connection.perform_as_transaction(sql, [2, test]) obs = qdb.util.filepath_id_to_rel_path(test) - exp = 'FASTQ/2/%s' % basename(fp) + exp = "FASTQ/2/%s" % basename(fp) self.assertEqual(obs, exp) def test_filepath_ids_to_rel_paths(self): fd, fp = mkstemp() close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") self.files_to_remove.append(fp) - test = qdb.util.insert_filepaths( - [(fp, "raw_forward_seqs")], 2, "FASTQ")[0] + test = qdb.util.insert_filepaths([(fp, "raw_forward_seqs")], 2, "FASTQ")[0] sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" qdb.sql_connection.perform_as_transaction(sql, [2, test]) obs = qdb.util.filepath_ids_to_rel_paths([1, 3, test]) - exp = {1: 'raw_data/1_s_G1_L001_sequences.fastq.gz', - 3: 'preprocessed_data/1_seqs.fna', - test: 'FASTQ/2/%s' % basename(fp)} + exp = { + 1: "raw_data/1_s_G1_L001_sequences.fastq.gz", + 3: "preprocessed_data/1_seqs.fna", + test: "FASTQ/2/%s" % basename(fp), + } self.assertEqual(obs, exp) def test_add_message(self): - count = qdb.util.get_count('qiita.message') + 1 - user = qdb.user.User.create('new@test.bar', 'password') + count = qdb.util.get_count("qiita.message") + 1 + user = qdb.user.User.create("new@test.bar", "password") users = [user] qdb.util.add_message("TEST MESSAGE", users) obs = [[x[0], x[1]] for x in user.messages()] - exp = [[count, 'TEST MESSAGE']] + exp = [[count, "TEST MESSAGE"]] self.assertEqual(obs, exp) def test_add_system_message(self): - count = qdb.util.get_count('qiita.message') + 1 - qdb.util.add_system_message("SYS MESSAGE", - datetime(2015, 8, 5, 19, 41)) + count = qdb.util.get_count("qiita.message") + 1 + qdb.util.add_system_message("SYS MESSAGE", datetime(2015, 8, 5, 19, 41)) - obs = [[x[0], x[1]] - for x in qdb.user.User('shared@foo.bar').messages()] - exp = [[count, 'SYS MESSAGE'], [1, 'message 1']] + obs = [[x[0], x[1]] for x in qdb.user.User("shared@foo.bar").messages()] + exp = [[count, "SYS MESSAGE"], [1, "message 1"]] self.assertEqual(obs, exp) - obs = [[x[0], x[1]] for x in qdb.user.User('admin@foo.bar').messages()] - exp = [[count, 'SYS MESSAGE']] + obs = [[x[0], x[1]] for x in qdb.user.User("admin@foo.bar").messages()] + exp = [[count, "SYS MESSAGE"]] self.assertEqual(obs, exp) sql = "SELECT expiration from qiita.message WHERE message_id = %s" @@ -753,16 +847,15 @@ def test_add_system_message(self): self.assertEqual(obs, exp) def test_clear_system_messages(self): - message_id = qdb.util.get_count('qiita.message') + 1 - user = qdb.user.User.create('csm@test.bar', 'password') + message_id = qdb.util.get_count("qiita.message") + 1 + user = qdb.user.User.create("csm@test.bar", "password") obs = [[x[0], x[1]] for x in user.messages()] exp = [] self.assertEqual(obs, exp) - qdb.util.add_system_message("SYS MESSAGE", - datetime(2015, 8, 5, 19, 41)) + qdb.util.add_system_message("SYS MESSAGE", datetime(2015, 8, 5, 19, 41)) obs = [[x[0], x[1]] for x in user.messages()] - exp = [[message_id, 'SYS MESSAGE']] + exp = [[message_id, "SYS MESSAGE"]] self.assertCountEqual(obs, exp) qdb.util.clear_system_messages() @@ -775,8 +868,11 @@ def test_clear_system_messages(self): def test_supported_filepath_types(self): obs = qdb.util.supported_filepath_types("FASTQ") - exp = [["raw_forward_seqs", True], ["raw_reverse_seqs", False], - ["raw_barcodes", True]] + exp = [ + ["raw_forward_seqs", True], + ["raw_reverse_seqs", False], + ["raw_barcodes", True], + ] self.assertCountEqual(obs, exp) obs = qdb.util.supported_filepath_types("BIOM") @@ -787,22 +883,34 @@ def test_generate_analysis_list(self): self.assertEqual(qdb.util.generate_analysis_list([]), []) obs = qdb.util.generate_analysis_list([1, 2, 3, 5]) - exp = [{'mapping_files': [ - (16, qdb.util.get_filepath_information(16)['fullpath'])], - 'description': 'A test analysis', 'artifacts': [8, 9], 'name': - 'SomeAnalysis', 'owner': 'test@foo.bar', 'analysis_id': 1, - 'visibility': 'private'}, - {'mapping_files': [], 'description': 'Another test analysis', - 'artifacts': [], 'name': 'SomeSecondAnalysis', - 'owner': 'admin@foo.bar', - 'analysis_id': 2, 'visibility': 'private'}] + exp = [ + { + "mapping_files": [ + (16, qdb.util.get_filepath_information(16)["fullpath"]) + ], + "description": "A test analysis", + "artifacts": [8, 9], + "name": "SomeAnalysis", + "owner": "test@foo.bar", + "analysis_id": 1, + "visibility": "private", + }, + { + "mapping_files": [], + "description": "Another test analysis", + "artifacts": [], + "name": "SomeSecondAnalysis", + "owner": "admin@foo.bar", + "analysis_id": 2, + "visibility": "private", + }, + ] # removing timestamp for testing for i in range(len(obs)): - del obs[i]['timestamp'] + del obs[i]["timestamp"] self.assertEqual(obs, exp) - self.assertEqual( - qdb.util.generate_analysis_list([1, 2, 3, 5], True), []) + self.assertEqual(qdb.util.generate_analysis_list([1, 2, 3, 5], True), []) @qiita_test_checker() @@ -823,8 +931,7 @@ def test_compute_checksum(self): def test_scrub_data_nothing(self): """Returns the same string without changes""" - self.assertEqual(qdb.util.scrub_data("nothing_changes"), - "nothing_changes") + self.assertEqual(qdb.util.scrub_data("nothing_changes"), "nothing_changes") def test_scrub_data_semicolon(self): """Correctly removes the semicolon from the string""" @@ -836,28 +943,28 @@ def test_scrub_data_single_quote(self): def test_get_visibilities(self): obs = qdb.util.get_visibilities() - exp = ['awaiting_approval', 'sandbox', 'private', 'public', 'archived'] + exp = ["awaiting_approval", "sandbox", "private", "public", "archived"] self.assertEqual(obs, exp) def test_infer_status(self): obs = qdb.util.infer_status([]) - self.assertEqual(obs, 'sandbox') + self.assertEqual(obs, "sandbox") - obs = qdb.util.infer_status([['private']]) - self.assertEqual(obs, 'private') + obs = qdb.util.infer_status([["private"]]) + self.assertEqual(obs, "private") - obs = qdb.util.infer_status([['private'], ['public']]) - self.assertEqual(obs, 'public') + obs = qdb.util.infer_status([["private"], ["public"]]) + self.assertEqual(obs, "public") - obs = qdb.util.infer_status([['sandbox'], ['awaiting_approval']]) - self.assertEqual(obs, 'awaiting_approval') + obs = qdb.util.infer_status([["sandbox"], ["awaiting_approval"]]) + self.assertEqual(obs, "awaiting_approval") - obs = qdb.util.infer_status([['sandbox'], ['sandbox']]) - self.assertEqual(obs, 'sandbox') + obs = qdb.util.infer_status([["sandbox"], ["sandbox"]]) + self.assertEqual(obs, "sandbox") def test_get_pubmed_ids_from_dois(self): - exp = {'10.100/123456': '123456'} - obs = qdb.util.get_pubmed_ids_from_dois(['', '10.100/123456']) + exp = {"10.100/123456": "123456"} + obs = qdb.util.get_pubmed_ids_from_dois(["", "10.100/123456"]) self.assertEqual(obs, exp) def test_generate_study_list(self): @@ -867,172 +974,197 @@ def test_generate_study_list(self): UTIL = qdb.util # testing owner email as name - user = USER('test@foo.bar') - username = user.info['name'] + user = USER("test@foo.bar") + username = user.info["name"] # test without changes - self.assertDictEqual( - STUDY_INFO, UTIL.generate_study_list(user, 'user')[0]) + self.assertDictEqual(STUDY_INFO, UTIL.generate_study_list(user, "user")[0]) # change user's name to None and tests again - user.info = {'name': None} + user.info = {"name": None} exp = STUDY_INFO.copy() - exp['owner'] = 'test@foo.bar' - self.assertDictEqual( - exp, qdb.util.generate_study_list(user, 'user')[0]) + exp["owner"] = "test@foo.bar" + self.assertDictEqual(exp, qdb.util.generate_study_list(user, "user")[0]) # returning original name - user.info = {'name': username} + user.info = {"name": username} # creating a new study to make sure that empty studies are also # returned - info = {"timeseries_type_id": 1, "metadata_complete": True, - "mixs_compliant": True, "study_alias": "TST", - "study_description": "Some description of the study goes here", - "study_abstract": "Some abstract goes here", - "principal_investigator_id": qdb.study.StudyPerson(1), - "lab_person_id": qdb.study.StudyPerson(1)} - new_study = STUDY.create( - USER('shared@foo.bar'), 'test_study_1', info=info) + info = { + "timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "study_alias": "TST", + "study_description": "Some description of the study goes here", + "study_abstract": "Some abstract goes here", + "principal_investigator_id": qdb.study.StudyPerson(1), + "lab_person_id": qdb.study.StudyPerson(1), + } + new_study = STUDY.create(USER("shared@foo.bar"), "test_study_1", info=info) snew_info = { - 'study_title': 'test_study_1', - 'metadata_complete': True, 'publication_pid': [], - 'artifact_biom_ids': [], 'autoloaded': False, - 'study_id': new_study.id, 'ebi_study_accession': None, - 'owner': 'Shared', 'shared': [], - 'study_abstract': 'Some abstract goes here', - 'pi': ('lab_dude@foo.bar', 'LabDude'), 'publication_doi': [], - 'study_alias': 'TST', 'study_tags': None, - 'preparation_data_types': [], 'number_samples_collected': 0} + "study_title": "test_study_1", + "metadata_complete": True, + "publication_pid": [], + "artifact_biom_ids": [], + "autoloaded": False, + "study_id": new_study.id, + "ebi_study_accession": None, + "owner": "Shared", + "shared": [], + "study_abstract": "Some abstract goes here", + "pi": ("lab_dude@foo.bar", "LabDude"), + "publication_doi": [], + "study_alias": "TST", + "study_tags": None, + "preparation_data_types": [], + "number_samples_collected": 0, + } exp1 = [STUDY_INFO] exp2 = [snew_info] exp_both = [STUDY_INFO, snew_info] # let's make sure that everything is private for study 1 for a in STUDY(1).artifacts(): - a.visibility = 'private' + a.visibility = "private" # owner of study - obs = UTIL.generate_study_list(USER('test@foo.bar'), 'user') + obs = UTIL.generate_study_list(USER("test@foo.bar"), "user") self.assertEqual(len(obs), 1) self.assertDictEqual(obs[0], exp1[0]) # shared with - obs = UTIL.generate_study_list(USER('shared@foo.bar'), 'user') + obs = UTIL.generate_study_list(USER("shared@foo.bar"), "user") self.assertEqual(len(obs), 2) self.assertDictEqual(obs[0], exp_both[0]) self.assertDictEqual(obs[1], exp_both[1]) # admin - obs = UTIL.generate_study_list(USER('admin@foo.bar'), 'user') + obs = UTIL.generate_study_list(USER("admin@foo.bar"), "user") self.assertEqual(obs, exp_both) # no access/hidden - obs = UTIL.generate_study_list(USER('demo@microbio.me'), 'user') + obs = UTIL.generate_study_list(USER("demo@microbio.me"), "user") self.assertEqual(obs, []) # public - none for everyone - obs = UTIL.generate_study_list(USER('test@foo.bar'), 'public') + obs = UTIL.generate_study_list(USER("test@foo.bar"), "public") self.assertEqual(obs, []) - obs = UTIL.generate_study_list(USER('shared@foo.bar'), 'public') + obs = UTIL.generate_study_list(USER("shared@foo.bar"), "public") self.assertEqual(obs, []) - obs = UTIL.generate_study_list(USER('admin@foo.bar'), 'public') + obs = UTIL.generate_study_list(USER("admin@foo.bar"), "public") self.assertEqual(obs, []) - obs = UTIL.generate_study_list(USER('demo@microbio.me'), 'public') + obs = UTIL.generate_study_list(USER("demo@microbio.me"), "public") self.assertEqual(obs, []) def _avoid_duplicated_tests(all_artifacts=False): # nothing should shange for owner, shared - obs = UTIL.generate_study_list(USER('test@foo.bar'), 'user') + obs = UTIL.generate_study_list(USER("test@foo.bar"), "user") self.assertEqual(obs, exp1) - obs = UTIL.generate_study_list(USER('shared@foo.bar'), 'user') + obs = UTIL.generate_study_list(USER("shared@foo.bar"), "user") self.assertEqual(obs, exp_both) # for admin it should be shown in public and user cause there are # 2 preps and only one is public - obs = UTIL.generate_study_list(USER('admin@foo.bar'), 'user') + obs = UTIL.generate_study_list(USER("admin@foo.bar"), "user") if not all_artifacts: self.assertEqual(obs, exp_both) else: self.assertEqual(obs, exp2) - obs = UTIL.generate_study_list(USER('demo@microbio.me'), 'user') + obs = UTIL.generate_study_list(USER("demo@microbio.me"), "user") self.assertEqual(obs, []) # for the public query, everything should be same for owner, share # and admin but demo should now see it as public but with limited # artifacts - obs = UTIL.generate_study_list(USER('test@foo.bar'), 'public') + obs = UTIL.generate_study_list(USER("test@foo.bar"), "public") self.assertEqual(obs, []) - obs = UTIL.generate_study_list(USER('shared@foo.bar'), 'public') + obs = UTIL.generate_study_list(USER("shared@foo.bar"), "public") self.assertEqual(obs, []) - obs = UTIL.generate_study_list(USER('admin@foo.bar'), 'public') + obs = UTIL.generate_study_list(USER("admin@foo.bar"), "public") if not all_artifacts: - exp1[0]['artifact_biom_ids'] = [7] + exp1[0]["artifact_biom_ids"] = [7] self.assertEqual(obs, exp1) - obs = UTIL.generate_study_list(USER('demo@microbio.me'), 'public') + obs = UTIL.generate_study_list(USER("demo@microbio.me"), "public") self.assertEqual(obs, exp1) # returning artifacts - exp1[0]['artifact_biom_ids'] = [4, 5, 6, 7] + exp1[0]["artifact_biom_ids"] = [4, 5, 6, 7] # make artifacts of prep 2 public - PREP(2).artifact.visibility = 'public' + PREP(2).artifact.visibility = "public" _avoid_duplicated_tests() # make artifacts of prep 1 awaiting_approval - PREP(1).artifact.visibility = 'awaiting_approval' + PREP(1).artifact.visibility = "awaiting_approval" _avoid_duplicated_tests() # making all studies public - PREP(1).artifact.visibility = 'public' + PREP(1).artifact.visibility = "public" _avoid_duplicated_tests(True) # deleting the new study study and returning artifact status qdb.study.Study.delete(new_study.id) - PREP(1).artifact.visibility = 'private' - PREP(2).artifact.visibility = 'private' + PREP(1).artifact.visibility = "private" + PREP(2).artifact.visibility = "private" def test_generate_study_list_errors(self): with self.assertRaises(ValueError): - qdb.util.generate_study_list(qdb.user.User('test@foo.bar'), 'bad') + qdb.util.generate_study_list(qdb.user.User("test@foo.bar"), "bad") def test_generate_study_list_without_artifacts(self): # creating a new study to make sure that empty studies are also # returned - info = {"timeseries_type_id": 1, "metadata_complete": True, - "mixs_compliant": True, "study_alias": "TST", - "study_description": "Some description of the study goes here", - "study_abstract": "Some abstract goes here", - "principal_investigator_id": qdb.study.StudyPerson(1), - "lab_person_id": qdb.study.StudyPerson(1)} + info = { + "timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "study_alias": "TST", + "study_description": "Some description of the study goes here", + "study_abstract": "Some abstract goes here", + "principal_investigator_id": qdb.study.StudyPerson(1), + "lab_person_id": qdb.study.StudyPerson(1), + } new_study = qdb.study.Study.create( - qdb.user.User('shared@foo.bar'), 'test_study_1', info=info) + qdb.user.User("shared@foo.bar"), "test_study_1", info=info + ) exp_info = [ - {'study_title': ( - 'Identification of the Microbiomes for Cannabis Soils'), - 'metadata_complete': True, 'publication_pid': [ - '123456', '7891011'], - 'study_id': 1, 'ebi_study_accession': 'EBI123456-BB', - 'autoloaded': False, - 'study_abstract': ( - 'This is a preliminary study to examine the microbiota ' - 'associated with the Cannabis plant. Soils samples from ' - 'the bulk soil, soil associated with the roots, and the ' - 'rhizosphere were extracted and the DNA sequenced. Roots ' - 'from three independent plants of different strains were ' - 'examined. These roots were obtained November 11, 2011 from ' - 'plants that had been harvested in the summer. Future studies ' - 'will attempt to analyze the soils and rhizospheres from the ' - 'same location at different time points in the plant ' - 'lifecycle.'), 'pi': ('PI_dude@foo.bar', 'PIDude'), - 'publication_doi': ['10.100/123456', '10.100/7891011'], - 'study_alias': 'Cannabis Soils', 'number_samples_collected': 27}, - {'study_title': 'test_study_1', - 'metadata_complete': True, 'publication_pid': [], - 'autoloaded': False, - 'study_id': new_study.id, 'ebi_study_accession': None, - 'study_abstract': 'Some abstract goes here', - 'pi': ('lab_dude@foo.bar', 'LabDude'), 'publication_doi': [], - 'study_alias': 'TST', 'number_samples_collected': 0}] + { + "study_title": ("Identification of the Microbiomes for Cannabis Soils"), + "metadata_complete": True, + "publication_pid": ["123456", "7891011"], + "study_id": 1, + "ebi_study_accession": "EBI123456-BB", + "autoloaded": False, + "study_abstract": ( + "This is a preliminary study to examine the microbiota " + "associated with the Cannabis plant. Soils samples from " + "the bulk soil, soil associated with the roots, and the " + "rhizosphere were extracted and the DNA sequenced. Roots " + "from three independent plants of different strains were " + "examined. These roots were obtained November 11, 2011 from " + "plants that had been harvested in the summer. Future studies " + "will attempt to analyze the soils and rhizospheres from the " + "same location at different time points in the plant " + "lifecycle." + ), + "pi": ("PI_dude@foo.bar", "PIDude"), + "publication_doi": ["10.100/123456", "10.100/7891011"], + "study_alias": "Cannabis Soils", + "number_samples_collected": 27, + }, + { + "study_title": "test_study_1", + "metadata_complete": True, + "publication_pid": [], + "autoloaded": False, + "study_id": new_study.id, + "ebi_study_accession": None, + "study_abstract": "Some abstract goes here", + "pi": ("lab_dude@foo.bar", "LabDude"), + "publication_doi": [], + "study_alias": "TST", + "number_samples_collected": 0, + }, + ] obs_info = qdb.util.generate_study_list_without_artifacts([1, 2, 3, 4]) self.assertEqual(obs_info, exp_info) - obs_info = qdb.util.generate_study_list_without_artifacts( - [1, 2, 3, 4], 'EMP') + obs_info = qdb.util.generate_study_list_without_artifacts([1, 2, 3, 4], "EMP") self.assertEqual(obs_info, []) # deleting the old study @@ -1044,42 +1176,84 @@ def test_get_artifacts_information(self): obs = qdb.util.get_artifacts_information([1, 2, 4, 6, 7, 8]) # not testing timestamp for i in range(len(obs)): - del obs[i]['timestamp'] + del obs[i]["timestamp"] exp = [ - {'artifact_id': 6, 'target_subfragment': ['V4'], - 'prep_samples': 27, 'platform': 'Illumina', - 'target_gene': '16S rRNA', 'name': 'BIOM', 'data_type': '16S', - 'parameters': {'reference': '2', 'similarity': '0.97', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'threads': '1', - 'sortmerna_coverage': '0.97'}, - 'algorithm': 'Pick closed-reference OTUs | Split libraries FASTQ', - 'algorithm_az': 'd480799a0a7a2fbe0e9022bc9c602018', - 'deprecated': False, 'active': True, - 'files': ['1_study_1001_closed_reference_otu_table_Silva.biom']}, - {'artifact_id': 4, 'target_subfragment': ['V4'], - 'prep_samples': 27, 'platform': 'Illumina', - 'target_gene': '16S rRNA', 'name': 'BIOM', 'data_type': '18S', - 'parameters': {'reference': '1', 'similarity': '0.97', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'threads': '1', - 'sortmerna_coverage': '0.97'}, - 'algorithm': 'Pick closed-reference OTUs | Split libraries FASTQ', - 'algorithm_az': 'd480799a0a7a2fbe0e9022bc9c602018', - 'deprecated': False, 'active': True, - 'files': ['1_study_1001_closed_reference_otu_table.biom']}, - {'artifact_id': 7, 'target_subfragment': ['V4'], - 'prep_samples': 27, 'platform': 'Illumina', - 'target_gene': '16S rRNA', 'name': 'BIOM', 'data_type': '16S', - 'parameters': {}, 'algorithm': '', 'algorithm_az': '', - 'deprecated': False, 'active': True, - 'files': ['biom_table.biom']}, - {'artifact_id': 8, 'target_subfragment': [], 'prep_samples': 0, - 'platform': 'not provided', 'target_gene': 'not provided', 'name': - 'noname', 'data_type': '18S', 'parameters': {}, 'algorithm': '', - 'algorithm_az': '', 'deprecated': False, 'active': True, - 'files': ['biom_table.biom']}] + { + "artifact_id": 6, + "target_subfragment": ["V4"], + "prep_samples": 27, + "platform": "Illumina", + "target_gene": "16S rRNA", + "name": "BIOM", + "data_type": "16S", + "parameters": { + "reference": "2", + "similarity": "0.97", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "threads": "1", + "sortmerna_coverage": "0.97", + }, + "algorithm": "Pick closed-reference OTUs | Split libraries FASTQ", + "algorithm_az": "d480799a0a7a2fbe0e9022bc9c602018", + "deprecated": False, + "active": True, + "files": ["1_study_1001_closed_reference_otu_table_Silva.biom"], + }, + { + "artifact_id": 4, + "target_subfragment": ["V4"], + "prep_samples": 27, + "platform": "Illumina", + "target_gene": "16S rRNA", + "name": "BIOM", + "data_type": "18S", + "parameters": { + "reference": "1", + "similarity": "0.97", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "threads": "1", + "sortmerna_coverage": "0.97", + }, + "algorithm": "Pick closed-reference OTUs | Split libraries FASTQ", + "algorithm_az": "d480799a0a7a2fbe0e9022bc9c602018", + "deprecated": False, + "active": True, + "files": ["1_study_1001_closed_reference_otu_table.biom"], + }, + { + "artifact_id": 7, + "target_subfragment": ["V4"], + "prep_samples": 27, + "platform": "Illumina", + "target_gene": "16S rRNA", + "name": "BIOM", + "data_type": "16S", + "parameters": {}, + "algorithm": "", + "algorithm_az": "", + "deprecated": False, + "active": True, + "files": ["biom_table.biom"], + }, + { + "artifact_id": 8, + "target_subfragment": [], + "prep_samples": 0, + "platform": "not provided", + "target_gene": "not provided", + "name": "noname", + "data_type": "18S", + "parameters": {}, + "algorithm": "", + "algorithm_az": "", + "deprecated": False, + "active": True, + "files": ["biom_table.biom"], + }, + ] self.assertCountEqual(obs, exp) exp = exp[1:] @@ -1089,31 +1263,36 @@ def test_get_artifacts_information(self): # setting up database changes for just checking commands qdb.sql_connection.TRN.add( """UPDATE qiita.command_parameter SET check_biom_merge = True - WHERE parameter_name = 'reference'""") + WHERE parameter_name = 'reference'""" + ) qdb.sql_connection.TRN.execute() # testing that it works as expected obs = qdb.util.get_artifacts_information([1, 2, 4, 7, 8]) # not testing timestamp for i in range(len(obs)): - del obs[i]['timestamp'] - exp[0]['algorithm'] = ('Pick closed-reference OTUs (reference: 1) ' - '| Split libraries FASTQ') - exp[0]['algorithm_az'] = '33fed1b35728417d7ba4139b8f817d44' + del obs[i]["timestamp"] + exp[0]["algorithm"] = ( + "Pick closed-reference OTUs (reference: 1) | Split libraries FASTQ" + ) + exp[0]["algorithm_az"] = "33fed1b35728417d7ba4139b8f817d44" self.assertCountEqual(obs, exp) # setting up database changes for also command output qdb.sql_connection.TRN.add( - "UPDATE qiita.command_output SET check_biom_merge = True") + "UPDATE qiita.command_output SET check_biom_merge = True" + ) qdb.sql_connection.TRN.execute() obs = qdb.util.get_artifacts_information([1, 2, 4, 7, 8]) # not testing timestamp for i in range(len(obs)): - del obs[i]['timestamp'] - exp[0]['algorithm'] = ('Pick closed-reference OTUs (reference: 1, ' - 'BIOM: 1_study_1001_closed_reference_' - 'otu_table.biom) | Split libraries FASTQ') - exp[0]['algorithm_az'] = 'de5b794a2cacd428f36fea86df196bfd' + del obs[i]["timestamp"] + exp[0]["algorithm"] = ( + "Pick closed-reference OTUs (reference: 1, " + "BIOM: 1_study_1001_closed_reference_" + "otu_table.biom) | Split libraries FASTQ" + ) + exp[0]["algorithm_az"] = "de5b794a2cacd428f36fea86df196bfd" self.assertCountEqual(obs, exp) # let's test that we ignore the parent_info @@ -1123,11 +1302,13 @@ def test_get_artifacts_information(self): obs = qdb.util.get_artifacts_information([1, 2, 4, 7, 8]) # not testing timestamp for i in range(len(obs)): - del obs[i]['timestamp'] - exp[0]['algorithm'] = ('Pick closed-reference OTUs (reference: 1, ' - 'BIOM: 1_study_1001_closed_reference_' - 'otu_table.biom)') - exp[0]['algorithm_az'] = '7f59a45b2f0d30cd1ed1929391c26e07' + del obs[i]["timestamp"] + exp[0]["algorithm"] = ( + "Pick closed-reference OTUs (reference: 1, " + "BIOM: 1_study_1001_closed_reference_" + "otu_table.biom)" + ) + exp[0]["algorithm_az"] = "7f59a45b2f0d30cd1ed1929391c26e07" self.assertCountEqual(obs, exp) # let's test that we ignore the parent_info @@ -1137,36 +1318,41 @@ def test_get_artifacts_information(self): obs = qdb.util.get_artifacts_information([1, 2, 4, 7, 8]) # not testing timestamp for i in range(len(obs)): - del obs[i]['timestamp'] - exp[0]['algorithm'] = ('Pick closed-reference OTUs (reference: 1, ' - 'BIOM: 1_study_1001_closed_reference_' - 'otu_table.biom)') - exp[0]['algorithm_az'] = '7f59a45b2f0d30cd1ed1929391c26e07' + del obs[i]["timestamp"] + exp[0]["algorithm"] = ( + "Pick closed-reference OTUs (reference: 1, " + "BIOM: 1_study_1001_closed_reference_" + "otu_table.biom)" + ) + exp[0]["algorithm_az"] = "7f59a45b2f0d30cd1ed1929391c26e07" self.assertCountEqual(obs, exp) # returning database as it was qdb.sql_connection.TRN.add( - "UPDATE qiita.command_output SET check_biom_merge = False") + "UPDATE qiita.command_output SET check_biom_merge = False" + ) qdb.sql_connection.TRN.add("""UPDATE qiita.software_command SET ignore_parent_command = False""") qdb.sql_connection.TRN.add( """UPDATE qiita.command_parameter SET check_biom_merge = False - WHERE parameter_name = 'reference'""") + WHERE parameter_name = 'reference'""" + ) qdb.sql_connection.TRN.execute() class TestFilePathOpening(TestCase): """Tests adapted from scikit-bio's skbio.io.util tests""" + def test_is_string_or_bytes(self): - self.assertTrue(qdb.util._is_string_or_bytes('foo')) - self.assertTrue(qdb.util._is_string_or_bytes(u'foo')) - self.assertTrue(qdb.util._is_string_or_bytes(b'foo')) - self.assertFalse(qdb.util._is_string_or_bytes(StringIO('bar'))) + self.assertTrue(qdb.util._is_string_or_bytes("foo")) + self.assertTrue(qdb.util._is_string_or_bytes("foo")) + self.assertTrue(qdb.util._is_string_or_bytes(b"foo")) + self.assertFalse(qdb.util._is_string_or_bytes(StringIO("bar"))) self.assertFalse(qdb.util._is_string_or_bytes([1])) def test_file_closed(self): """File gets closed in decorator""" - f = NamedTemporaryFile('r') + f = NamedTemporaryFile("r") filepath = f.name with qdb.util.open_file(filepath) as fh: pass @@ -1174,7 +1360,7 @@ def test_file_closed(self): def test_file_closed_harder(self): """File gets closed in decorator, even if exceptions happen.""" - f = NamedTemporaryFile('r') + f = NamedTemporaryFile("r") filepath = f.name try: with qdb.util.open_file(filepath) as fh: @@ -1189,7 +1375,7 @@ def test_file_closed_harder(self): def test_filehandle(self): """Filehandles slip through untouched""" - with TemporaryFile('r') as fh: + with TemporaryFile("r") as fh: with qdb.util.open_file(fh) as ffh: self.assertTrue(fh is ffh) # And it doesn't close the file-handle @@ -1209,7 +1395,7 @@ def test_BytesIO(self): def test_hdf5IO(self): """This tests that if we send a file handler it returns it""" - f = h5py.File('test', driver='core', backing_store=False, mode='w') + f = h5py.File("test", driver="core", backing_store=False, mode="w") with qdb.util.open_file(f) as fh: self.assertTrue(fh is f) @@ -1218,7 +1404,7 @@ def test_hdf5IO_open(self): name = fh.name fh.close() - h5file = h5py.File(name, 'w') + h5file = h5py.File(name, "w") h5file.close() with qdb.util.open_file(name) as fh_inner: @@ -1228,14 +1414,12 @@ def test_hdf5IO_open(self): class PurgeFilepathsTests(DBUtilTestsBase): - def _get_current_filepaths(self): sql_fp = "SELECT filepath_id FROM qiita.filepath" with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql_fp) results = qdb.sql_connection.TRN.execute_fetchflatten() - return [qdb.util.get_filepath_information(_id)['fullpath'] - for _id in results] + return [qdb.util.get_filepath_information(_id)["fullpath"] for _id in results] def _create_files(self, files): # format is: [mp_id, fp_type_id, file_name] @@ -1255,8 +1439,8 @@ def test_purge_filepaths_test(self): # Make sure that the files exist - specially for travis for fp in fps_expected: if not exists(fp): - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") self.files_to_remove.append(fp) # nothing shold be removed @@ -1266,26 +1450,25 @@ def test_purge_filepaths_test(self): # testing study filepath delete by inserting a new study sample info # and make sure it gets deleted - mp_id, mp = qdb.util.get_mountpoint('templates')[0] - txt_id = qdb.util.convert_to_id('sample_template', "filepath_type") - self._create_files([[mp_id, txt_id, '100_filepath.txt']]) + mp_id, mp = qdb.util.get_mountpoint("templates")[0] + txt_id = qdb.util.convert_to_id("sample_template", "filepath_type") + self._create_files([[mp_id, txt_id, "100_filepath.txt"]]) qdb.util.purge_filepaths() fps_viewed = self._get_current_filepaths() self.assertCountEqual(fps_expected, fps_viewed) # testing artifact [A], creating a folder with an artifact that # doesn't exist - _, mp = qdb.util.get_mountpoint('per_sample_FASTQ')[0] - not_an_artifact_fp = join(mp, '10000') + _, mp = qdb.util.get_mountpoint("per_sample_FASTQ")[0] + not_an_artifact_fp = join(mp, "10000") mkdir(not_an_artifact_fp) # now let's add test for [B] by creating 2 filepaths without a # link to the artifacts tables - mp_id, mp = qdb.util.get_mountpoint('BIOM')[0] - biom_id = qdb.util.convert_to_id('biom', "filepath_type") - self._create_files([ - [mp_id, txt_id, 'artifact_filepath.txt'], - [mp_id, biom_id, 'my_biom.biom'] - ]) + mp_id, mp = qdb.util.get_mountpoint("BIOM")[0] + biom_id = qdb.util.convert_to_id("biom", "filepath_type") + self._create_files( + [[mp_id, txt_id, "artifact_filepath.txt"], [mp_id, biom_id, "my_biom.biom"]] + ) # adding files to tests qdb.util.purge_filepaths() fps_viewed = self._get_current_filepaths() @@ -1294,12 +1477,14 @@ def test_purge_filepaths_test(self): # testing analysis filepath delete by filepaths for 2 different files # and making sure they get deleted - mp_id, mp = qdb.util.get_mountpoint('analysis')[0] - biom_id = qdb.util.convert_to_id('biom', "filepath_type") - self._create_files([ - [mp_id, txt_id, '10000_my_analysis_map.txt'], - [mp_id, biom_id, '10000_my_analysis_biom.biom'] - ]) + mp_id, mp = qdb.util.get_mountpoint("analysis")[0] + biom_id = qdb.util.convert_to_id("biom", "filepath_type") + self._create_files( + [ + [mp_id, txt_id, "10000_my_analysis_map.txt"], + [mp_id, biom_id, "10000_my_analysis_biom.biom"], + ] + ) qdb.util.purge_filepaths() fps_viewed = self._get_current_filepaths() self.assertCountEqual(fps_expected, fps_viewed) @@ -1316,154 +1501,181 @@ def setUp(self): self.cname = "Split libraries FASTQ" self.sname = "QIIMEq2" self.version = "1.9.1" - self.col_name = 'samples * columns' + self.col_name = "samples * columns" self.columns = [ - "sName", "sVersion", "cID", "cName", "processing_job_id", - "parameters", "samples", "columns", "input_size", "extra_info", - "MaxRSSRaw", "ElapsedRaw", "Start", "node_name", "node_model"] + "sName", + "sVersion", + "cID", + "cName", + "processing_job_id", + "parameters", + "samples", + "columns", + "input_size", + "extra_info", + "MaxRSSRaw", + "ElapsedRaw", + "Start", + "node_name", + "node_model", + ] # df is a dataframe that represents a table with columns specified in # self.columns self.df = qdb.util.retrieve_resource_data( - self.cname, self.sname, self.version, self.columns) + self.cname, self.sname, self.version, self.columns + ) def test_plot_return(self): # check the plot returns correct objects fig1, axs1 = qdb.util.resource_allocation_plot(self.df, self.col_name) self.assertIsInstance( - fig1, Figure, - "Returned object fig1 is not a Matplotlib Figure") + fig1, Figure, "Returned object fig1 is not a Matplotlib Figure" + ) for ax in axs1: self.assertIsInstance( - ax, Axes, - "Returned object axs1 is not a single Matplotlib Axes object") + ax, Axes, "Returned object axs1 is not a single Matplotlib Axes object" + ) def test_minimize_const(self): - self.df = self.df[ - (self.df.cName == self.cname) & (self.df.sName == self.sname)] - self.df.dropna(subset=['samples', 'columns'], inplace=True) - self.df[self.col_name] = self.df.samples * self.df['columns'] + self.df = self.df[(self.df.cName == self.cname) & (self.df.sName == self.sname)] + self.df.dropna(subset=["samples", "columns"], inplace=True) + self.df[self.col_name] = self.df.samples * self.df["columns"] fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False) mem_models, time_models = qdb.util.retrieve_equations() bm_name, bm, options = qdb.util._resource_allocation_plot_helper( - self.df, axs[0], 'MaxRSSRaw', mem_models, self.col_name) + self.df, axs[0], "MaxRSSRaw", mem_models, self.col_name + ) # check that the algorithm chooses correct model for MaxRSSRaw and # has 0 failures k, a, b = options.x failures_df = qdb.util._resource_allocation_success_failures( - self.df, k, a, b, bm, self.col_name, 'MaxRSSRaw')[-1] + self.df, k, a, b, bm, self.col_name, "MaxRSSRaw" + )[-1] failures = failures_df.shape[0] - self.assertEqual(bm_name, 'mem_model4', - msg=f"""Best memory model + self.assertEqual( + bm_name, + "mem_model4", + msg=f"""Best memory model doesn't match - {bm_name} != 'mem_model4'""") - self.assertEqual(bm, mem_models['mem_model4']['equation'], - msg=f"""Best memory model + {bm_name} != 'mem_model4'""", + ) + self.assertEqual( + bm, + mem_models["mem_model4"]["equation"], + msg=f"""Best memory model doesn't match Coefficients:{k} {a} {b} - """) + """, + ) self.assertEqual(failures, 0, "Number of failures must be 0") # check that the algorithm chooses correct model for ElapsedRaw and # has 1 failure bm_name, bm, options = qdb.util._resource_allocation_plot_helper( - self.df, axs[1], 'ElapsedRaw', time_models, self.col_name) + self.df, axs[1], "ElapsedRaw", time_models, self.col_name + ) k, a, b = options.x failures_df = qdb.util._resource_allocation_success_failures( - self.df, k, a, b, bm, self.col_name, 'ElapsedRaw')[-1] + self.df, k, a, b, bm, self.col_name, "ElapsedRaw" + )[-1] failures = failures_df.shape[0] - self.assertEqual(bm_name, 'time_model4', - msg=f"""Best time model + self.assertEqual( + bm_name, + "time_model4", + msg=f"""Best time model doesn't match - {bm_name} != 'time_model4'""") + {bm_name} != 'time_model4'""", + ) - self.assertEqual(bm, time_models[bm_name]['equation'], - msg=f"""Best time model + self.assertEqual( + bm, + time_models[bm_name]["equation"], + msg=f"""Best time model doesn't match Coefficients:{k} {a} {b} - """) + """, + ) self.assertEqual(failures, 0, "Number of failures must be 0") def test_MaxRSS_helper(self): tests = [ - ('6', 6.0), - ('6K', 6000), - ('6M', 6000000), - ('6G', 6000000000), - ('6.9', 6.9), - ('6.9K', 6900), - ('6.9M', 6900000), - ('6.9G', 6900000000), + ("6", 6.0), + ("6K", 6000), + ("6M", 6000000), + ("6G", 6000000000), + ("6.9", 6.9), + ("6.9K", 6900), + ("6.9M", 6900000), + ("6.9G", 6900000000), ] for x, y in tests: self.assertEqual(qdb.util.MaxRSS_helper(x), y) def test_db_update(self): - path_to_data = './qiita_db/test/test_data/slurm_data.txt.gz' + path_to_data = "./qiita_db/test/test_data/slurm_data.txt.gz" test_data = pd.read_csv(path_to_data, sep="|") types = { - 'Split libraries FASTQ': [ - '6d368e16-2242-4cf8-87b4-a5dc40bb890b', - '4c7115e8-4c8e-424c-bf25-96c292ca1931', - 'b72369f9-a886-4193-8d3d-f7b504168e75', - '46b76f74-e100-47aa-9bf2-c0208bcea52d', - '6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f'], - 'Pick closed-reference OTUs': [ - '3c9991ab-6c14-4368-a48c-841e8837a79c', - '80bf25f3-5f1d-4e10-9369-315e4244f6d5', - '9ba5ae7a-41e1-4202-b396-0259aeaac366', - 'e5609746-a985-41a1-babf-6b3ebe9eb5a9', + "Split libraries FASTQ": [ + "6d368e16-2242-4cf8-87b4-a5dc40bb890b", + "4c7115e8-4c8e-424c-bf25-96c292ca1931", + "b72369f9-a886-4193-8d3d-f7b504168e75", + "46b76f74-e100-47aa-9bf2-c0208bcea52d", + "6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f", ], - 'Single Rarefaction': [ - '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0' - ] + "Pick closed-reference OTUs": [ + "3c9991ab-6c14-4368-a48c-841e8837a79c", + "80bf25f3-5f1d-4e10-9369-315e4244f6d5", + "9ba5ae7a-41e1-4202-b396-0259aeaac366", + "e5609746-a985-41a1-babf-6b3ebe9eb5a9", + ], + "Single Rarefaction": ["8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0"], } qdb.util.update_resource_allocation_table(test=test_data) for curr_cname, ids in types.items(): updated_df = qdb.util.retrieve_resource_data( - curr_cname, self.sname, self.version, self.columns) - updated_ids_set = set(updated_df['processing_job_id']) - previous_ids_set = set(self.df['processing_job_id']) + curr_cname, self.sname, self.version, self.columns + ) + updated_ids_set = set(updated_df["processing_job_id"]) + previous_ids_set = set(self.df["processing_job_id"]) for id in ids: self.assertTrue(id in updated_ids_set) self.assertFalse(id in previous_ids_set) STUDY_INFO = { - 'study_id': 1, - 'owner': 'Dude', - 'study_alias': 'Cannabis Soils', - 'study_abstract': - 'This is a preliminary study to examine the microbiota ' - 'associated with the Cannabis plant. Soils samples ' - 'from the bulk soil, soil associated with the roots, ' - 'and the rhizosphere were extracted and the DNA ' - 'sequenced. Roots from three independent plants of ' - 'different strains were examined. These roots were ' - 'obtained November 11, 2011 from plants that had been ' - 'harvested in the summer. Future studies will attempt ' - 'to analyze the soils and rhizospheres from the same ' - 'location at different time points in the plant ' - 'lifecycle.', - 'metadata_complete': True, - 'autoloaded': False, - 'ebi_study_accession': 'EBI123456-BB', - 'study_title': - 'Identification of the Microbiomes for Cannabis Soils', - 'number_samples_collected': 27, - 'shared': [('shared@foo.bar', 'Shared')], - 'publication_doi': ['10.100/123456', '10.100/7891011'], - 'publication_pid': ['123456', '7891011'], - 'pi': ('PI_dude@foo.bar', 'PIDude'), - 'artifact_biom_ids': [4, 5, 6, 7], - 'preparation_data_types': ['18S'], - 'study_tags': None, + "study_id": 1, + "owner": "Dude", + "study_alias": "Cannabis Soils", + "study_abstract": "This is a preliminary study to examine the microbiota " + "associated with the Cannabis plant. Soils samples " + "from the bulk soil, soil associated with the roots, " + "and the rhizosphere were extracted and the DNA " + "sequenced. Roots from three independent plants of " + "different strains were examined. These roots were " + "obtained November 11, 2011 from plants that had been " + "harvested in the summer. Future studies will attempt " + "to analyze the soils and rhizospheres from the same " + "location at different time points in the plant " + "lifecycle.", + "metadata_complete": True, + "autoloaded": False, + "ebi_study_accession": "EBI123456-BB", + "study_title": "Identification of the Microbiomes for Cannabis Soils", + "number_samples_collected": 27, + "shared": [("shared@foo.bar", "Shared")], + "publication_doi": ["10.100/123456", "10.100/7891011"], + "publication_pid": ["123456", "7891011"], + "pi": ("PI_dude@foo.bar", "PIDude"), + "artifact_biom_ids": [4, 5, 6, 7], + "preparation_data_types": ["18S"], + "study_tags": None, } -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_db/user.py b/qiita_db/user.py index b4beac184..9452a3f7e 100644 --- a/qiita_db/user.py +++ b/qiita_db/user.py @@ -16,6 +16,7 @@ User """ + # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # @@ -23,14 +24,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from re import sub from datetime import datetime - -from qiita_core.exceptions import (IncorrectEmailError, IncorrectPasswordError, - IncompetentQiitaDeveloperError) -from qiita_core.qiita_settings import qiita_config +from re import sub import qiita_db as qdb +from qiita_core.exceptions import ( + IncompetentQiitaDeveloperError, + IncorrectEmailError, + IncorrectPasswordError, +) +from qiita_core.qiita_settings import qiita_config class User(qdb.base.QiitaObject): @@ -134,8 +137,11 @@ def login(cls, email, password): raise IncorrectPasswordError("Password not valid!") # pull password out of database - sql = ("SELECT password, user_level_id FROM qiita.{0} WHERE " - "email = %s".format(cls._table)) + sql = ( + "SELECT password, user_level_id FROM qiita.{0} WHERE email = %s".format( + cls._table + ) + ) qdb.sql_connection.TRN.add(sql, [email]) # Using [0] because there is only one row info = qdb.sql_connection.TRN.execute_fetchindex()[0] @@ -203,15 +209,14 @@ def create(cls, email, password, info=None): # make sure user does not already exist if cls.exists(email): - raise qdb.exceptions.QiitaDBDuplicateError( - "User", "email: %s" % email) + raise qdb.exceptions.QiitaDBDuplicateError("User", "email: %s" % email) # make sure non-info columns aren't passed in info dict if info: if cls._non_info.intersection(info): raise qdb.exceptions.QiitaDBColumnError( - "non info keys passed: %s" % - cls._non_info.intersection(info)) + "non info keys passed: %s" % cls._non_info.intersection(info) + ) else: info = {} @@ -219,8 +224,7 @@ def create(cls, email, password, info=None): # add values to info info["email"] = email info["password"] = qdb.util.hash_password(password) - info["user_verify_code"] = qdb.util.create_rand_string( - 20, punct=False) + info["user_verify_code"] = qdb.util.create_rand_string(20, punct=False) # make sure keys in info correspond to columns in table qdb.util.check_table_cols(info, cls._table) @@ -231,7 +235,8 @@ def create(cls, email, password, info=None): values = [info[col] for col in columns] # crete user sql = "INSERT INTO qiita.{0} ({1}) VALUES ({2})".format( - cls._table, ','.join(columns), ','.join(['%s'] * len(values))) + cls._table, ",".join(columns), ",".join(["%s"] * len(values)) + ) qdb.sql_connection.TRN.add(sql, values) # Add system messages to user @@ -269,16 +274,17 @@ def verify_code(cls, email, code, code_type): User has no code of the given type """ with qdb.sql_connection.TRN: - if code_type == 'create': - column = 'user_verify_code' - elif code_type == 'reset': - column = 'pass_reset_code' + if code_type == "create": + column = "user_verify_code" + elif code_type == "reset": + column = "pass_reset_code" else: raise IncompetentQiitaDeveloperError( - "code_type must be 'create' or 'reset' Uknown type %s" - % code_type) + "code_type must be 'create' or 'reset' Uknown type %s" % code_type + ) sql = "SELECT {0} FROM qiita.{1} WHERE email = %s".format( - column, cls._table) + column, cls._table + ) qdb.sql_connection.TRN.add(sql, [email]) db_code = qdb.sql_connection.TRN.execute_fetchindex() @@ -288,7 +294,8 @@ def verify_code(cls, email, code, code_type): db_code = db_code[0][0] if db_code is None: raise qdb.exceptions.QiitaDBError( - "No %s code for user %s" % (column, email)) + "No %s code for user %s" % (column, email) + ) correct_code = db_code == code @@ -299,8 +306,7 @@ def verify_code(cls, email, code, code_type): if code_type == "create": # verify the user - level = qdb.util.convert_to_id( - 'user', 'user_level', 'name') + level = qdb.util.convert_to_id("user", "user_level", "name") sql = """UPDATE qiita.{} SET user_level_id = %s WHERE email = %s""".format(cls._table) qdb.sql_connection.TRN.add(sql, [level, email]) @@ -320,8 +326,7 @@ def verify_code(cls, email, code, code_type): portal_ids = qdb.sql_connection.TRN.execute_fetchflatten() for portal_id in portal_ids: - args = [email, '%s-dflt-%d' % (email, portal_id), - 'dflt', True] + args = [email, "%s-dflt-%d" % (email, portal_id), "dflt", True] qdb.sql_connection.TRN.add(an_sql, args) an_id = qdb.sql_connection.TRN.execute_fetchlast() qdb.sql_connection.TRN.add(ap_sql, [an_id, portal_id]) @@ -333,12 +338,17 @@ def verify_code(cls, email, code, code_type): @classmethod def delete(cls, email, force=False): if not cls.exists(email): - raise IncorrectEmailError(f'This email does not exist: {email}') - - tables = ['qiita.study_users', 'qiita.study_tags', - 'qiita.processing_job_workflow', 'qiita.processing_job', - 'qiita.message_user', 'qiita.analysis_users', - 'qiita.analysis'] + raise IncorrectEmailError(f"This email does not exist: {email}") + + tables = [ + "qiita.study_users", + "qiita.study_tags", + "qiita.processing_job_workflow", + "qiita.processing_job", + "qiita.message_user", + "qiita.analysis_users", + "qiita.analysis", + ] not_empty = [] for t in tables: @@ -350,8 +360,10 @@ def delete(cls, email, force=False): not_empty.append(t) if not_empty and not force: - raise ValueError(f'These tables are not empty: "{not_empty}", ' - 'delete them first or use `force=True`') + raise ValueError( + f'These tables are not empty: "{not_empty}", ' + "delete them first or use `force=True`" + ) sql = """ DELETE FROM qiita.study_users WHERE email = %(email)s; @@ -409,7 +421,7 @@ def delete(cls, email, force=False): DELETE FROM qiita.qiita_user WHERE email = %(email)s;""" with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add(sql, {'email': email}) + qdb.sql_connection.TRN.add(sql, {"email": email}) qdb.sql_connection.TRN.execute() # ---properties--- @@ -423,8 +435,7 @@ def password(self): """The password of the user""" with qdb.sql_connection.TRN: # pull password out of database - sql = "SELECT password FROM qiita.{0} WHERE email = %s".format( - self._table) + sql = "SELECT password FROM qiita.{0} WHERE email = %s".format(self._table) qdb.sql_connection.TRN.add(sql, [self.email]) return qdb.sql_connection.TRN.execute_fetchlast() @@ -445,8 +456,7 @@ def level(self): def info(self): """Dict with any other information attached to the user""" with qdb.sql_connection.TRN: - sql = "SELECT * from qiita.{0} WHERE email = %s".format( - self._table) + sql = "SELECT * from qiita.{0} WHERE email = %s".format(self._table) # Need direct typecast from psycopg2 dict to standard dict qdb.sql_connection.TRN.add(sql, [self._id]) # [0] retrieves the first row (the only one present) @@ -467,8 +477,7 @@ def info(self, info): with qdb.sql_connection.TRN: # make sure non-info columns aren't passed in info dict if self._non_info.intersection(info): - raise qdb.exceptions.QiitaDBColumnError( - "non info keys passed!") + raise qdb.exceptions.QiitaDBColumnError("non info keys passed!") # make sure keys in info correspond to columns in table qdb.util.check_table_cols(info, self._table) @@ -482,8 +491,9 @@ def info(self, info): data.append(val) data.append(self._id) - sql = ("UPDATE qiita.{0} SET {1} WHERE " - "email = %s".format(self._table, ','.join(sql_insert))) + sql = "UPDATE qiita.{0} SET {1} WHERE email = %s".format( + self._table, ",".join(sql_insert) + ) qdb.sql_connection.TRN.add(sql, data) qdb.sql_connection.TRN.execute() @@ -496,8 +506,7 @@ def default_analysis(self): JOIN qiita.portal_type USING (portal_type_id) WHERE email = %s AND dflt = true AND portal = %s""" qdb.sql_connection.TRN.add(sql, [self._id, qiita_config.portal]) - return qdb.analysis.Analysis( - qdb.sql_connection.TRN.execute_fetchlast()) + return qdb.analysis.Analysis(qdb.sql_connection.TRN.execute_fetchlast()) @property def user_studies(self): @@ -511,7 +520,8 @@ def user_studies(self): qdb.sql_connection.TRN.add(sql, [self._id, qiita_config.portal]) return set( qdb.study.Study(sid) - for sid in qdb.sql_connection.TRN.execute_fetchflatten()) + for sid in qdb.sql_connection.TRN.execute_fetchflatten() + ) @property def shared_studies(self): @@ -525,7 +535,8 @@ def shared_studies(self): qdb.sql_connection.TRN.add(sql, [self._id, qiita_config.portal]) return set( qdb.study.Study(sid) - for sid in qdb.sql_connection.TRN.execute_fetchflatten()) + for sid in qdb.sql_connection.TRN.execute_fetchflatten() + ) @property def private_analyses(self): @@ -538,7 +549,8 @@ def private_analyses(self): qdb.sql_connection.TRN.add(sql, [self._id, qiita_config.portal]) return set( qdb.analysis.Analysis(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()) + for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ) @property def shared_analyses(self): @@ -551,7 +563,8 @@ def shared_analyses(self): qdb.sql_connection.TRN.add(sql, [self._id, qiita_config.portal]) return set( qdb.analysis.Analysis(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()) + for aid in qdb.sql_connection.TRN.execute_fetchflatten() + ) @property def unread_messages(self): @@ -613,7 +626,8 @@ def user_artifacts(self, artifact_type=None): res = {} for s_id, artifact_ids in db_res.items(): res[qdb.study.Study(s_id)] = [ - qdb.artifact.Artifact(a_id) for a_id in artifact_ids] + qdb.artifact.Artifact(a_id) for a_id in artifact_ids + ] return res @@ -633,8 +647,7 @@ def change_password(self, oldpass, newpass): password changed or not """ with qdb.sql_connection.TRN: - sql = "SELECT password FROM qiita.{0} WHERE email = %s".format( - self._table) + sql = "SELECT password FROM qiita.{0} WHERE email = %s".format(self._table) qdb.sql_connection.TRN.add(sql, [self._id]) dbpass = qdb.sql_connection.TRN.execute_fetchlast() if dbpass == qdb.util.hash_password(oldpass, dbpass): @@ -679,7 +692,8 @@ def _change_pass(self, newpass): SET password=%s, pass_reset_code = NULL WHERE email = %s""".format(self._table) qdb.sql_connection.perform_as_transaction( - sql, [qdb.util.hash_password(newpass), self._id]) + sql, [qdb.util.hash_password(newpass), self._id] + ) def messages(self, count=None): """Return messages in user's queue @@ -753,7 +767,7 @@ def delete_messages(self, messages): qdb.sql_connection.TRN.add(sql) qdb.sql_connection.TRN.execute() - def jobs(self, limit=30, ignore_status=['success'], show_hidden=False): + def jobs(self, limit=30, ignore_status=["success"], show_hidden=False): """Return jobs created by the user Parameters @@ -785,7 +799,7 @@ def jobs(self, limit=30, ignore_status=['success'], show_hidden=False): sql_info = [self._id, limit] if not show_hidden: - sql += ' AND hidden = false' + sql += " AND hidden = false" sql += """ ORDER BY CASE processing_job_status @@ -798,18 +812,20 @@ def jobs(self, limit=30, ignore_status=['success'], show_hidden=False): END, heartbeat DESC LIMIT %s""" qdb.sql_connection.TRN.add(sql, sql_info) - return [qdb.processing_job.ProcessingJob(p[0]) - for p in qdb.sql_connection.TRN.execute_fetchindex()] + return [ + qdb.processing_job.ProcessingJob(p[0]) + for p in qdb.sql_connection.TRN.execute_fetchindex() + ] def update_email(self, email): if not validate_email(email): - raise IncorrectEmailError(f'Bad email given: {email}') + raise IncorrectEmailError(f"Bad email given: {email}") if self.exists(email): - raise IncorrectEmailError(f'This email already exists: {email}') + raise IncorrectEmailError(f"This email already exists: {email}") with qdb.sql_connection.TRN: - sql = 'UPDATE qiita.qiita_user SET email = %s where email = %s' + sql = "UPDATE qiita.qiita_user SET email = %s where email = %s" qdb.sql_connection.TRN.add(sql, [email, self.email]) @@ -843,7 +859,7 @@ def validate_email(email): """ # Do not accept email addresses that have unicode characters try: - email.encode('ascii') + email.encode("ascii") except UnicodeError: return False @@ -852,30 +868,30 @@ def validate_email(email): return False # Must have exactly 1 @ symbol - if email.count('@') != 1: + if email.count("@") != 1: return False - local_part, domain_part = email.split('@') + local_part, domain_part = email.split("@") # Neither part can be blank if not (local_part and domain_part): return False # The local part cannot begin or end with a dot - if local_part.startswith('.') or local_part.endswith('.'): + if local_part.startswith(".") or local_part.endswith("."): return False # The domain part cannot begin or end with a hyphen - if domain_part.startswith('-') or domain_part.endswith('-'): + if domain_part.startswith("-") or domain_part.endswith("-"): return False # This is the full set of allowable characters for the local part. local_valid_chars = "[a-zA-Z0-9#_~!$&'()*+,;=:.-]" - if len(sub(local_valid_chars, '', local_part)): + if len(sub(local_valid_chars, "", local_part)): return False domain_valid_chars = "[a-zA-Z0-9.-]" - if len(sub(domain_valid_chars, '', domain_part)): + if len(sub(domain_valid_chars, "", domain_part)): return False return True @@ -918,7 +934,7 @@ def validate_password(password): return False try: - password.encode('ascii') + password.encode("ascii") except UnicodeError: return False diff --git a/qiita_db/util.py b/qiita_db/util.py index b335643c6..667e91627 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -36,6 +36,7 @@ generate_analysis_list human_merging_scheme """ + # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # @@ -43,44 +44,44 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from random import SystemRandom -from string import ascii_letters, digits, punctuation +import hashlib from binascii import crc32 -from bcrypt import hashpw, gensalt -from functools import partial -from os.path import join, basename, isdir, exists, getsize -from os import walk, remove, listdir, stat, makedirs -from glob import glob -from shutil import move, rmtree, copy as shutil_copy -from openpyxl import load_workbook -from tempfile import mkstemp +from contextlib import contextmanager from csv import writer as csv_writer from datetime import datetime, timedelta -from time import time as now +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from errno import EEXIST +from functools import partial +from glob import glob +from io import StringIO from itertools import chain -from contextlib import contextmanager -import h5py -from humanize import naturalsize -import hashlib +from json import loads +from os import listdir, makedirs, remove, stat, walk +from os.path import basename, exists, getsize, isdir, join +from random import SystemRandom +from shutil import copy as shutil_copy +from shutil import move, rmtree from smtplib import SMTP, SMTP_SSL, SMTPException - -from errno import EEXIST -from qiita_core.exceptions import IncompetentQiitaDeveloperError -from qiita_core.qiita_settings import qiita_config +from string import ascii_letters, digits, punctuation from subprocess import check_output -import qiita_db as qdb - -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText +from tempfile import mkstemp +from time import time as now +import h5py import matplotlib.pyplot as plt -from matplotlib import colormaps import numpy as np import pandas as pd -from io import StringIO -from json import loads +from bcrypt import gensalt, hashpw +from humanize import naturalsize +from matplotlib import colormaps +from openpyxl import load_workbook from scipy.optimize import minimize +import qiita_db as qdb +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_core.qiita_settings import qiita_config + def scrub_data(s): r"""Scrubs data fields of characters not allowed by PostgreSQL @@ -138,8 +139,9 @@ def convert_type(obj): else: break if item is None: - raise IncompetentQiitaDeveloperError("Can't convert item of type %s!" % - str(type(obj))) + raise IncompetentQiitaDeveloperError( + "Can't convert item of type %s!" % str(type(obj)) + ) return item @@ -160,14 +162,17 @@ def get_artifact_types(key_by_id=False): {artifact_type: artifact_type_id} """ with qdb.sql_connection.TRN: - cols = ('artifact_type_id, artifact_type' - if key_by_id else 'artifact_type, artifact_type_id') + cols = ( + "artifact_type_id, artifact_type" + if key_by_id + else "artifact_type, artifact_type_id" + ) sql = "SELECT {} FROM qiita.artifact_type".format(cols) qdb.sql_connection.TRN.add(sql) return dict(qdb.sql_connection.TRN.execute_fetchindex()) -def get_filepath_types(key='filepath_type'): +def get_filepath_types(key="filepath_type"): """Gets the list of possible filepath types from the filetype table Parameters @@ -185,20 +190,20 @@ def get_filepath_types(key='filepath_type'): {filepath_type_id: filepath_type} """ with qdb.sql_connection.TRN: - if key == 'filepath_type': - cols = 'filepath_type, filepath_type_id' - elif key == 'filepath_type_id': - cols = 'filepath_type_id, filepath_type' + if key == "filepath_type": + cols = "filepath_type, filepath_type_id" + elif key == "filepath_type_id": + cols = "filepath_type_id, filepath_type" else: raise qdb.exceptions.QiitaDBColumnError( - "Unknown key. Pass either 'filepath_type' or " - "'filepath_type_id'.") - sql = 'SELECT {} FROM qiita.filepath_type'.format(cols) + "Unknown key. Pass either 'filepath_type' or 'filepath_type_id'." + ) + sql = "SELECT {} FROM qiita.filepath_type".format(cols) qdb.sql_connection.TRN.add(sql) return dict(qdb.sql_connection.TRN.execute_fetchindex()) -def get_data_types(key='data_type'): +def get_data_types(key="data_type"): """Gets the list of possible data types from the data_type table Parameters @@ -215,14 +220,15 @@ def get_data_types(key='data_type'): {data_type_id: data_type} """ with qdb.sql_connection.TRN: - if key == 'data_type': - cols = 'data_type, data_type_id' - elif key == 'data_type_id': - cols = 'data_type_id, data_type' + if key == "data_type": + cols = "data_type, data_type_id" + elif key == "data_type_id": + cols = "data_type_id, data_type" else: raise qdb.exceptions.QiitaDBColumnError( - "Unknown key. Pass either 'data_type_id' or 'data_type'.") - sql = 'SELECT {} FROM qiita.data_type'.format(cols) + "Unknown key. Pass either 'data_type_id' or 'data_type'." + ) + sql = "SELECT {} FROM qiita.data_type".format(cols) qdb.sql_connection.TRN.add(sql) return dict(qdb.sql_connection.TRN.execute_fetchindex()) @@ -241,7 +247,7 @@ def create_rand_string(length, punct=True): if punct: chars += punctuation sr = SystemRandom() - return ''.join(sr.choice(chars) for i in range(length)) + return "".join(sr.choice(chars) for i in range(length)) def hash_password(password, hashedpw=None): @@ -270,8 +276,8 @@ def hash_password(password, hashedpw=None): if hashedpw is None: hashedpw = gensalt() else: - hashedpw = hashedpw.encode('utf-8') - password = password.encode('utf-8') + hashedpw = hashedpw.encode("utf-8") + password = password.encode("utf-8") output = hashpw(password, hashedpw) if isinstance(output, bytes): output = output.decode("utf-8") @@ -303,12 +309,12 @@ def check_required_columns(keys, table): # Test needed because a user with certain permissions can query without # error but be unable to get the column names if len(cols) == 0: - raise RuntimeError("Unable to fetch column names for table %s" - % table) - required = set(x[1] for x in cols if x[0] == 'NO' and x[2] is None) + raise RuntimeError("Unable to fetch column names for table %s" % table) + required = set(x[1] for x in cols if x[0] == "NO" and x[2] is None) if len(required.difference(keys)) > 0: raise qdb.exceptions.QiitaDBColumnError( - "Required keys missing: %s" % required.difference(keys)) + "Required keys missing: %s" % required.difference(keys) + ) def check_table_cols(keys, table): @@ -336,11 +342,11 @@ def check_table_cols(keys, table): # Test needed because a user with certain permissions can query without # error but be unable to get the column names if len(cols) == 0: - raise RuntimeError("Unable to fetch column names for table %s" - % table) + raise RuntimeError("Unable to fetch column names for table %s" % table) if len(set(keys).difference(cols)) > 0: raise qdb.exceptions.QiitaDBColumnError( - "Non-database keys found: %s" % set(keys).difference(cols)) + "Non-database keys found: %s" % set(keys).difference(cols) + ) def get_table_cols(table): @@ -423,8 +429,7 @@ def max_preparation_samples(): The max number of samples allowed in a single preparation """ with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add( - "SELECT max_preparation_samples FROM settings") + qdb.sql_connection.TRN.add("SELECT max_preparation_samples FROM settings") return qdb.sql_connection.TRN.execute_fetchlast() @@ -437,8 +442,7 @@ def max_artifacts_in_workflow(): The max number of artifacts allowed in a single workflow """ with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add( - "SELECT max_artifacts_in_workflow FROM settings") + qdb.sql_connection.TRN.add("SELECT max_artifacts_in_workflow FROM settings") return qdb.sql_connection.TRN.execute_fetchlast() @@ -466,7 +470,7 @@ def compute_checksum(path): buffersize = 65536 crcvalue = 0 for fp in filepaths: - with open(fp, 'rb') as f: + with open(fp, "rb") as f: buffr = f.read(buffersize) while len(buffr) > 0: crcvalue = crc32(buffr, crcvalue) @@ -496,7 +500,7 @@ def get_files_from_uploads_folders(study_id): if exists(t): for f in listdir(t): d = join(t, f) - if not f.startswith('.') and not isdir(d): + if not f.startswith(".") and not isdir(d): fp.append((pid, f, naturalsize(getsize(d), gnu=True))) return fp @@ -518,22 +522,25 @@ def move_upload_files_to_trash(study_id, files_to_move): If folder_id or the study folder don't exist and if the filename to erase matches the trash_folder, internal variable """ - trash_folder = 'trash' + trash_folder = "trash" folders = {k: v for k, v in get_mountpoint("uploads", retrieve_all=True)} for fid, filename in files_to_move: if filename == trash_folder: raise qdb.exceptions.QiitaDBError( - "You can not erase the trash folder: %s" % trash_folder) + "You can not erase the trash folder: %s" % trash_folder + ) if fid not in folders: raise qdb.exceptions.QiitaDBError( - "The filepath id: %d doesn't exist in the database" % fid) + "The filepath id: %d doesn't exist in the database" % fid + ) foldername = join(folders[fid], str(study_id)) if not exists(foldername): raise qdb.exceptions.QiitaDBError( - "The upload folder for study id: %d doesn't exist" % study_id) + "The upload folder for study id: %d doesn't exist" % study_id + ) trashpath = join(foldername, trash_folder) create_nested_path(trashpath) @@ -546,7 +553,7 @@ def move_upload_files_to_trash(study_id, files_to_move): def get_mountpoint(mount_type, retrieve_all=False, retrieve_subdir=False): - r""" Returns the most recent values from data directory for the given type + r"""Returns the most recent values from data directory for the given type Parameters ---------- @@ -583,7 +590,7 @@ def get_mountpoint(mount_type, retrieve_all=False, retrieve_subdir=False): def get_mountpoint_path_by_id(mount_id): - r""" Returns the mountpoint path for the mountpoint with id = mount_id + r"""Returns the mountpoint path for the mountpoint with id = mount_id Parameters ---------- @@ -645,14 +652,15 @@ def insert_filepaths(filepaths, obj_id, table, move_files=True, copy=False): dirname = db_path(str(obj_id)) create_nested_path(dirname) new_filepaths = [ - (join(dirname, basename(path)), id_) - for path, id_ in filepaths] + (join(dirname, basename(path)), id_) for path, id_ in filepaths + ] else: # Generate the new fileapths. format: # mountpoint/DataId_OriginalName new_filepaths = [ (db_path("%s_%s" % (obj_id, basename(path))), id_) - for path, id_ in filepaths] + for path, id_ in filepaths + ] # Move the original files to the controlled DB directory transfer_function = shutil_copy if copy else move for old_fp, new_fp in zip(filepaths, new_filepaths): @@ -660,14 +668,24 @@ def insert_filepaths(filepaths, obj_id, table, move_files=True, copy=False): # In case the transaction executes a rollback, we need to # make sure the files have not been moved qdb.sql_connection.TRN.add_post_rollback_func( - move, new_fp[0], old_fp[0]) + move, new_fp[0], old_fp[0] + ) def str_to_id(x): - return (x if isinstance(x, int) - else convert_to_id(x, "filepath_type")) + return x if isinstance(x, int) else convert_to_id(x, "filepath_type") + # 1 is the checksum algorithm, which we only have one implemented - values = [[basename(path), str_to_id(id_), compute_checksum(path), - getsize(path), 1, dd_id] for path, id_ in new_filepaths] + values = [ + [ + basename(path), + str_to_id(id_), + compute_checksum(path), + getsize(path), + 1, + dd_id, + ] + for path, id_ in new_filepaths + ] # Insert all the filepaths at once and get the filepath_id back sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, fp_size, @@ -678,8 +696,11 @@ def str_to_id(x): qdb.sql_connection.TRN.add(sql, values, many=True) # Since we added the query with many=True, we've added len(values) # queries to the transaction, so the ids are in the last idx queries - return list(chain.from_iterable( - chain.from_iterable(qdb.sql_connection.TRN.execute()[idx:]))) + return list( + chain.from_iterable( + chain.from_iterable(qdb.sql_connection.TRN.execute()[idx:]) + ) + ) def _path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): @@ -709,8 +730,7 @@ def _path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id): return join(db_dir, mountpoint, filepath) -def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None, - fp_type=None): +def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None, fp_type=None): """Retrieves the filepaths for the given object id Parameters @@ -734,14 +754,15 @@ def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None, """ sql_sort = "" - if sort == 'ascending': + if sort == "ascending": sql_sort = " ORDER BY filepath_id" - elif sort == 'descending': + elif sort == "descending": sql_sort = " ORDER BY filepath_id DESC" elif sort is not None: raise qdb.exceptions.QiitaDBError( "Unknown sorting direction: %s. Please choose from 'ascending' or " - "'descending'" % sort) + "'descending'" % sort + ) sql_args = [obj_id] @@ -757,15 +778,23 @@ def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None, JOIN qiita.filepath_type USING (filepath_type_id) JOIN qiita.data_directory USING (data_directory_id) JOIN qiita.{0} USING (filepath_id) - WHERE {1} = %s{2}{3}""".format(obj_fp_table, obj_id_column, - sql_type, sql_sort) + WHERE {1} = %s{2}{3}""".format( + obj_fp_table, obj_id_column, sql_type, sql_sort + ) qdb.sql_connection.TRN.add(sql, sql_args) results = qdb.sql_connection.TRN.execute_fetchindex() db_dir = get_db_files_base_dir() - return [{'fp_id': fpid, 'fp': _path_builder(db_dir, fp, m, s, obj_id), - 'fp_type': fp_type_, 'checksum': c, 'fp_size': fpsize} - for fpid, fp, fp_type_, m, s, c, fpsize in results] + return [ + { + "fp_id": fpid, + "fp": _path_builder(db_dir, fp, m, s, obj_id), + "fp_type": fp_type_, + "checksum": c, + "fp_size": fpsize, + } + for fpid, fp, fp_type_, m, s, c, fpsize in results + ] def _rm_files(TRN, fp): @@ -799,9 +828,9 @@ def purge_filepaths(delete_files=True): # so we can recover them (this has happened before) but let's remove # those from deleted studies. Note that we need to check for sample, # prep and qiime info files - st_id = qdb.util.convert_to_id('sample_template', "filepath_type") - pt_id = qdb.util.convert_to_id('prep_template', "filepath_type") - qt_id = qdb.util.convert_to_id('qiime_map', "filepath_type") + st_id = qdb.util.convert_to_id("sample_template", "filepath_type") + pt_id = qdb.util.convert_to_id("prep_template", "filepath_type") + qt_id = qdb.util.convert_to_id("qiime_map", "filepath_type") sql = """SELECT filepath_id, filepath FROM qiita.filepath WHERE filepath_type_id IN %s AND filepath ~ '^[0-9]' AND data_directory_id = %s AND filepath_id NOT IN ( @@ -809,27 +838,24 @@ def purge_filepaths(delete_files=True): UNION SELECT filepath_id FROM qiita.sample_template_filepath) """ - for mp_id, mp in get_mountpoint('templates'): - qdb.sql_connection.TRN.add( - sql, [tuple([st_id, pt_id, qt_id]), mp_id]) + for mp_id, mp in get_mountpoint("templates"): + qdb.sql_connection.TRN.add(sql, [tuple([st_id, pt_id, qt_id]), mp_id]) studies_exits = [] studies_erased = [] for fid, fp in qdb.sql_connection.TRN.execute_fetchindex(): # making sure the studies do _not_ exist, remember info files # are prepended by the study id - study_id = int(fp.split('_')[0]) + study_id = int(fp.split("_")[0]) if study_id in studies_exits: continue elif study_id in studies_erased: - fpath = qdb.util.get_filepath_information( - fid)['fullpath'] + fpath = qdb.util.get_filepath_information(fid)["fullpath"] files_to_remove.append([fid, fpath]) else: try: qdb.study.Study(study_id) except qdb.exceptions.QiitaDBUnknownIDError: - fpath = qdb.util.get_filepath_information( - fid)['fullpath'] + fpath = qdb.util.get_filepath_information(fid)["fullpath"] files_to_remove.append([fid, fpath]) studies_erased.append(study_id) else: @@ -872,10 +898,10 @@ def purge_filepaths(delete_files=True): """ qdb.sql_connection.TRN.add(sql) for fid in qdb.sql_connection.TRN.execute_fetchflatten(): - fpath = qdb.util.get_filepath_information(fid)['fullpath'] - aid = fpath.split('/')[-2] + fpath = qdb.util.get_filepath_information(fid)["fullpath"] + aid = fpath.split("/")[-2] # making sure the artifact doesn't exist any more - if aid == 'None': + if aid == "None": files_to_remove.append([fid, None]) # 4. analysis: we need to select all the filepaths that are not in @@ -894,10 +920,10 @@ def purge_filepaths(delete_files=True): qdb.sql_connection.TRN.add(sql) for fid in qdb.sql_connection.TRN.execute_fetchflatten(): fdata = qdb.util.get_filepath_information(fid) - analysis_id = int(fdata['filepath'].split('_')[0]) + analysis_id = int(fdata["filepath"].split("_")[0]) # making sure the Analysis doesn't exist if not qdb.analysis.Analysis.exists(analysis_id): - fpath = fdata['fullpath'] + fpath = fdata["fullpath"] files_to_remove.append([fid, fpath]) # 5. working directory: this is done internally in the Qiita system via @@ -912,7 +938,7 @@ def purge_filepaths(delete_files=True): if fpath is not None: _rm_files(qdb.sql_connection.TRN, fpath) else: - print('%s: %s' % (fid, fpath)) + print("%s: %s" % (fid, fpath)) if delete_files: # there is a chance that we will never enter the above @@ -941,16 +967,15 @@ def quick_mounts_purge(): qdb.sql_connection.TRN.add(main_sql) mp_ids = qdb.sql_connection.TRN.execute_fetchflatten() mounts = [qdb.util.get_mountpoint_path_by_id(x) for x in mp_ids] - folders = [join(x, f) for x in mounts for f in listdir(x) - if f.isnumeric()] + folders = [join(x, f) for x in mounts for f in listdir(x) if f.isnumeric()] # getting all unlinked folders to_delete = [] for i, f in enumerate(folders): - vals = f.split('/') + vals = f.split("/") aid = int(vals[-1]) artifact_type = vals[-2] - if artifact_type == 'FeatureData[Taxonomy]': + if artifact_type == "FeatureData[Taxonomy]": continue try: @@ -959,35 +984,34 @@ def quick_mounts_purge(): to_delete.append(f) continue if not a.artifact_type.startswith(artifact_type): - raise ValueError('Review artifact type: ' - f'{a.id} {artifact_type} {a.artifact_type}') + raise ValueError( + f"Review artifact type: {a.id} {artifact_type} {a.artifact_type}" + ) # now, let's just keep those older than 30 days (in seconds) - ignore = now() - (30*86400) + ignore = now() - (30 * 86400) to_keep = [x for x in to_delete if stat(x).st_mtime >= ignore] to_delete = set(to_delete) - set(to_keep) # get stats to report stats = dict() for td in to_delete: - f = td.split('/')[-2] + f = td.split("/")[-2] if f not in stats: stats[f] = 0 - stats[f] += sum([getsize(join(p, fp)) for p, ds, fs in walk(td) - for fp in fs]) + stats[f] += sum([getsize(join(p, fp)) for p, ds, fs in walk(td) for fp in fs]) - report = ['----------------------'] + report = ["----------------------"] for f, s in stats.items(): - report.append(f'{f}\t{naturalsize(s)}') - report.append( - f'Total files {len(to_delete)} {naturalsize(sum(stats.values()))}') - report.append('----------------------') + report.append(f"{f}\t{naturalsize(s)}") + report.append(f"Total files {len(to_delete)} {naturalsize(sum(stats.values()))}") + report.append("----------------------") for td in list(to_delete): if exists(td): rmtree(td) - return '\n'.join(report) + return "\n".join(report) def _rm_exists(fp, obj, _id, delete_files): @@ -1022,7 +1046,7 @@ def empty_trash_upload_folder(delete_files=True): for mp in qdb.sql_connection.TRN.execute_fetchflatten(): for path, dirs, files in walk(gfp(mp)): - if path.endswith('/trash'): + if path.endswith("/trash"): if delete_files: for f in files: fp = join(path, f) @@ -1053,25 +1077,32 @@ def move_filepaths_to_upload_folder(study_id, filepaths): path_builder = partial(join, uploads_fp) # do not move these files-types back to upload folder. - do_not_move = ['preprocessed_fasta', 'preprocessed_fastq', - 'preprocessed_demux', 'directory', 'log', - 'html_summary', 'tgz', 'html_summary_dir', 'qzv', 'qza'] + do_not_move = [ + "preprocessed_fasta", + "preprocessed_fastq", + "preprocessed_demux", + "directory", + "log", + "html_summary", + "tgz", + "html_summary_dir", + "qzv", + "qza", + ] # We can now go over and remove all the filepaths sql = """DELETE FROM qiita.filepath WHERE filepath_id = %s""" for x in filepaths: - qdb.sql_connection.TRN.add(sql, [x['fp_id']]) + qdb.sql_connection.TRN.add(sql, [x["fp_id"]]) - if x['fp_type'] in do_not_move: - _rm_files(qdb.sql_connection.TRN, x['fp']) + if x["fp_type"] in do_not_move: + _rm_files(qdb.sql_connection.TRN, x["fp"]) continue # if files were not removed, then they should be moved. - destination = path_builder(basename(x['fp'])) - qdb.sql_connection.TRN.add_post_rollback_func(move, - destination, - x['fp']) - move(x['fp'], destination) + destination = path_builder(basename(x["fp"])) + qdb.sql_connection.TRN.add_post_rollback_func(move, destination, x["fp"]) + move(x["fp"], destination) qdb.sql_connection.TRN.execute() @@ -1101,10 +1132,14 @@ def get_filepath_information(filepath_id): qdb.sql_connection.TRN.add(sql, [filepath_id]) res = dict(qdb.sql_connection.TRN.execute_fetchindex()[0]) - obj_id = res.pop('artifact_id') - res['fullpath'] = _path_builder(get_db_files_base_dir(), - res['filepath'], res['mountpoint'], - res['subdirectory'], obj_id) + obj_id = res.pop("artifact_id") + res["fullpath"] = _path_builder( + get_db_files_base_dir(), + res["filepath"], + res["mountpoint"], + res["subdirectory"], + obj_id, + ) return res @@ -1218,13 +1253,13 @@ def convert_to_id(value, table, text_col=None): """ text_col = table if text_col is None else text_col with qdb.sql_connection.TRN: - sql = "SELECT {0}_id FROM qiita.{0} WHERE {1} = %s".format( - table, text_col) + sql = "SELECT {0}_id FROM qiita.{0} WHERE {1} = %s".format(table, text_col) qdb.sql_connection.TRN.add(sql, [value]) _id = qdb.sql_connection.TRN.execute_fetchindex() if not _id: raise qdb.exceptions.QiitaDBLookupError( - "%s not valid for table %s" % (value, table)) + "%s not valid for table %s" % (value, table) + ) # If there was a result it was a single row and and single value, # hence access to [0][0] return _id[0][0] @@ -1256,7 +1291,8 @@ def convert_from_id(value, table): string = qdb.sql_connection.TRN.execute_fetchindex() if not string: raise qdb.exceptions.QiitaDBLookupError( - "%s not valid for table %s" % (value, table)) + "%s not valid for table %s" % (value, table) + ) # If there was a result it was a single row and and single value, # hence access to [0][0] return string[0][0] @@ -1361,8 +1397,7 @@ def get_pubmed_ids_from_dois(doi_ids): with qdb.sql_connection.TRN: sql = "SELECT doi, pubmed_id FROM qiita.publication WHERE doi IN %s" qdb.sql_connection.TRN.add(sql, [tuple(doi_ids)]) - return {row[0]: row[1] - for row in qdb.sql_connection.TRN.execute_fetchindex()} + return {row[0]: row[1] for row in qdb.sql_connection.TRN.execute_fetchindex()} def infer_status(statuses): @@ -1389,15 +1424,15 @@ def infer_status(statuses): """ if statuses: statuses = set(s[0] for s in statuses) - if 'public' in statuses: - return 'public' - if 'private' in statuses: - return 'private' - if 'awaiting_approval' in statuses: - return 'awaiting_approval' + if "public" in statuses: + return "public" + if "private" in statuses: + return "private" + if "awaiting_approval" in statuses: + return "awaiting_approval" # If there are no statuses, or any of the previous ones have been found # then the inferred status is 'sandbox' - return 'sandbox' + return "sandbox" def add_message(message, users): @@ -1543,19 +1578,21 @@ def generate_study_list(user, visibility): WHERE email=qiita.study.email) AS owner """ - visibility_sql = '' + visibility_sql = "" sids = set(s.id for s in user.user_studies.union(user.shared_studies)) - if visibility == 'user': - if user.level == 'admin': - sids = (sids | - qdb.study.Study.get_ids_by_status('sandbox') | - qdb.study.Study.get_ids_by_status('private') | - qdb.study.Study.get_ids_by_status('awaiting_approval')) - elif visibility == 'public': - sids = qdb.study.Study.get_ids_by_status('public') - sids + if visibility == "user": + if user.level == "admin": + sids = ( + sids + | qdb.study.Study.get_ids_by_status("sandbox") + | qdb.study.Study.get_ids_by_status("private") + | qdb.study.Study.get_ids_by_status("awaiting_approval") + ) + elif visibility == "public": + sids = qdb.study.Study.get_ids_by_status("public") - sids visibility_sql = "visibility = 'public' AND" else: - raise ValueError('Not a valid visibility: %s' % visibility) + raise ValueError("Not a valid visibility: %s" % visibility) sql = """ SELECT metadata_complete, study_abstract, study_id, study_alias, @@ -1611,57 +1648,56 @@ def generate_study_list(user, visibility): info = dict(info) # cleaning owners name - if info['owner'] in (None, ''): - info['owner'] = info['owner_email'] - del info['owner_email'] + if info["owner"] in (None, ""): + info["owner"] = info["owner_email"] + del info["owner_email"] preparation_data_types = [] artifact_biom_ids = [] - if info['preparation_information'] is not None: - for pinfo in info['preparation_information']: + if info["preparation_information"] is not None: + for pinfo in info["preparation_information"]: # 'f1': prep_template_id, 'f2': data_type, # 'f3': artifact_id, 'f4': artifact_type, # 'f5':deprecated, 'f6': biom artifacts - if pinfo['f5']: + if pinfo["f5"]: continue - preparation_data_types.append(pinfo['f2']) - if pinfo['f4'] == 'BIOM': - artifact_biom_ids.append(pinfo['f3']) - if pinfo['f6'] is not None: - artifact_biom_ids.extend( - map(int, pinfo['f6'].split(','))) - del info['preparation_information'] - info['artifact_biom_ids'] = list(set(artifact_biom_ids)) - info['preparation_data_types'] = list(set( - preparation_data_types)) + preparation_data_types.append(pinfo["f2"]) + if pinfo["f4"] == "BIOM": + artifact_biom_ids.append(pinfo["f3"]) + if pinfo["f6"] is not None: + artifact_biom_ids.extend(map(int, pinfo["f6"].split(","))) + del info["preparation_information"] + info["artifact_biom_ids"] = list(set(artifact_biom_ids)) + info["preparation_data_types"] = list(set(preparation_data_types)) # publication info - info['publication_doi'] = [] - info['publication_pid'] = [] - if info['publications'] is not None: - for p in info['publications']: + info["publication_doi"] = [] + info["publication_pid"] = [] + if info["publications"] is not None: + for p in info["publications"]: # f1-2 are the default names given by pgsql - pub = p['f1'] - is_doi = p['f2'] + pub = p["f1"] + is_doi = p["f2"] if is_doi: - info['publication_doi'].append(pub) + info["publication_doi"].append(pub) else: - info['publication_pid'].append(pub) - del info['publications'] + info["publication_pid"].append(pub) + del info["publications"] # pi info - info["pi"] = (info['pi_email'], info['pi_name']) + info["pi"] = (info["pi_email"], info["pi_name"]) del info["pi_email"] del info["pi_name"] # shared with - info['shared'] = [] - if info['shared_with_name'] and info['shared_with_email']: - for name, email in zip(info['shared_with_name'], - info['shared_with_email']): + info["shared"] = [] + if info["shared_with_name"] and info["shared_with_email"]: + for name, email in zip( + info["shared_with_name"], info["shared_with_email"] + ): if not name: name = email - info['shared'].append((email, name)) + info["shared"].append((email, name)) del info["shared_with_name"] del info["shared_with_email"] @@ -1691,7 +1727,7 @@ def generate_study_list(user, visibility): # qdb.sql_connection.TRN.add(sql) # hsn = qdb.sql_connection.TRN.execute_fetchflatten() # info['host_scientific_name'] = hsn - del info['has_sample_info'] + del info["has_sample_info"] infolist.append(info) return infolist @@ -1756,21 +1792,21 @@ def generate_study_list_without_artifacts(study_ids, portal=None): info = dict(info) # publication info - info['publication_doi'] = [] - info['publication_pid'] = [] - if info['publications'] is not None: - for p in info['publications']: + info["publication_doi"] = [] + info["publication_pid"] = [] + if info["publications"] is not None: + for p in info["publications"]: # f1-2 are the default names given - pub = p['f1'] - is_doi = p['f2'] + pub = p["f1"] + is_doi = p["f2"] if is_doi: - info['publication_doi'].append(pub) + info["publication_doi"].append(pub) else: - info['publication_pid'].append(pub) - del info['publications'] + info["publication_pid"].append(pub) + del info["publications"] # pi info - info["pi"] = (info['pi_email'], info['pi_name']) + info["pi"] = (info["pi_email"], info["pi_name"]) del info["pi_email"] del info["pi_name"] @@ -1862,10 +1898,11 @@ def get_artifacts_information(artifact_ids, only_biom=True): for cid, params in qdb.sql_connection.TRN.execute_fetchindex(): cmd = qdb.software.Command(cid) commands[cid] = { - 'params': params, - 'merging_scheme': cmd.merging_scheme, - 'active': cmd.active, - 'deprecated': cmd.software.deprecated} + "params": params, + "merging_scheme": cmd.merging_scheme, + "active": cmd.active, + "deprecated": cmd.software.deprecated, + } # Now let's get the actual artifacts. Note that ts is a cache # (prep id : target subfragment) so we don't have to query @@ -1874,13 +1911,28 @@ def get_artifacts_information(artifact_ids, only_biom=True): # file; thus we can have a None prep id (key) ts = {None: []} ps = {} - algorithm_az = {'': ''} + algorithm_az = {"": ""} PT = qdb.metadata_template.prep_template.PrepTemplate - qdb.sql_connection.TRN.add(sql, [ - tuple(artifact_ids), qdb.util.artifact_visibilities_to_skip()]) + qdb.sql_connection.TRN.add( + sql, [tuple(artifact_ids), qdb.util.artifact_visibilities_to_skip()] + ) for row in qdb.sql_connection.TRN.execute_fetchindex(): - aid, name, cid, cname, gt, aparams, dt, pid, pcid, pname, \ - pparams, filepaths, _, prep_template_id = row + ( + aid, + name, + cid, + cname, + gt, + aparams, + dt, + pid, + pcid, + pname, + pparams, + filepaths, + _, + prep_template_id, + ) = row # cleaning up aparams & pparams # - [0] due to the array_agg @@ -1890,34 +1942,39 @@ def get_artifacts_information(artifact_ids, only_biom=True): aparams = {} else: # we are going to remove any artifacts from the parameters - for ti in commands[cid]['params']: + for ti in commands[cid]["params"]: del aparams[ti] # - ignoring empty filepaths if filepaths == [None]: filepaths = [] else: - filepaths = [fp for fp in filepaths if fp.endswith('biom')] + filepaths = [fp for fp in filepaths if fp.endswith("biom")] # generating algorithm, by default is '' - algorithm = '' + algorithm = "" if cid is not None: - deprecated = commands[cid]['deprecated'] - active = commands[cid]['active'] + deprecated = commands[cid]["deprecated"] + active = commands[cid]["active"] if pcid is None: parent_merging_scheme = None else: - parent_merging_scheme = commands[pcid][ - 'merging_scheme'] + parent_merging_scheme = commands[pcid]["merging_scheme"] algorithm = human_merging_scheme( - cname, commands[cid]['merging_scheme'], - pname, parent_merging_scheme, - aparams, filepaths, pparams) + cname, + commands[cid]["merging_scheme"], + pname, + parent_merging_scheme, + aparams, + filepaths, + pparams, + ) if algorithm not in algorithm_az: algorithm_az[algorithm] = hashlib.md5( - algorithm.encode('utf-8')).hexdigest() + algorithm.encode("utf-8") + ).hexdigest() else: # there is no cid, thus is a direct upload; setting things # like this so the artifacts are dispayed @@ -1926,51 +1983,51 @@ def get_artifacts_information(artifact_ids, only_biom=True): if prep_template_id not in ts: qdb.sql_connection.TRN.add(sql_ts, [prep_template_id]) - ts[prep_template_id] = \ - qdb.sql_connection.TRN.execute_fetchflatten() + ts[prep_template_id] = qdb.sql_connection.TRN.execute_fetchflatten() target = ts[prep_template_id] prep_samples = 0 - platform = 'not provided' - target_gene = 'not provided' + platform = "not provided" + target_gene = "not provided" if prep_template_id is not None: if prep_template_id not in ps: pt = PT(prep_template_id) categories = pt.categories - if 'platform' in categories: - platform = ', '.join( - set(pt.get_category('platform').values())) - if 'target_gene' in categories: - target_gene = ', '.join( - set(pt.get_category('target_gene').values())) + if "platform" in categories: + platform = ", ".join(set(pt.get_category("platform").values())) + if "target_gene" in categories: + target_gene = ", ".join( + set(pt.get_category("target_gene").values()) + ) - ps[prep_template_id] = [ - len(list(pt.keys())), platform, target_gene] + ps[prep_template_id] = [len(list(pt.keys())), platform, target_gene] prep_samples, platform, target_gene = ps[prep_template_id] - results.append({ - 'artifact_id': aid, - 'target_subfragment': target, - 'prep_samples': prep_samples, - 'platform': platform, - 'target_gene': target_gene, - 'name': name, - 'data_type': dt, - 'timestamp': str(gt), - 'parameters': aparams, - 'algorithm': algorithm, - 'algorithm_az': algorithm_az[algorithm], - 'deprecated': deprecated, - 'active': active, - 'files': filepaths}) + results.append( + { + "artifact_id": aid, + "target_subfragment": target, + "prep_samples": prep_samples, + "platform": platform, + "target_gene": target_gene, + "name": name, + "data_type": dt, + "timestamp": str(gt), + "parameters": aparams, + "algorithm": algorithm, + "algorithm_az": algorithm_az[algorithm], + "deprecated": deprecated, + "active": active, + "files": filepaths, + } + ) return results def _is_string_or_bytes(s): - """Returns True if input argument is string (unicode or not) or bytes. - """ + """Returns True if input argument is string (unicode or not) or bytes.""" return isinstance(s, str) or isinstance(s, bytes) @@ -1986,23 +2043,28 @@ def _get_filehandle(filepath_or, *args, **kwargs): if _is_string_or_bytes(filepath_or): if h5py.is_hdf5(filepath_or): fh, own_fh = h5py.File(filepath_or, *args, **kwargs), True - elif filepath_or.endswith('.xlsx'): + elif filepath_or.endswith(".xlsx"): # due to extension, let's assume Excel file wb = load_workbook(filename=filepath_or, data_only=True) sheetnames = wb.sheetnames # let's check if Qiimp, they must be in same order first_cell_index = 0 is_qiimp_wb = False - if sheetnames == ["Metadata", "Validation", "Data Dictionary", - "metadata_schema", "metadata_form", - "Instructions"]: + if sheetnames == [ + "Metadata", + "Validation", + "Data Dictionary", + "metadata_schema", + "metadata_form", + "Instructions", + ]: first_cell_index = 1 is_qiimp_wb = True first_sheet = wb[sheetnames[0]] cell_range = range(first_cell_index, first_sheet.max_column) - _, fp = mkstemp(suffix='.txt') - with open(fp, 'w') as fh: - cfh = csv_writer(fh, delimiter='\t') + _, fp = mkstemp(suffix=".txt") + with open(fp, "w") as fh: + cfh = csv_writer(fh, delimiter="\t") for r in first_sheet.rows: if is_qiimp_wb: # check contents of first column; if they are a zero @@ -2054,7 +2116,7 @@ def open_file(filepath_or, *args, **kwargs): def artifact_visibilities_to_skip(): - return tuple([qdb.util.convert_to_id('archived', "visibility")]) + return tuple([qdb.util.convert_to_id("archived", "visibility")]) def generate_analysis_list(analysis_ids, public_only=False): @@ -2097,30 +2159,38 @@ def generate_analysis_list(analysis_ids, public_only=False): qdb.sql_connection.TRN.add(sql, [tuple(analysis_ids)]) for row in qdb.sql_connection.TRN.execute_fetchindex(): - aid, name, description, ts, owner, artifacts, \ - av, mapping_files = row + aid, name, description, ts, owner, artifacts, av, mapping_files = row - av = 'public' if set(av) == {'public'} else 'private' - if av != 'public' and public_only: + av = "public" if set(av) == {"public"} else "private" + if av != "public" and public_only: continue if mapping_files == [None]: mapping_files = [] else: mapping_files = [ - (mid, get_filepath_information(mid)['fullpath']) - for mid in mapping_files if mid is not None] + (mid, get_filepath_information(mid)["fullpath"]) + for mid in mapping_files + if mid is not None + ] if artifacts == [None]: artifacts = [] else: # making sure they are int so they don't break the GUI artifacts = [int(a) for a in artifacts if a is not None] - results.append({ - 'analysis_id': aid, 'name': name, 'description': description, - 'timestamp': ts.strftime("%m/%d/%y %H:%M:%S"), - 'visibility': av, 'artifacts': artifacts, 'owner': owner, - 'mapping_files': mapping_files}) + results.append( + { + "analysis_id": aid, + "name": name, + "description": description, + "timestamp": ts.strftime("%m/%d/%y %H:%M:%S"), + "visibility": av, + "artifacts": artifacts, + "owner": owner, + "mapping_files": mapping_files, + } + ) return results @@ -2173,8 +2243,7 @@ def generate_analyses_list_per_study(study_id): qdb.sql_connection.TRN.add(analysis_sql, [study_id]) aids = qdb.sql_connection.TRN.execute_fetchindex() for aid, artifact_ids in aids: - qdb.sql_connection.TRN.add( - extra_sql, [tuple(artifact_ids), aid]) + qdb.sql_connection.TRN.add(extra_sql, [tuple(artifact_ids), aid]) for row in qdb.sql_connection.TRN.execute_fetchindex(): results.append(dict(row)) @@ -2216,10 +2285,15 @@ def create_nested_path(path): raise -def human_merging_scheme(cname, merging_scheme, - pname, parent_merging_scheme, - artifact_parameters, artifact_filepaths, - parent_parameters): +def human_merging_scheme( + cname, + merging_scheme, + pname, + parent_merging_scheme, + artifact_parameters, + artifact_filepaths, + parent_parameters, +): """From the artifact and its parent features format the merging scheme Parameters @@ -2245,29 +2319,40 @@ def human_merging_scheme(cname, merging_scheme, The merging scheme """ eparams = [] - if merging_scheme['parameters']: - eparams.append(','.join(['%s: %s' % (k, artifact_parameters[k]) - for k in merging_scheme['parameters']])) - if (merging_scheme['outputs'] and - artifact_filepaths is not None and - artifact_filepaths): - eparams.append('BIOM: %s' % ', '.join(artifact_filepaths)) + if merging_scheme["parameters"]: + eparams.append( + ",".join( + [ + "%s: %s" % (k, artifact_parameters[k]) + for k in merging_scheme["parameters"] + ] + ) + ) + if ( + merging_scheme["outputs"] + and artifact_filepaths is not None + and artifact_filepaths + ): + eparams.append("BIOM: %s" % ", ".join(artifact_filepaths)) if eparams: - cname = "%s (%s)" % (cname, ', '.join(eparams)) + cname = "%s (%s)" % (cname, ", ".join(eparams)) - if merging_scheme['ignore_parent_command']: + if merging_scheme["ignore_parent_command"]: algorithm = cname else: - palgorithm = 'N/A' + palgorithm = "N/A" if pname is not None: palgorithm = pname - if parent_merging_scheme['parameters']: - params = ','.join( - ['%s: %s' % (k, parent_parameters[k]) - for k in parent_merging_scheme['parameters']]) + if parent_merging_scheme["parameters"]: + params = ",".join( + [ + "%s: %s" % (k, parent_parameters[k]) + for k in parent_merging_scheme["parameters"] + ] + ) palgorithm = "%s (%s)" % (palgorithm, params) - algorithm = '%s | %s' % (cname, palgorithm) + algorithm = "%s | %s" % (cname, palgorithm) return algorithm @@ -2283,7 +2368,8 @@ def activate_or_update_plugins(update=False): """ conf_files = sorted(glob(join(qiita_config.plugin_dir, "*.conf"))) label = "{} plugin (%s/{}): %s... ".format( - "Updating" if update else "\tLoading", len(conf_files)) + "Updating" if update else "\tLoading", len(conf_files) + ) for i, fp in enumerate(conf_files): print(label % (i + 1, basename(fp)), end=None) s = qdb.software.Software.from_file(fp, update=update) @@ -2295,12 +2381,12 @@ def activate_or_update_plugins(update=False): def send_email(to, subject, body): # create email msg = MIMEMultipart() - msg['From'] = qiita_config.smtp_email - msg['To'] = to + msg["From"] = qiita_config.smtp_email + msg["To"] = to # we need to do 'replace' because the subject can have # new lines in the middle of the string - msg['Subject'] = subject.replace('\n', '') - msg.attach(MIMEText(body, 'plain')) + msg["Subject"] = subject.replace("\n", "") + msg.attach(MIMEText(body, "plain")) # connect to smtp server, using ssl if needed if qiita_config.smtp_ssl: @@ -2344,8 +2430,8 @@ def resource_allocation_plot(df, col_name): Returns a matplotlib object with a plot """ - df.dropna(subset=['samples', 'columns'], inplace=True) - df[col_name] = df.samples * df['columns'] + df.dropna(subset=["samples", "columns"], inplace=True) + df[col_name] = df.samples * df["columns"] df[col_name] = df[col_name].astype(int) fig, axs = plt.subplots(ncols=2, figsize=(10, 4), sharey=False) @@ -2354,18 +2440,16 @@ def resource_allocation_plot(df, col_name): mem_models, time_models = retrieve_equations() # models for memory - _resource_allocation_plot_helper( - df, ax, "MaxRSSRaw", mem_models, col_name) + _resource_allocation_plot_helper(df, ax, "MaxRSSRaw", mem_models, col_name) ax = axs[1] # models for time - _resource_allocation_plot_helper( - df, ax, "ElapsedRaw", time_models, col_name) + _resource_allocation_plot_helper(df, ax, "ElapsedRaw", time_models, col_name) return fig, axs def retrieve_equations(): - ''' + """ Helper function for resource_allocation_plot. Retrieves equations from db. Creates dictionary for memory and time models. @@ -2376,30 +2460,30 @@ def retrieve_equations(): memory models - potential memory models for resource allocations dict time models - potential time models for resource allocations - ''' + """ memory_models = {} time_models = {} res = [] with qdb.sql_connection.TRN: - sql = ''' SELECT * FROM qiita.allocation_equations; ''' + sql = """ SELECT * FROM qiita.allocation_equations; """ qdb.sql_connection.TRN.add(sql) res = qdb.sql_connection.TRN.execute_fetchindex() for models in res: - if 'mem' in models[1]: + if "mem" in models[1]: memory_models[models[1]] = { "equation_name": models[2], - "equation": lambda x, k, a, b: eval(models[2]) + "equation": lambda x, k, a, b: eval(models[2]), } else: time_models[models[1]] = { "equation_name": models[2], - "equation": lambda x, k, a, b: eval(models[2]) + "equation": lambda x, k, a, b: eval(models[2]), } return (memory_models, time_models) def retrieve_resource_data(cname, sname, version, columns): - ''' + """ Retrieves resource data from db and constructs a DataFrame with relevant fields. @@ -2414,7 +2498,7 @@ def retrieve_resource_data(cname, sname, version, columns): ------- pd.DataFrame DataFrame with resources. - ''' + """ with qdb.sql_connection.TRN: sql = """ SELECT @@ -2453,8 +2537,7 @@ def retrieve_resource_data(cname, sname, version, columns): return df -def _resource_allocation_plot_helper( - df, ax, curr, models, col_name): +def _resource_allocation_plot_helper(df, ax, curr, models, col_name): """Helper function for resource allocation plot. Builds plot for MaxRSSRaw and ElapsedRaw @@ -2514,62 +2597,76 @@ def _resource_allocation_plot_helper( x_data = np.array(x_data) y_data = np.array(y_data) - ax.set_xscale('log') - ax.set_yscale('log') + ax.set_xscale("log") + ax.set_yscale("log") ax.set_ylabel(curr) ax.set_xlabel(col_name) # 50 - number of maximum iterations, 3 - number of failures we tolerate best_model_name, best_model, options = _resource_allocation_calculate( - df, x_data, y_data, models, curr, col_name, 50, 3) + df, x_data, y_data, models, curr, col_name, 50, 3 + ) k, a, b = options.x x_plot = np.array(sorted(df[col_name].unique())) y_plot = best_model(x_plot, k, a, b) - ax.plot(x_plot, y_plot, linewidth=1, color='orange') + ax.plot(x_plot, y_plot, linewidth=1, color="orange") cmin_value = min(y_plot) cmax_value = max(y_plot) - maxi = naturalsize(df[curr].max(), gnu=True) if curr == "MaxRSSRaw" else \ - timedelta(seconds=float(df[curr].max())) - cmax = naturalsize(cmax_value, gnu=True) if curr == "MaxRSSRaw" else \ - str(timedelta(seconds=round(cmax_value, 2))).rstrip('0').rstrip('.') - - mini = naturalsize(df[curr].min(), gnu=True) if curr == "MaxRSSRaw" else \ - timedelta(seconds=float(df[curr].min())) - cmin = naturalsize(cmin_value, gnu=True) if curr == "MaxRSSRaw" else \ - str(timedelta(seconds=round(cmin_value, 2))).rstrip('0').rstrip('.') + maxi = ( + naturalsize(df[curr].max(), gnu=True) + if curr == "MaxRSSRaw" + else timedelta(seconds=float(df[curr].max())) + ) + cmax = ( + naturalsize(cmax_value, gnu=True) + if curr == "MaxRSSRaw" + else str(timedelta(seconds=round(cmax_value, 2))).rstrip("0").rstrip(".") + ) + + mini = ( + naturalsize(df[curr].min(), gnu=True) + if curr == "MaxRSSRaw" + else timedelta(seconds=float(df[curr].min())) + ) + cmin = ( + naturalsize(cmin_value, gnu=True) + if curr == "MaxRSSRaw" + else str(timedelta(seconds=round(cmin_value, 2))).rstrip("0").rstrip(".") + ) x_plot = np.array(df[col_name]) success_df, failures_df = _resource_allocation_success_failures( - df, k, a, b, best_model, col_name, curr) + df, k, a, b, best_model, col_name, curr + ) failures = failures_df.shape[0] - ax.scatter(failures_df[col_name], failures_df[curr], color='red', s=3, - label="failures") - success_df['node_name'] = success_df['node_name'].fillna('unknown') - slurm_hosts = set(success_df['node_name'].tolist()) - cmap = colormaps.get_cmap('Accent') + ax.scatter( + failures_df[col_name], failures_df[curr], color="red", s=3, label="failures" + ) + success_df["node_name"] = success_df["node_name"].fillna("unknown") + slurm_hosts = set(success_df["node_name"].tolist()) + cmap = colormaps.get_cmap("Accent") if len(slurm_hosts) > len(cmap.colors): raise ValueError(f"""'Accent' colormap only has {len(cmap.colors)} colors, but {len(slurm_hosts)} hosts are provided.""") - colors = cmap.colors[:len(slurm_hosts)] + colors = cmap.colors[: len(slurm_hosts)] for i, host in enumerate(slurm_hosts): - host_df = success_df[success_df['node_name'] == host] - ax.scatter(host_df[col_name], host_df[curr], color=colors[i], s=3, - label=host) + host_df = success_df[success_df["node_name"] == host] + ax.scatter(host_df[col_name], host_df[curr], color=colors[i], s=3, label=host) ax.set_title( - f'k||a||b: {k}||{a}||{b}\n' - f'model: {models[best_model_name]["equation_name"]}\n' - f'real: {mini} || {maxi}\n' - f'calculated: {cmin} || {cmax}\n' - f'failures: {failures}') - ax.legend(loc='upper left') + f"k||a||b: {k}||{a}||{b}\n" + f"model: {models[best_model_name]['equation_name']}\n" + f"real: {mini} || {maxi}\n" + f"calculated: {cmin} || {cmax}\n" + f"failures: {failures}" + ) + ax.legend(loc="upper left") return best_model_name, best_model, options -def _resource_allocation_calculate( - df, x, y, models, type_, col_name, depth, tolerance): +def _resource_allocation_calculate(df, x, y, models, type_, col_name, depth, tolerance): """Helper function for resource allocation plot. Calculates best_model and best_result given the models list and x,y data. @@ -2610,7 +2707,7 @@ def _resource_allocation_calculate( best_failures = np.inf best_max = np.inf for model_name, model in models.items(): - model_equation = model['equation'] + model_equation = model["equation"] # start values for binary search, where sl is left, sr is right # penalty weight must be positive & non-zero, hence, sl >= 1. # the upper bound for error can be an arbitrary large number @@ -2627,13 +2724,17 @@ def _resource_allocation_calculate( # scoring constraints defined in if/else statements. while left < right and cnt < depth: middle = (left + right) // 2 - options = minimize(_resource_allocation_custom_loss, init, - args=(x, y, model_equation, middle)) + options = minimize( + _resource_allocation_custom_loss, + init, + args=(x, y, model_equation, middle), + ) k, a, b = options.x # important: here we take the 2nd (last) value of tuple since # the helper function returns success, then failures. failures_df = _resource_allocation_success_failures( - df, k, a, b, model_equation, col_name, type_)[-1] + df, k, a, b, model_equation, col_name, type_ + )[-1] y_plot = model_equation(x, k, a, b) if not any(y_plot): continue @@ -2672,9 +2773,10 @@ def _resource_allocation_calculate( # this is helpful if the model that has e.g. 1 failure is a better fit # overall based on maximum calculated value. is_acceptable_based_on_failures = ( - prev_failures <= tolerance or abs( - prev_failures - best_failures) < tolerance or - best_failures == np.inf) + prev_failures <= tolerance + or abs(prev_failures - best_failures) < tolerance + or best_failures == np.inf + ) # case where less failures if is_acceptable_based_on_failures: @@ -2714,8 +2816,7 @@ def _resource_allocation_custom_loss(params, x, y, model, p): residuals = y - model(x, k, a, b) # Apply a heavier penalty to points below the curve penalty = p - weighted_residuals = np.where(residuals > 0, penalty * residuals**2, - residuals**2) + weighted_residuals = np.where(residuals > 0, penalty * residuals**2, residuals**2) return np.mean(weighted_residuals) @@ -2750,18 +2851,18 @@ def _resource_allocation_success_failures(df, k, a, b, model, col_name, type_): """ x_plot = np.array(df[col_name]) - df[f'c{type_}'] = model(x_plot, k, a, b) - success_df = df[df[type_] <= df[f'c{type_}']] - failures_df = df[df[type_] > df[f'c{type_}']] + df[f"c{type_}"] = model(x_plot, k, a, b) + success_df = df[df[type_] <= df[f"c{type_}"]] + failures_df = df[df[type_] > df[f"c{type_}"]] return (success_df, failures_df) def MaxRSS_helper(x): - if x[-1] == 'K': + if x[-1] == "K": y = float(x[:-1]) * 1000 - elif x[-1] == 'M': + elif x[-1] == "M": y = float(x[:-1]) * 1000000 - elif x[-1] == 'G': + elif x[-1] == "G": y = float(x[:-1]) * 1000000000 else: y = float(x) @@ -2773,17 +2874,17 @@ def update_resource_allocation_table(weeks=1, test=None): # better allocation so we default start time 2023-04-28 to # use the latests for the newest version """ - Updates qiita.slurm_resource_allocation SQL table with jobs from slurm. - Retrieves the most recent job available in the table and appends with - the data. + Updates qiita.slurm_resource_allocation SQL table with jobs from slurm. + Retrieves the most recent job available in the table and appends with + the data. - Parameters: - ---------- - weeks: integer, optional - Number of weeks for which we want to make a request from slurm. - test: pandas.DataFrame, optional - Represents dataframe containing slurm data from 2023-04-28. Used - for testing only. + Parameters: + ---------- + weeks: integer, optional + Number of weeks for which we want to make a request from slurm. + test: pandas.DataFrame, optional + Represents dataframe containing slurm data from 2023-04-28. Used + for testing only. """ # retrieve the most recent timestamp @@ -2802,10 +2903,10 @@ def update_resource_allocation_table(weeks=1, test=None): LIMIT 1; """ - dates = ['', ''] + dates = ["", ""] slurm_external_id = 0 - start_date = datetime.strptime('2023-04-28', '%Y-%m-%d') + start_date = datetime.strptime("2023-04-28", "%Y-%m-%d") with qdb.sql_connection.TRN: sql = sql_timestamp qdb.sql_connection.TRN.add(sql) @@ -2845,22 +2946,30 @@ def update_resource_allocation_table(weeks=1, test=None): with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add(sql_command, sql_args=[slurm_external_id]) res = qdb.sql_connection.TRN.execute_fetchindex() - df = pd.DataFrame(res, columns=["processing_job_id", 'external_id']) - df['external_id'] = df['external_id'].astype(int) + df = pd.DataFrame(res, columns=["processing_job_id", "external_id"]) + df["external_id"] = df["external_id"].astype(int) data = [] sacct = [ - 'sacct', '-p', - '--format=JobID,ElapsedRaw,MaxRSS,Submit,Start,End,CPUTimeRAW,' - 'ReqMem,AllocCPUs,AveVMSize,MaxVMSizeNode', '--starttime', - dates[0].strftime('%Y-%m-%d'), '--endtime', - dates[1].strftime('%Y-%m-%d'), '--user', 'qiita', '--state', 'CD'] + "sacct", + "-p", + "--format=JobID,ElapsedRaw,MaxRSS,Submit,Start,End,CPUTimeRAW," + "ReqMem,AllocCPUs,AveVMSize,MaxVMSizeNode", + "--starttime", + dates[0].strftime("%Y-%m-%d"), + "--endtime", + dates[1].strftime("%Y-%m-%d"), + "--user", + "qiita", + "--state", + "CD", + ] if test is not None: slurm_data = test else: - rvals = check_output(sacct).decode('ascii') - slurm_data = pd.read_csv(StringIO(rvals), sep='|') + rvals = check_output(sacct).decode("ascii") + slurm_data = pd.read_csv(StringIO(rvals), sep="|") # In slurm, each JobID is represented by 3 rows in the dataframe: # - external_id: overall container for the job and its associated @@ -2879,31 +2988,33 @@ def update_resource_allocation_table(weeks=1, test=None): # other columns def merge_rows(rows): - date_fmt = '%Y-%m-%dT%H:%M:%S' - wait_time = ( - datetime.strptime(rows.iloc[0]['Start'], date_fmt) - - datetime.strptime(rows.iloc[0]['Submit'], date_fmt)) + date_fmt = "%Y-%m-%dT%H:%M:%S" + wait_time = datetime.strptime( + rows.iloc[0]["Start"], date_fmt + ) - datetime.strptime(rows.iloc[0]["Submit"], date_fmt) if rows.shape[0] >= 2: tmp = rows.iloc[1].copy() else: tmp = rows.iloc[0].copy() - tmp['WaitTime'] = wait_time + tmp["WaitTime"] = wait_time return tmp - slurm_data['external_id'] = slurm_data['JobID'].apply( - lambda x: int(x.split('.')[0])) - slurm_data['external_id'] = slurm_data['external_id'].ffill() + slurm_data["external_id"] = slurm_data["JobID"].apply( + lambda x: int(x.split(".")[0]) + ) + slurm_data["external_id"] = slurm_data["external_id"].ffill() - slurm_data = slurm_data.groupby( - 'external_id').apply(merge_rows).reset_index(drop=True) + slurm_data = ( + slurm_data.groupby("external_id").apply(merge_rows).reset_index(drop=True) + ) # filter to only those jobs that are within the slurm_data df. - eids = set(slurm_data['external_id']) - df = df[df['external_id'].isin(eids)] + eids = set(slurm_data["external_id"]) + df = df[df["external_id"].isin(eids)] for index, row in df.iterrows(): - job = qdb.processing_job.ProcessingJob(row['processing_job_id']) - extra_info = '' + job = qdb.processing_job.ProcessingJob(row["processing_job_id"]) + extra_info = "" eid = job.external_id cmd = job.command @@ -2919,62 +3030,71 @@ def merge_rows(rows): except TypeError as e: # similar to the except above, exept that for these 2 commands, we # have the study_id as None - if cmd.name in {'create_sample_template', 'delete_sample_template', - 'list_remote_files'}: + if cmd.name in { + "create_sample_template", + "delete_sample_template", + "list_remote_files", + }: continue else: raise e sname = s.name - if cmd.name == 'release_validators': - ej = qdb.processing_job.ProcessingJob(job.parameters.values['job']) + if cmd.name == "release_validators": + ej = qdb.processing_job.ProcessingJob(job.parameters.values["job"]) extra_info = ej.command.name samples, columns, input_size = ej.shape - elif cmd.name == 'complete_job': - artifacts = loads(job.parameters.values['payload'])['artifacts'] + elif cmd.name == "complete_job": + artifacts = loads(job.parameters.values["payload"])["artifacts"] if artifacts is not None: - extra_info = ','.join({ - x['artifact_type'] for x in artifacts.values() - if 'artifact_type' in x}) - elif cmd.name == 'Validate': - input_size = sum([len(x) for x in loads( - job.parameters.values['files']).values()]) + extra_info = ",".join( + { + x["artifact_type"] + for x in artifacts.values() + if "artifact_type" in x + } + ) + elif cmd.name == "Validate": + input_size = sum( + [len(x) for x in loads(job.parameters.values["files"]).values()] + ) sname = f"{sname} - {job.parameters.values['artifact_type']}" - elif cmd.name == 'Alpha rarefaction curves [alpha_rarefaction]': + elif cmd.name == "Alpha rarefaction curves [alpha_rarefaction]": extra_info = job.parameters.values[ - ('The number of rarefaction depths to include between ' - 'min_depth and max_depth. (steps)')] - curr = slurm_data[slurm_data['external_id'] == int(eid)].iloc[0] - barnacle_info = curr['MaxVMSizeNode'] + ( + "The number of rarefaction depths to include between " + "min_depth and max_depth. (steps)" + ) + ] + curr = slurm_data[slurm_data["external_id"] == int(eid)].iloc[0] + barnacle_info = curr["MaxVMSizeNode"] if len(barnacle_info) == 0: barnacle_info = [None, None] else: - barnacle_info = barnacle_info.split('-') + barnacle_info = barnacle_info.split("-") row_dict = { - 'processing_job_id': job.id, - 'samples': samples, - 'columns': columns, - 'input_size': input_size, - 'extra_info': extra_info, - 'ElapsedRaw': curr['ElapsedRaw'], - 'MaxRSS': curr['MaxRSS'], - 'Start': curr['Start'], - 'node_name': barnacle_info[0], - 'node_model': barnacle_info[1] + "processing_job_id": job.id, + "samples": samples, + "columns": columns, + "input_size": input_size, + "extra_info": extra_info, + "ElapsedRaw": curr["ElapsedRaw"], + "MaxRSS": curr["MaxRSS"], + "Start": curr["Start"], + "node_name": barnacle_info[0], + "node_model": barnacle_info[1], } data.append(row_dict) df = pd.DataFrame(data) # This is important as we are transforming the MaxRSS to raw value # so we need to confirm that there is no other suffixes - print('Make sure that only 0/K/M exist', set( - df.MaxRSS.apply(lambda x: str(x)[-1]))) + print("Make sure that only 0/K/M exist", set(df.MaxRSS.apply(lambda x: str(x)[-1]))) # Generating new columns - df['MaxRSSRaw'] = df.MaxRSS.apply(lambda x: MaxRSS_helper(str(x))) - df['ElapsedRawTime'] = df.ElapsedRaw.apply( - lambda x: timedelta(seconds=float(x))) + df["MaxRSSRaw"] = df.MaxRSS.apply(lambda x: MaxRSS_helper(str(x))) + df["ElapsedRawTime"] = df.ElapsedRaw.apply(lambda x: timedelta(seconds=float(x))) df.replace({np.nan: None}, inplace=True) for index, row in df.iterrows(): @@ -2995,10 +3115,17 @@ def merge_rows(rows): VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """ to_insert = [ - row['processing_job_id'], row['samples'], row['columns'], - row['input_size'], row['extra_info'], row['MaxRSSRaw'], - row['ElapsedRaw'], row['Start'], row['node_name'], - row['node_model']] + row["processing_job_id"], + row["samples"], + row["columns"], + row["input_size"], + row["extra_info"], + row["MaxRSSRaw"], + row["ElapsedRaw"], + row["Start"], + row["node_name"], + row["node_model"], + ] qdb.sql_connection.TRN.add(sql, sql_args=to_insert) qdb.sql_connection.TRN.execute() @@ -3022,4 +3149,4 @@ def merge_overlapping_strings(str1, str2): for i in range(1, min(len(str1), len(str2)) + 1): if str1.endswith(str2[:i]): overlap = str2[:i] - return str1 + str2[len(overlap):] + return str1 + str2[len(overlap) :] diff --git a/qiita_pet/exceptions.py b/qiita_pet/exceptions.py index f6d8541c7..9aba3dc2f 100644 --- a/qiita_pet/exceptions.py +++ b/qiita_pet/exceptions.py @@ -12,8 +12,7 @@ class QiitaHTTPError(HTTPError): def __init__(self, status_code=500, log_message=None, *args, **kwargs): - super(QiitaHTTPError, self).__init__( - status_code, log_message, *args, **kwargs) + super(QiitaHTTPError, self).__init__(status_code, log_message, *args, **kwargs) # The HTTPError has an attribute named "reason" that will get send to # the requester if specified. However, the developer need to # specifically pass the keyword "reason" when raising the exception. @@ -28,7 +27,9 @@ def __init__(self, status_code=500, log_message=None, *args, **kwargs): class QiitaPetAuthorizationError(QiitaError): """When a user tries to access a resource without proper authorization""" + def __init__(self, user_id, resource_name_str): super(QiitaPetAuthorizationError, self).__init__() - self.args = ("User %s is not authorized to access %s" - % (user_id, resource_name_str),) + self.args = ( + "User %s is not authorized to access %s" % (user_id, resource_name_str), + ) diff --git a/qiita_pet/handlers/admin_processing_job.py b/qiita_pet/handlers/admin_processing_job.py index 95794e471..9067bea4c 100644 --- a/qiita_pet/handlers/admin_processing_job.py +++ b/qiita_pet/handlers/admin_processing_job.py @@ -6,37 +6,42 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from collections import Counter +from json import dumps + from tornado.gen import coroutine from tornado.web import HTTPError -from .base_handlers import BaseHandler from qiita_core.util import execute_as_transaction - -from qiita_db.software import Software -from qiita_db.study import Study from qiita_db.exceptions import QiitaDBUnknownIDError -from qiita_db.sql_connection import TRN from qiita_db.processing_job import ProcessingJob as PJ +from qiita_db.software import Software +from qiita_db.sql_connection import TRN +from qiita_db.study import Study -from json import dumps -from collections import Counter +from .base_handlers import BaseHandler class AdminProcessingJobBaseClass(BaseHandler): def _check_access(self): if self.current_user is None or self.current_user.level not in { - 'admin', 'wet-lab admin'}: - raise HTTPError(403, reason="User %s doesn't have sufficient " - "privileges to view error page" % - self.current_user.email) + "admin", + "wet-lab admin", + }: + raise HTTPError( + 403, + reason="User %s doesn't have sufficient " + "privileges to view error page" % self.current_user.email, + ) return self def _get_private_software(self): # skipping the internal Qiita plugin and only selecting private # commands - private_software = [s for s in Software.iter() - if s.name != 'Qiita' and s.type == 'private'] + private_software = [ + s for s in Software.iter() if s.name != "Qiita" and s.type == "private" + ] return private_software @@ -47,8 +52,9 @@ class AdminProcessingJob(AdminProcessingJobBaseClass): def get(self): self._check_access() - self.render("admin_processing_job.html", - private_software=self._get_private_software()) + self.render( + "admin_processing_job.html", private_software=self._get_private_software() + ) class AJAXAdminProcessingJobListing(AdminProcessingJobBaseClass): @@ -56,8 +62,8 @@ class AJAXAdminProcessingJobListing(AdminProcessingJobBaseClass): @execute_as_transaction def get(self): self._check_access() - echo = self.get_argument('sEcho') - command_id = int(self.get_argument('commandId')) + echo = self.get_argument("sEcho") + command_id = int(self.get_argument("commandId")) with TRN: # different versions of the same plugin will have different @@ -77,36 +83,46 @@ def get(self): jobs = [] for jid in jids: job = PJ(jid) - msg = '' - if job.status == 'error': + msg = "" + if job.status == "error": msg = job.log.msg - elif job.status == 'running': + elif job.status == "running": msg = job.step if msg is not None: - msg = msg.replace('\n', '
') + msg = msg.replace("\n", "
") outputs = [] - if job.status == 'success': + if job.status == "success": outputs = [[k, v.id] for k, v in job.outputs.items()] validator_jobs = [v.id for v in job.validator_jobs] if job.heartbeat is not None: - heartbeat = job.heartbeat.strftime('%Y-%m-%d %H:%M:%S') + heartbeat = job.heartbeat.strftime("%Y-%m-%d %H:%M:%S") else: - heartbeat = 'N/A' - - jobs.append([job.id, job.command.name, job.status, msg, - outputs, validator_jobs, heartbeat, - job.parameters.values, job.external_id, - job.user.email]) + heartbeat = "N/A" + + jobs.append( + [ + job.id, + job.command.name, + job.status, + msg, + outputs, + validator_jobs, + heartbeat, + job.parameters.values, + job.external_id, + job.user.email, + ] + ) results = { "sEcho": echo, "recordsTotal": len(jobs), "recordsFiltered": len(jobs), - "data": jobs + "data": jobs, } # return the json in compact form to save transmit size - self.write(dumps(results, separators=(',', ':'))) + self.write(dumps(results, separators=(",", ":"))) class SampleValidation(AdminProcessingJobBaseClass): @@ -122,7 +138,7 @@ def post(self): # Get user-inputted qiita id and sample names qid = self.get_argument("qid") snames = self.get_argument("snames").split() - error, matching, missing, extra, blank, duplicates = [None]*6 + error, matching, missing, extra, blank, duplicates = [None] * 6 # Stripping leading qiita id from sample names # Example: 1.SKB1.640202 -> SKB1.640202 @@ -130,9 +146,9 @@ def post(self): sample_info = Study(qid).sample_template qsnames = list(sample_info) except TypeError: - error = f'Study {qid} seems to have no sample template' + error = f"Study {qid} seems to have no sample template" except QiitaDBUnknownIDError: - error = f'Study {qid} does not exist' + error = f"Study {qid} does not exist" if error is None: # if tube_id is present then this should take precedence in qsnames @@ -140,28 +156,27 @@ def post(self): if "tube_id" in sample_info.categories: for k, v in sample_info.get_category("tube_id").items(): # ignoring empty values - if v in (None, 'None', ''): + if v in (None, "None", ""): continue if k.startswith(qid): - k = k.replace(f'{qid}.', "", 1) + k = k.replace(f"{qid}.", "", 1) tube_ids[k] = v for i, qsname in enumerate(qsnames): if qsname.startswith(qid): - qsname = qsname.replace(f'{qid}.', "", 1) + qsname = qsname.replace(f"{qid}.", "", 1) if qsname in tube_ids: - nname = f'{qsname}, tube_id: {tube_ids[qsname]}' - snames = [s if s != tube_ids[qsname] else nname - for s in snames] + nname = f"{qsname}, tube_id: {tube_ids[qsname]}" + snames = [s if s != tube_ids[qsname] else nname for s in snames] qsname = nname qsnames[i] = qsname # Finds duplicates in the samples seen = Counter(snames) - duplicates = [f'{s} \u00D7 {seen[s]}' for s in seen if seen[s] > 1] + duplicates = [f"{s} \u00d7 {seen[s]}" for s in seen if seen[s] > 1] # Remove blank samples from sample names - blank = [x for x in snames if x.lower().startswith('blank')] + blank = [x for x in snames if x.lower().startswith("blank")] snames = set(snames) - set(blank) # Validate user's sample names against qiita study @@ -171,6 +186,13 @@ def post(self): missing = qsnames.difference(snames) extra = snames.difference(qsnames) - self.render("sample_validation.html", input=False, matching=matching, - missing=missing, extra=extra, blank=blank, - duplicates=duplicates, error=error) + self.render( + "sample_validation.html", + input=False, + matching=matching, + missing=missing, + extra=extra, + blank=blank, + duplicates=duplicates, + error=error, + ) diff --git a/qiita_pet/handlers/analysis_handlers/__init__.py b/qiita_pet/handlers/analysis_handlers/__init__.py index a45105643..0eb4bdd6b 100644 --- a/qiita_pet/handlers/analysis_handlers/__init__.py +++ b/qiita_pet/handlers/analysis_handlers/__init__.py @@ -6,15 +6,28 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from .util import check_analysis_access -from .base_handlers import (CreateAnalysisHandler, AnalysisHandler, - AnalysisGraphHandler, AnalysisJobsHandler) -from .listing_handlers import (ListAnalysesHandler, AnalysisSummaryAJAX, - SelectedSamplesHandler) +from .base_handlers import ( + AnalysisGraphHandler, + AnalysisHandler, + AnalysisJobsHandler, + CreateAnalysisHandler, +) +from .listing_handlers import ( + AnalysisSummaryAJAX, + ListAnalysesHandler, + SelectedSamplesHandler, +) from .sharing_handlers import ShareAnalysisAJAX +from .util import check_analysis_access -__all__ = ['CreateAnalysisHandler', 'AnalysisHandler', - 'AnalysisGraphHandler', 'AnalysisJobsHandler', - 'ListAnalysesHandler', 'AnalysisSummaryAJAX', - 'SelectedSamplesHandler', 'check_analysis_access', - 'ShareAnalysisAJAX'] +__all__ = [ + "CreateAnalysisHandler", + "AnalysisHandler", + "AnalysisGraphHandler", + "AnalysisJobsHandler", + "ListAnalysesHandler", + "AnalysisSummaryAJAX", + "SelectedSamplesHandler", + "check_analysis_access", + "ShareAnalysisAJAX", +] diff --git a/qiita_pet/handlers/analysis_handlers/base_handlers.py b/qiita_pet/handlers/analysis_handlers/base_handlers.py index d64e3adca..a0e289b5d 100644 --- a/qiita_pet/handlers/analysis_handlers/base_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/base_handlers.py @@ -10,36 +10,43 @@ from tornado.web import authenticated -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import qiita_config, r_client -from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_core.util import execute_as_transaction +from qiita_db.analysis import Analysis +from qiita_db.artifact import Artifact from qiita_pet.handlers.analysis_handlers import check_analysis_access +from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import to_int from qiita_pet.util import get_network_nodes_edges -from qiita_db.analysis import Analysis -from qiita_db.artifact import Artifact class CreateAnalysisHandler(BaseHandler): @authenticated def post(self): - name = self.get_argument('name') - desc = self.get_argument('description') - mdsi = self.get_argument('merge_duplicated_sample_ids', False) - metadata = self.request.arguments.get('analysis-metadata', None) - reservation = self.get_argument('reservation', None) + name = self.get_argument("name") + desc = self.get_argument("description") + mdsi = self.get_argument("merge_duplicated_sample_ids", False) + metadata = self.request.arguments.get("analysis-metadata", None) + reservation = self.get_argument("reservation", None) # we need to change from bytes to strings if metadata is not None: - metadata = [m.decode('utf-8') for m in metadata] + metadata = [m.decode("utf-8") for m in metadata] - if mdsi in (b'on', 'on'): + if mdsi in (b"on", "on"): mdsi = True analysis = Analysis.create( - self.current_user, name, desc, merge_duplicated_sample_ids=mdsi, - from_default=True, categories=metadata, reservation=reservation) + self.current_user, + name, + desc, + merge_duplicated_sample_ids=mdsi, + from_default=True, + categories=metadata, + reservation=reservation, + ) - self.redirect(u"%s/analysis/description/%s/" - % (qiita_config.portal_dir, analysis.id)) + self.redirect( + "%s/analysis/description/%s/" % (qiita_config.portal_dir, analysis.id) + ) def analysis_description_handler_get_request(analysis_id, user): @@ -56,48 +63,52 @@ def analysis_description_handler_get_request(analysis_id, user): check_analysis_access(user, analysis) job_info = r_client.get("analysis_%s" % analysis.id) - alert_type = 'info' - alert_msg = '' + alert_type = "info" + alert_msg = "" if job_info: job_info = loads(job_info) - job_id = job_info['job_id'] + job_id = job_info["job_id"] if job_id: r_payload = r_client.get(job_id) if r_payload: redis_info = loads(r_client.get(job_id)) - if redis_info['status_msg'] == 'running': - alert_msg = ('An artifact is being deleted from this ' - 'analysis') - elif redis_info['return'] is not None: - alert_type = redis_info['return']['status'] - alert_msg = redis_info['return']['message'].replace( - '\n', '
') + if redis_info["status_msg"] == "running": + alert_msg = "An artifact is being deleted from this analysis" + elif redis_info["return"] is not None: + alert_type = redis_info["return"]["status"] + alert_msg = redis_info["return"]["message"].replace("\n", "
") artifacts = {} for aid, samples in analysis.samples.items(): artifact = Artifact(aid) prep_ids = set([str(x.id) for x in artifact.prep_templates]) study = artifact.study artifacts[aid] = ( - study.id, study.title, artifact.merging_scheme, samples, prep_ids) + study.id, + study.title, + artifact.merging_scheme, + samples, + prep_ids, + ) - return {'analysis_name': analysis.name, - 'analysis_id': analysis.id, - 'analysis_is_public': analysis.is_public, - 'analysis_description': analysis.description, - 'analysis_mapping_id': analysis.mapping_file, - 'analysis_owner': analysis.owner.email, - 'alert_type': alert_type, - 'artifacts': artifacts, - 'analysis_reservation': analysis._slurm_reservation()[0], - 'alert_msg': alert_msg} + return { + "analysis_name": analysis.name, + "analysis_id": analysis.id, + "analysis_is_public": analysis.is_public, + "analysis_description": analysis.description, + "analysis_mapping_id": analysis.mapping_file, + "analysis_owner": analysis.owner.email, + "alert_type": alert_type, + "artifacts": artifacts, + "analysis_reservation": analysis._slurm_reservation()[0], + "alert_msg": alert_msg, + } class AnalysisHandler(BaseHandler): @authenticated @execute_as_transaction def get(self, analysis_id): - res = analysis_description_handler_get_request(analysis_id, - self.current_user) + res = analysis_description_handler_get_request(analysis_id, self.current_user) self.render("analysis_description.html", **res) @@ -107,18 +118,17 @@ def post(self, analysis_id): analysis = Analysis(analysis_id) check_analysis_access(self.current_user, analysis) - message = '' + message = "" try: Analysis(analysis_id).make_public() except Exception as e: message = str(e) - res = analysis_description_handler_get_request( - analysis_id, self.current_user) + res = analysis_description_handler_get_request(analysis_id, self.current_user) if message: # this will display the error message in the main banner - res['level'] = 'danger' - res['message'] = message + res["level"] = "danger" + res["message"] = message self.render("analysis_description.html", **res) @@ -130,15 +140,15 @@ def patch(self, analysis_id): Follows the JSON PATCH specification: https://tools.ietf.org/html/rfc6902 """ - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) - if req_op == 'replace' and req_path == 'reservation': + if req_op == "replace" and req_path == "reservation": Analysis(analysis_id).slurm_reservation = req_value - response = {'status': 'success', 'message': ''} + response = {"status": "success", "message": ""} else: - response = {'status': 'error', 'message': 'Not implemented'} + response = {"status": "error", "message": "Not implemented"} self.write(response) @@ -169,8 +179,9 @@ def analyisis_graph_handler_get_request(analysis_id, user): # A user has full access to the analysis if it is one of its private # analyses, the analysis has been shared with the user or the user is a # superuser or admin - full_access = (analysis in (user.private_analyses | user.shared_analyses) - or user.level in {'superuser', 'admin'}) + full_access = analysis in ( + user.private_analyses | user.shared_analyses + ) or user.level in {"superuser", "admin"} nodes = [] edges = [] @@ -181,26 +192,29 @@ def analyisis_graph_handler_get_request(analysis_id, user): if a.processing_parameters is None: g = a.descendants_with_jobs nodes, edges, a_wf_id = get_network_nodes_edges( - g, full_access, nodes=nodes, edges=edges) + g, full_access, nodes=nodes, edges=edges + ) # nodes returns [node_type, node_name, element_id]; here we # are looking for the node_type == artifact, and check by # the element/artifact_id if it's being deleted for a in nodes: - if (a[0] == 'artifact' and - Artifact(a[2]).being_deleted_by is not None): + if a[0] == "artifact" and Artifact(a[2]).being_deleted_by is not None: artifacts_being_deleted.append(a[2]) if wf_id is None: wf_id = a_wf_id elif a_wf_id is not None and wf_id != a_wf_id: # This should never happen, but worth having a useful message - raise ValueError('More than one workflow in a single analysis') + raise ValueError("More than one workflow in a single analysis") # the list(set()) is to remove any duplicated nodes - return {'edges': list(set(edges)), 'nodes': list(set(nodes)), - 'workflow': wf_id, - 'artifacts_being_deleted': artifacts_being_deleted} + return { + "edges": list(set(edges)), + "nodes": list(set(nodes)), + "workflow": wf_id, + "artifacts_being_deleted": artifacts_being_deleted, + } class AnalysisGraphHandler(BaseHandler): @@ -208,8 +222,7 @@ class AnalysisGraphHandler(BaseHandler): @execute_as_transaction def get(self, analysis_id): analysis_id = to_int(analysis_id) - response = analyisis_graph_handler_get_request( - analysis_id, self.current_user) + response = analyisis_graph_handler_get_request(analysis_id, self.current_user) self.write(response) @@ -231,9 +244,9 @@ def analyisis_job_handler_get_request(analysis_id, user): # Check if the user actually has access to the analysis check_analysis_access(user, analysis) return { - j.id: {'status': j.status, 'step': j.step, - 'error': j.log.msg if j.log else ""} - for j in analysis.jobs} + j.id: {"status": j.status, "step": j.step, "error": j.log.msg if j.log else ""} + for j in analysis.jobs + } class AnalysisJobsHandler(BaseHandler): @@ -241,6 +254,5 @@ class AnalysisJobsHandler(BaseHandler): @execute_as_transaction def get(self, analysis_id): analysis_id = to_int(analysis_id) - response = analyisis_job_handler_get_request( - analysis_id, self.current_user) + response = analyisis_job_handler_get_request(analysis_id, self.current_user) self.write(response) diff --git a/qiita_pet/handlers/analysis_handlers/listing_handlers.py b/qiita_pet/handlers/analysis_handlers/listing_handlers.py index 8f2e37bb4..c1a5e2e1e 100644 --- a/qiita_pet/handlers/analysis_handlers/listing_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/listing_handlers.py @@ -6,25 +6,23 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from collections import defaultdict from functools import partial from json import dumps, loads -from collections import defaultdict from tornado.web import authenticated -from qiita_core.qiita_settings import qiita_config +from qiita_core.qiita_settings import qiita_config, r_client from qiita_core.util import execute_as_transaction -from qiita_core.qiita_settings import r_client +from qiita_db.analysis import Analysis +from qiita_db.artifact import Artifact +from qiita_db.processing_job import ProcessingJob +from qiita_db.software import Parameters, Software +from qiita_db.util import generate_analysis_list +from qiita_pet.handlers.analysis_handlers import check_analysis_access from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import download_link_or_path -from qiita_pet.handlers.analysis_handlers import check_analysis_access from qiita_pet.util import is_localhost -from qiita_db.util import generate_analysis_list -from qiita_db.analysis import Analysis -from qiita_db.processing_job import ProcessingJob -from qiita_db.software import Parameters -from qiita_db.artifact import Artifact -from qiita_db.software import Software class ListAnalysesHandler(BaseHandler): @@ -32,12 +30,12 @@ class ListAnalysesHandler(BaseHandler): @execute_as_transaction def get(self): user = self.current_user - is_local_request = is_localhost(self.request.headers['host']) + is_local_request = is_localhost(self.request.headers["host"]) uanalyses = user.shared_analyses | user.private_analyses user_analysis_ids = set([a.id for a in uanalyses]) - panalyses = Analysis.get_by_status('public') + panalyses = Analysis.get_by_status("public") public_analysis_ids = set([a.id for a in panalyses]) public_analysis_ids = public_analysis_ids - user_analysis_ids @@ -46,46 +44,50 @@ def get(self): dlop = partial(download_link_or_path, is_local_request) - messages = {'info': '', 'danger': ''} + messages = {"info": "", "danger": ""} for analysis_id in user_analysis_ids: - job_info = r_client.get('analysis_delete_%d' % analysis_id) + job_info = r_client.get("analysis_delete_%d" % analysis_id) if job_info: - job_info = defaultdict(lambda: '', loads(job_info)) - job_id = job_info['job_id'] + job_info = defaultdict(lambda: "", loads(job_info)) + job_id = job_info["job_id"] job = ProcessingJob(job_id) job_status = job.status - processing = job_status not in ('success', 'error') + processing = job_status not in ("success", "error") if processing: - messages['info'] += ( - 'Analysis %s is being deleted
' % analysis_id) - elif job_status == 'error': - messages['danger'] += ( - job.log.msg.replace('\n', '
') + '
') + messages["info"] += ( + "Analysis %s is being deleted
" % analysis_id + ) + elif job_status == "error": + messages["danger"] += job.log.msg.replace("\n", "
") + "
" else: - if job_info['alert_type'] not in messages: - messages[job_info['alert_type']] = [] - messages[job_info['alert_type']] += ( - job.log.msg.replace('\n', '
') + '
') - - self.render("list_analyses.html", user_analyses=user_analyses, - public_analyses=public_analyses, messages=messages, - dlop=dlop) + if job_info["alert_type"] not in messages: + messages[job_info["alert_type"]] = [] + messages[job_info["alert_type"]] += ( + job.log.msg.replace("\n", "
") + "
" + ) + + self.render( + "list_analyses.html", + user_analyses=user_analyses, + public_analyses=public_analyses, + messages=messages, + dlop=dlop, + ) @authenticated @execute_as_transaction def post(self): - analysis_id = int(self.get_argument('analysis_id')) + analysis_id = int(self.get_argument("analysis_id")) user = self.current_user check_analysis_access(user, Analysis(analysis_id)) - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_analysis') - params = Parameters.load(cmd, values_dict={'analysis_id': analysis_id}) + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_analysis") + params = Parameters.load(cmd, values_dict={"analysis_id": analysis_id}) job = ProcessingJob.create(user, params, True) # Store the job id attaching it to the sample template id - r_client.set('analysis_delete_%d' % analysis_id, - dumps({'job_id': job.id})) + r_client.set("analysis_delete_%d" % analysis_id, dumps({"job_id": job.id})) job.submit() self.redirect("%s/analysis/list/" % (qiita_config.portal_dir)) @@ -111,22 +113,27 @@ def get(self): artifact = Artifact(aid) sel_data[artifact.study][aid] = samples proc_data_info[aid] = { - 'processed_date': str(artifact.timestamp), - 'merging_scheme': artifact.merging_scheme, - 'data_type': artifact.data_type + "processed_date": str(artifact.timestamp), + "merging_scheme": artifact.merging_scheme, + "data_type": artifact.data_type, } # finding common metadata fields metadata = analysis.metadata_categories - common = {'sample': set(), 'prep': set()} + common = {"sample": set(), "prep": set()} for i, (_, m) in enumerate(metadata.items()): - svals = set(m['sample']) - pvals = set(m['prep']) + svals = set(m["sample"]) + pvals = set(m["prep"]) if i != 0: - svals = common['sample'] & svals - pvals = common['prep'] & pvals - common['sample'] = svals - common['prep'] = pvals - - self.render("analysis_selected.html", sel_data=sel_data, - proc_info=proc_data_info, metadata=metadata, common=common) + svals = common["sample"] & svals + pvals = common["prep"] & pvals + common["sample"] = svals + common["prep"] = pvals + + self.render( + "analysis_selected.html", + sel_data=sel_data, + proc_info=proc_data_info, + metadata=metadata, + common=common, + ) diff --git a/qiita_pet/handlers/analysis_handlers/sharing_handlers.py b/qiita_pet/handlers/analysis_handlers/sharing_handlers.py index 8ef502be2..0c5f1e008 100644 --- a/qiita_pet/handlers/analysis_handlers/sharing_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/sharing_handlers.py @@ -8,16 +8,16 @@ from json import dumps -from tornado.web import authenticated, HTTPError -from tornado.gen import coroutine, Task +from tornado.gen import Task, coroutine +from tornado.web import HTTPError, authenticated from qiita_core.qiita_settings import qiita_config from qiita_core.util import execute_as_transaction -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_pet.handlers.util import get_shared_links -from qiita_db.user import User from qiita_db.analysis import Analysis +from qiita_db.user import User from qiita_db.util import add_message +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import get_shared_links class ShareAnalysisAJAX(BaseHandler): @@ -31,31 +31,34 @@ def _get_shared_for_study(self, analysis, callback): def _share(self, analysis, user, callback): user = User(user) add_message( - 'Analysis \'%s\' ' - 'has been shared with you.' % ( - qiita_config.portal_dir, analysis.id, analysis.name), [user]) + "Analysis '%s' " + "has been shared with you." + % (qiita_config.portal_dir, analysis.id, analysis.name), + [user], + ) callback(analysis.share(user)) @execute_as_transaction def _unshare(self, analysis, user, callback): user = User(user) - add_message('Analysis \'%s\' has been unshared with you.' % - analysis.name, [user]) + add_message("Analysis '%s' has been unshared with you." % analysis.name, [user]) callback(analysis.unshare(user)) @authenticated @coroutine @execute_as_transaction def get(self): - analysis_id = int(self.get_argument('id')) + analysis_id = int(self.get_argument("id")) analysis = Analysis(analysis_id) if not analysis.has_access(self.current_user): - raise HTTPError(403, reason='User %s does not have permissions to ' - 'share analysis %s' % (self.current_user.id, - analysis.id)) + raise HTTPError( + 403, + reason="User %s does not have permissions to " + "share analysis %s" % (self.current_user.id, analysis.id), + ) - selected = self.get_argument('selected', None) - deselected = self.get_argument('deselected', None) + selected = self.get_argument("selected", None) + deselected = self.get_argument("deselected", None) if selected is not None: yield Task(self._share, analysis, selected) @@ -64,4 +67,4 @@ def get(self): users, links = yield Task(self._get_shared_for_study, analysis) - self.write(dumps({'users': users, 'links': links})) + self.write(dumps({"users": users, "links": links})) diff --git a/qiita_pet/handlers/analysis_handlers/tests/test_base_handlers.py b/qiita_pet/handlers/analysis_handlers/tests/test_base_handlers.py index d3cf77ca5..c0ae240a4 100644 --- a/qiita_pet/handlers/analysis_handlers/tests/test_base_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/tests/test_base_handlers.py @@ -6,25 +6,26 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from json import dumps, loads from unittest import TestCase, main -from json import loads, dumps -from tornado.web import HTTPError from mock import Mock +from tornado.web import HTTPError -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_core.util import qiita_test_checker from qiita_core.qiita_settings import r_client from qiita_core.testing import wait_for_processing_job -from qiita_db.util import activate_or_update_plugins -from qiita_db.user import User +from qiita_core.util import qiita_test_checker from qiita_db.analysis import Analysis -from qiita_db.software import Command, Parameters, DefaultParameters from qiita_db.processing_job import ProcessingWorkflow -from qiita_pet.test.tornado_test_base import TestHandlerBase +from qiita_db.software import Command, DefaultParameters, Parameters +from qiita_db.user import User +from qiita_db.util import activate_or_update_plugins from qiita_pet.handlers.analysis_handlers.base_handlers import ( analyisis_graph_handler_get_request, - analysis_description_handler_get_request) + analysis_description_handler_get_request, +) +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.test.tornado_test_base import TestHandlerBase @qiita_test_checker() @@ -33,194 +34,325 @@ def tearDown(self): r_client.flushdb() def test_analysis_description_handler_get_request(self): - obs = analysis_description_handler_get_request(1, User('test@foo.bar')) - exp = {'analysis_name': 'SomeAnalysis', - 'analysis_id': 1, - 'analysis_description': 'A test analysis', - 'analysis_mapping_id': 16, - 'analysis_owner': 'test@foo.bar', - 'analysis_is_public': False, - 'alert_type': 'info', - 'artifacts': { - 4: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'}), - 5: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'}), - 6: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'})}, - 'analysis_reservation': '', - 'alert_msg': ''} + obs = analysis_description_handler_get_request(1, User("test@foo.bar")) + exp = { + "analysis_name": "SomeAnalysis", + "analysis_id": 1, + "analysis_description": "A test analysis", + "analysis_mapping_id": 16, + "analysis_owner": "test@foo.bar", + "analysis_is_public": False, + "alert_type": "info", + "artifacts": { + 4: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + 5: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + 6: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + }, + "analysis_reservation": "", + "alert_msg": "", + } self.assertEqual(obs, exp) - r_client.set('analysis_1', dumps({'job_id': 'job_id'})) - r_client.set('job_id', dumps({'status_msg': 'running'})) - obs = analysis_description_handler_get_request(1, User('test@foo.bar')) - exp = {'analysis_name': 'SomeAnalysis', - 'analysis_id': 1, - 'analysis_description': 'A test analysis', - 'analysis_mapping_id': 16, - 'analysis_owner': 'test@foo.bar', - 'analysis_is_public': False, - 'alert_type': 'info', - 'artifacts': { - 4: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'}), - 5: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'}), - 6: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'})}, - 'alert_msg': 'An artifact is being deleted from this analysis', - 'analysis_reservation': ''} + r_client.set("analysis_1", dumps({"job_id": "job_id"})) + r_client.set("job_id", dumps({"status_msg": "running"})) + obs = analysis_description_handler_get_request(1, User("test@foo.bar")) + exp = { + "analysis_name": "SomeAnalysis", + "analysis_id": 1, + "analysis_description": "A test analysis", + "analysis_mapping_id": 16, + "analysis_owner": "test@foo.bar", + "analysis_is_public": False, + "alert_type": "info", + "artifacts": { + 4: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + 5: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + 6: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + }, + "alert_msg": "An artifact is being deleted from this analysis", + "analysis_reservation": "", + } self.assertEqual(obs, exp) - r_client.set('job_id', dumps( - {'status_msg': 'Success', - 'return': {'status': 'danger', - 'message': 'Error deleting artifact'}})) - obs = analysis_description_handler_get_request(1, User('test@foo.bar')) - exp = {'analysis_name': 'SomeAnalysis', - 'analysis_id': 1, - 'analysis_description': 'A test analysis', - 'analysis_mapping_id': 16, - 'analysis_owner': 'test@foo.bar', - 'analysis_is_public': False, - 'alert_type': 'danger', - 'artifacts': { - 4: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'}), - 5: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'}), - 6: (1, 'Identification of the Microbiomes for Cannabis ' - 'Soils', ('Pick closed-reference OTUs | Split ' - 'libraries FASTQ', 'QIIMEq2 v1.9.1'), [ - '1.SKB7.640196', '1.SKB8.640193', '1.SKD8.640184', - '1.SKM4.640180', '1.SKM9.640192'], {'1'})}, - 'alert_msg': 'Error deleting artifact', - 'analysis_reservation': ''} + r_client.set( + "job_id", + dumps( + { + "status_msg": "Success", + "return": { + "status": "danger", + "message": "Error deleting artifact", + }, + } + ), + ) + obs = analysis_description_handler_get_request(1, User("test@foo.bar")) + exp = { + "analysis_name": "SomeAnalysis", + "analysis_id": 1, + "analysis_description": "A test analysis", + "analysis_mapping_id": 16, + "analysis_owner": "test@foo.bar", + "analysis_is_public": False, + "alert_type": "danger", + "artifacts": { + 4: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + 5: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + 6: ( + 1, + "Identification of the Microbiomes for Cannabis Soils", + ( + "Pick closed-reference OTUs | Split libraries FASTQ", + "QIIMEq2 v1.9.1", + ), + [ + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKM4.640180", + "1.SKM9.640192", + ], + {"1"}, + ), + }, + "alert_msg": "Error deleting artifact", + "analysis_reservation": "", + } self.assertEqual(obs, exp) def test_analyisis_graph_handler_get_request(self): - obs = analyisis_graph_handler_get_request(1, User('test@foo.bar')) + obs = analyisis_graph_handler_get_request(1, User("test@foo.bar")) # The job id is randomly generated in the test environment. Gather # it here. There is only 1 job in the first artifact of the analysis job_id = Analysis(1).artifacts[0].jobs()[0].id - exp = {'edges': [(8, job_id), (job_id, 9)], - 'artifacts_being_deleted': [], - 'nodes': [ - ('job', 'job', job_id, 'Single Rarefaction', 'success'), - ('artifact', 'BIOM', 9, 'noname\n(BIOM)', 'outdated'), - ('artifact', 'BIOM', 8, 'noname\n(BIOM)', 'artifact')], - 'workflow': None} + exp = { + "edges": [(8, job_id), (job_id, 9)], + "artifacts_being_deleted": [], + "nodes": [ + ("job", "job", job_id, "Single Rarefaction", "success"), + ("artifact", "BIOM", 9, "noname\n(BIOM)", "outdated"), + ("artifact", "BIOM", 8, "noname\n(BIOM)", "artifact"), + ], + "workflow": None, + } self.assertCountEqual(obs, exp) - self.assertCountEqual(obs['edges'], exp['edges']) - self.assertCountEqual(obs['nodes'], exp['nodes']) - self.assertIsNone(obs['workflow']) + self.assertCountEqual(obs["edges"], exp["edges"]) + self.assertCountEqual(obs["nodes"], exp["nodes"]) + self.assertIsNone(obs["workflow"]) # An admin has full access to the analysis - obs = analyisis_graph_handler_get_request(1, User('admin@foo.bar')) + obs = analyisis_graph_handler_get_request(1, User("admin@foo.bar")) self.assertCountEqual(obs, exp) - self.assertCountEqual(obs['edges'], exp['edges']) - self.assertCountEqual(obs['nodes'], exp['nodes']) + self.assertCountEqual(obs["edges"], exp["edges"]) + self.assertCountEqual(obs["nodes"], exp["nodes"]) # If the analysis is shared with the user he also has access - obs = analyisis_graph_handler_get_request(1, User('shared@foo.bar')) + obs = analyisis_graph_handler_get_request(1, User("shared@foo.bar")) self.assertCountEqual(obs, exp) - self.assertCountEqual(obs['edges'], exp['edges']) - self.assertCountEqual(obs['nodes'], exp['nodes']) + self.assertCountEqual(obs["edges"], exp["edges"]) + self.assertCountEqual(obs["nodes"], exp["nodes"]) # The user doesn't have access to the analysis with self.assertRaises(HTTPError): - analyisis_graph_handler_get_request(1, User('demo@microbio.me')) + analyisis_graph_handler_get_request(1, User("demo@microbio.me")) class TestBaseHandlers(TestHandlerBase): def test_post_create_analysis_handler(self): - user = User('test@foo.bar') + user = User("test@foo.bar") dflt_analysis = user.default_analysis dflt_analysis.add_samples( - {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180']}) - args = {'name': 'New Test Analysis', - 'description': 'Test Analysis Description'} - response = self.post('/analysis/create/', args) + { + 4: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ] + } + ) + args = {"name": "New Test Analysis", "description": "Test Analysis Description"} + response = self.post("/analysis/create/", args) self.assertRegex( - response.effective_url, - r"http://127.0.0.1:\d+/analysis/description/\d+/") + response.effective_url, r"http://127.0.0.1:\d+/analysis/description/\d+/" + ) self.assertEqual(response.code, 200) # The new analysis id is located at the -2 position (see regex above) - new_id = response.effective_url.split('/')[-2] + new_id = response.effective_url.split("/")[-2] a = Analysis(new_id) # Make sure that all jobs have completed before we exit this tests for j in a.jobs: wait_for_processing_job(j.id) def test_get_analysis_description_handler(self): - response = self.get('/analysis/description/1/') + response = self.get("/analysis/description/1/") self.assertEqual(response.code, 200) def test_post_analysis_description_handler(self): - response = self.post('/analysis/description/1/', {}) + response = self.post("/analysis/description/1/", {}) self.assertEqual(response.code, 200) def test_get_analysis_jobs_handler(self): - user = User('test@foo.bar') + user = User("test@foo.bar") dflt_analysis = user.default_analysis dflt_analysis.add_samples( - {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196', - '1.SKM9.640192', '1.SKM4.640180']}) - new = Analysis.create(user, "newAnalysis", "A New Analysis", - from_default=True) - response = self.get('/analysis/description/%s/jobs/' % new.id) + { + 4: [ + "1.SKB8.640193", + "1.SKD8.640184", + "1.SKB7.640196", + "1.SKM9.640192", + "1.SKM4.640180", + ] + } + ) + new = Analysis.create(user, "newAnalysis", "A New Analysis", from_default=True) + response = self.get("/analysis/description/%s/jobs/" % new.id) self.assertEqual(response.code, 200) # There is only one job job_id = new.jobs[0].id obs = loads(response.body) - exp = {job_id: {'status': 'queued', 'step': None, 'error': ""}} + exp = {job_id: {"status": "queued", "step": None, "error": ""}} self.assertEqual(obs, exp) def test_patch(self): # first let's check that the reservation is not set analysis = Analysis(1) - self.assertEqual(analysis._slurm_reservation(), ['']) + self.assertEqual(analysis._slurm_reservation(), [""]) # now, let's change it to something different - reservation = 'myreservation' - arguments = { - 'op': 'replace', 'path': 'reservation', 'value': reservation} - self.patch(f'/analysis/description/{analysis.id}/', data=arguments) + reservation = "myreservation" + arguments = {"op": "replace", "path": "reservation", "value": reservation} + self.patch(f"/analysis/description/{analysis.id}/", data=arguments) self.assertEqual(analysis._slurm_reservation(), [reservation]) # then bring it back - reservation = '' - arguments = { - 'op': 'replace', 'path': 'reservation', 'value': reservation} - self.patch(f'/analysis/description/{analysis.id}/', data=arguments) + reservation = "" + arguments = {"op": "replace", "path": "reservation", "value": reservation} + self.patch(f"/analysis/description/{analysis.id}/", data=arguments) self.assertEqual(analysis._slurm_reservation(), [reservation]) @@ -229,36 +361,41 @@ def test_get_analysis_graph_handler(self): # making sure we load the plugins activate_or_update_plugins(update=True) - response = self.get('/analysis/description/1/graph/') + response = self.get("/analysis/description/1/graph/") self.assertEqual(response.code, 200) # The job id is randomly generated in the test environment. Gather # it here. There is only 1 job in the first artifact of the analysis job_id = Analysis(1).artifacts[0].jobs()[0].id obs = loads(response.body) - exp = {'edges': [[8, job_id], [job_id, 9]], - 'artifacts_being_deleted': [], - 'nodes': [ - ['job', 'job', job_id, 'Single Rarefaction', 'success'], - ['artifact', 'BIOM', 9, 'noname\n(BIOM)', 'artifact'], - ['artifact', 'BIOM', 8, 'noname\n(BIOM)', 'artifact']], - 'workflow': None} + exp = { + "edges": [[8, job_id], [job_id, 9]], + "artifacts_being_deleted": [], + "nodes": [ + ["job", "job", job_id, "Single Rarefaction", "success"], + ["artifact", "BIOM", 9, "noname\n(BIOM)", "artifact"], + ["artifact", "BIOM", 8, "noname\n(BIOM)", "artifact"], + ], + "workflow": None, + } self.assertCountEqual(obs, exp) - self.assertCountEqual(obs['edges'], exp['edges']) - self.assertCountEqual(obs['nodes'], exp['nodes']) - self.assertIsNone(obs['workflow']) + self.assertCountEqual(obs["edges"], exp["edges"]) + self.assertCountEqual(obs["nodes"], exp["nodes"]) + self.assertIsNone(obs["workflow"]) # Create a new analysis with 2 starting BIOMs to be able to test # the different if statements of the request - BaseHandler.get_current_user = Mock( - return_value=User('shared@foo.bar')) - user = User('shared@foo.bar') + BaseHandler.get_current_user = Mock(return_value=User("shared@foo.bar")) + user = User("shared@foo.bar") dflt_analysis = user.default_analysis dflt_analysis.add_samples( - {4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], - 6: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']}) - args = {'name': 'New Test Graph Analysis', 'description': 'Desc'} - response = self.post('/analysis/create/', args) - new_id = response.effective_url.split('/')[-2] + { + 4: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"], + 6: ["1.SKB8.640193", "1.SKD8.640184", "1.SKB7.640196"], + } + ) + args = {"name": "New Test Graph Analysis", "description": "Desc"} + response = self.post("/analysis/create/", args) + new_id = response.effective_url.split("/")[-2] a = Analysis(new_id) # Wait until all the jobs are done so the BIOM tables exist for j in a.jobs: @@ -270,89 +407,113 @@ def test_get_analysis_graph_handler(self): # Create a new workflow starting on the first artifact # Magic number 9 -> Summarize Taxa command params = Parameters.load( - Command(9), values_dict={'metadata_category': 'None', - 'sort': 'False', - 'biom_table': artifacts[0].id}) + Command(9), + values_dict={ + "metadata_category": "None", + "sort": "False", + "biom_table": artifacts[0].id, + }, + ) wf = ProcessingWorkflow.from_scratch(user, params) # There is only one job in the workflow job_id = list(wf.graph.nodes())[0].id - response = self.get('/analysis/description/%s/graph/' % new_id) + response = self.get("/analysis/description/%s/graph/" % new_id) self.assertEqual(response.code, 200) obs = loads(response.body) - exp = {'edges': [[artifacts[0].id, job_id], - [job_id, '%s:taxa_summary' % job_id]], - 'artifacts_being_deleted': [], - 'nodes': [ - ['job', 'job', job_id, 'Summarize Taxa', - 'in_construction'], - ['artifact', 'BIOM', artifacts[0].id, 'noname\n(BIOM)', - 'artifact'], - ['artifact', 'BIOM', artifacts[1].id, 'noname\n(BIOM)', - 'artifact'], - ['type', 'taxa_summary', '%s:taxa_summary' % job_id, - 'taxa_summary\n(taxa_summary)', 'type']], - 'workflow': wf.id} + exp = { + "edges": [[artifacts[0].id, job_id], [job_id, "%s:taxa_summary" % job_id]], + "artifacts_being_deleted": [], + "nodes": [ + ["job", "job", job_id, "Summarize Taxa", "in_construction"], + ["artifact", "BIOM", artifacts[0].id, "noname\n(BIOM)", "artifact"], + ["artifact", "BIOM", artifacts[1].id, "noname\n(BIOM)", "artifact"], + [ + "type", + "taxa_summary", + "%s:taxa_summary" % job_id, + "taxa_summary\n(taxa_summary)", + "type", + ], + ], + "workflow": wf.id, + } # Check that the keys are the same self.assertCountEqual(obs, exp) # Check the edges - self.assertCountEqual(obs['edges'], exp['edges']) + self.assertCountEqual(obs["edges"], exp["edges"]) # Check the edges - self.assertCountEqual(obs['nodes'], exp['nodes']) + self.assertCountEqual(obs["nodes"], exp["nodes"]) # Check the edges - self.assertEqual(obs['workflow'], exp['workflow']) + self.assertEqual(obs["workflow"], exp["workflow"]) # Add a job to the second BIOM to make sure that the edges and nodes # are respected. Magic number 12 -> Single Rarefaction job2 = wf.add( - DefaultParameters(16), req_params={'depth': '100', - 'biom_table': artifacts[1].id}) + DefaultParameters(16), + req_params={"depth": "100", "biom_table": artifacts[1].id}, + ) job_id_2 = job2.id - response = self.get('/analysis/description/%s/graph/' % new_id) + response = self.get("/analysis/description/%s/graph/" % new_id) self.assertEqual(response.code, 200) obs = loads(response.body) - exp = {'edges': [[artifacts[0].id, job_id], - [job_id, '%s:taxa_summary' % job_id], - [artifacts[1].id, job_id_2], - [job_id_2, '%s:rarefied_table' % job_id_2]], - 'artifacts_being_deleted': [], - 'nodes': [ - ['job', 'job', job_id, 'Summarize Taxa', - 'in_construction'], - ['job', 'job', job_id_2, 'Single Rarefaction', - 'in_construction'], - ['artifact', 'BIOM', artifacts[0].id, 'noname\n(BIOM)', - 'artifact'], - ['artifact', 'BIOM', artifacts[1].id, 'noname\n(BIOM)', - 'artifact'], - ['type', 'taxa_summary', '%s:taxa_summary' % job_id, - 'taxa_summary\n(taxa_summary)', 'type'], - ['type', 'BIOM', '%s:rarefied_table' % job_id_2, - 'rarefied_table\n(BIOM)', 'type']], - 'workflow': wf.id} + exp = { + "edges": [ + [artifacts[0].id, job_id], + [job_id, "%s:taxa_summary" % job_id], + [artifacts[1].id, job_id_2], + [job_id_2, "%s:rarefied_table" % job_id_2], + ], + "artifacts_being_deleted": [], + "nodes": [ + ["job", "job", job_id, "Summarize Taxa", "in_construction"], + ["job", "job", job_id_2, "Single Rarefaction", "in_construction"], + ["artifact", "BIOM", artifacts[0].id, "noname\n(BIOM)", "artifact"], + ["artifact", "BIOM", artifacts[1].id, "noname\n(BIOM)", "artifact"], + [ + "type", + "taxa_summary", + "%s:taxa_summary" % job_id, + "taxa_summary\n(taxa_summary)", + "type", + ], + [ + "type", + "BIOM", + "%s:rarefied_table" % job_id_2, + "rarefied_table\n(BIOM)", + "type", + ], + ], + "workflow": wf.id, + } # Check that the keys are the same self.assertCountEqual(obs, exp) # Check the edges - self.assertCountEqual(obs['edges'], exp['edges']) + self.assertCountEqual(obs["edges"], exp["edges"]) # Check the edges - self.assertCountEqual(obs['nodes'], exp['nodes']) + self.assertCountEqual(obs["nodes"], exp["nodes"]) # Check the edges - self.assertEqual(obs['workflow'], exp['workflow']) + self.assertEqual(obs["workflow"], exp["workflow"]) # Add a second Workflow to the second artifact to force the raise of # the error. This situation should never happen when using # the interface wf.remove(job2) params = Parameters.load( - Command(9), values_dict={'metadata_category': 'None', - 'sort': 'False', - 'biom_table': artifacts[1].id}) + Command(9), + values_dict={ + "metadata_category": "None", + "sort": "False", + "biom_table": artifacts[1].id, + }, + ) wf = ProcessingWorkflow.from_scratch(user, params) - response = self.get('/analysis/description/%s/graph/' % new_id) + response = self.get("/analysis/description/%s/graph/" % new_id) self.assertEqual(response.code, 500) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/analysis_handlers/tests/test_listing_handlers.py b/qiita_pet/handlers/analysis_handlers/tests/test_listing_handlers.py index 739333dc9..b868393ba 100644 --- a/qiita_pet/handlers/analysis_handlers/tests/test_listing_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/tests/test_listing_handlers.py @@ -6,28 +6,29 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from qiita_pet.test.tornado_test_base import TestHandlerBase class TestListingHandlers(TestHandlerBase): def test_get_list_analyses_handler(self): - response = self.get('/analysis/list/') + response = self.get("/analysis/list/") self.assertEqual(response.code, 200) def test_get_analysis_summary_ajax(self): - response = self.get('/analysis/dflt/sumary/') + response = self.get("/analysis/dflt/sumary/") self.assertEqual(response.code, 200) - self.assertEqual(loads(response.body), - {"artifacts": 1, "studies": 1, "samples": 4}) + self.assertEqual( + loads(response.body), {"artifacts": 1, "studies": 1, "samples": 4} + ) def test_get_selected_samples_handler(self): - response = self.get('/analysis/selected/') + response = self.get("/analysis/selected/") # Make sure page response loaded sucessfully self.assertEqual(response.code, 200) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/analysis_handlers/tests/test_sharing_handlers.py b/qiita_pet/handlers/analysis_handlers/tests/test_sharing_handlers.py index 4e61da700..f1dc255aa 100644 --- a/qiita_pet/handlers/analysis_handlers/tests/test_sharing_handlers.py +++ b/qiita_pet/handlers/analysis_handlers/tests/test_sharing_handlers.py @@ -6,8 +6,9 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main + from mock import Mock from qiita_db.analysis import Analysis @@ -17,54 +18,56 @@ class TestShareStudyAjax(TestHandlerBase): - def test_get(self): a = Analysis(1) - u = User('shared@foo.bar') + u = User("shared@foo.bar") self.assertEqual(a.shared_with, [u]) # deselecting - args = {'deselected': u.id, 'id': a.id} - response = self.get('/analysis/sharing/', args) + args = {"deselected": u.id, "id": a.id} + response = self.get("/analysis/sharing/", args) self.assertEqual(response.code, 200) - exp = {'users': [], 'links': ''} + exp = {"users": [], "links": ""} self.assertEqual(loads(response.body), exp) self.assertEqual(a.shared_with, []) # Make sure unshared message added to the system - self.assertEqual("Analysis 'SomeAnalysis' has been unshared with you.", - u.messages()[0][1]) + self.assertEqual( + "Analysis 'SomeAnalysis' has been unshared with you.", u.messages()[0][1] + ) # selecting - args = {'selected': u.id, 'id': a.id} - response = self.get('/analysis/sharing/', args) + args = {"selected": u.id, "id": a.id} + response = self.get("/analysis/sharing/", args) self.assertEqual(response.code, 200) exp = { - 'users': ['shared@foo.bar'], - 'links': - ('Shared')} + "users": ["shared@foo.bar"], + "links": ('Shared'), + } self.assertEqual(loads(response.body), exp) self.assertEqual(a.shared_with, [u]) # Make sure shared message added to the system self.assertEqual( - 'Analysis \'SomeAnalysis\' ' - 'has been shared with you.', u.messages()[0][1]) + "Analysis 'SomeAnalysis' " + "has been shared with you.", + u.messages()[0][1], + ) # admins can share BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - args = {'deselected': u.id, 'id': a.id} - response = self.get('/analysis/sharing/', args) + args = {"deselected": u.id, "id": a.id} + response = self.get("/analysis/sharing/", args) self.assertEqual(response.code, 200) - exp = {'users': [], 'links': ''} + exp = {"users": [], "links": ""} self.assertEqual(loads(response.body), exp) self.assertEqual(a.shared_with, []) def test_get_no_access(self): - args = {'selected': 'demo@microbio.me', 'id': 2} - response = self.get('/analysis/sharing/', args) + args = {"selected": "demo@microbio.me", "id": 2} + response = self.get("/analysis/sharing/", args) self.assertEqual(response.code, 403) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/analysis_handlers/tests/test_util.py b/qiita_pet/handlers/analysis_handlers/tests/test_util.py index 93d5016ee..c4a314935 100644 --- a/qiita_pet/handlers/analysis_handlers/tests/test_util.py +++ b/qiita_pet/handlers/analysis_handlers/tests/test_util.py @@ -6,31 +6,31 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase +from unittest import TestCase, main from tornado.web import HTTPError -from qiita_db.user import User from qiita_db.analysis import Analysis +from qiita_db.user import User from qiita_pet.handlers.analysis_handlers import check_analysis_access class UtilTests(TestCase): def test_check_analysis_access(self): # Has access, so it allows execution - u = User('test@foo.bar') + u = User("test@foo.bar") a = Analysis(1) check_analysis_access(u, a) # Admin has access to everything - u = User('admin@foo.bar') + u = User("admin@foo.bar") check_analysis_access(u, a) # Raises an error because it doesn't have access - u = User('demo@microbio.me') + u = User("demo@microbio.me") with self.assertRaises(HTTPError): check_analysis_access(u, a) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/analysis_handlers/util.py b/qiita_pet/handlers/analysis_handlers/util.py index b4c5b3ce6..effdcf8d5 100644 --- a/qiita_pet/handlers/analysis_handlers/util.py +++ b/qiita_pet/handlers/analysis_handlers/util.py @@ -25,5 +25,4 @@ def check_analysis_access(user, analysis): Tried to access analysis that user does not have access to """ if not analysis.has_access(user): - raise HTTPError(403, reason="Analysis access denied to %s" % ( - analysis.id)) + raise HTTPError(403, reason="Analysis access denied to %s" % (analysis.id)) diff --git a/qiita_pet/handlers/api_proxy/__init__.py b/qiita_pet/handlers/api_proxy/__init__.py index 0dce06d41..4cc62b8ed 100644 --- a/qiita_pet/handlers/api_proxy/__init__.py +++ b/qiita_pet/handlers/api_proxy/__init__.py @@ -10,57 +10,106 @@ # The idea is that this proxies the call and response dicts we expect from the # Qiita API once we build it. This will be removed and replaced with API calls # when the API is complete. -from .sample_template import ( - sample_template_filepaths_get_req, analyses_associated_with_study, - sample_template_get_req, sample_template_meta_cats_get_req, - sample_template_samples_get_req, sample_template_category_get_req, - get_sample_template_processing_status) +from .artifact import ( + artifact_get_info, + artifact_get_prep_req, + artifact_get_req, + artifact_graph_get_req, + artifact_post_req, + artifact_status_put_req, + artifact_types_get_req, +) +from .ontology import ontology_patch_handler from .prep_template import ( - prep_template_summary_get_req, prep_template_post_req, - prep_template_delete_req, prep_template_get_req, - prep_template_graph_get_req, prep_template_filepaths_get_req, + new_prep_template_get_req, + prep_template_ajax_get_req, + prep_template_delete_req, + prep_template_filepaths_get_req, + prep_template_get_req, + prep_template_graph_get_req, + prep_template_jobs_get_req, + prep_template_patch_req, + prep_template_post_req, prep_template_samples_get_req, - new_prep_template_get_req, prep_template_ajax_get_req, - prep_template_patch_req, prep_template_jobs_get_req) -from .studies import ( - data_types_get_req, study_get_req, study_prep_get_req, study_delete_req, - study_files_get_req, study_patch_request, study_get_tags_request, - study_tags_request) -from .artifact import (artifact_graph_get_req, artifact_types_get_req, - artifact_post_req, artifact_get_req, - artifact_status_put_req, artifact_get_prep_req, - artifact_get_info) -from .ontology import ontology_patch_handler + prep_template_summary_get_req, +) from .processing import ( - list_commands_handler_get_req, list_options_handler_get_req, - workflow_handler_post_req, workflow_handler_patch_req, - workflow_run_post_req, job_ajax_get_req, job_ajax_patch_req) -from .user import (user_jobs_get_req) + job_ajax_get_req, + job_ajax_patch_req, + list_commands_handler_get_req, + list_options_handler_get_req, + workflow_handler_patch_req, + workflow_handler_post_req, + workflow_run_post_req, +) +from .sample_template import ( + analyses_associated_with_study, + get_sample_template_processing_status, + sample_template_category_get_req, + sample_template_filepaths_get_req, + sample_template_get_req, + sample_template_meta_cats_get_req, + sample_template_samples_get_req, +) +from .studies import ( + data_types_get_req, + study_delete_req, + study_files_get_req, + study_get_req, + study_get_tags_request, + study_patch_request, + study_prep_get_req, + study_tags_request, +) +from .user import user_jobs_get_req from .util import check_access, check_fp __version__ = "2025.11" -__all__ = ['prep_template_summary_get_req', 'data_types_get_req', - 'study_get_req', 'sample_template_filepaths_get_req', - 'prep_template_summary_get_req', 'prep_template_post_req', - 'prep_template_delete_req', 'artifact_get_prep_req', - 'prep_template_graph_get_req', 'prep_template_filepaths_get_req', - 'prep_template_jobs_get_req', - 'artifact_get_req', 'artifact_status_put_req', - 'prep_template_get_req', 'study_delete_req', - 'study_prep_get_req', 'sample_template_get_req', - 'artifact_graph_get_req', 'artifact_types_get_req', - 'artifact_post_req', 'artifact_get_info', - 'sample_template_meta_cats_get_req', - 'sample_template_samples_get_req', 'prep_template_samples_get_req', - 'sample_template_category_get_req', 'new_prep_template_get_req', - 'study_files_get_req', 'prep_template_ajax_get_req', - 'study_tags_request', 'study_patch_request', - 'study_get_tags_request', - 'prep_template_patch_req', 'ontology_patch_handler', - 'list_commands_handler_get_req', - 'list_options_handler_get_req', 'workflow_handler_post_req', - 'workflow_handler_patch_req', 'workflow_run_post_req', - 'job_ajax_get_req', 'analyses_associated_with_study', - 'get_sample_template_processing_status', 'user_jobs_get_req', - 'job_ajax_patch_req', 'check_access', 'check_fp'] +__all__ = [ + "prep_template_summary_get_req", + "data_types_get_req", + "study_get_req", + "sample_template_filepaths_get_req", + "prep_template_summary_get_req", + "prep_template_post_req", + "prep_template_delete_req", + "artifact_get_prep_req", + "prep_template_graph_get_req", + "prep_template_filepaths_get_req", + "prep_template_jobs_get_req", + "artifact_get_req", + "artifact_status_put_req", + "prep_template_get_req", + "study_delete_req", + "study_prep_get_req", + "sample_template_get_req", + "artifact_graph_get_req", + "artifact_types_get_req", + "artifact_post_req", + "artifact_get_info", + "sample_template_meta_cats_get_req", + "sample_template_samples_get_req", + "prep_template_samples_get_req", + "sample_template_category_get_req", + "new_prep_template_get_req", + "study_files_get_req", + "prep_template_ajax_get_req", + "study_tags_request", + "study_patch_request", + "study_get_tags_request", + "prep_template_patch_req", + "ontology_patch_handler", + "list_commands_handler_get_req", + "list_options_handler_get_req", + "workflow_handler_post_req", + "workflow_handler_patch_req", + "workflow_run_post_req", + "job_ajax_get_req", + "analyses_associated_with_study", + "get_sample_template_processing_status", + "user_jobs_get_req", + "job_ajax_patch_req", + "check_access", + "check_fp", +] diff --git a/qiita_pet/handlers/api_proxy/artifact.py b/qiita_pet/handlers/api_proxy/artifact.py index 65c6d0e14..7b05afc50 100644 --- a/qiita_pet/handlers/api_proxy/artifact.py +++ b/qiita_pet/handlers/api_proxy/artifact.py @@ -5,26 +5,24 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import join from functools import partial -from json import dumps - from itertools import chain +from json import dumps +from os.path import join -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import qiita_config, r_client -from qiita_pet.handlers.api_proxy.util import check_access, check_fp +from qiita_core.util import execute_as_transaction from qiita_db.artifact import Artifact -from qiita_db.user import User -from qiita_db.metadata_template.prep_template import PrepTemplate -from qiita_db.util import ( - get_mountpoint, get_visibilities, get_artifacts_information) -from qiita_db.software import Command, Parameters, Software -from qiita_db.processing_job import ProcessingJob from qiita_db.exceptions import QiitaDBError from qiita_db.logger import LogEntry +from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.processing_job import ProcessingJob +from qiita_db.software import Command, Parameters, Software +from qiita_db.user import User +from qiita_db.util import get_artifacts_information, get_mountpoint, get_visibilities +from qiita_pet.handlers.api_proxy.util import check_access, check_fp -PREP_TEMPLATE_KEY_FORMAT = 'prep_template_%s' +PREP_TEMPLATE_KEY_FORMAT = "prep_template_%s" def artifact_get_req(user_id, artifact_id): @@ -53,25 +51,25 @@ def artifact_get_req(user_id, artifact_id): return access_error can_submit_ebi = artifact.can_be_submitted_to_ebi - ebi_run_accessions = (artifact.ebi_run_accessions - if can_submit_ebi else None) + ebi_run_accessions = artifact.ebi_run_accessions if can_submit_ebi else None can_submit_vamps = artifact.can_be_submitted_to_vamps - is_submitted_vamps = (artifact.is_submitted_to_vamps - if can_submit_vamps else False) - - return {'id': artifact_id, - 'timestamp': artifact.timestamp, - 'processing_parameters': artifact.processing_parameters, - 'visibility': artifact.visibility, - 'type': artifact.artifact_type, - 'data_type': artifact.data_type, - 'filepaths': artifact.filepaths, - 'parents': [a.id for a in artifact.parents], - 'study': artifact.study.id if artifact.study else None, - 'can_submit_ebi': can_submit_ebi, - 'ebi_run_accessions': ebi_run_accessions, - 'can_submit_vamps': can_submit_vamps, - 'is_submitted_vamps': is_submitted_vamps} + is_submitted_vamps = artifact.is_submitted_to_vamps if can_submit_vamps else False + + return { + "id": artifact_id, + "timestamp": artifact.timestamp, + "processing_parameters": artifact.processing_parameters, + "visibility": artifact.visibility, + "type": artifact.artifact_type, + "data_type": artifact.data_type, + "filepaths": artifact.filepaths, + "parents": [a.id for a in artifact.parents], + "study": artifact.study.id if artifact.study else None, + "can_submit_ebi": can_submit_ebi, + "ebi_run_accessions": ebi_run_accessions, + "can_submit_vamps": can_submit_vamps, + "is_submitted_vamps": is_submitted_vamps, + } @execute_as_transaction @@ -101,10 +99,11 @@ def artifact_get_prep_req(user_id, artifact_ids): if access_error: return access_error - samples[aid] = list(chain( - *[sorted(pt.keys()) for pt in Artifact(aid).prep_templates])) + samples[aid] = list( + chain(*[sorted(pt.keys()) for pt in Artifact(aid).prep_templates]) + ) - return {'status': 'success', 'msg': '', 'data': samples} + return {"status": "success", "msg": "", "data": samples} @execute_as_transaction @@ -132,12 +131,13 @@ def artifact_get_info(user_id, artifact_ids, only_biom=True): artifact_info = get_artifacts_information(artifact_ids, only_biom) - return {'status': 'success', 'msg': '', 'data': artifact_info} + return {"status": "success", "msg": "", "data": artifact_info} @execute_as_transaction -def artifact_post_req(user_id, filepaths, artifact_type, name, - prep_template_id, artifact_id=None): +def artifact_post_req( + user_id, filepaths, artifact_type, name, prep_template_id, artifact_id=None +): """Creates the initial artifact for the prep template Parameters @@ -177,19 +177,20 @@ def artifact_post_req(user_id, filepaths, artifact_type, name, if artifact_id: # if the artifact id has been provided, import the artifact - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('copy_artifact') - params = Parameters.load(cmd, values_dict={'artifact': artifact_id, - 'prep_template': prep.id}) + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("copy_artifact") + params = Parameters.load( + cmd, values_dict={"artifact": artifact_id, "prep_template": prep.id} + ) job = ProcessingJob.create(user, params, True) else: - uploads_path = get_mountpoint('uploads')[0][1] + uploads_path = get_mountpoint("uploads")[0][1] path_builder = partial(join, uploads_path, str(study_id)) cleaned_filepaths = {} for ftype, file_list in filepaths.items(): # JavaScript sends us this list as a comma-separated list - for fp in file_list.split(','): + for fp in file_list.split(","): # JavaScript will send this value as an empty string if the # list of files was empty. In such case, the split will # generate a single element containing the empty string. Check @@ -199,9 +200,11 @@ def artifact_post_req(user_id, filepaths, artifact_type, name, # Check if filepath being passed exists for study full_fp = path_builder(fp) exists = check_fp(study_id, full_fp) - if exists['status'] != 'success': - return {'status': 'error', - 'message': 'File does not exist: %s' % fp} + if exists["status"] != "success": + return { + "status": "error", + "message": "File does not exist: %s" % fp, + } if ftype not in cleaned_filepaths: cleaned_filepaths[ftype] = [] cleaned_filepaths[ftype].append(full_fp) @@ -209,32 +212,41 @@ def artifact_post_req(user_id, filepaths, artifact_type, name, # This should never happen, but it doesn't hurt to actually have # a explicit check, in case there is something odd with the JS if not cleaned_filepaths: - return {'status': 'error', - 'message': "Can't create artifact, no files provided."} + return { + "status": "error", + "message": "Can't create artifact, no files provided.", + } # This try/except will catch the case when the plugins are not # activated so there is no Validate for the given artifact_type try: command = Command.get_validator(artifact_type) except QiitaDBError as e: - return {'status': 'error', 'message': str(e)} + return {"status": "error", "message": str(e)} job = ProcessingJob.create( user, - Parameters.load(command, values_dict={ - 'template': prep_template_id, - 'files': dumps(cleaned_filepaths), - 'artifact_type': artifact_type, - 'name': name, - 'analysis': None, - }), True) + Parameters.load( + command, + values_dict={ + "template": prep_template_id, + "files": dumps(cleaned_filepaths), + "artifact_type": artifact_type, + "name": name, + "analysis": None, + }, + ), + True, + ) # Submit the job job.submit() - r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep.id, - dumps({'job_id': job.id, 'is_qiita_job': True})) + r_client.set( + PREP_TEMPLATE_KEY_FORMAT % prep.id, + dumps({"job_id": job.id, "is_qiita_job": True}), + ) - return {'status': 'success', 'message': ''} + return {"status": "success", "message": ""} def artifact_types_get_req(): @@ -249,9 +261,7 @@ def artifact_types_get_req(): types holds type and description of the artifact type, in the form [[artifact_type, description], ...] """ - return {'status': 'success', - 'message': '', - 'types': Artifact.types()} + return {"status": "success", "message": "", "types": Artifact.types()} def artifact_graph_get_req(artifact_id, direction, user_id): @@ -282,22 +292,20 @@ def artifact_graph_get_req(artifact_id, direction, user_id): if access_error: return access_error - if direction == 'descendants': + if direction == "descendants": G = Artifact(int(artifact_id)).descendants - elif direction == 'ancestors': + elif direction == "ancestors": G = Artifact(int(artifact_id)).ancestors else: - return { - 'status': 'error', - 'message': 'Unknown directon %s' % direction - } + return {"status": "error", "message": "Unknown directon %s" % direction} - node_labels = [(n.id, ' - '.join([n.name, n.artifact_type])) - for n in G.nodes()] - return {'edge_list': [(n.id, m.id) for n, m in G.edges()], - 'node_labels': node_labels, - 'status': 'success', - 'message': ''} + node_labels = [(n.id, " - ".join([n.name, n.artifact_type])) for n in G.nodes()] + return { + "edge_list": [(n.id, m.id) for n, m in G.edges()], + "node_labels": node_labels, + "status": "success", + "message": "", + } def artifact_status_put_req(artifact_id, user_id, visibility): @@ -320,8 +328,10 @@ def artifact_status_put_req(artifact_id, user_id, visibility): message: Human readable message for status """ if visibility not in get_visibilities(): - return {'status': 'error', - 'message': 'Unknown visibility value: %s' % visibility} + return { + "status": "error", + "message": "Unknown visibility value: %s" % visibility, + } pd = Artifact(int(artifact_id)) sid = pd.study.id @@ -329,24 +339,26 @@ def artifact_status_put_req(artifact_id, user_id, visibility): if access_error: return access_error user = User(str(user_id)) - status = 'success' - msg = 'Artifact visibility changed to %s' % visibility + status = "success" + msg = "Artifact visibility changed to %s" % visibility # Set the approval to private if needs approval and admin - if visibility == 'private': + if visibility == "private": if not qiita_config.require_approval: - pd.visibility = 'private' + pd.visibility = "private" # Set the approval to private if approval not required - elif user.level == 'admin': - pd.visibility = 'private' + elif user.level == "admin": + pd.visibility = "private" # Trying to set approval without admin privileges else: - status = 'error' - msg = 'User does not have permissions to approve change' + status = "error" + msg = "User does not have permissions to approve change" else: pd.visibility = visibility - LogEntry.create('Warning', '%s changed artifact %s (study %d) to %s' % ( - user_id, artifact_id, sid, visibility)) + LogEntry.create( + "Warning", + "%s changed artifact %s (study %d) to %s" + % (user_id, artifact_id, sid, visibility), + ) - return {'status': status, - 'message': msg} + return {"status": status, "message": msg} diff --git a/qiita_pet/handlers/api_proxy/ontology.py b/qiita_pet/handlers/api_proxy/ontology.py index fb072af53..703471f77 100644 --- a/qiita_pet/handlers/api_proxy/ontology.py +++ b/qiita_pet/handlers/api_proxy/ontology.py @@ -6,9 +6,9 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from qiita_db.util import convert_to_id from qiita_db.exceptions import QiitaDBLookupError from qiita_db.ontology import Ontology +from qiita_db.util import convert_to_id def ontology_patch_handler(req_op, req_path, req_value=None, req_from=None): @@ -33,25 +33,27 @@ def ontology_patch_handler(req_op, req_path, req_value=None, req_from=None): is a human readable string with the error message in case that status is 'error'. """ - if req_op == 'add': - req_path = [v for v in req_path.split('/') if v] + if req_op == "add": + req_path = [v for v in req_path.split("/") if v] if len(req_path) != 1: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} req_path = req_path[0] try: - o_id = convert_to_id(req_path, 'ontology') + o_id = convert_to_id(req_path, "ontology") except QiitaDBLookupError: - return {'status': 'error', - 'message': 'Ontology "%s" does not exist' % req_path} + return { + "status": "error", + "message": 'Ontology "%s" does not exist' % req_path, + } ontology = Ontology(o_id) ontology.add_user_defined_term(req_value) - return {'status': 'success', - 'message': ''} + return {"status": "success", "message": ""} else: - return {'status': 'error', - 'message': 'Operation "%s" not supported. ' - 'Current supported operations: add' % req_op} + return { + "status": "error", + "message": 'Operation "%s" not supported. ' + "Current supported operations: add" % req_op, + } diff --git a/qiita_pet/handlers/api_proxy/prep_template.py b/qiita_pet/handlers/api_proxy/prep_template.py index 054ead386..ededc29fd 100644 --- a/qiita_pet/handlers/api_proxy/prep_template.py +++ b/qiita_pet/handlers/api_proxy/prep_template.py @@ -6,28 +6,28 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- import warnings +from collections import defaultdict +from json import dumps, loads from os import remove from os.path import basename -from json import loads, dumps -from collections import defaultdict from natsort import natsorted -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import r_client -from qiita_pet.handlers.api_proxy.util import check_access, check_fp -from qiita_pet.util import get_network_nodes_edges +from qiita_core.util import execute_as_transaction from qiita_db.artifact import Artifact +from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.metadata_template.util import load_template_to_dataframe -from qiita_db.util import convert_to_id, get_files_from_uploads_folders -from qiita_db.study import Study -from qiita_db.user import User from qiita_db.ontology import Ontology -from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.processing_job import ProcessingJob -from qiita_db.software import Software, Parameters +from qiita_db.software import Parameters, Software +from qiita_db.study import Study +from qiita_db.user import User +from qiita_db.util import convert_to_id, get_files_from_uploads_folders +from qiita_pet.handlers.api_proxy.util import check_access, check_fp +from qiita_pet.util import get_network_nodes_edges -PREP_TEMPLATE_KEY_FORMAT = 'prep_template_%s' +PREP_TEMPLATE_KEY_FORMAT = "prep_template_%s" def _get_ENA_ontology(): @@ -39,13 +39,13 @@ def _get_ENA_ontology(): A dictionary of the form {'ENA': list of str, 'User': list of str} with the ENA-defined terms and the User-defined terms, respectivelly. """ - ontology = Ontology(convert_to_id('ENA', 'ontology')) + ontology = Ontology(convert_to_id("ENA", "ontology")) ena_terms = sorted(ontology.terms) # make "Other" last on the list - ena_terms.remove('Other') - ena_terms.append('Other') + ena_terms.remove("Other") + ena_terms.append("Other") - return {'ENA': ena_terms, 'User': sorted(ontology.user_defined_terms)} + return {"ENA": ena_terms, "User": sorted(ontology.user_defined_terms)} def new_prep_template_get_req(study_id): @@ -63,17 +63,22 @@ def new_prep_template_get_req(study_id): The list of available data types The investigation type ontology information """ - prep_files = [f for _, f, _ in get_files_from_uploads_folders(study_id) - if f.endswith(('.txt', '.tsv', '.xlsx'))] + prep_files = [ + f + for _, f, _ in get_files_from_uploads_folders(study_id) + if f.endswith((".txt", ".tsv", ".xlsx")) + ] data_types = sorted(Study.all_data_types()) # Get all the ENA terms for the investigation type ontology_info = _get_ENA_ontology() - return {'status': 'success', - 'prep_files': prep_files, - 'data_types': data_types, - 'ontology': ontology_info} + return { + "status": "success", + "prep_files": prep_files, + "data_types": data_types, + "ontology": ontology_info, + } def prep_template_ajax_get_req(user_id, prep_id): @@ -111,29 +116,32 @@ def prep_template_ajax_get_req(user_id, prep_id): # Initialize variables here processing = False - alert_type = '' - alert_msg = '' + alert_type = "" + alert_msg = "" job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) if job_info: - job_info = defaultdict(lambda: '', loads(job_info)) - job_id = job_info['job_id'] + job_info = defaultdict(lambda: "", loads(job_info)) + job_id = job_info["job_id"] job = ProcessingJob(job_id) job_status = job.status - processing = job_status not in ('success', 'error') + processing = job_status not in ("success", "error") if processing: - alert_type = 'info' - alert_msg = 'This prep template is currently being updated' - elif job_status == 'error': - alert_type = 'danger' - alert_msg = job.log.msg.replace('\n', '
') + alert_type = "info" + alert_msg = "This prep template is currently being updated" + elif job_status == "error": + alert_type = "danger" + alert_msg = job.log.msg.replace("\n", "
") else: - alert_type = job_info['alert_type'] - alert_msg = job_info['alert_msg'].replace('\n', '
') + alert_type = job_info["alert_type"] + alert_msg = job_info["alert_msg"].replace("\n", "
") artifact_attached = pt.artifact is not None study_id = pt.study_id - files = [f for _, f, _ in get_files_from_uploads_folders(study_id) - if f.endswith(('.txt', '.tsv', '.xlsx'))] + files = [ + f + for _, f, _ in get_files_from_uploads_folders(study_id) + if f.endswith((".txt", ".tsv", ".xlsx")) + ] # The call to list is needed because keys is an iterator num_samples = len(list(pt.keys())) @@ -159,28 +167,30 @@ def prep_template_ajax_get_req(user_id, prep_id): if creation_job is not None: creation_job = ProcessingJob(creation_job) - return {'status': 'success', - 'message': '', - 'name': name, - 'files': files, - 'download_prep_id': download_prep_id, - 'other_filepaths': other_filepaths, - 'num_samples': num_samples, - 'num_columns': num_columns, - 'investigation_type': investigation_type, - 'ontology': ontology, - 'artifact_attached': artifact_attached, - 'archived_artifacts': pt.archived_artifacts, - 'study_id': study_id, - 'editable': editable, - 'data_type': pt.data_type(), - 'alert_type': alert_type, - 'is_submitted_to_ebi': pt.is_submitted_to_ebi, - 'prep_restrictions': restrictions, - 'samples': sorted(list(pt.keys())), - 'deprecated': deprecated, - 'creation_job': creation_job, - 'alert_message': alert_msg} + return { + "status": "success", + "message": "", + "name": name, + "files": files, + "download_prep_id": download_prep_id, + "other_filepaths": other_filepaths, + "num_samples": num_samples, + "num_columns": num_columns, + "investigation_type": investigation_type, + "ontology": ontology, + "artifact_attached": artifact_attached, + "archived_artifacts": pt.archived_artifacts, + "study_id": study_id, + "editable": editable, + "data_type": pt.data_type(), + "alert_type": alert_type, + "is_submitted_to_ebi": pt.is_submitted_to_ebi, + "prep_restrictions": restrictions, + "samples": sorted(list(pt.keys())), + "deprecated": deprecated, + "creation_job": creation_job, + "alert_message": alert_msg, + } @execute_as_transaction @@ -201,14 +211,14 @@ def _process_investigation_type(inv_type, user_def_type, new_type): str The investigation type chosen by the user """ - if inv_type == '': + if inv_type == "": inv_type = None - elif inv_type == 'Other' and user_def_type == 'New Type': + elif inv_type == "Other" and user_def_type == "New Type": # This is a new user defined investigation type so store it inv_type = new_type - ontology = Ontology(convert_to_id('ENA', 'ontology')) + ontology = Ontology(convert_to_id("ENA", "ontology")) ontology.add_user_defined_term(inv_type) - elif inv_type == 'Other' and user_def_type != 'New Type': + elif inv_type == "Other" and user_def_type != "New Type": inv_type = user_def_type return inv_type @@ -228,11 +238,11 @@ def _check_prep_template_exists(prep_id): 'message': msg} """ if not PrepTemplate.exists(int(prep_id)): - return {'status': 'error', - 'message': 'Prep template %d does not exist' % int(prep_id) - } - return {'status': 'success', - 'message': ''} + return { + "status": "error", + "message": "Prep template %d does not exist" % int(prep_id), + } + return {"status": "success", "message": ""} def prep_template_get_req(prep_id, user_id): @@ -253,7 +263,7 @@ def prep_template_get_req(prep_id, user_id): 'template': {sample: {column: value, ...}, ...} """ exists = _check_prep_template_exists(int(prep_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists prep = PrepTemplate(int(prep_id)) @@ -261,9 +271,7 @@ def prep_template_get_req(prep_id, user_id): if access_error: return access_error df = prep.to_dataframe() - return {'status': 'success', - 'message': '', - 'template': df.to_dict(orient='index')} + return {"status": "success", "message": "", "template": df.to_dict(orient="index")} def prep_template_summary_get_req(prep_id, user_id): @@ -289,7 +297,7 @@ def prep_template_summary_get_req(prep_id, user_id): 'editable': bool} """ exists = _check_prep_template_exists(int(prep_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists prep = PrepTemplate(int(prep_id)) @@ -299,26 +307,34 @@ def prep_template_summary_get_req(prep_id, user_id): editable = Study(prep.study_id).can_edit(User(user_id)) df = prep.to_dataframe() - out = {'num_samples': df.shape[0], - 'summary': [], - 'status': 'success', - 'message': '', - 'editable': editable} + out = { + "num_samples": df.shape[0], + "summary": [], + "status": "success", + "message": "", + "editable": editable, + } cols = sorted(list(df.columns)) for column in cols: counts = df[column].value_counts(dropna=False) - out['summary'].append( - (str(column), [(str(key), counts[key]) - for key in natsorted(counts.index)])) + out["summary"].append( + (str(column), [(str(key), counts[key]) for key in natsorted(counts.index)]) + ) return out @execute_as_transaction -def prep_template_post_req(study_id, user_id, prep_template, data_type, - investigation_type=None, - user_defined_investigation_type=None, - new_investigation_type=None, name=None): +def prep_template_post_req( + study_id, + user_id, + prep_template, + data_type, + investigation_type=None, + user_defined_investigation_type=None, + new_investigation_type=None, + name=None, +): """Adds a prep template to the system Parameters @@ -352,18 +368,18 @@ def prep_template_post_req(study_id, user_id, prep_template, data_type, if access_error: return access_error fp_rpt = check_fp(study_id, prep_template) - if fp_rpt['status'] != 'success': + if fp_rpt["status"] != "success": # Unknown filepath, so return the error message return fp_rpt - fp_rpt = fp_rpt['file'] + fp_rpt = fp_rpt["file"] # Add new investigation type if needed investigation_type = _process_investigation_type( - investigation_type, user_defined_investigation_type, - new_investigation_type) + investigation_type, user_defined_investigation_type, new_investigation_type + ) - msg = '' - status = 'success' + msg = "" + status = "success" prep = None if name: name = name if name.strip() else None @@ -371,30 +387,35 @@ def prep_template_post_req(study_id, user_id, prep_template, data_type, with warnings.catch_warnings(record=True) as warns: # deleting previous uploads and inserting new one prep = PrepTemplate.create( - load_template_to_dataframe(fp_rpt), Study(study_id), data_type, - investigation_type=investigation_type, name=name) + load_template_to_dataframe(fp_rpt), + Study(study_id), + data_type, + investigation_type=investigation_type, + name=name, + ) remove(fp_rpt) # join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: - msg = '\n'.join(set(str(w.message) for w in warns)) - status = 'warning' + msg = "\n".join(set(str(w.message) for w in warns)) + status = "warning" except Exception as e: # Some error occurred while processing the prep template # Show the error to the user so he can fix the template - status = 'error' + status = "error" msg = str(e) - info = {'status': status, - 'message': msg, - 'file': prep_template, - 'id': prep.id if prep is not None else None} + info = { + "status": status, + "message": msg, + "file": prep_template, + "id": prep.id if prep is not None else None, + } return info -def prep_template_patch_req(user_id, req_op, req_path, req_value=None, - req_from=None): +def prep_template_patch_req(user_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the prep template Parameters @@ -418,13 +439,12 @@ def prep_template_patch_req(user_id, req_op, req_path, req_value=None, - message: str, if the request is unsuccessful, a human readable error - row_id: str, the row_id that we tried to delete """ - req_path = [v for v in req_path.split('/') if v] - if req_op == 'replace': + req_path = [v for v in req_path.split("/") if v] + if req_op == "replace": # The structure of the path should be /prep_id/attribute_to_modify/ # so if we don't have those 2 elements, we should return an error if len(req_path) != 2: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} prep_id = int(req_path[0]) attribute = req_path[1] @@ -434,39 +454,40 @@ def prep_template_patch_req(user_id, req_op, req_path, req_value=None, if access_error: return access_error - status = 'success' - msg = '' - if attribute == 'investigation_type': + status = "success" + msg = "" + if attribute == "investigation_type": prep.investigation_type = req_value - elif attribute == 'data': + elif attribute == "data": fp = check_fp(prep.study_id, req_value) - if fp['status'] != 'success': + if fp["status"] != "success": return fp - fp = fp['file'] - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('update_prep_template') + fp = fp["file"] + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("update_prep_template") params = Parameters.load( - cmd, values_dict={'prep_template': prep_id, 'template_fp': fp}) + cmd, values_dict={"prep_template": prep_id, "template_fp": fp} + ) job = ProcessingJob.create(User(user_id), params, True) - r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, - dumps({'job_id': job.id})) + r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, dumps({"job_id": job.id})) job.submit() - elif attribute == 'name': + elif attribute == "name": prep.name = req_value.strip() else: # We don't understand the attribute so return an error - return {'status': 'error', - 'message': 'Attribute "%s" not found. ' - 'Please, check the path parameter' % attribute} + return { + "status": "error", + "message": 'Attribute "%s" not found. ' + "Please, check the path parameter" % attribute, + } - return {'status': status, 'message': msg} - elif req_op == 'remove': + return {"status": status, "message": msg} + elif req_op == "remove": # The structure of the path should be: # /prep_id/row_id/{columns|samples}/name if len(req_path) != 4: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} prep_id = int(req_path[0]) row_id = req_path[1] attribute = req_path[2] @@ -478,25 +499,27 @@ def prep_template_patch_req(user_id, req_op, req_path, req_value=None, if access_error: return access_error - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_sample_or_column') + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_sample_or_column") params = Parameters.load( - cmd, values_dict={'obj_class': 'PrepTemplate', - 'obj_id': prep_id, - 'sample_or_col': attribute, - 'name': attr_id}) + cmd, + values_dict={ + "obj_class": "PrepTemplate", + "obj_id": prep_id, + "sample_or_col": attribute, + "name": attr_id, + }, + ) job = ProcessingJob.create(User(user_id), params, True) # Store the job id attaching it to the sample template id - r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, - dumps({'job_id': job.id})) + r_client.set(PREP_TEMPLATE_KEY_FORMAT % prep_id, dumps({"job_id": job.id})) job.submit() - return {'status': 'success', 'message': '', 'row_id': row_id} - elif req_op == 'update-deprecated': + return {"status": "success", "message": "", "row_id": row_id} + elif req_op == "update-deprecated": if len(req_path) != 2: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} prep_id = int(req_path[0]) - value = req_path[1] == 'true' + value = req_path[1] == "true" # Check if the user actually has access to the study pt = PrepTemplate(prep_id) @@ -505,13 +528,14 @@ def prep_template_patch_req(user_id, req_op, req_path, req_value=None, return access_error pt.deprecated = value - return {'status': 'success', 'message': ''} + return {"status": "success", "message": ""} else: - return {'status': 'error', - 'message': 'Operation "%s" not supported. ' - 'Current supported operations: replace, remove' - % req_op, - 'row_id': '0'} + return { + "status": "error", + "message": 'Operation "%s" not supported. ' + "Current supported operations: replace, remove" % req_op, + "row_id": "0", + } def prep_template_samples_get_req(prep_id, user_id): @@ -534,16 +558,17 @@ def prep_template_samples_get_req(prep_id, user_id): samples is list of samples in the template """ exists = _check_prep_template_exists(int(prep_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error - return {'status': 'success', - 'message': '', - 'samples': sorted(x for x in PrepTemplate(int(prep_id))) - } + return { + "status": "success", + "message": "", + "samples": sorted(x for x in PrepTemplate(int(prep_id))), + } def prep_template_delete_req(prep_id, user_id): @@ -563,23 +588,22 @@ def prep_template_delete_req(prep_id, user_id): 'message': message} """ exists = _check_prep_template_exists(int(prep_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error - msg = '' - status = 'success' + msg = "" + status = "success" try: PrepTemplate.delete(prep.id) except Exception as e: msg = str(e) - status = 'error' + status = "error" - return {'status': status, - 'message': msg} + return {"status": status, "message": msg} @execute_as_transaction @@ -601,17 +625,14 @@ def prep_template_filepaths_get_req(prep_id, user_id): 'filepaths': [(filepath_id, filepath), ...]} """ exists = _check_prep_template_exists(int(prep_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists prep = PrepTemplate(int(prep_id)) access_error = check_access(prep.study_id, user_id) if access_error: return access_error - return {'status': 'success', - 'message': '', - 'filepaths': prep.get_filepaths() - } + return {"status": "success", "message": "", "filepaths": prep.get_filepaths()} def prep_template_graph_get_req(prep_id, user_id): @@ -639,7 +660,7 @@ def prep_template_graph_get_req(prep_id, user_id): Nodes are identified by the corresponding Artifact ID. """ exists = _check_prep_template_exists(int(prep_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists prep = PrepTemplate(int(prep_id)) @@ -654,8 +675,7 @@ def prep_template_graph_get_req(prep_id, user_id): artifact = prep.artifact if artifact is None: - return {'edges': [], 'nodes': [], - 'status': 'success', 'message': ''} + return {"edges": [], "nodes": [], "status": "success", "message": ""} G = artifact.descendants_with_jobs @@ -663,15 +683,20 @@ def prep_template_graph_get_req(prep_id, user_id): # nodes returns [node_type, node_name, element_id]; here we are looking # for the node_type == artifact, and check by the element/artifact_id if # it's being deleted - artifacts_being_deleted = [a[2] for a in nodes if a[0] == 'artifact' and - Artifact(a[2]).being_deleted_by is not None] - - return {'edges': edges, - 'nodes': nodes, - 'workflow': wf_id, - 'status': 'success', - 'artifacts_being_deleted': artifacts_being_deleted, - 'message': ''} + artifacts_being_deleted = [ + a[2] + for a in nodes + if a[0] == "artifact" and Artifact(a[2]).being_deleted_by is not None + ] + + return { + "edges": edges, + "nodes": nodes, + "workflow": wf_id, + "status": "success", + "artifacts_being_deleted": artifacts_being_deleted, + "message": "", + } def prep_template_jobs_get_req(prep_id, user_id): @@ -700,10 +725,13 @@ def prep_template_jobs_get_req(prep_id, user_id): job_info = r_client.get(PREP_TEMPLATE_KEY_FORMAT % prep_id) result = {} if job_info: - job_info = defaultdict(lambda: '', loads(job_info)) - job_id = job_info['job_id'] + job_info = defaultdict(lambda: "", loads(job_info)) + job_id = job_info["job_id"] job = ProcessingJob(job_id) - result[job.id] = {'status': job.status, 'step': job.step, - 'error': job.log.msg if job.log else ""} + result[job.id] = { + "status": job.status, + "step": job.step, + "error": job.log.msg if job.log else "", + } return result diff --git a/qiita_pet/handlers/api_proxy/processing.py b/qiita_pet/handlers/api_proxy/processing.py index f145bf191..20a4b6bb7 100644 --- a/qiita_pet/handlers/api_proxy/processing.py +++ b/qiita_pet/handlers/api_proxy/processing.py @@ -6,14 +6,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from json import loads, dumps from collections import defaultdict +from json import dumps, loads -from qiita_db.user import User from qiita_db.artifact import Artifact -from qiita_db.software import Command, Parameters, DefaultParameters -from qiita_db.processing_job import ProcessingWorkflow, ProcessingJob from qiita_db.exceptions import QiitaDBUnknownIDError +from qiita_db.processing_job import ProcessingJob, ProcessingWorkflow +from qiita_db.software import Command, DefaultParameters, Parameters +from qiita_db.user import User def list_commands_handler_get_req(id, exclude_analysis): @@ -39,10 +39,10 @@ def list_commands_handler_get_req(id, exclude_analysis): if id.isdigit(): commands = Artifact(id).get_commands else: - pieces = id.split(':') + pieces = id.split(":") if len(pieces) == 1: aid = pieces[0] - root = '' + root = "" else: aid = pieces[0] root = pieces[1] @@ -53,15 +53,14 @@ def list_commands_handler_get_req(id, exclude_analysis): prep_type = artifact.prep_templates[0].data_type commands = Command.get_commands_by_input_type( - [aid], exclude_analysis=exclude_analysis, - prep_type=prep_type) + [aid], exclude_analysis=exclude_analysis, prep_type=prep_type + ) - cmd_info = [{'id': cmd.id, 'command': cmd.name, 'output': cmd.outputs} - for cmd in commands] + cmd_info = [ + {"id": cmd.id, "command": cmd.name, "output": cmd.outputs} for cmd in commands + ] - return {'status': 'success', - 'message': '', - 'commands': cmd_info} + return {"status": "success", "message": "", "commands": cmd_info} def list_options_handler_get_req(command_id, artifact_id=None): @@ -88,9 +87,9 @@ def list_options_handler_get_req(command_id, artifact_id=None): 'opt_options': dict, 'extra_artifacts': dict} """ + def _helper_process_params(params): - return dumps( - {k: str(v).lower() for k, v in params.items()}, sort_keys=True) + return dumps({k: str(v).lower() for k, v in params.items()}, sort_keys=True) command = Command(command_id) rparamers = [] @@ -108,10 +107,10 @@ def _helper_process_params(params): analysis = artifact.analysis for job in artifact.jobs(cmd=command): jstatus = job.status - outputs = job.outputs if job.status == 'success' else None + outputs = job.outputs if job.status == "success" else None # this ignore any jobs that weren't successful or are in # construction, or the results have been deleted [outputs == {}] - if jstatus not in {'success', 'in_construction'} or outputs == {}: + if jstatus not in {"success", "in_construction"} or outputs == {}: continue params = job.parameters.values.copy() for k in rparamers: @@ -136,15 +135,19 @@ def _helper_process_params(params): if artifact_id != aa.id and atype in extra_atypes: extra_artifacts[atype].append((aa.id, aa.name)) - options = [{'id': p.id, 'name': p.name, 'values': p.values} - for p in command.default_parameter_sets - if _helper_process_params(p.values) not in eparams] - return {'status': 'success', - 'message': '', - 'options': options, - 'req_options': command.required_parameters, - 'opt_options': command.optional_parameters, - 'extra_artifacts': extra_artifacts} + options = [ + {"id": p.id, "name": p.name, "values": p.values} + for p in command.default_parameter_sets + if _helper_process_params(p.values) not in eparams + ] + return { + "status": "success", + "message": "", + "options": options, + "req_options": command.required_parameters, + "opt_options": command.optional_parameters, + "extra_artifacts": extra_artifacts, + } def workflow_handler_post_req(user_id, command_id, params): @@ -168,25 +171,25 @@ def workflow_handler_post_req(user_id, command_id, params): 'message': str, 'workflow_id': int} """ - status = 'success' - message = '' + status = "success" + message = "" try: parameters = Parameters.load(Command(command_id), json_str=params) except Exception as exc: wf = None wf_id = None job_info = None - status = 'error' + status = "error" message = str(exc) - if status == 'success': + if status == "success": try: wf = ProcessingWorkflow.from_scratch(User(user_id), parameters) except Exception as exc: wf = None wf_id = None job_info = None - status = 'error' + status = "error" message = str(exc) if wf is not None: @@ -197,15 +200,17 @@ def workflow_handler_post_req(user_id, command_id, params): inputs = [a.id for a in job.input_artifacts] job_cmd = job.command wf_id = wf.id - job_info = {'id': job.id, 'inputs': inputs, 'label': job_cmd.name, - 'outputs': job_cmd.outputs} + job_info = { + "id": job.id, + "inputs": inputs, + "label": job_cmd.name, + "outputs": job_cmd.outputs, + } - return {'status': status, 'message': message, 'workflow_id': wf_id, - 'job': job_info} + return {"status": status, "message": message, "workflow_id": wf_id, "job": job_info} -def workflow_handler_patch_req(req_op, req_path, req_value=None, - req_from=None): +def workflow_handler_patch_req(req_op, req_path, req_value=None, req_from=None): """Patches a workflow Parameters @@ -227,49 +232,57 @@ def workflow_handler_patch_req(req_op, req_path, req_value=None, is a human readable string with the error message in case that status is 'error'. """ - if req_op == 'add': - req_path = [v for v in req_path.split('/') if v] + if req_op == "add": + req_path = [v for v in req_path.split("/") if v] if len(req_path) != 1: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} req_path = req_path[0] try: wf = ProcessingWorkflow(req_path) except QiitaDBUnknownIDError: - return {'status': 'error', - 'message': 'Workflow %s does not exist' % req_path} + return { + "status": "error", + "message": "Workflow %s does not exist" % req_path, + } req_value = loads(req_value) - dflt_params = DefaultParameters(req_value['dflt_params']) - req_params = req_value.get('req_params', None) - opt_params = req_value.get('opt_params', None) - connections = {ProcessingJob(k): v - for k, v in req_value['connections'].items()} - job = wf.add(dflt_params, connections=connections, - req_params=req_params, opt_params=opt_params) + dflt_params = DefaultParameters(req_value["dflt_params"]) + req_params = req_value.get("req_params", None) + opt_params = req_value.get("opt_params", None) + connections = {ProcessingJob(k): v for k, v in req_value["connections"].items()} + job = wf.add( + dflt_params, + connections=connections, + req_params=req_params, + opt_params=opt_params, + ) job_cmd = job.command - return {'status': 'success', - 'message': '', - 'job': {'id': job.id, - 'inputs': list(req_value['connections'].keys()), - 'label': job_cmd.name, - 'outputs': job_cmd.outputs}} - elif req_op == 'remove': - req_path = [v for v in req_path.split('/') if v] + return { + "status": "success", + "message": "", + "job": { + "id": job.id, + "inputs": list(req_value["connections"].keys()), + "label": job_cmd.name, + "outputs": job_cmd.outputs, + }, + } + elif req_op == "remove": + req_path = [v for v in req_path.split("/") if v] if len(req_path) != 2: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} wf_id = req_path[0] job_id = req_path[1] wf = ProcessingWorkflow(wf_id) job = ProcessingJob(job_id) wf.remove(job, cascade=True) - return {'status': 'success', - 'message': ''} + return {"status": "success", "message": ""} else: - return {'status': 'error', - 'message': 'Operation "%s" not supported. Current supported ' - 'operations: add' % req_op} + return { + "status": "error", + "message": 'Operation "%s" not supported. Current supported ' + "operations: add" % req_op, + } def workflow_run_post_req(workflow_id): @@ -291,10 +304,12 @@ def workflow_run_post_req(workflow_id): try: wf = ProcessingWorkflow(workflow_id) except QiitaDBUnknownIDError: - return {'status': 'error', - 'message': 'Workflow %s does not exist' % workflow_id} + return { + "status": "error", + "message": "Workflow %s does not exist" % workflow_id, + } wf.submit() - return {'status': 'success', 'message': ''} + return {"status": "success", "message": ""} def job_ajax_get_req(job_id): @@ -321,18 +336,20 @@ def job_ajax_get_req(job_id): sw = cmd.software job_status = job.status job_error = job.log.msg if job.log is not None else None - return {'status': 'success', - 'message': '', - 'job_id': job.id, - 'job_external_id': job.external_id, - 'job_status': job_status, - 'job_step': job.step, - 'job_parameters': job.parameters.values, - 'job_error': job_error, - 'command': cmd.name, - 'command_description': cmd.description, - 'software': sw.name, - 'software_version': sw.version} + return { + "status": "success", + "message": "", + "job_id": job.id, + "job_external_id": job.external_id, + "job_status": job_status, + "job_step": job.step, + "job_parameters": job.parameters.values, + "job_error": job_error, + "command": cmd.name, + "command_description": cmd.description, + "software": sw.name, + "software_version": sw.version, + } def job_ajax_patch_req(req_op, req_path, req_value=None, req_from=None): @@ -357,52 +374,64 @@ def job_ajax_patch_req(req_op, req_path, req_value=None, req_from=None): is a human readable string with the error message in case that status is 'error'. """ - if req_op == 'remove': - req_path = [v for v in req_path.split('/') if v] + if req_op == "remove": + req_path = [v for v in req_path.split("/") if v] if len(req_path) != 1: - return {'status': 'error', - 'message': 'Incorrect path parameter: missing job id'} + return { + "status": "error", + "message": "Incorrect path parameter: missing job id", + } # We have ensured that we only have one element on req_path job_id = req_path[0] try: job = ProcessingJob(job_id) except QiitaDBUnknownIDError: - return {'status': 'error', - 'message': 'Incorrect path parameter: ' - '%s is not a recognized job id' % job_id} + return { + "status": "error", + "message": "Incorrect path parameter: " + "%s is not a recognized job id" % job_id, + } except Exception as e: e = str(e) if "invalid input syntax for uuid" in e: - return {'status': 'error', - 'message': 'Incorrect path parameter: ' - '%s is not a recognized job id' % job_id} + return { + "status": "error", + "message": "Incorrect path parameter: " + "%s is not a recognized job id" % job_id, + } else: - return {'status': 'error', - 'message': 'An error occured while accessing the ' - 'job: %s' % e} + return { + "status": "error", + "message": "An error occured while accessing the job: %s" % e, + } job_status = job.status - if job_status == 'in_construction': + if job_status == "in_construction": # A job that is in construction is in a workflow. Use the methods # defined for workflows to keep everything consistent. This message # should never be presented to the user, but rather to the # developer if it makes a mistake during changes in the interface - return {'status': 'error', - 'message': "Can't delete job %s. It is 'in_construction' " - "status. Please use /study/process/workflow/" - % job_id} - elif job_status == 'error': + return { + "status": "error", + "message": "Can't delete job %s. It is 'in_construction' " + "status. Please use /study/process/workflow/" % job_id, + } + elif job_status == "error": # When the job is in error status, we just need to hide it job.hide() - return {'status': 'success', 'message': ''} + return {"status": "success", "message": ""} else: # In any other state, we currently fail. Adding the else here # because it can be useful to have it for fixing issue #2307 - return {'status': 'error', - 'message': 'Only jobs in "error" status can be deleted.'} + return { + "status": "error", + "message": 'Only jobs in "error" status can be deleted.', + } else: - return {'status': 'error', - 'message': 'Operation "%s" not supported. Current supported ' - 'operations: remove' % req_op} + return { + "status": "error", + "message": 'Operation "%s" not supported. Current supported ' + "operations: remove" % req_op, + } diff --git a/qiita_pet/handlers/api_proxy/sample_template.py b/qiita_pet/handlers/api_proxy/sample_template.py index f7b9365b3..45486807b 100644 --- a/qiita_pet/handlers/api_proxy/sample_template.py +++ b/qiita_pet/handlers/api_proxy/sample_template.py @@ -5,19 +5,18 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from json import loads from collections import defaultdict +from json import loads -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import r_client -from qiita_db.util import generate_analyses_list_per_study +from qiita_core.util import execute_as_transaction +from qiita_db.exceptions import QiitaDBColumnError, QiitaDBUnknownIDError from qiita_db.metadata_template.sample_template import SampleTemplate -from qiita_db.exceptions import QiitaDBUnknownIDError -from qiita_db.exceptions import QiitaDBColumnError from qiita_db.processing_job import ProcessingJob +from qiita_db.util import generate_analyses_list_per_study from qiita_pet.handlers.api_proxy.util import check_access -SAMPLE_TEMPLATE_KEY_FORMAT = 'sample_template_%s' +SAMPLE_TEMPLATE_KEY_FORMAT = "sample_template_%s" def _check_sample_template_exists(samp_id): @@ -35,11 +34,11 @@ def _check_sample_template_exists(samp_id): 'message': msg} """ if not SampleTemplate.exists(int(samp_id)): - return {'status': 'error', - 'message': 'Sample template %d does not exist' % int(samp_id) - } - return {'status': 'success', - 'message': ''} + return { + "status": "error", + "message": "Sample template %d does not exist" % int(samp_id), + } + return {"status": "success", "message": ""} def sample_template_get_req(samp_id, user_id): @@ -64,7 +63,7 @@ def sample_template_get_req(samp_id, user_id): Format {sample: {column: value, ...}, ...} """ exists = _check_sample_template_exists(int(samp_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists access_error = check_access(int(samp_id), user_id) if access_error: @@ -75,9 +74,7 @@ def sample_template_get_req(samp_id, user_id): if access_error: return access_error df = template.to_dataframe() - return {'status': 'success', - 'message': '', - 'template': df.to_dict(orient='index')} + return {"status": "success", "message": "", "template": df.to_dict(orient="index")} def sample_template_samples_get_req(samp_id, user_id): @@ -100,16 +97,17 @@ def sample_template_samples_get_req(samp_id, user_id): samples is list of samples in the template """ exists = _check_sample_template_exists(int(samp_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists access_error = check_access(samp_id, user_id) if access_error: return access_error - return {'status': 'success', - 'message': '', - 'samples': sorted(x for x in SampleTemplate(int(samp_id))) - } + return { + "status": "success", + "message": "", + "samples": sorted(x for x in SampleTemplate(int(samp_id))), + } def sample_template_meta_cats_get_req(samp_id, user_id): @@ -132,16 +130,17 @@ def sample_template_meta_cats_get_req(samp_id, user_id): samples is list of metadata categories in the template """ exists = _check_sample_template_exists(int(samp_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists access_error = check_access(samp_id, user_id) if access_error: return access_error - return {'status': 'success', - 'message': '', - 'categories': sorted(SampleTemplate(int(samp_id)).categories) - } + return { + "status": "success", + "message": "", + "categories": sorted(SampleTemplate(int(samp_id)).categories), + } def sample_template_category_get_req(category, samp_id, user_id): @@ -165,7 +164,7 @@ def sample_template_category_get_req(category, samp_id, user_id): 'values': dict of {str: object}} """ exists = _check_sample_template_exists(int(samp_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists access_error = check_access(samp_id, user_id) if access_error: @@ -175,12 +174,11 @@ def sample_template_category_get_req(category, samp_id, user_id): try: values = st.get_category(category) except QiitaDBColumnError: - return {'status': 'error', - 'message': 'Category %s does not exist in sample template' % - category} - return {'status': 'success', - 'message': '', - 'values': values} + return { + "status": "error", + "message": "Category %s does not exist in sample template" % category, + } + return {"status": "success", "message": "", "values": values} def analyses_associated_with_study(study_id, user_id): @@ -208,32 +206,30 @@ def analyses_associated_with_study(study_id, user_id): values = generate_analyses_list_per_study(study_id) - return {'status': 'success', - 'message': '', - 'values': values} + return {"status": "success", "message": "", "values": values} def get_sample_template_processing_status(st_id): # Initialize variables here processing = False - alert_type = '' - alert_msg = '' + alert_type = "" + alert_msg = "" job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % st_id) if job_info: - job_info = defaultdict(lambda: '', loads(job_info)) - job_id = job_info['job_id'] + job_info = defaultdict(lambda: "", loads(job_info)) + job_id = job_info["job_id"] job = ProcessingJob(job_id) job_status = job.status - processing = job_status not in ('success', 'error') + processing = job_status not in ("success", "error") if processing: - alert_type = 'info' - alert_msg = 'This sample template is currently being processed' - elif job_status == 'error': - alert_type = 'danger' - alert_msg = job.log.msg.replace('\n', '
') + alert_type = "info" + alert_msg = "This sample template is currently being processed" + elif job_status == "error": + alert_type = "danger" + alert_msg = job.log.msg.replace("\n", "
") else: - alert_type = job_info['alert_type'] - alert_msg = job_info['alert_msg'].replace('\n', '
') + alert_type = job_info["alert_type"] + alert_msg = job_info["alert_msg"].replace("\n", "
") return processing, alert_type, alert_msg @@ -262,7 +258,7 @@ def sample_template_filepaths_get_req(study_id, user_id): All files in the sample template, as [(id, URL), ...] """ exists = _check_sample_template_exists(int(study_id)) - if exists['status'] != 'success': + if exists["status"] != "success": return exists access_error = check_access(study_id, user_id) if access_error: @@ -271,10 +267,6 @@ def sample_template_filepaths_get_req(study_id, user_id): try: template = SampleTemplate(int(study_id)) except QiitaDBUnknownIDError as e: - return {'status': 'error', - 'message': str(e)} + return {"status": "error", "message": str(e)} - return {'status': 'success', - 'message': '', - 'filepaths': template.get_filepaths() - } + return {"status": "success", "message": "", "filepaths": template.get_filepaths()} diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py index 547c7621f..937461c3b 100644 --- a/qiita_pet/handlers/api_proxy/studies.py +++ b/qiita_pet/handlers/api_proxy/studies.py @@ -9,22 +9,20 @@ from json import dumps, loads from qiita_core.exceptions import IncompetentQiitaDeveloperError -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import r_client +from qiita_core.util import execute_as_transaction from qiita_db.artifact import Artifact -from qiita_db.sql_connection import TRN -from qiita_db.user import User -from qiita_db.study import Study from qiita_db.exceptions import QiitaDBColumnError, QiitaDBLookupError from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.processing_job import ProcessingJob -from qiita_db.software import Software, Parameters -from qiita_db.util import (supported_filepath_types, - get_files_from_uploads_folders) +from qiita_db.software import Parameters, Software +from qiita_db.sql_connection import TRN +from qiita_db.study import Study +from qiita_db.user import User +from qiita_db.util import get_files_from_uploads_folders, supported_filepath_types from qiita_pet.handlers.api_proxy.util import check_access - -STUDY_KEY_FORMAT = 'study_%s' +STUDY_KEY_FORMAT = "study_%s" def data_types_get_req(): @@ -41,9 +39,7 @@ def data_types_get_req(): message has the warnings or errors data_types is the list of available data types in the system """ - return {'status': 'success', - 'message': '', - 'data_types': Study.all_data_types()} + return {"status": "success", "message": "", "data_types": Study.all_data_types()} def study_get_req(study_id, user_id): @@ -76,77 +72,83 @@ def study_get_req(study_id, user_id): study = Study(study_id) study_info = study.info # Add needed info that is not part of the initial info pull - study_info['publication_doi'] = [] - study_info['publication_pid'] = [] + study_info["publication_doi"] = [] + study_info["publication_pid"] = [] for pub, is_doi in study.publications: if is_doi: - study_info['publication_doi'].append(pub) + study_info["publication_doi"].append(pub) else: - study_info['publication_pid'].append(pub) - study_info['study_id'] = study.id - study_info['study_title'] = study.title - study_info['shared_with'] = [s.id for s in study.shared_with] - study_info['status'] = study.status - study_info['ebi_study_accession'] = study.ebi_study_accession - study_info['ebi_submission_status'] = study.ebi_submission_status - study_info['public_raw_download'] = study.public_raw_download - study_info['notes'] = study.notes - study_info['autoloaded'] = study.autoloaded + study_info["publication_pid"].append(pub) + study_info["study_id"] = study.id + study_info["study_title"] = study.title + study_info["shared_with"] = [s.id for s in study.shared_with] + study_info["status"] = study.status + study_info["ebi_study_accession"] = study.ebi_study_accession + study_info["ebi_submission_status"] = study.ebi_submission_status + study_info["public_raw_download"] = study.public_raw_download + study_info["notes"] = study.notes + study_info["autoloaded"] = study.autoloaded # Clean up StudyPerson objects to string for display - pi = study_info['principal_investigator'] - study_info['principal_investigator'] = { - 'name': pi.name, - 'email': pi.email, - 'affiliation': pi.affiliation} - - lab_person = study_info['lab_person'] + pi = study_info["principal_investigator"] + study_info["principal_investigator"] = { + "name": pi.name, + "email": pi.email, + "affiliation": pi.affiliation, + } + + lab_person = study_info["lab_person"] if lab_person: - study_info['lab_person'] = { - 'name': lab_person.name, - 'email': lab_person.email, - 'affiliation': lab_person.affiliation} + study_info["lab_person"] = { + "name": lab_person.name, + "email": lab_person.email, + "affiliation": lab_person.affiliation, + } samples = study.sample_template - study_info['num_samples'] = 0 if samples is None else len(list(samples)) - study_info['owner'] = study.owner.id + study_info["num_samples"] = 0 if samples is None else len(list(samples)) + study_info["owner"] = study.owner.id # Study.has_access no_public=True, will return True only if the user_id is # the owner of the study or if the study is shared with the user_id; this # with study.public_raw_download will define has_access_to_raw_data - study_info['has_access_to_raw_data'] = study.has_access( - User(user_id), True) or study.public_raw_download + study_info["has_access_to_raw_data"] = ( + study.has_access(User(user_id), True) or study.public_raw_download + ) - study_info['show_biom_download_button'] = len( - study.artifacts(artifact_type='BIOM')) != 0 - study_info['show_raw_download_button'] = any([ - True for pt in study.prep_templates() if pt.artifact is not None]) + study_info["show_biom_download_button"] = ( + len(study.artifacts(artifact_type="BIOM")) != 0 + ) + study_info["show_raw_download_button"] = any( + [True for pt in study.prep_templates() if pt.artifact is not None] + ) # getting study processing status from redis processing = False - study_info['level'] = '' - study_info['message'] = '' + study_info["level"] = "" + study_info["message"] = "" job_info = r_client.get(STUDY_KEY_FORMAT % study_id) if job_info: - job_info = defaultdict(lambda: '', loads(job_info)) - job_id = job_info['job_id'] + job_info = defaultdict(lambda: "", loads(job_info)) + job_id = job_info["job_id"] job = ProcessingJob(job_id) job_status = job.status - processing = job_status not in ('success', 'error') + processing = job_status not in ("success", "error") if processing: - study_info['level'] = 'info' - study_info['message'] = 'This study is currently being processed' - elif job_status == 'error': - study_info['level'] = 'danger' - study_info['message'] = job.log.msg.replace('\n', '
') + study_info["level"] = "info" + study_info["message"] = "This study is currently being processed" + elif job_status == "error": + study_info["level"] = "danger" + study_info["message"] = job.log.msg.replace("\n", "
") else: - study_info['level'] = job_info['alert_type'] - study_info['message'] = job_info['alert_msg'].replace( - '\n', '
') + study_info["level"] = job_info["alert_type"] + study_info["message"] = job_info["alert_msg"].replace("\n", "
") - return {'status': 'success', - 'message': '', - 'study_info': study_info, - 'editable': study.can_edit(User(user_id))} + return { + "status": "success", + "message": "", + "study_info": study_info, + "editable": study.can_edit(User(user_id)), + } @execute_as_transaction @@ -171,17 +173,16 @@ def study_delete_req(study_id, user_id): if access_error: return access_error - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_study') - params = Parameters.load(cmd, values_dict={'study': study_id}) + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_study") + params = Parameters.load(cmd, values_dict={"study": study_id}) job = ProcessingJob.create(User(user_id), params, True) # Store the job id attaching it to the sample template id - r_client.set(STUDY_KEY_FORMAT % study_id, - dumps({'job_id': job.id})) + r_client.set(STUDY_KEY_FORMAT % study_id, dumps({"job_id": job.id})) job.submit() - return {'status': 'success', 'message': ''} + return {"status": "success", "message": ""} def study_prep_get_req(study_id, user_id): @@ -234,44 +235,43 @@ def study_prep_get_req(study_id, user_id): TRN.add(sql, [study_id]) for row in TRN.execute_fetchindex(): row = dict(row) - if row['visibility'] != 'public' and not editable: + if row["visibility"] != "public" and not editable: continue # for those preps that have no artifact - if row['visibility'] is None: - row['visibility'] = 'sandbox' + if row["visibility"] is None: + row["visibility"] = "sandbox" info = { - 'name': row['name'], - 'id': row['prep_template_id'], - 'status': row['visibility'], - 'total_samples': row['total_samples'], - 'creation_timestamp': row['creation_timestamp'], - 'modification_timestamp': row['modification_timestamp'], - 'start_artifact': None, - 'start_artifact_id': None, - 'youngest_artifact': None, - 'num_artifact_children': 0, - 'youngest_artifact_name': None, - 'youngest_artifact_type': None, - 'ebi_experiment': row['ebi_experiment'] + "name": row["name"], + "id": row["prep_template_id"], + "status": row["visibility"], + "total_samples": row["total_samples"], + "creation_timestamp": row["creation_timestamp"], + "modification_timestamp": row["modification_timestamp"], + "start_artifact": None, + "start_artifact_id": None, + "youngest_artifact": None, + "num_artifact_children": 0, + "youngest_artifact_name": None, + "youngest_artifact_type": None, + "ebi_experiment": row["ebi_experiment"], } - if row['artifact_id'] is not None: - start_artifact = Artifact(row['artifact_id']) + if row["artifact_id"] is not None: + start_artifact = Artifact(row["artifact_id"]) youngest_artifact = start_artifact.youngest_artifact - info['start_artifact'] = start_artifact.artifact_type - info['start_artifact_id'] = row['artifact_id'] - info['num_artifact_children'] = len(start_artifact.children) - info['youngest_artifact_name'] = youngest_artifact.name - info['youngest_artifact_type'] = \ - youngest_artifact.artifact_type - info['youngest_artifact'] = '%s - %s' % ( - youngest_artifact.name, youngest_artifact.artifact_type) + info["start_artifact"] = start_artifact.artifact_type + info["start_artifact_id"] = row["artifact_id"] + info["num_artifact_children"] = len(start_artifact.children) + info["youngest_artifact_name"] = youngest_artifact.name + info["youngest_artifact_type"] = youngest_artifact.artifact_type + info["youngest_artifact"] = "%s - %s" % ( + youngest_artifact.name, + youngest_artifact.artifact_type, + ) - prep_info[row['data_type']].append(info) + prep_info[row["data_type"]].append(info) - return {'status': 'success', - 'message': '', - 'info': prep_info} + return {"status": "success", "message": "", "info": prep_info} def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): @@ -317,14 +317,14 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): if pt.study_id != study_id: raise IncompetentQiitaDeveloperError( "The requested prep id (%d) doesn't belong to the study " - "(%d)" % (pt.study_id, study_id)) + "(%d)" % (pt.study_id, study_id) + ) uploaded = get_files_from_uploads_folders(study_id) pt = pt.to_dataframe() - ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types - if ft != 'raw_sff') - if any(ftypes_if) and 'run_prefix' in pt.columns: - prep_prefixes = tuple(set(pt['run_prefix'])) + ftypes_if = (ft.startswith("raw_") for ft, _ in supp_file_types if ft != "raw_sff") + if any(ftypes_if) and "run_prefix" in pt.columns: + prep_prefixes = tuple(set(pt["run_prefix"])) num_prefixes = len(prep_prefixes) # sorting prefixes by length to avoid collisions like: 100 1002 # 10003 @@ -358,8 +358,10 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): remaining = [f for _, f, _ in uploaded] # get file_types, format: filetype, required, list of files - file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)]) - for i, (t, req) in enumerate(supp_file_types)] + file_types = [ + (t, req, [x[i] for x in selected if i + 1 <= len(x)]) + for i, (t, req) in enumerate(supp_file_types) + ] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact @@ -373,17 +375,19 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): study_label = "%s (%d)" % (study.title, study.id) for a in artifacts: artifact_options.append( - (a.id, "%s - %s (%d)" % (study_label, a.name, a.id))) + (a.id, "%s - %s (%d)" % (study_label, a.name, a.id)) + ) - message = ('' if not message - else '\n'.join(['Check these run_prefix:'] + message)) + message = "" if not message else "\n".join(["Check these run_prefix:"] + message) - return {'status': 'success', - 'message': message, - 'remaining': sorted(remaining), - 'file_types': file_types, - 'num_prefixes': num_prefixes, - 'artifacts': artifact_options} + return { + "status": "success", + "message": message, + "remaining": sorted(remaining), + "file_types": file_types, + "num_prefixes": num_prefixes, + "artifacts": artifact_options, + } def study_tags_request(): @@ -397,9 +401,7 @@ def study_tags_request(): - message: str, if the request is unsuccessful, a human readable error - tags: {level: value, ..., ...} """ - return {'status': 'success', - 'message': '', - 'tags': Study.get_tags()} + return {"status": "success", "message": "", "tags": Study.get_tags()} def study_get_tags_request(user_id, study_id): @@ -426,13 +428,12 @@ def study_get_tags_request(user_id, study_id): return access_error study = Study(study_id) - return {'status': 'success', - 'message': '', - 'tags': study.tags} + return {"status": "success", "message": "", "tags": study.tags} -def study_patch_request(user_id, study_id, - req_op, req_path, req_value=None, req_from=None): +def study_patch_request( + user_id, study_id, req_op, req_path, req_value=None, req_from=None +): """Modifies an attribute of the study object Parameters @@ -457,11 +458,10 @@ def study_patch_request(user_id, study_id, - status: str, whether if the request is successful or not - message: str, if the request is unsuccessful, a human readable error """ - if req_op == 'replace': - req_path = [v for v in req_path.split('/') if v] + if req_op == "replace": + req_path = [v for v in req_path.split("/") if v] if len(req_path) != 1: - return {'status': 'error', - 'message': 'Incorrect path parameter'} + return {"status": "error", "message": "Incorrect path parameter"} attribute = req_path[0] @@ -471,24 +471,28 @@ def study_patch_request(user_id, study_id, return access_error study = Study(study_id) - if attribute == 'tags': + if attribute == "tags": message = study.update_tags(User(user_id), req_value) - return {'status': 'success', - 'message': message} - elif attribute == 'toggle_public_raw_download': + return {"status": "success", "message": message} + elif attribute == "toggle_public_raw_download": try: study.public_raw_download = not study.public_raw_download - return {'status': 'success', - 'message': 'Successfully updated public_raw_download'} + return { + "status": "success", + "message": "Successfully updated public_raw_download", + } except (QiitaDBLookupError, QiitaDBColumnError) as e: - return {'status': 'error', - 'message': str(e)} + return {"status": "error", "message": str(e)} else: # We don't understand the attribute so return an error - return {'status': 'error', - 'message': 'Attribute "%s" not found. ' - 'Please, check the path parameter' % attribute} + return { + "status": "error", + "message": 'Attribute "%s" not found. ' + "Please, check the path parameter" % attribute, + } else: - return {'status': 'error', - 'message': 'Operation "%s" not supported. ' - 'Current supported operations: replace' % req_op} + return { + "status": "error", + "message": 'Operation "%s" not supported. ' + "Current supported operations: replace" % req_op, + } diff --git a/qiita_pet/handlers/api_proxy/tests/test_artifact.py b/qiita_pet/handlers/api_proxy/tests/test_artifact.py index fda7fee57..57b517999 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_artifact.py +++ b/qiita_pet/handlers/api_proxy/tests/test_artifact.py @@ -5,135 +5,155 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from os.path import join, exists -from os import remove, close from datetime import datetime -from tempfile import mkstemp from functools import partial +from os import close, remove +from os.path import exists, join +from tempfile import mkstemp +from unittest import TestCase, main -import pandas as pd import numpy.testing as npt +import pandas as pd -from qiita_core.util import qiita_test_checker -from qiita_core.testing import wait_for_prep_information_job from qiita_core.qiita_settings import r_client +from qiita_core.testing import wait_for_prep_information_job +from qiita_core.util import qiita_test_checker from qiita_db.artifact import Artifact +from qiita_db.exceptions import QiitaDBWarning +from qiita_db.logger import LogEntry from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.software import DefaultParameters, Parameters from qiita_db.study import Study from qiita_db.util import get_mountpoint -from qiita_db.software import Parameters, DefaultParameters -from qiita_db.exceptions import QiitaDBWarning from qiita_pet.handlers.api_proxy.artifact import ( - artifact_get_req, artifact_status_put_req, artifact_graph_get_req, - artifact_types_get_req, artifact_post_req, artifact_get_prep_req, - artifact_get_info) -from qiita_db.logger import LogEntry + artifact_get_info, + artifact_get_prep_req, + artifact_get_req, + artifact_graph_get_req, + artifact_post_req, + artifact_status_put_req, + artifact_types_get_req, +) class TestArtifactAPIReadOnly(TestCase): def test_artifact_get_req_no_access(self): - obs = artifact_get_req('demo@microbio.me', 1) - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = artifact_get_req("demo@microbio.me", 1) + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_artifact_get_req(self): - obs = artifact_get_req('test@foo.bar', 1) - path_builder = partial(join, get_mountpoint('raw_data')[0][1]) - exp = {'id': 1, - 'type': 'FASTQ', - 'study': 1, - 'data_type': '18S', - 'timestamp': datetime(2012, 10, 1, 9, 30, 27), - 'visibility': 'private', - 'can_submit_vamps': False, - 'can_submit_ebi': False, - 'processing_parameters': None, - 'ebi_run_accessions': None, - 'is_submitted_vamps': False, - 'parents': [], - 'filepaths': [ - {'fp_id': 1, - 'fp': path_builder("1_s_G1_L001_sequences.fastq.gz"), - 'fp_type': "raw_forward_seqs", - 'checksum': '2125826711', - 'fp_size': 58}, - {'fp_id': 2, - 'fp': path_builder( - "1_s_G1_L001_sequences_barcodes.fastq.gz"), - 'fp_type': "raw_barcodes", - 'checksum': '2125826711', - 'fp_size': 58}] - } + obs = artifact_get_req("test@foo.bar", 1) + path_builder = partial(join, get_mountpoint("raw_data")[0][1]) + exp = { + "id": 1, + "type": "FASTQ", + "study": 1, + "data_type": "18S", + "timestamp": datetime(2012, 10, 1, 9, 30, 27), + "visibility": "private", + "can_submit_vamps": False, + "can_submit_ebi": False, + "processing_parameters": None, + "ebi_run_accessions": None, + "is_submitted_vamps": False, + "parents": [], + "filepaths": [ + { + "fp_id": 1, + "fp": path_builder("1_s_G1_L001_sequences.fastq.gz"), + "fp_type": "raw_forward_seqs", + "checksum": "2125826711", + "fp_size": 58, + }, + { + "fp_id": 2, + "fp": path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"), + "fp_type": "raw_barcodes", + "checksum": "2125826711", + "fp_size": 58, + }, + ], + } self.assertEqual(obs, exp) def test_artifact_graph_get_req_ancestors(self): - obs = artifact_graph_get_req(1, 'ancestors', 'test@foo.bar') - exp = {'status': 'success', - 'message': '', - 'edge_list': [], - 'node_labels': [(1, 'Raw data 1 - FASTQ')]} + obs = artifact_graph_get_req(1, "ancestors", "test@foo.bar") + exp = { + "status": "success", + "message": "", + "edge_list": [], + "node_labels": [(1, "Raw data 1 - FASTQ")], + } self.assertEqual(obs, exp) def test_artifact_graph_get_req_descendants(self): - obs = artifact_graph_get_req(1, 'descendants', 'test@foo.bar') - exp = {'status': 'success', - 'message': '', - 'node_labels': [(1, 'Raw data 1 - FASTQ'), - (3, 'Demultiplexed 2 - Demultiplexed'), - (2, 'Demultiplexed 1 - Demultiplexed'), - (4, 'BIOM - BIOM'), - (5, 'BIOM - BIOM'), - (6, 'BIOM - BIOM')], - 'edge_list': [(1, 3), (1, 2), (2, 5), (2, 4), (2, 6)]} - self.assertEqual(obs['message'], exp['message']) - self.assertEqual(obs['status'], exp['status']) - self.assertCountEqual(obs['node_labels'], exp['node_labels']) - self.assertCountEqual(obs['edge_list'], exp['edge_list']) + obs = artifact_graph_get_req(1, "descendants", "test@foo.bar") + exp = { + "status": "success", + "message": "", + "node_labels": [ + (1, "Raw data 1 - FASTQ"), + (3, "Demultiplexed 2 - Demultiplexed"), + (2, "Demultiplexed 1 - Demultiplexed"), + (4, "BIOM - BIOM"), + (5, "BIOM - BIOM"), + (6, "BIOM - BIOM"), + ], + "edge_list": [(1, 3), (1, 2), (2, 5), (2, 4), (2, 6)], + } + self.assertEqual(obs["message"], exp["message"]) + self.assertEqual(obs["status"], exp["status"]) + self.assertCountEqual(obs["node_labels"], exp["node_labels"]) + self.assertCountEqual(obs["edge_list"], exp["edge_list"]) def test_artifact_graph_get_req_no_access(self): - obs = artifact_graph_get_req(1, 'ancestors', 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = artifact_graph_get_req(1, "ancestors", "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_artifact_graph_get_req_bad_direction(self): - obs = artifact_graph_get_req(1, 'WRONG', 'test@foo.bar') - exp = {'status': 'error', 'message': 'Unknown directon WRONG'} + obs = artifact_graph_get_req(1, "WRONG", "test@foo.bar") + exp = {"status": "error", "message": "Unknown directon WRONG"} self.assertEqual(obs, exp) def test_artifact_types_get_req(self): obs = artifact_types_get_req() - exp = {'message': '', - 'status': 'success', - 'types': [['BIOM', 'BIOM table', False, False, True], - ['Demultiplexed', 'Demultiplexed and QC sequences', - True, True, False], - ['FASTA', None, False, False, False], - ['FASTA_Sanger', None, False, False, False], - ['FASTQ', None, False, False, True], - ['SFF', None, False, False, False], - ['beta_div_plots', 'Qiime 1 beta diversity results', - False, False, False], - ['per_sample_FASTQ', None, True, False, True], - ['rarefaction_curves', 'Rarefaction curves', False, - False, False], - ['taxa_summary', 'Taxa summary plots', False, False, - False]]} - - self.assertEqual(obs['message'], exp['message']) - self.assertEqual(obs['status'], exp['status']) - self.assertCountEqual(obs['types'], exp['types']) + exp = { + "message": "", + "status": "success", + "types": [ + ["BIOM", "BIOM table", False, False, True], + ["Demultiplexed", "Demultiplexed and QC sequences", True, True, False], + ["FASTA", None, False, False, False], + ["FASTA_Sanger", None, False, False, False], + ["FASTQ", None, False, False, True], + ["SFF", None, False, False, False], + [ + "beta_div_plots", + "Qiime 1 beta diversity results", + False, + False, + False, + ], + ["per_sample_FASTQ", None, True, False, True], + ["rarefaction_curves", "Rarefaction curves", False, False, False], + ["taxa_summary", "Taxa summary plots", False, False, False], + ], + } + + self.assertEqual(obs["message"], exp["message"]) + self.assertEqual(obs["status"], exp["status"]) + self.assertCountEqual(obs["types"], exp["types"]) @qiita_test_checker() class TestArtifactAPI(TestCase): def setUp(self): - uploads_path = get_mountpoint('uploads')[0][1] + uploads_path = get_mountpoint("uploads")[0][1] # Create prep test file to point at - self.update_fp = join(uploads_path, '1', 'update.txt') - with open(self.update_fp, 'w') as f: + self.update_fp = join(uploads_path, "1", "update.txt") + with open(self.update_fp, "w") as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self._files_to_remove = [self.update_fp] @@ -142,20 +162,26 @@ def setUp(self): # creating temporal files and artifact # NOTE: we don't need to remove the artifact created cause it's # used to test the delete functionality - fd, fp = mkstemp(suffix='_seqs.fna') + fd, fp = mkstemp(suffix="_seqs.fna") close(fd) - with open(fp, 'w') as f: - f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " - "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" - "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n") + with open(fp, "w") as f: + f.write( + ">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " + "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" + "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n" + ) # 4 Demultiplexed filepaths_processed = [(fp, 4)] # 1 for default parameters and input data - exp_params = Parameters.from_default_params(DefaultParameters(1), - {'input_data': 1}) - self.artifact = Artifact.create(filepaths_processed, "Demultiplexed", - parents=[Artifact(1)], - processing_parameters=exp_params) + exp_params = Parameters.from_default_params( + DefaultParameters(1), {"input_data": 1} + ) + self.artifact = Artifact.create( + filepaths_processed, + "Demultiplexed", + parents=[Artifact(1)], + processing_parameters=exp_params, + ) def tearDown(self): for fp in self._files_to_remove: @@ -163,176 +189,246 @@ def tearDown(self): remove(fp) # Replace file if removed as part of function testing - uploads_path = get_mountpoint('uploads')[0][1] - fp = join(uploads_path, '1', 'uploaded_file.txt') + uploads_path = get_mountpoint("uploads")[0][1] + fp = join(uploads_path, "1", "uploaded_file.txt") if not exists(fp): - with open(fp, 'w') as f: - f.write('') + with open(fp, "w") as f: + f.write("") r_client.flushdb() def test_artifact_get_prep_req(self): - obs = artifact_get_prep_req('test@foo.bar', [4]) - exp = {'status': 'success', 'msg': '', 'data': { - 4: ['1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', - '1.SKB4.640189', '1.SKB5.640181', '1.SKB6.640176', - '1.SKB7.640196', '1.SKB8.640193', '1.SKB9.640200', - '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', - '1.SKD7.640191', '1.SKD8.640184', '1.SKD9.640182', - '1.SKM1.640183', '1.SKM2.640199', '1.SKM3.640197', - '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192']}} + obs = artifact_get_prep_req("test@foo.bar", [4]) + exp = { + "status": "success", + "msg": "", + "data": { + 4: [ + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + ] + }, + } self.assertEqual(obs, exp) - obs = artifact_get_prep_req('demo@microbio.me', [4]) - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = artifact_get_prep_req("demo@microbio.me", [4]) + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_artifact_get_info(self): - obs = artifact_get_info('test@foo.bar', [5, 6, 7]) + obs = artifact_get_info("test@foo.bar", [5, 6, 7]) data = [ - {'files': ['1_study_1001_closed_reference_otu_table_Silva.biom'], - 'artifact_id': 6, 'data_type': '16S', - 'timestamp': '2012-10-02 17:30:00', 'active': True, - 'target_gene': '16S rRNA', 'name': 'BIOM', - 'target_subfragment': ['V4'], 'parameters': { - 'reference': '2', 'similarity': '0.97', - 'sortmerna_e_value': '1', 'sortmerna_max_pos': '10000', - 'threads': '1', 'sortmerna_coverage': '0.97'}, - 'algorithm': 'Pick closed-reference OTUs | Split libraries FASTQ', - 'deprecated': False, 'platform': 'Illumina', - 'algorithm_az': 'd480799a0a7a2fbe0e9022bc9c602018', - 'prep_samples': 27}, - {'files': ['1_study_1001_closed_reference_otu_table.biom'], - 'artifact_id': 5, 'data_type': '18S', - 'timestamp': '2012-10-02 17:30:00', 'active': True, - 'target_gene': '16S rRNA', 'name': 'BIOM', - 'target_subfragment': ['V4'], 'parameters': { - 'reference': '1', 'similarity': '0.97', - 'sortmerna_e_value': '1', 'sortmerna_max_pos': '10000', - 'threads': '1', 'sortmerna_coverage': '0.97'}, - 'algorithm': 'Pick closed-reference OTUs | Split libraries FASTQ', - 'deprecated': False, 'platform': 'Illumina', - 'algorithm_az': 'd480799a0a7a2fbe0e9022bc9c602018', - 'prep_samples': 27}, - {'files': ['biom_table.biom'], 'artifact_id': 7, - 'data_type': '16S', - 'timestamp': '2012-10-02 17:30:00', 'active': True, - 'target_gene': '16S rRNA', 'name': 'BIOM', - 'target_subfragment': ['V4'], 'parameters': {}, 'algorithm': '', - 'deprecated': False, 'platform': 'Illumina', 'algorithm_az': '', - 'prep_samples': 27}] - exp = {'status': 'success', 'msg': '', 'data': data} + { + "files": ["1_study_1001_closed_reference_otu_table_Silva.biom"], + "artifact_id": 6, + "data_type": "16S", + "timestamp": "2012-10-02 17:30:00", + "active": True, + "target_gene": "16S rRNA", + "name": "BIOM", + "target_subfragment": ["V4"], + "parameters": { + "reference": "2", + "similarity": "0.97", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "threads": "1", + "sortmerna_coverage": "0.97", + }, + "algorithm": "Pick closed-reference OTUs | Split libraries FASTQ", + "deprecated": False, + "platform": "Illumina", + "algorithm_az": "d480799a0a7a2fbe0e9022bc9c602018", + "prep_samples": 27, + }, + { + "files": ["1_study_1001_closed_reference_otu_table.biom"], + "artifact_id": 5, + "data_type": "18S", + "timestamp": "2012-10-02 17:30:00", + "active": True, + "target_gene": "16S rRNA", + "name": "BIOM", + "target_subfragment": ["V4"], + "parameters": { + "reference": "1", + "similarity": "0.97", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "threads": "1", + "sortmerna_coverage": "0.97", + }, + "algorithm": "Pick closed-reference OTUs | Split libraries FASTQ", + "deprecated": False, + "platform": "Illumina", + "algorithm_az": "d480799a0a7a2fbe0e9022bc9c602018", + "prep_samples": 27, + }, + { + "files": ["biom_table.biom"], + "artifact_id": 7, + "data_type": "16S", + "timestamp": "2012-10-02 17:30:00", + "active": True, + "target_gene": "16S rRNA", + "name": "BIOM", + "target_subfragment": ["V4"], + "parameters": {}, + "algorithm": "", + "deprecated": False, + "platform": "Illumina", + "algorithm_az": "", + "prep_samples": 27, + }, + ] + exp = {"status": "success", "msg": "", "data": data} self.assertCountEqual(list(obs.keys()), exp.keys()) - self.assertEqual(obs['status'], exp['status']) - self.assertEqual(obs['msg'], exp['msg']) - self.assertCountEqual(obs['data'], exp['data']) + self.assertEqual(obs["status"], exp["status"]) + self.assertEqual(obs["msg"], exp["msg"]) + self.assertCountEqual(obs["data"], exp["data"]) def test_artifact_post_req(self): # Create new prep template to attach artifact to pt = npt.assert_warns( - QiitaDBWarning, PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S') + QiitaDBWarning, + PrepTemplate.create, + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + Study(1), + "16S", + ) self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()]) - filepaths = {'raw_forward_seqs': 'uploaded_file.txt', - 'raw_barcodes': 'update.txt'} + filepaths = { + "raw_forward_seqs": "uploaded_file.txt", + "raw_barcodes": "update.txt", + } obs = artifact_post_req( - 'test@foo.bar', filepaths, 'FASTQ', 'New Test Artifact', pt.id) - exp = {'status': 'success', - 'message': ''} + "test@foo.bar", filepaths, "FASTQ", "New Test Artifact", pt.id + ) + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) wait_for_prep_information_job(pt.id) # Test importing an artifact # Create new prep template to attach artifact to pt = npt.assert_warns( - QiitaDBWarning, PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S') + QiitaDBWarning, + PrepTemplate.create, + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + Study(1), + "16S", + ) self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()]) obs = artifact_post_req( - 'test@foo.bar', {}, 'Demultiplexed', 'New Test Artifact 2', - pt.id, 3) - exp = {'status': 'success', - 'message': ''} + "test@foo.bar", {}, "Demultiplexed", "New Test Artifact 2", pt.id, 3 + ) + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) wait_for_prep_information_job(pt.id) # Instantiate the artifact to make sure it was made and # to clean the environment a = Artifact(pt.artifact.id) - self._files_to_remove.extend([x['fp'] for x in a.filepaths]) + self._files_to_remove.extend([x["fp"] for x in a.filepaths]) def test_artifact_post_req_error(self): # Create a new prep template to attach the artifact to pt = npt.assert_warns( - QiitaDBWarning, PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S') + QiitaDBWarning, + PrepTemplate.create, + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + Study(1), + "16S", + ) self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()]) - user_id = 'test@foo.bar' - filepaths = {'raw_barcodes': 'uploaded_file.txt', - 'raw_forward_seqs': 'update.txt'} + user_id = "test@foo.bar" + filepaths = { + "raw_barcodes": "uploaded_file.txt", + "raw_forward_seqs": "update.txt", + } artifact_type = "FASTQ" name = "TestArtifact" # The user doesn't have access to the study - obs = artifact_post_req("demo@microbio.me", filepaths, artifact_type, - name, pt.id) - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = artifact_post_req( + "demo@microbio.me", filepaths, artifact_type, name, pt.id + ) + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) # A file does not exist - missing_fps = {'raw_barcodes': 'NOTEXISTS'} - obs = artifact_post_req(user_id, missing_fps, artifact_type, - name, pt.id) - exp = {'status': 'error', - 'message': 'File does not exist: NOTEXISTS'} + missing_fps = {"raw_barcodes": "NOTEXISTS"} + obs = artifact_post_req(user_id, missing_fps, artifact_type, name, pt.id) + exp = {"status": "error", "message": "File does not exist: NOTEXISTS"} self.assertEqual(obs, exp) # Cleaned filepaths is empty - empty_fps = {'raw_barcodes': '', 'raw_forward_seqs': ''} + empty_fps = {"raw_barcodes": "", "raw_forward_seqs": ""} obs = artifact_post_req(user_id, empty_fps, artifact_type, name, pt.id) - exp = {'status': 'error', - 'message': "Can't create artifact, no files provided."} + exp = { + "status": "error", + "message": "Can't create artifact, no files provided.", + } self.assertEqual(obs, exp) def test_artifact_status_put_req(self): - obs = artifact_status_put_req(1, 'test@foo.bar', 'sandbox') - exp = {'status': 'success', - 'message': 'Artifact visibility changed to sandbox'} + obs = artifact_status_put_req(1, "test@foo.bar", "sandbox") + exp = {"status": "success", "message": "Artifact visibility changed to sandbox"} self.assertEqual(obs, exp) def test_artifact_status_put_req_private(self): - obs = artifact_status_put_req(1, 'admin@foo.bar', 'private') - exp = {'status': 'success', - 'message': 'Artifact visibility changed to private'} + obs = artifact_status_put_req(1, "admin@foo.bar", "private") + exp = {"status": "success", "message": "Artifact visibility changed to private"} self.assertEqual(obs, exp) # testing that the log message is generated self.assertEqual( LogEntry.newest_records(1)[0].msg, - 'admin@foo.bar changed artifact 1 (study 1) to private') + "admin@foo.bar changed artifact 1 (study 1) to private", + ) def test_artifact_status_put_req_private_bad_permissions(self): - obs = artifact_status_put_req(1, 'test@foo.bar', 'private') - exp = {'status': 'error', - 'message': 'User does not have permissions to approve change'} + obs = artifact_status_put_req(1, "test@foo.bar", "private") + exp = { + "status": "error", + "message": "User does not have permissions to approve change", + } self.assertEqual(obs, exp) def test_artifact_status_put_req_no_access(self): - obs = artifact_status_put_req(1, 'demo@microbio.me', 'sandbox') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = artifact_status_put_req(1, "demo@microbio.me", "sandbox") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_artifact_status_put_req_unknown_status(self): - obs = artifact_status_put_req(1, 'test@foo.bar', 'BADSTAT') - exp = {'status': 'error', - 'message': 'Unknown visibility value: BADSTAT'} + obs = artifact_status_put_req(1, "test@foo.bar", "BADSTAT") + exp = {"status": "error", "message": "Unknown visibility value: BADSTAT"} self.assertEqual(obs, exp) diff --git a/qiita_pet/handlers/api_proxy/tests/test_ontology.py b/qiita_pet/handlers/api_proxy/tests/test_ontology.py index 839fd90f2..a82c2d787 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_ontology.py +++ b/qiita_pet/handlers/api_proxy/tests/test_ontology.py @@ -14,27 +14,28 @@ @qiita_test_checker() class TestOntology(TestCase): def test_ontology_patch_handler(self): - obs = ontology_patch_handler('add', '/ENA/', 'TERM') - exp = {'status': 'success', 'message': ''} + obs = ontology_patch_handler("add", "/ENA/", "TERM") + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) def test_ontology_patch_handler_errors(self): # Operation not supported - obs = ontology_patch_handler('replace', '/ENA/', 'TERM') - exp = {'status': 'error', - 'message': 'Operation "replace" not supported. ' - 'Current supported operations: add'} + obs = ontology_patch_handler("replace", "/ENA/", "TERM") + exp = { + "status": "error", + "message": 'Operation "replace" not supported. ' + "Current supported operations: add", + } self.assertEqual(obs, exp) # Incorrect path parameter - obs = ontology_patch_handler('add', '/ENA/Metagenomics', 'TERM') - exp = {'status': 'error', 'message': 'Incorrect path parameter'} + obs = ontology_patch_handler("add", "/ENA/Metagenomics", "TERM") + exp = {"status": "error", "message": "Incorrect path parameter"} self.assertEqual(obs, exp) # Ontology does not exist - obs = ontology_patch_handler('add', '/ONTOLOGY/', 'TERM') - exp = {'status': 'error', - 'message': 'Ontology "ONTOLOGY" does not exist'} + obs = ontology_patch_handler("add", "/ONTOLOGY/", "TERM") + exp = {"status": "error", "message": 'Ontology "ONTOLOGY" does not exist'} self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/tests/test_prep_template.py b/qiita_pet/handlers/api_proxy/tests/test_prep_template.py index 51a5d33b1..4c64e4545 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_prep_template.py +++ b/qiita_pet/handlers/api_proxy/tests/test_prep_template.py @@ -5,288 +5,399 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main +from json import loads from os import remove -from os.path import join, exists -from string import ascii_letters +from os.path import exists, join from random import choice -from json import loads +from string import ascii_letters +from unittest import TestCase, main -import pandas as pd import numpy.testing as npt +import pandas as pd -from qiita_core.util import qiita_test_checker from qiita_core.qiita_settings import r_client from qiita_core.testing import wait_for_processing_job +from qiita_core.util import qiita_test_checker from qiita_db.artifact import Artifact +from qiita_db.exceptions import QiitaDBWarning from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.ontology import Ontology from qiita_db.study import Study from qiita_db.util import get_count, get_mountpoint -from qiita_db.exceptions import QiitaDBWarning from qiita_pet.handlers.api_proxy.prep_template import ( - prep_template_summary_get_req, prep_template_post_req, - prep_template_delete_req, prep_template_get_req, - prep_template_graph_get_req, prep_template_filepaths_get_req, - _process_investigation_type, prep_template_patch_req, - _check_prep_template_exists, new_prep_template_get_req, - prep_template_ajax_get_req, _get_ENA_ontology, - prep_template_jobs_get_req) + _check_prep_template_exists, + _get_ENA_ontology, + _process_investigation_type, + new_prep_template_get_req, + prep_template_ajax_get_req, + prep_template_delete_req, + prep_template_filepaths_get_req, + prep_template_get_req, + prep_template_graph_get_req, + prep_template_jobs_get_req, + prep_template_patch_req, + prep_template_post_req, + prep_template_summary_get_req, +) class TestPrepAPIReadOnly(TestCase): def test_get_ENA_ontology(self): obs = _get_ENA_ontology() exp = { - 'ENA': ['AMPLICON', 'Metagenomics', 'RNA-Seq', 'WGS', 'Other'], - 'User': []} + "ENA": ["AMPLICON", "Metagenomics", "RNA-Seq", "WGS", "Other"], + "User": [], + } self.assertEqual(obs, exp) def test_new_prep_template_get_req(self): obs = new_prep_template_get_req(1) exp = { - 'status': 'success', - 'prep_files': ['uploaded_file.txt'], - 'data_types': ['16S', '18S', 'Genomics', 'ITS', - 'Job Output Folder', 'Metabolomic', - 'Metagenomic', 'Metatranscriptomics', - 'Multiomic', 'Proteomic', 'Transcriptomics', - 'Viromics'], - 'ontology': { - 'ENA': ['AMPLICON', 'Metagenomics', 'RNA-Seq', 'WGS', 'Other'], - 'User': []}} + "status": "success", + "prep_files": ["uploaded_file.txt"], + "data_types": [ + "16S", + "18S", + "Genomics", + "ITS", + "Job Output Folder", + "Metabolomic", + "Metagenomic", + "Metatranscriptomics", + "Multiomic", + "Proteomic", + "Transcriptomics", + "Viromics", + ], + "ontology": { + "ENA": ["AMPLICON", "Metagenomics", "RNA-Seq", "WGS", "Other"], + "User": [], + }, + } self.assertEqual(obs, exp) def test_prep_template_ajax_get_req(self): - obs = prep_template_ajax_get_req('test@foo.bar', 1) - - exp = {'status': 'success', - 'message': '', - 'name': "Prep information 1", - 'files': ["uploaded_file.txt"], - 'download_prep_id': 21, - 'other_filepaths': ['1_prep_1_19700101-000000.txt', - '1_prep_1_qiime_19700101-000000.txt', - '1_prep_1_19700101-000000.txt'], - 'num_samples': 27, - 'num_columns': 22, - 'investigation_type': 'Metagenomics', - 'ontology': { - 'ENA': ['AMPLICON', 'Metagenomics', 'RNA-Seq', 'WGS', - 'Other'], - 'User': []}, - 'artifact_attached': True, - 'archived_artifacts': [], - 'study_id': 1, - 'editable': True, - 'data_type': '18S', - 'alert_type': '', - 'is_submitted_to_ebi': True, - 'prep_restrictions': '', - 'samples': ['1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', - '1.SKB4.640189', '1.SKB5.640181', '1.SKB6.640176', - '1.SKB7.640196', '1.SKB8.640193', '1.SKB9.640200', - '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', - '1.SKD7.640191', '1.SKD8.640184', '1.SKD9.640182', - '1.SKM1.640183', '1.SKM2.640199', '1.SKM3.640197', - '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192'], - 'deprecated': False, - 'creation_job': None, - 'alert_message': ''} + obs = prep_template_ajax_get_req("test@foo.bar", 1) + + exp = { + "status": "success", + "message": "", + "name": "Prep information 1", + "files": ["uploaded_file.txt"], + "download_prep_id": 21, + "other_filepaths": [ + "1_prep_1_19700101-000000.txt", + "1_prep_1_qiime_19700101-000000.txt", + "1_prep_1_19700101-000000.txt", + ], + "num_samples": 27, + "num_columns": 22, + "investigation_type": "Metagenomics", + "ontology": { + "ENA": ["AMPLICON", "Metagenomics", "RNA-Seq", "WGS", "Other"], + "User": [], + }, + "artifact_attached": True, + "archived_artifacts": [], + "study_id": 1, + "editable": True, + "data_type": "18S", + "alert_type": "", + "is_submitted_to_ebi": True, + "prep_restrictions": "", + "samples": [ + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + ], + "deprecated": False, + "creation_job": None, + "alert_message": "", + } self.assertDictEqual(obs, exp) - obs = prep_template_ajax_get_req('admin@foo.bar', 1) + obs = prep_template_ajax_get_req("admin@foo.bar", 1) self.assertEqual(obs, exp) - obs = prep_template_ajax_get_req('demo@microbio.me', 1) - exp['editable'] = False + obs = prep_template_ajax_get_req("demo@microbio.me", 1) + exp["editable"] = False self.assertEqual(obs, exp) def test_check_prep_template_exists(self): obs = _check_prep_template_exists(1) - self.assertEqual(obs, {'status': 'success', 'message': ''}) + self.assertEqual(obs, {"status": "success", "message": ""}) def test_check_prep_template_exists_no_template(self): obs = _check_prep_template_exists(3100) - self.assertEqual(obs, {'status': 'error', - 'message': 'Prep template 3100 does not exist'}) + self.assertEqual( + obs, {"status": "error", "message": "Prep template 3100 does not exist"} + ) def test_prep_template_get_req(self): - obs = prep_template_get_req(1, 'test@foo.bar') + obs = prep_template_get_req(1, "test@foo.bar") + self.assertCountEqual(list(obs.keys()), ["status", "message", "template"]) + self.assertEqual(obs["status"], "success") + self.assertEqual(obs["message"], "") self.assertCountEqual( - list(obs.keys()), ['status', 'message', 'template']) - self.assertEqual(obs['status'], 'success') - self.assertEqual(obs['message'], '') - self.assertCountEqual(obs['template'].keys(), [ - '1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', '1.SKB6.640176', - '1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', '1.SKM8.640201', - '1.SKM2.640199', '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185', - '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', '1.SKB1.640202', - '1.SKM1.640183', '1.SKD1.640179', '1.SKD3.640198', '1.SKB5.640181', - '1.SKB4.640189', '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184', - '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191']) - self.assertEqual(obs['template']['1.SKD7.640191'], { - 'experiment_center': 'ANL', - 'center_name': 'ANL', - 'run_center': 'ANL', - 'run_prefix': 's_G1_L001_sequences', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'target_gene': '16S rRNA', - 'sequencing_meth': 'Sequencing by synthesis', - 'run_date': '8/1/12', - 'platform': 'Illumina', - 'pcr_primers': 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', - 'library_construction_protocol': - 'This analysis was done as in Caporaso et al 2011 Genome ' - 'research. The PCR primers (F515/R806) were developed against ' - 'the V4 region of the 16S rRNA (both bacteria and archaea), ' - 'which we determined would yield optimal community clustering ' - 'with reads of this length using a procedure similar to that ' - 'of ref. 15. [For reference, this primer pair amplifies the ' - 'region 533_786 in the Escherichia coli strain 83972 sequence ' - '(greengenes accession no. prokMSA_id:470367).] The reverse ' - 'PCR primer is barcoded with a 12-base error-correcting Golay ' - 'code to facilitate multiplexing of up to 1,500 samples per ' - 'lane, and both PCR primers contain sequencer adapter ' - 'regions.', - 'experiment_design_description': - 'micro biome of soil and rhizosphere of cannabis plants ' - 'from CA', - 'study_center': 'CCME', - 'center_project_name': None, - 'sample_center': 'ANL', - 'samp_size': '.25,g', - 'barcode': 'ACGCACATACAA', - 'qiita_prep_id': '1', - 'emp_status': 'EMP', - 'illumina_technology': 'MiSeq', - 'experiment_title': 'Cannabis Soil Microbiome', - 'target_subfragment': 'V4', - 'instrument_model': 'Illumina MiSeq'}) + obs["template"].keys(), + [ + "1.SKB2.640194", + "1.SKM4.640180", + "1.SKB3.640195", + "1.SKB6.640176", + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + ], + ) + self.assertEqual( + obs["template"]["1.SKD7.640191"], + { + "experiment_center": "ANL", + "center_name": "ANL", + "run_center": "ANL", + "run_prefix": "s_G1_L001_sequences", + "primer": "GTGCCAGCMGCCGCGGTAA", + "target_gene": "16S rRNA", + "sequencing_meth": "Sequencing by synthesis", + "run_date": "8/1/12", + "platform": "Illumina", + "pcr_primers": "FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT", + "library_construction_protocol": "This analysis was done as in Caporaso et al 2011 Genome " + "research. The PCR primers (F515/R806) were developed against " + "the V4 region of the 16S rRNA (both bacteria and archaea), " + "which we determined would yield optimal community clustering " + "with reads of this length using a procedure similar to that " + "of ref. 15. [For reference, this primer pair amplifies the " + "region 533_786 in the Escherichia coli strain 83972 sequence " + "(greengenes accession no. prokMSA_id:470367).] The reverse " + "PCR primer is barcoded with a 12-base error-correcting Golay " + "code to facilitate multiplexing of up to 1,500 samples per " + "lane, and both PCR primers contain sequencer adapter " + "regions.", + "experiment_design_description": "micro biome of soil and rhizosphere of cannabis plants " + "from CA", + "study_center": "CCME", + "center_project_name": None, + "sample_center": "ANL", + "samp_size": ".25,g", + "barcode": "ACGCACATACAA", + "qiita_prep_id": "1", + "emp_status": "EMP", + "illumina_technology": "MiSeq", + "experiment_title": "Cannabis Soil Microbiome", + "target_subfragment": "V4", + "instrument_model": "Illumina MiSeq", + }, + ) def test_prep_template_get_req_no_access(self): - obs = prep_template_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_prep_template_get_req_no_exists(self): - obs = prep_template_get_req(3100, 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Prep template 3100 does not exist'}) + obs = prep_template_get_req(3100, "test@foo.bar") + self.assertEqual( + obs, {"status": "error", "message": "Prep template 3100 does not exist"} + ) def test_prep_template_filepaths_get_req(self): - obs = prep_template_filepaths_get_req(1, 'test@foo.bar') + obs = prep_template_filepaths_get_req(1, "test@foo.bar") # have to check each key individually as the filepaths will change - self.assertEqual(obs['status'], 'success') - self.assertEqual(obs['message'], '') + self.assertEqual(obs["status"], "success") + self.assertEqual(obs["message"], "") # [0] the fp_id is the first element, that should change - fp_ids = [fp[0] for fp in obs['filepaths']] + fp_ids = [fp[0] for fp in obs["filepaths"]] self.assertCountEqual(fp_ids, [18, 19, 20, 21]) def test_prep_template_filepaths_get_req_no_access(self): - obs = prep_template_filepaths_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_filepaths_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_prep_template_graph_get_req_no_access(self): - obs = prep_template_graph_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_graph_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_prep_template_graph_get_req_no_exists(self): - obs = prep_template_graph_get_req(3100, 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Prep template 3100 does not exist'}) + obs = prep_template_graph_get_req(3100, "test@foo.bar") + self.assertEqual( + obs, {"status": "error", "message": "Prep template 3100 does not exist"} + ) def test_prep_template_summary_get_req(self): - obs = prep_template_summary_get_req(1, 'test@foo.bar') - exp = {'num_samples': 27, 'summary': [ - ('barcode', [('AACTCCTGTGGA', 1), ('ACCTCAGTCAAG', 1), - ('ACGCACATACAA', 1), ('AGCAGGCACGAA', 1), - ('AGCGCTCACATC', 1), ('ATATCGCGATGA', 1), - ('ATGGCCTGACTA', 1), ('CATACACGCACC', 1), - ('CCACCCAGTAAC', 1), ('CCGATGCCTTGA', 1), - ('CCTCGATGCAGT', 1), ('CCTCTGAGAGCT', 1), - ('CGAGGTTCTGAT', 1), ('CGCCGGTAATCT', 1), - ('CGGCCTAAGTTC', 1), ('CGTAGAGCTCTC', 1), - ('CGTGCACAATTG', 1), ('GATAGCACTCGT', 1), - ('GCGGACTATTCA', 1), ('GTCCGCAAGTTA', 1), - ('TAATGGTCGTAG', 1), ('TAGCGCGAACTT', 1), - ('TCGACCAAACAC', 1), ('TGAGTGGTCTGT', 1), - ('TGCTACAGACGT', 1), ('TGGTTATGGCAC', 1), - ('TTGCACCGTCGA', 1)]), ('center_name', [('ANL', 27)]), - ('center_project_name', [('None', 27)]), - ('emp_status', [('EMP', 27)]), - ('experiment_center', [('ANL', 27)]), - ('experiment_design_description', [('micro biome of soil and ' - 'rhizosphere of cannabis plants from CA', 27)]), - ('experiment_title', [('Cannabis Soil Microbiome', 27)]), - ('illumina_technology', [('MiSeq', 27)]), - ('instrument_model', [('Illumina MiSeq', 27)]), - ('library_construction_protocol', [( - 'This analysis was done as in Caporaso et al 2011 Genome ' - 'research. The PCR primers (F515/R806) were developed against ' - 'the V4 region of the 16S rRNA (both bacteria and archaea), ' - 'which we determined would yield optimal community clustering ' - 'with reads of this length using a procedure similar to that ' - 'of ref. 15. [For reference, this primer pair amplifies the ' - 'region 533_786 in the Escherichia coli strain 83972 sequence ' - '(greengenes accession no. prokMSA_id:470367).] The reverse ' - 'PCR primer is barcoded with a 12-base error-correcting Golay ' - 'code to facilitate multiplexing of up to 1,500 samples per ' - 'lane, and both PCR primers contain sequencer adapter ' - 'regions.', 27)]), - ('pcr_primers', [('FWD:GTGCCAGCMGCCGCGGTAA; ' - 'REV:GGACTACHVGGGTWTCTAAT', 27)]), - ('platform', [('Illumina', 27)]), - ('primer', [('GTGCCAGCMGCCGCGGTAA', 27)]), - ('qiita_prep_id', [('1', 27)]), ('run_center', [('ANL', 27)]), - ('run_date', [('8/1/12', 27)]), - ('run_prefix', [('s_G1_L001_sequences', 27)]), - ('samp_size', [('.25,g', 27)]), ('sample_center', [('ANL', 27)]), - ('sequencing_meth', [('Sequencing by synthesis', 27)]), - ('study_center', [('CCME', 27)]), - ('target_gene', [('16S rRNA', 27)]), - ('target_subfragment', [('V4', 27)])], 'status': 'success', - 'message': '', 'editable': True} + obs = prep_template_summary_get_req(1, "test@foo.bar") + exp = { + "num_samples": 27, + "summary": [ + ( + "barcode", + [ + ("AACTCCTGTGGA", 1), + ("ACCTCAGTCAAG", 1), + ("ACGCACATACAA", 1), + ("AGCAGGCACGAA", 1), + ("AGCGCTCACATC", 1), + ("ATATCGCGATGA", 1), + ("ATGGCCTGACTA", 1), + ("CATACACGCACC", 1), + ("CCACCCAGTAAC", 1), + ("CCGATGCCTTGA", 1), + ("CCTCGATGCAGT", 1), + ("CCTCTGAGAGCT", 1), + ("CGAGGTTCTGAT", 1), + ("CGCCGGTAATCT", 1), + ("CGGCCTAAGTTC", 1), + ("CGTAGAGCTCTC", 1), + ("CGTGCACAATTG", 1), + ("GATAGCACTCGT", 1), + ("GCGGACTATTCA", 1), + ("GTCCGCAAGTTA", 1), + ("TAATGGTCGTAG", 1), + ("TAGCGCGAACTT", 1), + ("TCGACCAAACAC", 1), + ("TGAGTGGTCTGT", 1), + ("TGCTACAGACGT", 1), + ("TGGTTATGGCAC", 1), + ("TTGCACCGTCGA", 1), + ], + ), + ("center_name", [("ANL", 27)]), + ("center_project_name", [("None", 27)]), + ("emp_status", [("EMP", 27)]), + ("experiment_center", [("ANL", 27)]), + ( + "experiment_design_description", + [ + ( + "micro biome of soil and " + "rhizosphere of cannabis plants from CA", + 27, + ) + ], + ), + ("experiment_title", [("Cannabis Soil Microbiome", 27)]), + ("illumina_technology", [("MiSeq", 27)]), + ("instrument_model", [("Illumina MiSeq", 27)]), + ( + "library_construction_protocol", + [ + ( + "This analysis was done as in Caporaso et al 2011 Genome " + "research. The PCR primers (F515/R806) were developed against " + "the V4 region of the 16S rRNA (both bacteria and archaea), " + "which we determined would yield optimal community clustering " + "with reads of this length using a procedure similar to that " + "of ref. 15. [For reference, this primer pair amplifies the " + "region 533_786 in the Escherichia coli strain 83972 sequence " + "(greengenes accession no. prokMSA_id:470367).] The reverse " + "PCR primer is barcoded with a 12-base error-correcting Golay " + "code to facilitate multiplexing of up to 1,500 samples per " + "lane, and both PCR primers contain sequencer adapter " + "regions.", + 27, + ) + ], + ), + ( + "pcr_primers", + [("FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT", 27)], + ), + ("platform", [("Illumina", 27)]), + ("primer", [("GTGCCAGCMGCCGCGGTAA", 27)]), + ("qiita_prep_id", [("1", 27)]), + ("run_center", [("ANL", 27)]), + ("run_date", [("8/1/12", 27)]), + ("run_prefix", [("s_G1_L001_sequences", 27)]), + ("samp_size", [(".25,g", 27)]), + ("sample_center", [("ANL", 27)]), + ("sequencing_meth", [("Sequencing by synthesis", 27)]), + ("study_center", [("CCME", 27)]), + ("target_gene", [("16S rRNA", 27)]), + ("target_subfragment", [("V4", 27)]), + ], + "status": "success", + "message": "", + "editable": True, + } self.assertDictEqual(obs, exp) def test_prep_template_summary_get_req_no_access(self): - obs = prep_template_summary_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_summary_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_prep_template_summary_get_req_no_exists(self): - obs = prep_template_summary_get_req(3100, 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Prep template 3100 does not exist'}) + obs = prep_template_summary_get_req(3100, "test@foo.bar") + self.assertEqual( + obs, {"status": "error", "message": "Prep template 3100 does not exist"} + ) @qiita_test_checker() class TestPrepAPI(TestCase): def setUp(self): # Create test file to point update tests at - self.update_fp = join(get_mountpoint("uploads")[0][1], '1', - 'update.txt') - with open(self.update_fp, 'w') as f: + self.update_fp = join(get_mountpoint("uploads")[0][1], "1", "update.txt") + with open(self.update_fp, "w") as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") def tearDown(self): if exists(self.update_fp): remove(self.update_fp) - fp = join(get_mountpoint("uploads")[0][1], '1', 'uploaded_file.txt') + fp = join(get_mountpoint("uploads")[0][1], "1", "uploaded_file.txt") if not exists(fp): - with open(fp, 'w') as f: - f.write('') + with open(fp, "w") as f: + f.write("") r_client.flushdb() @@ -295,81 +406,123 @@ def _wait_for_parallel_job(self, key): # so we need to make sure that all processes are done before we reset # the test database obs = r_client.get(key) - wait_for_processing_job(loads(obs)['job_id']) + wait_for_processing_job(loads(obs)["job_id"]) def test_prep_template_graph_get_req(self): - obs = prep_template_graph_get_req(1, 'test@foo.bar') + obs = prep_template_graph_get_req(1, "test@foo.bar") # jobs are randomly generated then testing composition - self.assertEqual(obs['message'], '') - self.assertEqual(obs['status'], 'success') - self.assertEqual(11, len(obs['nodes'])) + self.assertEqual(obs["message"], "") + self.assertEqual(obs["status"], "success") + self.assertEqual(11, len(obs["nodes"])) self.assertIn( - ('artifact', 'FASTQ', 1, 'Raw data 1\n(FASTQ)', 'artifact'), - obs['nodes']) + ("artifact", "FASTQ", 1, "Raw data 1\n(FASTQ)", "artifact"), obs["nodes"] + ) self.assertIn( - ('artifact', 'Demultiplexed', 2, - 'Demultiplexed 1\n(Demultiplexed)', 'artifact'), - obs['nodes']) + ( + "artifact", + "Demultiplexed", + 2, + "Demultiplexed 1\n(Demultiplexed)", + "artifact", + ), + obs["nodes"], + ) self.assertIn( - ('artifact', 'Demultiplexed', 3, - 'Demultiplexed 2\n(Demultiplexed)', 'artifact'), - obs['nodes']) - self.assertIn(('artifact', 'BIOM', 4, 'BIOM\n(BIOM)', 'artifact'), - obs['nodes']) - self.assertIn(('artifact', 'BIOM', 5, 'BIOM\n(BIOM)', 'artifact'), - obs['nodes']) - self.assertIn(('artifact', 'BIOM', 6, 'BIOM\n(BIOM)', 'artifact'), - obs['nodes']) - self.assertEqual(3, len([n for dt, _, _, n, _ in obs['nodes'] - if n == 'Pick closed-reference OTUs' and - dt == 'job'])) - self.assertEqual(2, len([n for dt, _, _, n, _ in obs['nodes'] - if n == 'Split libraries FASTQ' and - dt == 'job'])) - - self.assertEqual(10, len(obs['edges'])) - self.assertEqual(2, len([x for x, y in obs['edges'] if x == 1])) - self.assertEqual(3, len([x for x, y in obs['edges'] if x == 2])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 2])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 3])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 4])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 5])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 6])) - - self.assertIsNone(obs['workflow']) + ( + "artifact", + "Demultiplexed", + 3, + "Demultiplexed 2\n(Demultiplexed)", + "artifact", + ), + obs["nodes"], + ) + self.assertIn(("artifact", "BIOM", 4, "BIOM\n(BIOM)", "artifact"), obs["nodes"]) + self.assertIn(("artifact", "BIOM", 5, "BIOM\n(BIOM)", "artifact"), obs["nodes"]) + self.assertIn(("artifact", "BIOM", 6, "BIOM\n(BIOM)", "artifact"), obs["nodes"]) + self.assertEqual( + 3, + len( + [ + n + for dt, _, _, n, _ in obs["nodes"] + if n == "Pick closed-reference OTUs" and dt == "job" + ] + ), + ) + self.assertEqual( + 2, + len( + [ + n + for dt, _, _, n, _ in obs["nodes"] + if n == "Split libraries FASTQ" and dt == "job" + ] + ), + ) + + self.assertEqual(10, len(obs["edges"])) + self.assertEqual(2, len([x for x, y in obs["edges"] if x == 1])) + self.assertEqual(3, len([x for x, y in obs["edges"] if x == 2])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 2])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 3])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 4])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 5])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 6])) + + self.assertIsNone(obs["workflow"]) Artifact(4).visibility = "public" - obs = prep_template_graph_get_req(1, 'demo@microbio.me') - self.assertEqual(obs['message'], '') - self.assertEqual(obs['status'], 'success') - self.assertEqual(11, len(obs['nodes'])) + obs = prep_template_graph_get_req(1, "demo@microbio.me") + self.assertEqual(obs["message"], "") + self.assertEqual(obs["status"], "success") + self.assertEqual(11, len(obs["nodes"])) self.assertIn( - ('artifact', 'FASTQ', 1, 'Raw data 1\n(FASTQ)', 'artifact'), - obs['nodes']) + ("artifact", "FASTQ", 1, "Raw data 1\n(FASTQ)", "artifact"), obs["nodes"] + ) self.assertIn( - ('artifact', 'Demultiplexed', 2, - 'Demultiplexed 1\n(Demultiplexed)', 'artifact'), - obs['nodes']) - self.assertIn(('artifact', 'BIOM', 4, 'BIOM\n(BIOM)', 'artifact'), - obs['nodes']) - self.assertEqual(3, len([n for dt, _, _, n, _ in obs['nodes'] - if n == 'Pick closed-reference OTUs' and - dt == 'job'])) - self.assertEqual(2, len([n for dt, _, _, n, _ in obs['nodes'] - if n == 'Split libraries FASTQ' and - dt == 'job'])) - - self.assertEqual(10, len(obs['edges'])) - self.assertEqual(2, len([x for x, y in obs['edges'] if x == 1])) - self.assertEqual(3, len([x for x, y in obs['edges'] if x == 2])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 2])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 3])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 4])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 5])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 6])) - - self.assertIsNone(obs['workflow']) + ( + "artifact", + "Demultiplexed", + 2, + "Demultiplexed 1\n(Demultiplexed)", + "artifact", + ), + obs["nodes"], + ) + self.assertIn(("artifact", "BIOM", 4, "BIOM\n(BIOM)", "artifact"), obs["nodes"]) + self.assertEqual( + 3, + len( + [ + n + for dt, _, _, n, _ in obs["nodes"] + if n == "Pick closed-reference OTUs" and dt == "job" + ] + ), + ) + self.assertEqual( + 2, + len( + [ + n + for dt, _, _, n, _ in obs["nodes"] + if n == "Split libraries FASTQ" and dt == "job" + ] + ), + ) + + self.assertEqual(10, len(obs["edges"])) + self.assertEqual(2, len([x for x, y in obs["edges"] if x == 1])) + self.assertEqual(3, len([x for x, y in obs["edges"] if x == 2])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 2])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 3])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 4])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 5])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 6])) + + self.assertIsNone(obs["workflow"]) # Reset visibility of the artifacts for i in range(4, 0, -1): @@ -378,54 +531,60 @@ def test_prep_template_graph_get_req(self): def test_prep_template_jobs_get_req(self): # Create a new template: metadata = pd.DataFrame.from_dict( - {'SKD6.640190': {'center_name': 'ANL', - 'target_subfragment': 'V4', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 1', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}}, - orient='index', dtype=str) - pt = PrepTemplate.create(metadata, Study(1), '16S') + { + "SKD6.640190": { + "center_name": "ANL", + "target_subfragment": "V4", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 1", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + }, + orient="index", + dtype=str, + ) + pt = PrepTemplate.create(metadata, Study(1), "16S") # Check that it returns an empty dictionary when there are no jobs # attached to the prep template - self.assertEqual(prep_template_jobs_get_req(pt.id, 'test@foo.bar'), {}) + self.assertEqual(prep_template_jobs_get_req(pt.id, "test@foo.bar"), {}) # Create a job on the template prep_template_patch_req( - 'test@foo.bar', 'remove', - '/%s/10/columns/target_subfragment/' % pt.id) + "test@foo.bar", "remove", "/%s/10/columns/target_subfragment/" % pt.id + ) # To ensure a deterministic result, wait until the job is completed - self._wait_for_parallel_job('prep_template_%s' % pt.id) - obs = prep_template_jobs_get_req(pt.id, 'test@foo.bar') + self._wait_for_parallel_job("prep_template_%s" % pt.id) + obs = prep_template_jobs_get_req(pt.id, "test@foo.bar") self.assertEqual(len(obs), 1) self.assertCountEqual( - obs.values(), [{'error': '', 'status': 'success', 'step': None}]) + obs.values(), [{"error": "", "status": "success", "step": None}] + ) - obs = prep_template_jobs_get_req(pt.id, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_jobs_get_req(pt.id, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_process_investigation_type(self): - obs = _process_investigation_type('Metagenomics', '', '') - self.assertEqual(obs, 'Metagenomics') + obs = _process_investigation_type("Metagenomics", "", "") + self.assertEqual(obs, "Metagenomics") def test_process_investigation_type_user_term(self): - _process_investigation_type('Other', 'New Type', 'userterm') - obs = _process_investigation_type('Other', 'userterm', '') - self.assertEqual(obs, 'userterm') + _process_investigation_type("Other", "New Type", "userterm") + obs = _process_investigation_type("Other", "userterm", "") + self.assertEqual(obs, "userterm") def test_process_investigation_type_new_term(self): - randstr = ''.join([choice(ascii_letters) for x in range(30)]) - obs = _process_investigation_type('Other', 'New Type', randstr) + randstr = "".join([choice(ascii_letters) for x in range(30)]) + obs = _process_investigation_type("Other", "New Type", randstr) self.assertEqual(obs, randstr) # Make sure New Type added @@ -433,174 +592,188 @@ def test_process_investigation_type_new_term(self): self.assertIn(randstr, ontology.user_defined_terms) def test_prep_template_post_req(self): - obs = prep_template_post_req(1, 'test@foo.bar', 'update.txt', - '16S', name=" ") - exp = {'status': 'warning', - 'message': [ - 'Both a converter and dtype were specified for column ' - 'sample_name - only the converter will be used.', 'Some ' - 'functionality will be disabled due to missing columns:', - '\tEBI submission disabled: center_name, ' - 'experiment_design_description, instrument_model, ' - 'library_construction_protocol, platform;', - '\tDemultiplexing disabled.: barcode;', '\tDemultiplexing ' - 'with multiple input files disabled.: barcode, primer, ' - 'run_prefix.', 'See the Templates tutorial for a ' - 'description of these fields.'], - 'file': 'update.txt', - 'id': 'ignored in test'} - - self.assertCountEqual(obs['message'].split('\n'), exp['message']) - self.assertEqual(obs['status'], exp['status']) - self.assertEqual(obs['file'], exp['file']) - self.assertIsInstance(obs['id'], int) + obs = prep_template_post_req(1, "test@foo.bar", "update.txt", "16S", name=" ") + exp = { + "status": "warning", + "message": [ + "Both a converter and dtype were specified for column " + "sample_name - only the converter will be used.", + "Some functionality will be disabled due to missing columns:", + "\tEBI submission disabled: center_name, " + "experiment_design_description, instrument_model, " + "library_construction_protocol, platform;", + "\tDemultiplexing disabled.: barcode;", + "\tDemultiplexing " + "with multiple input files disabled.: barcode, primer, " + "run_prefix.", + "See the Templates tutorial for a description of these fields.", + ], + "file": "update.txt", + "id": "ignored in test", + } + + self.assertCountEqual(obs["message"].split("\n"), exp["message"]) + self.assertEqual(obs["status"], exp["status"]) + self.assertEqual(obs["file"], exp["file"]) + self.assertIsInstance(obs["id"], int) # Make sure new prep template added - prep = PrepTemplate(obs['id']) - self.assertEqual(prep.data_type(), '16S') - self.assertEqual([x for x in prep.keys()], ['1.SKD6.640190']) - self.assertEqual([x._to_dict() for x in prep.values()], - [{'new_col': 'new_value'}]) + prep = PrepTemplate(obs["id"]) + self.assertEqual(prep.data_type(), "16S") + self.assertEqual([x for x in prep.keys()], ["1.SKD6.640190"]) + self.assertEqual( + [x._to_dict() for x in prep.values()], [{"new_col": "new_value"}] + ) self.assertEqual(prep.name, "Prep information %s" % prep.id) def test_prep_template_post_req_errors(self): # User doesn't have access - obs = prep_template_post_req(1, 'demo@microbio.me', 'filepath', '16S') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_post_req(1, "demo@microbio.me", "filepath", "16S") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) # The file does not exist - obs = prep_template_post_req(1, 'test@foo.bar', 'badfilepath', '16S') - exp = {'status': 'error', - 'message': 'file does not exist', - 'file': 'badfilepath'} + obs = prep_template_post_req(1, "test@foo.bar", "badfilepath", "16S") + exp = { + "status": "error", + "message": "file does not exist", + "file": "badfilepath", + } self.assertEqual(obs, exp) # Prep template does not exist - obs = prep_template_post_req(3100, 'test@foo.bar', 'update.txt', - '16S') - self.assertEqual(obs, {'status': 'error', - 'message': 'Study does not exist'}) + obs = prep_template_post_req(3100, "test@foo.bar", "update.txt", "16S") + self.assertEqual(obs, {"status": "error", "message": "Study does not exist"}) def test_prep_template_patch_req(self): metadata = pd.DataFrame.from_dict( - {'SKD6.640190': {'center_name': 'ANL', - 'target_subfragment': 'V4', - 'center_project_name': 'Test Project', - 'ebi_submission_accession': None, - 'EMP_status': 'EMP', - 'str_column': 'Value for sample 1', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'GTCCGCAAGTTA', - 'run_prefix': "s_G1_L001_sequences", - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'AAAA', - 'experiment_design_description': 'BBBB'}}, - orient='index', dtype=str) - pt = PrepTemplate.create(metadata, Study(1), '16S') + { + "SKD6.640190": { + "center_name": "ANL", + "target_subfragment": "V4", + "center_project_name": "Test Project", + "ebi_submission_accession": None, + "EMP_status": "EMP", + "str_column": "Value for sample 1", + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "GTCCGCAAGTTA", + "run_prefix": "s_G1_L001_sequences", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "AAAA", + "experiment_design_description": "BBBB", + } + }, + orient="index", + dtype=str, + ) + pt = PrepTemplate.create(metadata, Study(1), "16S") # Update investigation type obs = prep_template_patch_req( - 'test@foo.bar', 'replace', '/%s/investigation_type' % pt.id, - 'RNA-Seq') - exp = {'status': 'success', 'message': ''} + "test@foo.bar", "replace", "/%s/investigation_type" % pt.id, "RNA-Seq" + ) + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) - self.assertEqual(pt.investigation_type, 'RNA-Seq') + self.assertEqual(pt.investigation_type, "RNA-Seq") # Update prep template data obs = prep_template_patch_req( - 'test@foo.bar', 'replace', '/%s/data' % pt.id, 'update.txt') + "test@foo.bar", "replace", "/%s/data" % pt.id, "update.txt" + ) self.assertEqual(obs, exp) - obs = r_client.get('prep_template_%s' % pt.id) + obs = r_client.get("prep_template_%s" % pt.id) self.assertIsNotNone(obs) - self._wait_for_parallel_job('prep_template_%s' % pt.id) + self._wait_for_parallel_job("prep_template_%s" % pt.id) # Delete a prep template column obs = prep_template_patch_req( - 'test@foo.bar', 'remove', - '/%s/10/columns/target_subfragment/' % pt.id) - exp = {'status': 'success', 'message': '', 'row_id': '10'} + "test@foo.bar", "remove", "/%s/10/columns/target_subfragment/" % pt.id + ) + exp = {"status": "success", "message": "", "row_id": "10"} self.assertEqual(obs, exp) - self._wait_for_parallel_job('prep_template_%s' % pt.id) - self.assertNotIn('target_subfragment', pt.categories) + self._wait_for_parallel_job("prep_template_%s" % pt.id) + self.assertNotIn("target_subfragment", pt.categories) # Change the name of the prep template obs = prep_template_patch_req( - 'test@foo.bar', 'replace', '/%s/name' % pt.id, ' My New Name ') - exp = {'status': 'success', 'message': ''} + "test@foo.bar", "replace", "/%s/name" % pt.id, " My New Name " + ) + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) - self.assertEqual(pt.name, 'My New Name') + self.assertEqual(pt.name, "My New Name") # Test all the errors # Operation not supported obs = prep_template_patch_req( - 'test@foo.bar', 'add', '/1/investigation_type', - 'RNA-Seq') - exp = {'status': 'error', - 'message': 'Operation "add" not supported. ' - 'Current supported operations: replace, remove', - 'row_id': '0'} + "test@foo.bar", "add", "/1/investigation_type", "RNA-Seq" + ) + exp = { + "status": "error", + "message": 'Operation "add" not supported. ' + "Current supported operations: replace, remove", + "row_id": "0", + } self.assertEqual(obs, exp) # Incorrect path parameter obs = prep_template_patch_req( - 'test@foo.bar', 'replace', '/investigation_type', - 'RNA-Seq') - exp = {'status': 'error', - 'message': 'Incorrect path parameter'} + "test@foo.bar", "replace", "/investigation_type", "RNA-Seq" + ) + exp = {"status": "error", "message": "Incorrect path parameter"} self.assertEqual(obs, exp) # Incorrect attribute obs = prep_template_patch_req( - 'test@foo.bar', 'replace', '/1/other_attribute', - 'RNA-Seq') - exp = {'status': 'error', - 'message': 'Attribute "other_attribute" not found. ' - 'Please, check the path parameter'} + "test@foo.bar", "replace", "/1/other_attribute", "RNA-Seq" + ) + exp = { + "status": "error", + "message": 'Attribute "other_attribute" not found. ' + "Please, check the path parameter", + } self.assertEqual(obs, exp) # User doesn't have access obs = prep_template_patch_req( - 'demo@microbio.me', 'replace', '/%s/investigation_type' % pt.id, - 'RNA-Seq') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + "demo@microbio.me", "replace", "/%s/investigation_type" % pt.id, "RNA-Seq" + ) + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) # File does not exists obs = prep_template_patch_req( - 'test@foo.bar', 'replace', '/1/data', 'unknown_file.txt') - exp = {'status': 'error', - 'message': 'file does not exist', - 'file': 'unknown_file.txt'} + "test@foo.bar", "replace", "/1/data", "unknown_file.txt" + ) + exp = { + "status": "error", + "message": "file does not exist", + "file": "unknown_file.txt", + } self.assertEqual(obs, exp) def test_prep_template_delete_req(self): - template = pd.read_csv(self.update_fp, sep='\t', index_col=0) - new_id = get_count('qiita.prep_template') + 1 - npt.assert_warns(QiitaDBWarning, PrepTemplate.create, - template, Study(1), '16S') - obs = prep_template_delete_req(new_id, 'test@foo.bar') - exp = {'status': 'success', - 'message': ''} + template = pd.read_csv(self.update_fp, sep="\t", index_col=0) + new_id = get_count("qiita.prep_template") + 1 + npt.assert_warns(QiitaDBWarning, PrepTemplate.create, template, Study(1), "16S") + obs = prep_template_delete_req(new_id, "test@foo.bar") + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) def test_prep_template_delete_req_attached_artifact(self): - obs = prep_template_delete_req(1, 'test@foo.bar') - exp = {'status': 'error', - 'message': "Cannot remove prep template 1 because it has an " - "artifact associated with it"} + obs = prep_template_delete_req(1, "test@foo.bar") + exp = { + "status": "error", + "message": "Cannot remove prep template 1 because it has an " + "artifact associated with it", + } self.assertEqual(obs, exp) def test_prep_template_delete_req_no_access(self): - obs = prep_template_delete_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = prep_template_delete_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_prep_template_delete_req_no_prep(self): - obs = prep_template_delete_req(3100, 'test@foo.bar') - exp = {'status': 'error', - 'message': 'Prep template 3100 does not exist'} + obs = prep_template_delete_req(3100, "test@foo.bar") + exp = {"status": "error", "message": "Prep template 3100 does not exist"} self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/tests/test_processing.py b/qiita_pet/handlers/api_proxy/tests/test_processing.py index cd8079b7e..ff68a1a9b 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_processing.py +++ b/qiita_pet/handlers/api_proxy/tests/test_processing.py @@ -5,124 +5,174 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main from json import dumps +from unittest import TestCase, main from qiita_core.util import qiita_test_checker -from qiita_db.processing_job import ProcessingWorkflow, ProcessingJob +from qiita_db.processing_job import ProcessingJob, ProcessingWorkflow from qiita_db.software import Command, Parameters from qiita_db.user import User from qiita_pet.handlers.api_proxy.processing import ( - list_commands_handler_get_req, list_options_handler_get_req, - workflow_handler_post_req, workflow_handler_patch_req, job_ajax_get_req, - job_ajax_patch_req) + job_ajax_get_req, + job_ajax_patch_req, + list_commands_handler_get_req, + list_options_handler_get_req, + workflow_handler_patch_req, + workflow_handler_post_req, +) class TestProcessingAPIReadOnly(TestCase): def test_list_commands_handler_get_req(self): - obs = list_commands_handler_get_req('FASTQ', True) - exp = {'status': 'success', - 'message': '', - 'commands': [{'id': 1, 'command': 'Split libraries FASTQ', - 'output': [['demultiplexed', 'Demultiplexed']]}]} + obs = list_commands_handler_get_req("FASTQ", True) + exp = { + "status": "success", + "message": "", + "commands": [ + { + "id": 1, + "command": "Split libraries FASTQ", + "output": [["demultiplexed", "Demultiplexed"]], + } + ], + } self.assertEqual(obs, exp) - obs = list_commands_handler_get_req('Demultiplexed', True) - exp = {'status': 'success', - 'message': '', - 'commands': [{'id': 3, 'command': 'Pick closed-reference OTUs', - 'output': [['OTU table', 'BIOM']]}]} + obs = list_commands_handler_get_req("Demultiplexed", True) + exp = { + "status": "success", + "message": "", + "commands": [ + { + "id": 3, + "command": "Pick closed-reference OTUs", + "output": [["OTU table", "BIOM"]], + } + ], + } self.assertEqual(obs, exp) - obs = list_commands_handler_get_req('BIOM', False) - exp = {'status': 'success', - 'message': '', - 'commands': [ - {'command': 'Summarize Taxa', 'id': 9, - 'output': [['taxa_summary', 'taxa_summary']]}, - {'command': 'Beta Diversity', 'id': 10, - 'output': [['distance_matrix', 'beta_div_plots']]}, - {'command': 'Alpha Rarefaction', 'id': 11, - 'output': [['rarefaction_curves', 'rarefaction_curves']]}, - {'command': 'Single Rarefaction', 'id': 12, - 'output': [['rarefied_table', 'BIOM']]}]} + obs = list_commands_handler_get_req("BIOM", False) + exp = { + "status": "success", + "message": "", + "commands": [ + { + "command": "Summarize Taxa", + "id": 9, + "output": [["taxa_summary", "taxa_summary"]], + }, + { + "command": "Beta Diversity", + "id": 10, + "output": [["distance_matrix", "beta_div_plots"]], + }, + { + "command": "Alpha Rarefaction", + "id": 11, + "output": [["rarefaction_curves", "rarefaction_curves"]], + }, + { + "command": "Single Rarefaction", + "id": 12, + "output": [["rarefied_table", "BIOM"]], + }, + ], + } # since the order of the commands can change, test them separately - self.assertCountEqual(obs.pop('commands'), exp.pop('commands')) + self.assertCountEqual(obs.pop("commands"), exp.pop("commands")) self.assertEqual(obs, exp) def test_list_options_handler_get_req(self): obs = list_options_handler_get_req(3) - exp = {'status': 'success', - 'message': '', - 'options': [{'id': 10, - 'name': 'Defaults', - 'values': {'reference': 1, - 'similarity': 0.97, - 'sortmerna_coverage': 0.97, - 'sortmerna_e_value': 1, - 'sortmerna_max_pos': 10000, - 'threads': 1}}], - 'req_options': {'input_data': ('artifact', ['Demultiplexed'])}, - 'opt_options': {'reference': ['reference', '1'], - 'similarity': ['float', '0.97'], - 'sortmerna_coverage': ['float', '0.97'], - 'sortmerna_e_value': ['float', '1'], - 'sortmerna_max_pos': ['integer', '10000'], - 'threads': ['integer', '1']}, - 'extra_artifacts': {}} + exp = { + "status": "success", + "message": "", + "options": [ + { + "id": 10, + "name": "Defaults", + "values": { + "reference": 1, + "similarity": 0.97, + "sortmerna_coverage": 0.97, + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "threads": 1, + }, + } + ], + "req_options": {"input_data": ("artifact", ["Demultiplexed"])}, + "opt_options": { + "reference": ["reference", "1"], + "similarity": ["float", "0.97"], + "sortmerna_coverage": ["float", "0.97"], + "sortmerna_e_value": ["float", "1"], + "sortmerna_max_pos": ["integer", "10000"], + "threads": ["integer", "1"], + }, + "extra_artifacts": {}, + } # First check that the keys are the same self.assertCountEqual(obs, exp) - self.assertEqual(obs['status'], exp['status']) - self.assertEqual(obs['message'], exp['message']) - self.assertEqual(obs['options'], exp['options']) - self.assertEqual(obs['req_options'], exp['req_options']) - self.assertEqual(obs['opt_options'], exp['opt_options']) - self.assertEqual(obs['extra_artifacts'], exp['extra_artifacts']) + self.assertEqual(obs["status"], exp["status"]) + self.assertEqual(obs["message"], exp["message"]) + self.assertEqual(obs["options"], exp["options"]) + self.assertEqual(obs["req_options"], exp["req_options"]) + self.assertEqual(obs["opt_options"], exp["opt_options"]) + self.assertEqual(obs["extra_artifacts"], exp["extra_artifacts"]) def test_job_ajax_get_req(self): obs = job_ajax_get_req("063e553b-327c-4818-ab4a-adfe58e49860") exp = { - 'status': 'success', - 'message': '', - 'job_id': "063e553b-327c-4818-ab4a-adfe58e49860", - 'job_external_id': "Not Available", - 'job_status': "queued", - 'job_step': None, - 'job_error': None, - 'job_parameters': {'barcode_type': u'golay_12', - 'input_data': 1, - 'max_bad_run_length': 3, - 'max_barcode_errors': 1.5, - 'min_per_read_length_fraction': 0.75, - 'phred_quality_threshold': 3, - 'rev_comp': False, - 'rev_comp_barcode': False, - 'rev_comp_mapping_barcodes': False, - 'sequence_max_n': 0, - 'phred_offset': 'auto'}, - 'command': 'Split libraries FASTQ', - 'command_description': 'Demultiplexes and applies quality ' - 'control to FASTQ data', - 'software': 'QIIMEq2', - 'software_version': '1.9.1'} + "status": "success", + "message": "", + "job_id": "063e553b-327c-4818-ab4a-adfe58e49860", + "job_external_id": "Not Available", + "job_status": "queued", + "job_step": None, + "job_error": None, + "job_parameters": { + "barcode_type": "golay_12", + "input_data": 1, + "max_bad_run_length": 3, + "max_barcode_errors": 1.5, + "min_per_read_length_fraction": 0.75, + "phred_quality_threshold": 3, + "rev_comp": False, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "sequence_max_n": 0, + "phred_offset": "auto", + }, + "command": "Split libraries FASTQ", + "command_description": "Demultiplexes and applies quality " + "control to FASTQ data", + "software": "QIIMEq2", + "software_version": "1.9.1", + } self.assertEqual(obs, exp) @qiita_test_checker() class TestProcessingAPI(TestCase): def test_workflow_handler_post_req(self): - params = ('{"max_barcode_errors": 1.5, "barcode_type": "golay_12", ' - '"max_bad_run_length": 3, "phred_offset": "auto", ' - '"rev_comp": false, "phred_quality_threshold": 3, ' - '"input_data": 1, "rev_comp_barcode": false, ' - '"rev_comp_mapping_barcodes": false, ' - '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}') + params = ( + '{"max_barcode_errors": 1.5, "barcode_type": "golay_12", ' + '"max_bad_run_length": 3, "phred_offset": "auto", ' + '"rev_comp": false, "phred_quality_threshold": 3, ' + '"input_data": 1, "rev_comp_barcode": false, ' + '"rev_comp_mapping_barcodes": false, ' + '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}' + ) obs = workflow_handler_post_req("test@foo.bar", 1, params) self.assertRegex( - obs.pop('message'), 'Cannot create job because the parameters are ' - 'the same as jobs that are queued, running or already have ' - 'succeeded:\n') - exp = {'status': 'error', 'workflow_id': None, 'job': None} + obs.pop("message"), + "Cannot create job because the parameters are " + "the same as jobs that are queued, running or already have " + "succeeded:\n", + ) + exp = {"status": "error", "workflow_id": None, "job": None} self.assertEqual(obs, exp) def test_workflow_handler_patch_req(self): @@ -133,63 +183,68 @@ def test_workflow_handler_patch_req(self): '"barcode_type": "golay_12", "max_bad_run_length": 3, ' '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' - '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}') + '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}' + ) exp_params = Parameters.load(exp_command, json_str=json_str) - exp_user = User('test@foo.bar') + exp_user = User("test@foo.bar") name = "Test processing workflow" # tests success wf = ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) graph = wf.graph nodes = list(graph.nodes()) job_id = nodes[0].id - value = {'dflt_params': 10, - 'connections': {job_id: {'demultiplexed': 'input_data'}}} - obs = workflow_handler_patch_req( - 'add', '/%s/' % wf.id, req_value=dumps(value)) + value = { + "dflt_params": 10, + "connections": {job_id: {"demultiplexed": "input_data"}}, + } + obs = workflow_handler_patch_req("add", "/%s/" % wf.id, req_value=dumps(value)) new_jobs = set(wf.graph.nodes()) - set(nodes) self.assertEqual(len(new_jobs), 1) new_job = new_jobs.pop() - exp = {'status': 'success', - 'message': '', - 'job': {'id': new_job.id, - 'inputs': [job_id], - 'label': 'Pick closed-reference OTUs', - 'outputs': [['OTU table', 'BIOM']]}} + exp = { + "status": "success", + "message": "", + "job": { + "id": new_job.id, + "inputs": [job_id], + "label": "Pick closed-reference OTUs", + "outputs": [["OTU table", "BIOM"]], + }, + } self.assertEqual(obs, exp) - obs = workflow_handler_patch_req( - 'remove', '/%s/%s/' % (wf.id, new_job.id)) - exp = {'status': 'success', 'message': ''} + obs = workflow_handler_patch_req("remove", "/%s/%s/" % (wf.id, new_job.id)) + exp = {"status": "success", "message": ""} jobs = set(wf.graph.nodes()) - set(nodes) self.assertEqual(jobs, set()) def test_workflow_handler_patch_req_error(self): # Incorrect path parameter - obs = workflow_handler_patch_req('add', '/1/extra/') - exp = {'status': 'error', - 'message': 'Incorrect path parameter'} + obs = workflow_handler_patch_req("add", "/1/extra/") + exp = {"status": "error", "message": "Incorrect path parameter"} self.assertEqual(obs, exp) # Workflow does not exist - obs = workflow_handler_patch_req('add', '/1000/') - exp = {'status': 'error', - 'message': 'Workflow 1000 does not exist'} + obs = workflow_handler_patch_req("add", "/1000/") + exp = {"status": "error", "message": "Workflow 1000 does not exist"} self.assertEqual(obs, exp) # Operation not supported - obs = workflow_handler_patch_req('replace', '/1/') - exp = {'status': 'error', - 'message': 'Operation "replace" not supported. ' - 'Current supported operations: add'} + obs = workflow_handler_patch_req("replace", "/1/") + exp = { + "status": "error", + "message": 'Operation "replace" not supported. ' + "Current supported operations: add", + } self.assertEqual(obs, exp) # Incorrect path parameter (op = remove) - obs = workflow_handler_patch_req('remove', '/1/') - exp = {'status': 'error', - 'message': 'Incorrect path parameter'} + obs = workflow_handler_patch_req("remove", "/1/") + exp = {"status": "error", "message": "Incorrect path parameter"} self.assertEqual(obs, exp) def test_job_ajax_patch_req(self): @@ -201,77 +256,83 @@ def test_job_ajax_patch_req(self): '"barcode_type": "golay_12", "max_bad_run_length": 3, ' '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' - '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}') + '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}' + ) exp_params = Parameters.load(exp_command, json_str=json_str) - exp_user = User('test@foo.bar') + exp_user = User("test@foo.bar") name = "Test processing workflow" # tests success wf = ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) graph = wf.graph nodes = list(graph.nodes()) job_id = nodes[0].id # Incorrect path parameter - obs = job_ajax_patch_req('remove', '/%s/somethingelse' % job_id) - exp = {'status': 'error', - 'message': 'Incorrect path parameter: missing job id'} + obs = job_ajax_patch_req("remove", "/%s/somethingelse" % job_id) + exp = {"status": "error", "message": "Incorrect path parameter: missing job id"} self.assertEqual(obs, exp) - obs = job_ajax_patch_req('remove', '/') - exp = {'status': 'error', - 'message': 'Incorrect path parameter: missing job id'} + obs = job_ajax_patch_req("remove", "/") + exp = {"status": "error", "message": "Incorrect path parameter: missing job id"} self.assertEqual(obs, exp) # Job id is not like a job id - obs = job_ajax_patch_req('remove', '/notAJobId') - exp = {'status': 'error', - 'message': 'Incorrect path parameter: ' - 'notAJobId is not a recognized job id'} + obs = job_ajax_patch_req("remove", "/notAJobId") + exp = { + "status": "error", + "message": "Incorrect path parameter: notAJobId is not a recognized job id", + } self.assertEqual(obs, exp) # Job doesn't exist - obs = job_ajax_patch_req('remove', - '/6d368e16-2242-4cf8-87b4-a5dc40bc890b') - exp = {'status': 'error', - 'message': 'Incorrect path parameter: ' - '6d368e16-2242-4cf8-87b4-a5dc40bc890b is not a ' - 'recognized job id'} + obs = job_ajax_patch_req("remove", "/6d368e16-2242-4cf8-87b4-a5dc40bc890b") + exp = { + "status": "error", + "message": "Incorrect path parameter: " + "6d368e16-2242-4cf8-87b4-a5dc40bc890b is not a " + "recognized job id", + } self.assertEqual(obs, exp) # in_construction job - obs = job_ajax_patch_req('remove', '/%s' % job_id) - exp = {'status': 'error', - 'message': "Can't delete job %s. It is 'in_construction' " - "status. Please use /study/process/workflow/" - % job_id} + obs = job_ajax_patch_req("remove", "/%s" % job_id) + exp = { + "status": "error", + "message": "Can't delete job %s. It is 'in_construction' " + "status. Please use /study/process/workflow/" % job_id, + } self.assertEqual(obs, exp) # job status != 'error' job = ProcessingJob(job_id) - job._set_status('queued') - obs = job_ajax_patch_req('remove', '/%s' % job_id) - exp = {'status': 'error', - 'message': 'Only jobs in "error" status can be deleted.'} + job._set_status("queued") + obs = job_ajax_patch_req("remove", "/%s" % job_id) + exp = { + "status": "error", + "message": 'Only jobs in "error" status can be deleted.', + } self.assertEqual(obs, exp) # Operation not supported - job._set_status('queued') - obs = job_ajax_patch_req('add', '/%s' % job_id) - exp = {'status': 'error', - 'message': 'Operation "add" not supported. Current supported ' - 'operations: remove'} + job._set_status("queued") + obs = job_ajax_patch_req("add", "/%s" % job_id) + exp = { + "status": "error", + "message": 'Operation "add" not supported. Current supported ' + "operations: remove", + } self.assertEqual(obs, exp) # Test success - job._set_error('Killed for testing') - obs = job_ajax_patch_req('remove', '/%s' % job_id) - exp = {'status': 'success', - 'message': ''} + job._set_error("Killed for testing") + obs = job_ajax_patch_req("remove", "/%s" % job_id) + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/tests/test_sample_template.py b/qiita_pet/handlers/api_proxy/tests/test_sample_template.py index 87b255f56..a5ae41b24 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_sample_template.py +++ b/qiita_pet/handlers/api_proxy/tests/test_sample_template.py @@ -5,20 +5,25 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from os import remove, mkdir -from os.path import join, exists from json import dumps +from os import mkdir, remove +from os.path import exists, join +from unittest import TestCase, main -from qiita_core.util import qiita_test_checker -from qiita_core.qiita_settings import r_client import qiita_db as qdb +from qiita_core.qiita_settings import r_client +from qiita_core.util import qiita_test_checker from qiita_pet.handlers.api_proxy.sample_template import ( - sample_template_filepaths_get_req, sample_template_get_req, - _check_sample_template_exists, sample_template_samples_get_req, - sample_template_category_get_req, sample_template_meta_cats_get_req, - get_sample_template_processing_status, analyses_associated_with_study, - SAMPLE_TEMPLATE_KEY_FORMAT) + SAMPLE_TEMPLATE_KEY_FORMAT, + _check_sample_template_exists, + analyses_associated_with_study, + get_sample_template_processing_status, + sample_template_category_get_req, + sample_template_filepaths_get_req, + sample_template_get_req, + sample_template_meta_cats_get_req, + sample_template_samples_get_req, +) @qiita_test_checker() @@ -32,27 +37,29 @@ def setUp(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } self.new_study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Some New Study", info) + qdb.user.User("test@foo.bar"), "Some New Study", info + ) - base_dir = join(qdb.util.get_mountpoint('uploads')[0][1], - str(self.new_study.id)) + base_dir = join( + qdb.util.get_mountpoint("uploads")[0][1], str(self.new_study.id) + ) if not exists(base_dir): mkdir(base_dir) - self.new_study_fp = join(base_dir, 'uploaded_file.txt') + self.new_study_fp = join(base_dir, "uploaded_file.txt") if not exists(self.new_study_fp): - with open(self.new_study_fp, 'w') as f: - f.write('') + with open(self.new_study_fp, "w") as f: + f.write("") def tearDown(self): - base_dir = qdb.util.get_mountpoint('uploads')[0][1] - fp = join(base_dir, '1', 'uploaded_file.txt') + base_dir = qdb.util.get_mountpoint("uploads")[0][1] + fp = join(base_dir, "1", "uploaded_file.txt") if not exists(fp): - with open(fp, 'w') as f: - f.write('') + with open(fp, "w") as f: + f.write("") if exists(self.new_study_fp): remove(self.new_study_fp) @@ -63,92 +70,124 @@ def tearDown(self): def test_check_sample_template_exists(self): obs = _check_sample_template_exists(1) - self.assertEqual(obs, {'status': 'success', 'message': ''}) + self.assertEqual(obs, {"status": "success", "message": ""}) def test_check_sample_template_exists_no_template(self): obs = _check_sample_template_exists(self.new_study.id) - self.assertEqual(obs, {'status': 'error', - 'message': 'Sample template %d does not ' - 'exist' % self.new_study.id}) + self.assertEqual( + obs, + { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + }, + ) def test_sample_template_get_req(self): - obs = sample_template_get_req(1, 'test@foo.bar') - self.assertCountEqual(obs.keys(), ['status', 'message', 'template']) - self.assertEqual(obs['status'], 'success') - self.assertEqual(obs['message'], '') - self.assertEqual(len(obs['template']), 27) - self.assertEqual(str( - obs['template']['1.SKB2.640194']['collection_timestamp']), - '2011-11-11 13:00:00') - del obs['template']['1.SKB2.640194']['collection_timestamp'] - self.assertEqual(obs['template']['1.SKB2.640194'], { - 'physical_specimen_location': 'ANL', - 'texture': '64.6 sand, 17.6 silt, 17.8 clay', - 'common_name': 'soil metagenome', - 'water_content_soil': '0.164', - 'env_feature': 'ENVO:plant-associated habitat', - 'assigned_from_geo': 'n', - 'altitude': '0', - 'tot_org_carb': '5', - 'env_biome': 'ENVO:Temperate grasslands, savannas, and shrubland ' - 'biome', - 'sample_type': 'ENVO:soil', - 'scientific_name': '1118232', - 'host_taxid': '3483', - 'latitude': '35.2374368957', - 'ph': '6.94', - 'description_duplicate': 'Burmese bulk', - 'elevation': '114', - 'description': 'Cannabis Soil Microbiome', - 'physical_specimen_remaining': 'true', - 'dna_extracted': 'true', - 'taxon_id': '410658', - 'samp_salinity': '7.15', - 'host_subject_id': '1001:B4', - 'season_environment': 'winter', - 'env_package': 'soil', - 'temp': '15', - 'qiita_study_id': '1', - 'country': 'GAZ:United States of America', - 'longitude': '68.5041623253', - 'tot_nitro': '1.41', - 'depth': '0.15', - 'anonymized_name': 'SKB2'}) + obs = sample_template_get_req(1, "test@foo.bar") + self.assertCountEqual(obs.keys(), ["status", "message", "template"]) + self.assertEqual(obs["status"], "success") + self.assertEqual(obs["message"], "") + self.assertEqual(len(obs["template"]), 27) + self.assertEqual( + str(obs["template"]["1.SKB2.640194"]["collection_timestamp"]), + "2011-11-11 13:00:00", + ) + del obs["template"]["1.SKB2.640194"]["collection_timestamp"] + self.assertEqual( + obs["template"]["1.SKB2.640194"], + { + "physical_specimen_location": "ANL", + "texture": "64.6 sand, 17.6 silt, 17.8 clay", + "common_name": "soil metagenome", + "water_content_soil": "0.164", + "env_feature": "ENVO:plant-associated habitat", + "assigned_from_geo": "n", + "altitude": "0", + "tot_org_carb": "5", + "env_biome": "ENVO:Temperate grasslands, savannas, and shrubland biome", + "sample_type": "ENVO:soil", + "scientific_name": "1118232", + "host_taxid": "3483", + "latitude": "35.2374368957", + "ph": "6.94", + "description_duplicate": "Burmese bulk", + "elevation": "114", + "description": "Cannabis Soil Microbiome", + "physical_specimen_remaining": "true", + "dna_extracted": "true", + "taxon_id": "410658", + "samp_salinity": "7.15", + "host_subject_id": "1001:B4", + "season_environment": "winter", + "env_package": "soil", + "temp": "15", + "qiita_study_id": "1", + "country": "GAZ:United States of America", + "longitude": "68.5041623253", + "tot_nitro": "1.41", + "depth": "0.15", + "anonymized_name": "SKB2", + }, + ) def test_sample_template_get_req_no_access(self): - obs = sample_template_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = sample_template_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_sample_template_get_req_no_template(self): - obs = sample_template_get_req(self.new_study.id, 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Sample template %d does not ' - 'exist' % self.new_study.id}) + obs = sample_template_get_req(self.new_study.id, "test@foo.bar") + self.assertEqual( + obs, + { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + }, + ) def test_analyses_associated_with_study(self): - obs = analyses_associated_with_study(self.new_study.id, 'test@foo.bar') - exp = {'status': 'success', 'message': '', 'values': []} + obs = analyses_associated_with_study(self.new_study.id, "test@foo.bar") + exp = {"status": "success", "message": "", "values": []} self.assertEqual(obs, exp) - obs = analyses_associated_with_study(1, 'test@foo.bar') - exp = {'status': 'success', 'message': '', 'values': [ - {'analysis_id': 1, 'name': 'SomeAnalysis', 'email': 'test@foo.bar', - 'dflt': False, 'artifact_ids': [8, 9], 'prep_ids': [1], - 'visibility': ['sandbox']}, - {'analysis_id': 2, 'name': 'SomeSecondAnalysis', - 'email': 'admin@foo.bar', 'dflt': False, 'artifact_ids': None, - 'prep_ids': [1], 'visibility': None}, - {'analysis_id': 3, 'name': 'test@foo.bar-dflt-1', - 'email': 'test@foo.bar', 'dflt': True, 'artifact_ids': None, - 'prep_ids': [1], 'visibility': None}]} + obs = analyses_associated_with_study(1, "test@foo.bar") + exp = { + "status": "success", + "message": "", + "values": [ + { + "analysis_id": 1, + "name": "SomeAnalysis", + "email": "test@foo.bar", + "dflt": False, + "artifact_ids": [8, 9], + "prep_ids": [1], + "visibility": ["sandbox"], + }, + { + "analysis_id": 2, + "name": "SomeSecondAnalysis", + "email": "admin@foo.bar", + "dflt": False, + "artifact_ids": None, + "prep_ids": [1], + "visibility": None, + }, + { + "analysis_id": 3, + "name": "test@foo.bar-dflt-1", + "email": "test@foo.bar", + "dflt": True, + "artifact_ids": None, + "prep_ids": [1], + "visibility": None, + }, + ], + } self.assertEqual(obs, exp) - obs = analyses_associated_with_study( - self.new_study.id, 'shared@foo.bar') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = analyses_associated_with_study(self.new_study.id, "shared@foo.bar") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_get_sample_template_processing_status(self): @@ -160,25 +199,33 @@ def test_get_sample_template_processing_status(self): self.assertEqual(obs_am, "") # With job id and processing - qiita_plugin = qdb.software.Software.from_name_and_version('Qiita', - 'alpha') - cmd = qiita_plugin.get_command('update_sample_template') + qiita_plugin = qdb.software.Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("update_sample_template") params = qdb.software.Parameters.load( - cmd, values_dict={'study': 1, 'template_fp': 'ignored'}) + cmd, values_dict={"study": 1, "template_fp": "ignored"} + ) job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), params, True) - job._set_status('running') - r_client.set(key, dumps({'job_id': job.id})) + qdb.user.User("test@foo.bar"), params, True + ) + job._set_status("running") + r_client.set(key, dumps({"job_id": job.id})) obs_proc, obs_at, obs_am = get_sample_template_processing_status(1) self.assertTrue(obs_proc) self.assertEqual(obs_at, "info") - self.assertEqual( - obs_am, "This sample template is currently being processed") + self.assertEqual(obs_am, "This sample template is currently being processed") # With job id and success - job._set_status('success') - r_client.set(key, dumps({'job_id': job.id, 'alert_type': 'warning', - 'alert_msg': 'Some\nwarning'})) + job._set_status("success") + r_client.set( + key, + dumps( + { + "job_id": job.id, + "alert_type": "warning", + "alert_msg": "Some\nwarning", + } + ), + ) obs_proc, obs_at, obs_am = get_sample_template_processing_status(1) self.assertFalse(obs_proc) self.assertEqual(obs_at, "warning") @@ -186,10 +233,11 @@ def test_get_sample_template_processing_status(self): # With job and not success job = qdb.processing_job.ProcessingJob.create( - qdb.user.User('test@foo.bar'), params, True) - job._set_status('running') - job._set_error('Some\nerror') - r_client.set(key, dumps({'job_id': job.id})) + qdb.user.User("test@foo.bar"), params, True + ) + job._set_status("running") + job._set_error("Some\nerror") + r_client.set(key, dumps({"job_id": job.id})) obs_proc, obs_at, obs_am = get_sample_template_processing_status(1) self.assertFalse(obs_proc) self.assertEqual(obs_at, "danger") @@ -197,140 +245,200 @@ def test_get_sample_template_processing_status(self): def test_sample_template_columns_get_req_no_template(self): # Test sample template not existing - obs = sample_template_get_req(self.new_study.id, 'test@foo.bar') - exp = {'status': 'error', - 'message': 'Sample template %d does not exist' % - self.new_study.id} + obs = sample_template_get_req(self.new_study.id, "test@foo.bar") + exp = { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + } self.assertEqual(obs, exp) def test_sample_template_samples_get_req(self): - obs = sample_template_samples_get_req(1, 'test@foo.bar') - exp = {'status': 'success', - 'message': '', - 'samples': ['1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', - '1.SKB4.640189', '1.SKB5.640181', '1.SKB6.640176', - '1.SKB7.640196', '1.SKB8.640193', '1.SKB9.640200', - '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', - '1.SKD7.640191', '1.SKD8.640184', '1.SKD9.640182', - '1.SKM1.640183', '1.SKM2.640199', '1.SKM3.640197', - '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192']} + obs = sample_template_samples_get_req(1, "test@foo.bar") + exp = { + "status": "success", + "message": "", + "samples": [ + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + ], + } self.assertEqual(obs, exp) def test_sample_template_samples_get_req_no_access(self): - obs = sample_template_samples_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = sample_template_samples_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_sample_template_sample_get_req_no_template(self): - obs = sample_template_samples_get_req(self.new_study.id, - 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Sample template %d does not ' - 'exist' % self.new_study.id}) + obs = sample_template_samples_get_req(self.new_study.id, "test@foo.bar") + self.assertEqual( + obs, + { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + }, + ) def test_sample_template_category_get_req(self): - obs = sample_template_category_get_req('latitude', 1, 'test@foo.bar') - exp = {'status': 'success', - 'message': '', - 'values': {'1.SKB2.640194': '35.2374368957', - '1.SKM4.640180': 'Not applicable', - '1.SKB3.640195': '95.2060749748', - '1.SKB6.640176': '78.3634273709', - '1.SKD6.640190': '29.1499460692', - '1.SKM6.640187': '0.291867635913', - '1.SKD9.640182': '23.1218032799', - '1.SKM8.640201': '3.21190859967', - '1.SKM2.640199': '82.8302905615', - '1.SKD2.640178': '53.5050692395', - '1.SKB7.640196': '13.089194595', - '1.SKD4.640185': '40.8623799474', - '1.SKB8.640193': '74.0894932572', - '1.SKM3.640197': 'Not applicable', - '1.SKD5.640186': '85.4121476399', - '1.SKB1.640202': '4.59216095574', - '1.SKM1.640183': '38.2627021402', - '1.SKD1.640179': '68.0991287718', - '1.SKD3.640198': '84.0030227585', - '1.SKB5.640181': '10.6655599093', - '1.SKB4.640189': '43.9614715197', - '1.SKB9.640200': '12.6245524972', - '1.SKM9.640192': '12.7065957714', - '1.SKD8.640184': '57.571893782', - '1.SKM5.640177': '44.9725384282', - '1.SKM7.640188': '60.1102854322', - '1.SKD7.640191': '68.51099627'}} + obs = sample_template_category_get_req("latitude", 1, "test@foo.bar") + exp = { + "status": "success", + "message": "", + "values": { + "1.SKB2.640194": "35.2374368957", + "1.SKM4.640180": "Not applicable", + "1.SKB3.640195": "95.2060749748", + "1.SKB6.640176": "78.3634273709", + "1.SKD6.640190": "29.1499460692", + "1.SKM6.640187": "0.291867635913", + "1.SKD9.640182": "23.1218032799", + "1.SKM8.640201": "3.21190859967", + "1.SKM2.640199": "82.8302905615", + "1.SKD2.640178": "53.5050692395", + "1.SKB7.640196": "13.089194595", + "1.SKD4.640185": "40.8623799474", + "1.SKB8.640193": "74.0894932572", + "1.SKM3.640197": "Not applicable", + "1.SKD5.640186": "85.4121476399", + "1.SKB1.640202": "4.59216095574", + "1.SKM1.640183": "38.2627021402", + "1.SKD1.640179": "68.0991287718", + "1.SKD3.640198": "84.0030227585", + "1.SKB5.640181": "10.6655599093", + "1.SKB4.640189": "43.9614715197", + "1.SKB9.640200": "12.6245524972", + "1.SKM9.640192": "12.7065957714", + "1.SKD8.640184": "57.571893782", + "1.SKM5.640177": "44.9725384282", + "1.SKM7.640188": "60.1102854322", + "1.SKD7.640191": "68.51099627", + }, + } self.assertEqual(obs, exp) def test_sample_template_category_get_req_no_access(self): - obs = sample_template_category_get_req('latitude', 1, - 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = sample_template_category_get_req("latitude", 1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_sample_template_category_get_req_no_template(self): - obs = sample_template_category_get_req('latitiude', self.new_study.id, - 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Sample template %d does not ' - 'exist' % self.new_study.id}) + obs = sample_template_category_get_req( + "latitiude", self.new_study.id, "test@foo.bar" + ) + self.assertEqual( + obs, + { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + }, + ) def test_sample_template_filepaths_get_req(self): - obs = sample_template_filepaths_get_req(1, 'test@foo.bar') + obs = sample_template_filepaths_get_req(1, "test@foo.bar") # have to check each key individually as the filepaths will change - self.assertEqual(obs['status'], 'success') - self.assertEqual(obs['message'], '') + self.assertEqual(obs["status"], "success") + self.assertEqual(obs["message"], "") # [0] the fp_id is the first element, that should change - fp_ids = [fp[0] for fp in obs['filepaths']] + fp_ids = [fp[0] for fp in obs["filepaths"]] self.assertCountEqual(fp_ids, [17, 23]) def test_sample_template_filepaths_get_req_no_access(self): - obs = sample_template_filepaths_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = sample_template_filepaths_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_sample_template_filepaths_get_req_no_template(self): - obs = sample_template_filepaths_get_req(self.new_study.id, - 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Sample template %d does not ' - 'exist' % self.new_study.id}) + obs = sample_template_filepaths_get_req(self.new_study.id, "test@foo.bar") + self.assertEqual( + obs, + { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + }, + ) def test_sample_template_meta_cats_get_req(self): - obs = sample_template_meta_cats_get_req(1, 'test@foo.bar') - exp = {'status': 'success', - 'message': '', - 'categories': [ - 'altitude', 'anonymized_name', 'assigned_from_geo', - 'collection_timestamp', 'common_name', 'country', 'depth', - 'description', 'description_duplicate', 'dna_extracted', - 'elevation', 'env_biome', 'env_feature', 'env_package', - 'host_subject_id', 'host_taxid', 'latitude', 'longitude', - 'ph', 'physical_specimen_location', - 'physical_specimen_remaining', 'samp_salinity', - 'sample_type', 'scientific_name', 'season_environment', - 'taxon_id', 'temp', 'texture', 'tot_nitro', 'tot_org_carb', - 'water_content_soil']} + obs = sample_template_meta_cats_get_req(1, "test@foo.bar") + exp = { + "status": "success", + "message": "", + "categories": [ + "altitude", + "anonymized_name", + "assigned_from_geo", + "collection_timestamp", + "common_name", + "country", + "depth", + "description", + "description_duplicate", + "dna_extracted", + "elevation", + "env_biome", + "env_feature", + "env_package", + "host_subject_id", + "host_taxid", + "latitude", + "longitude", + "ph", + "physical_specimen_location", + "physical_specimen_remaining", + "samp_salinity", + "sample_type", + "scientific_name", + "season_environment", + "taxon_id", + "temp", + "texture", + "tot_nitro", + "tot_org_carb", + "water_content_soil", + ], + } self.assertEqual(obs, exp) def test_sample_template_meta_cats_get_req_no_access(self): - obs = sample_template_meta_cats_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = sample_template_meta_cats_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_sample_template_meta_cats_get_req_no_template(self): - obs = sample_template_meta_cats_get_req(self.new_study.id, - 'test@foo.bar') - self.assertEqual(obs, {'status': 'error', - 'message': 'Sample template %d does not ' - 'exist' % self.new_study.id}) + obs = sample_template_meta_cats_get_req(self.new_study.id, "test@foo.bar") + self.assertEqual( + obs, + { + "status": "error", + "message": "Sample template %d does not exist" % self.new_study.id, + }, + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py index 4c574ecfd..2ab5f97d1 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_studies.py +++ b/qiita_pet/handlers/api_proxy/tests/test_studies.py @@ -5,24 +5,30 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main +from collections import defaultdict from datetime import datetime -from os.path import exists, join, isdir from os import remove +from os.path import exists, isdir, join from shutil import rmtree from tempfile import mkdtemp -from collections import defaultdict +from unittest import TestCase, main -import pandas as pd import numpy.testing as npt +import pandas as pd -from qiita_core.util import qiita_test_checker -from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_core.util import qiita_test_checker from qiita_pet.handlers.api_proxy.studies import ( - data_types_get_req, study_get_req, study_prep_get_req, study_delete_req, - study_tags_request, study_get_tags_request, study_patch_request, - study_files_get_req) + data_types_get_req, + study_delete_req, + study_files_get_req, + study_get_req, + study_get_tags_request, + study_patch_request, + study_prep_get_req, + study_tags_request, +) @qiita_test_checker() @@ -43,55 +49,79 @@ class TestStudyAPI1(TestStudyAPI): def test_data_types_get_req(self): obs = data_types_get_req() exp = { - 'status': 'success', - 'message': '', - 'data_types': ['16S', '18S', 'ITS', 'Proteomic', 'Metagenomic', - 'Metabolomic']} + "status": "success", + "message": "", + "data_types": [ + "16S", + "18S", + "ITS", + "Proteomic", + "Metagenomic", + "Metabolomic", + ], + } self.assertCountEqual(obs, exp) def test_study_get_req(self): - obs = study_get_req(1, 'test@foo.bar') + obs = study_get_req(1, "test@foo.bar") exp = { - 'status': 'success', - 'study_info': { - 'mixs_compliant': True, 'metadata_complete': True, 'level': '', - 'reprocess': False, 'owner': 'test@foo.bar', 'message': '', - 'funding': None, 'show_biom_download_button': True, - 'publication_pid': ['123456', '7891011'], 'vamps_id': None, - 'first_contact': datetime(2014, 5, 19, 16, 10), - 'ebi_submission_status': 'submitted', - 'show_raw_download_button': True, 'timeseries_type_id': 1, - 'study_abstract': ( - 'This is a preliminary study to examine the microbiota ' - 'associated with the Cannabis plant. Soils samples from ' - 'the bulk soil, soil associated with the roots, and the ' - 'rhizosphere were extracted and the DNA sequenced. Roots ' - 'from three independent plants of different strains were ' - 'examined. These roots were obtained November 11, 2011 ' - 'from plants that had been harvested in the summer. ' - 'Future studies will attempt to analyze the soils and ' - 'rhizospheres from the same location at different time ' - 'points in the plant lifecycle.'), - 'status': 'private', 'spatial_series': False, - 'public_raw_download': False, - 'study_description': ( - 'Analysis of the Cannabis Plant Microbiome'), - 'shared_with': ['shared@foo.bar'], 'publication_doi': [ - '10.100/123456', '10.100/7891011'], - 'has_access_to_raw_data': True, 'lab_person': { - 'affiliation': 'knight lab', 'name': 'LabDude', - 'email': 'lab_dude@foo.bar'}, - 'principal_investigator': { - 'affiliation': 'Wash U', 'name': 'PIDude', - 'email': 'PI_dude@foo.bar'}, - 'study_alias': 'Cannabis Soils', 'study_id': 1, - 'most_recent_contact': datetime(2014, 5, 19, 16, 11), - 'ebi_study_accession': 'EBI123456-BB', 'num_samples': 27, - 'public_raw_download': False, 'notes': '', 'autoloaded': False, - 'study_title': ( - 'Identification of the Microbiomes for Cannabis Soils')}, - 'message': '', - 'editable': True} + "status": "success", + "study_info": { + "mixs_compliant": True, + "metadata_complete": True, + "level": "", + "reprocess": False, + "owner": "test@foo.bar", + "message": "", + "funding": None, + "show_biom_download_button": True, + "publication_pid": ["123456", "7891011"], + "vamps_id": None, + "first_contact": datetime(2014, 5, 19, 16, 10), + "ebi_submission_status": "submitted", + "show_raw_download_button": True, + "timeseries_type_id": 1, + "study_abstract": ( + "This is a preliminary study to examine the microbiota " + "associated with the Cannabis plant. Soils samples from " + "the bulk soil, soil associated with the roots, and the " + "rhizosphere were extracted and the DNA sequenced. Roots " + "from three independent plants of different strains were " + "examined. These roots were obtained November 11, 2011 " + "from plants that had been harvested in the summer. " + "Future studies will attempt to analyze the soils and " + "rhizospheres from the same location at different time " + "points in the plant lifecycle." + ), + "status": "private", + "spatial_series": False, + "public_raw_download": False, + "study_description": ("Analysis of the Cannabis Plant Microbiome"), + "shared_with": ["shared@foo.bar"], + "publication_doi": ["10.100/123456", "10.100/7891011"], + "has_access_to_raw_data": True, + "lab_person": { + "affiliation": "knight lab", + "name": "LabDude", + "email": "lab_dude@foo.bar", + }, + "principal_investigator": { + "affiliation": "Wash U", + "name": "PIDude", + "email": "PI_dude@foo.bar", + }, + "study_alias": "Cannabis Soils", + "study_id": 1, + "most_recent_contact": datetime(2014, 5, 19, 16, 11), + "ebi_study_accession": "EBI123456-BB", + "num_samples": 27, + "notes": "", + "autoloaded": False, + "study_title": ("Identification of the Microbiomes for Cannabis Soils"), + }, + "message": "", + "editable": True, + } self.assertEqual(obs, exp) # Test with no lab person @@ -103,52 +133,74 @@ def test_study_get_req(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": qdb.study.StudyPerson(3), - 'first_contact': datetime(2015, 5, 19, 16, 10), - 'most_recent_contact': datetime(2015, 5, 19, 16, 11), + "first_contact": datetime(2015, 5, 19, 16, 10), + "most_recent_contact": datetime(2015, 5, 19, 16, 11), } new_study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Some New Study for test", info) - - obs = study_get_req(new_study.id, 'test@foo.bar') - exp = {'status': 'success', 'study_info': { - 'mixs_compliant': True, 'metadata_complete': True, - 'reprocess': False, 'public_raw_download': False, - 'owner': 'test@foo.bar', 'message': '', 'funding': None, - 'show_biom_download_button': False, 'publication_pid': [], - 'vamps_id': None, 'first_contact': datetime(2015, 5, 19, 16, 10), - 'ebi_submission_status': 'not submitted', 'autoloaded': False, - 'show_raw_download_button': False, 'timeseries_type_id': 1, - 'study_abstract': 'ABS', 'status': 'sandbox', - 'spatial_series': None, 'study_description': 'DESC', - 'num_samples': 0, 'shared_with': [], 'publication_doi': [], - 'has_access_to_raw_data': True, 'lab_person': None, 'level': '', - 'principal_investigator': { - 'affiliation': 'Wash U', 'name': 'PIDude', - 'email': 'PI_dude@foo.bar'}, 'study_alias': 'FCM', - 'study_id': new_study.id, 'notes': '', - 'most_recent_contact': datetime(2015, 5, 19, 16, 11), - 'ebi_study_accession': None, - 'study_title': 'Some New Study for test'}, 'message': '', - 'editable': True} + qdb.user.User("test@foo.bar"), "Some New Study for test", info + ) + + obs = study_get_req(new_study.id, "test@foo.bar") + exp = { + "status": "success", + "study_info": { + "mixs_compliant": True, + "metadata_complete": True, + "reprocess": False, + "public_raw_download": False, + "owner": "test@foo.bar", + "message": "", + "funding": None, + "show_biom_download_button": False, + "publication_pid": [], + "vamps_id": None, + "first_contact": datetime(2015, 5, 19, 16, 10), + "ebi_submission_status": "not submitted", + "autoloaded": False, + "show_raw_download_button": False, + "timeseries_type_id": 1, + "study_abstract": "ABS", + "status": "sandbox", + "spatial_series": None, + "study_description": "DESC", + "num_samples": 0, + "shared_with": [], + "publication_doi": [], + "has_access_to_raw_data": True, + "lab_person": None, + "level": "", + "principal_investigator": { + "affiliation": "Wash U", + "name": "PIDude", + "email": "PI_dude@foo.bar", + }, + "study_alias": "FCM", + "study_id": new_study.id, + "notes": "", + "most_recent_contact": datetime(2015, 5, 19, 16, 11), + "ebi_study_accession": None, + "study_title": "Some New Study for test", + }, + "message": "", + "editable": True, + } self.assertCountEqual(obs, exp) - self.assertCountEqual(obs['study_info'], exp['study_info']) + self.assertCountEqual(obs["study_info"], exp["study_info"]) def test_study_get_req_no_access(self): - obs = study_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = study_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_study_get_req_no_exists(self): - obs = study_get_req(4000, 'test@foo.bar') - exp = {'status': 'error', - 'message': 'Study does not exist'} + obs = study_get_req(4000, "test@foo.bar") + exp = {"status": "error", "message": "Study does not exist"} self.assertEqual(obs, exp) def test_study_prep_get_req_failed_EBI(self): temp_dir = mkdtemp() self._clean_up_files.append(temp_dir) - user_email = 'test@foo.bar' + user_email = "test@foo.bar" # creating a (A) new study, (B) sample info, (C) prep without EBI # values @@ -162,135 +214,170 @@ def test_study_prep_get_req_failed_EBI(self): "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } study = qdb.study.Study.create( - qdb.user.User(user_email), "Test EBI study", info) + qdb.user.User(user_email), "Test EBI study", info + ) # (B) metadata_dict = { - 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 1'}, - 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 2'}, - 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 3'} + "Sample1": { + "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0), + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 1", + }, + "Sample2": { + "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0), + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 2", + }, + "Sample3": { + "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0), + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 3", + }, } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.sample_template.SampleTemplate.create, - metadata, study) + metadata, + study, + ) # (C) metadata_dict = { - 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTC', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 1"}, - 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTA', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 2"}, - 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTT', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 3"}, + "Sample1": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTC", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 1", + }, + "Sample2": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTA", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 2", + }, + "Sample3": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTT", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 3", + }, } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) pt = qdb.metadata_template.prep_template.PrepTemplate.create( - metadata, study, "16S", 'Metagenomics') + metadata, study, "16S", "Metagenomics" + ) # making sure that the EBI values are empty - exp = {('%d.Sample3' % study.id): None, - ('%d.Sample2' % study.id): None, - ('%d.Sample1' % study.id): None} + exp = { + ("%d.Sample3" % study.id): None, + ("%d.Sample2" % study.id): None, + ("%d.Sample1" % study.id): None, + } self.assertEqual(pt.ebi_experiment_accessions, exp) # actual test obs = study_prep_get_req(study.id, user_email) - temp_info = {'16S': [ - {"status": 'sandbox', - 'name': 'Prep information %d' % pt.id, - 'start_artifact': None, 'youngest_artifact': None, - 'ebi_experiment': False, 'id': pt.id, - 'start_artifact_id': None, - 'creation_timestamp': pt.creation_timestamp, - 'modification_timestamp': pt.modification_timestamp, - 'num_artifact_children': 0, - 'youngest_artifact_name': None, - 'youngest_artifact_type': None, - 'total_samples': 3}]} + temp_info = { + "16S": [ + { + "status": "sandbox", + "name": "Prep information %d" % pt.id, + "start_artifact": None, + "youngest_artifact": None, + "ebi_experiment": False, + "id": pt.id, + "start_artifact_id": None, + "creation_timestamp": pt.creation_timestamp, + "modification_timestamp": pt.modification_timestamp, + "num_artifact_children": 0, + "youngest_artifact_name": None, + "youngest_artifact_type": None, + "total_samples": 3, + } + ] + } - exp = { - 'info': temp_info, - 'message': '', - 'status': 'success'} + exp = {"info": temp_info, "message": "", "status": "success"} self.assertEqual(obs, exp) qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) def test_study_prep_get_req_no_access(self): - obs = study_prep_get_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = study_prep_get_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_study_delete_req_no_access(self): - obs = study_delete_req(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = study_delete_req(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_study_delete_req_no_exists(self): - obs = study_delete_req(4, 'test@foo.bar') - exp = {'status': 'error', - 'message': 'Study does not exist'} + obs = study_delete_req(4, "test@foo.bar") + exp = {"status": "error", "message": "Study does not exist"} self.assertEqual(obs, exp) def test_study_files_get_req(self): - obs = study_files_get_req('test@foo.bar', 1, 1, 'FASTQ') - exp = {'status': 'success', - 'message': '', - 'remaining': ['uploaded_file.txt'], - 'file_types': [('raw_barcodes', True, []), - ('raw_forward_seqs', True, []), - ('raw_reverse_seqs', False, [])], - 'num_prefixes': 1, - 'artifacts': [(1, 'Identification of the Microbiomes for ' - 'Cannabis Soils (1) - Raw data 1 (1)')]} + obs = study_files_get_req("test@foo.bar", 1, 1, "FASTQ") + exp = { + "status": "success", + "message": "", + "remaining": ["uploaded_file.txt"], + "file_types": [ + ("raw_barcodes", True, []), + ("raw_forward_seqs", True, []), + ("raw_reverse_seqs", False, []), + ], + "num_prefixes": 1, + "artifacts": [ + ( + 1, + "Identification of the Microbiomes for " + "Cannabis Soils (1) - Raw data 1 (1)", + ) + ], + } self.assertEqual(obs, exp) - obs = study_files_get_req('admin@foo.bar', 1, 1, 'FASTQ') - exp = {'status': 'success', - 'message': '', - 'remaining': ['uploaded_file.txt'], - 'file_types': [('raw_barcodes', True, []), - ('raw_forward_seqs', True, []), - ('raw_reverse_seqs', False, [])], - 'num_prefixes': 1, - 'artifacts': [(1, 'Identification of the Microbiomes for ' - 'Cannabis Soils (1) - Raw data 1 (1)')]} + obs = study_files_get_req("admin@foo.bar", 1, 1, "FASTQ") + exp = { + "status": "success", + "message": "", + "remaining": ["uploaded_file.txt"], + "file_types": [ + ("raw_barcodes", True, []), + ("raw_forward_seqs", True, []), + ("raw_reverse_seqs", False, []), + ], + "num_prefixes": 1, + "artifacts": [ + ( + 1, + "Identification of the Microbiomes for " + "Cannabis Soils (1) - Raw data 1 (1)", + ) + ], + } self.assertEqual(obs, exp) # adding a new study for further testing @@ -302,176 +389,232 @@ def test_study_files_get_req(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": qdb.study.StudyPerson(3), - "lab_person_id": qdb.study.StudyPerson(1) + "lab_person_id": qdb.study.StudyPerson(1), } new_study = qdb.study.Study.create( - qdb.user.User('test@foo.bar'), "Some New Study to get files", info) + qdb.user.User("test@foo.bar"), "Some New Study to get files", info + ) # check that you can't call a this function using two unrelated # study_id and prep_template_id with self.assertRaises(IncompetentQiitaDeveloperError): - study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ') + study_files_get_req("test@foo.bar", new_study.id, 1, "FASTQ") def test_study_files_get_req_multiple(self): study_id = 1 # adding a new prep for testing PREP = qdb.metadata_template.prep_template.PrepTemplate prep_info_dict = { - 'SKB7.640196': {'run_prefix': 'test_1'}, - 'SKB8.640193': {'run_prefix': 'test_2'} + "SKB7.640196": {"run_prefix": "test_1"}, + "SKB8.640193": {"run_prefix": "test_2"}, } - prep_info = pd.DataFrame.from_dict(prep_info_dict, - orient='index', dtype=str) + prep_info = pd.DataFrame.from_dict(prep_info_dict, orient="index", dtype=str) pt = npt.assert_warns( - qdb.exceptions.QiitaDBWarning, PREP.create, prep_info, - qdb.study.Study(study_id), "Metagenomic") + qdb.exceptions.QiitaDBWarning, + PREP.create, + prep_info, + qdb.study.Study(study_id), + "Metagenomic", + ) # getting the upload folder so we can test - study_upload_dir = join( - qdb.util.get_mountpoint("uploads")[0][1], str(study_id)) + study_upload_dir = join(qdb.util.get_mountpoint("uploads")[0][1], str(study_id)) # adding just foward per sample FASTQ to the upload folder - filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz'] + filenames = ["test_1.R1.fastq.gz", "test_2.R1.fastq.gz"] for f in filenames: fpt = join(study_upload_dir, f) - open(fpt, 'wb', 0).close() + open(fpt, "wb", 0).close() self._clean_up_files.append(fpt) - obs = study_files_get_req( - 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + obs = study_files_get_req("shared@foo.bar", 1, pt.id, "per_sample_FASTQ") exp = { - 'status': 'success', 'num_prefixes': 2, 'artifacts': [], - 'remaining': ['uploaded_file.txt'], 'message': '', - 'file_types': [ - ('raw_forward_seqs', True, - sorted(['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz'])), - ('raw_reverse_seqs', False, [])]} + "status": "success", + "num_prefixes": 2, + "artifacts": [], + "remaining": ["uploaded_file.txt"], + "message": "", + "file_types": [ + ( + "raw_forward_seqs", + True, + sorted(["test_2.R1.fastq.gz", "test_1.R1.fastq.gz"]), + ), + ("raw_reverse_seqs", False, []), + ], + } # making sure they are always in the same order - oft = obs['file_types'][0] - obs['file_types'][0] = (oft[0], oft[1], sorted(oft[2])) + oft = obs["file_types"][0] + obs["file_types"][0] = (oft[0], oft[1], sorted(oft[2])) self.assertEqual(obs, exp) # let's add reverse - filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz'] + filenames = ["test_1.R2.fastq.gz", "test_2.R2.fastq.gz"] for f in filenames: fpt = join(study_upload_dir, f) - open(fpt, 'wb', 0).close() + open(fpt, "wb", 0).close() self._clean_up_files.append(fpt) - obs = study_files_get_req( - 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') - exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], - 'remaining': ['uploaded_file.txt'], 'message': '', - 'file_types': [ - ('raw_forward_seqs', True, sorted( - ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz'])), - ('raw_reverse_seqs', False, sorted( - ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz']))]} + obs = study_files_get_req("shared@foo.bar", 1, pt.id, "per_sample_FASTQ") + exp = { + "status": "success", + "num_prefixes": 2, + "artifacts": [], + "remaining": ["uploaded_file.txt"], + "message": "", + "file_types": [ + ( + "raw_forward_seqs", + True, + sorted(["test_2.R1.fastq.gz", "test_1.R1.fastq.gz"]), + ), + ( + "raw_reverse_seqs", + False, + sorted(["test_2.R2.fastq.gz", "test_1.R2.fastq.gz"]), + ), + ], + } # making sure they are always in the same order - oft = obs['file_types'] - obs['file_types'][0] = (oft[0][0], oft[0][1], sorted(oft[0][2])) - obs['file_types'][1] = (oft[1][0], oft[1][1], sorted(oft[1][2])) + oft = obs["file_types"] + obs["file_types"][0] = (oft[0][0], oft[0][1], sorted(oft[0][2])) + obs["file_types"][1] = (oft[1][0], oft[1][1], sorted(oft[1][2])) self.assertEqual(obs, exp) # let's an extra file that matches - filenames = ['test_1.R3.fastq.gz'] + filenames = ["test_1.R3.fastq.gz"] for f in filenames: fpt = join(study_upload_dir, f) - open(fpt, 'wb', 0).close() + open(fpt, "wb", 0).close() self._clean_up_files.append(fpt) - obs = study_files_get_req( - 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') - exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], - 'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz', - 'test_1.R3.fastq.gz', 'uploaded_file.txt'], - 'message': "Check these run_prefix:\n'test_1' has 3 matches.", - 'file_types': [('raw_forward_seqs', True, - ['test_2.R1.fastq.gz']), - ('raw_reverse_seqs', False, - ['test_2.R2.fastq.gz'])]} + obs = study_files_get_req("shared@foo.bar", 1, pt.id, "per_sample_FASTQ") + exp = { + "status": "success", + "num_prefixes": 2, + "artifacts": [], + "remaining": [ + "test_1.R1.fastq.gz", + "test_1.R2.fastq.gz", + "test_1.R3.fastq.gz", + "uploaded_file.txt", + ], + "message": "Check these run_prefix:\n'test_1' has 3 matches.", + "file_types": [ + ("raw_forward_seqs", True, ["test_2.R1.fastq.gz"]), + ("raw_reverse_seqs", False, ["test_2.R2.fastq.gz"]), + ], + } self.assertEqual(obs, exp) # now if we select FASTQ we have 3 columns so the extra file should go # to the 3rd column - obs = study_files_get_req( - 'shared@foo.bar', 1, pt.id, 'FASTQ') - exp = {'status': 'success', 'num_prefixes': 2, - 'remaining': ['uploaded_file.txt'], - 'message': '', - 'artifacts': [(1, 'Identification of the Microbiomes for ' - 'Cannabis Soils (1) - Raw data 1 (1)')], - 'file_types': [ - ('raw_barcodes', True, sorted( - ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz'])), - ('raw_forward_seqs', True, sorted( - ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])), - ('raw_reverse_seqs', False, ['test_1.R3.fastq.gz'])]} + obs = study_files_get_req("shared@foo.bar", 1, pt.id, "FASTQ") + exp = { + "status": "success", + "num_prefixes": 2, + "remaining": ["uploaded_file.txt"], + "message": "", + "artifacts": [ + ( + 1, + "Identification of the Microbiomes for " + "Cannabis Soils (1) - Raw data 1 (1)", + ) + ], + "file_types": [ + ( + "raw_barcodes", + True, + sorted(["test_2.R1.fastq.gz", "test_1.R1.fastq.gz"]), + ), + ( + "raw_forward_seqs", + True, + sorted(["test_2.R2.fastq.gz", "test_1.R2.fastq.gz"]), + ), + ("raw_reverse_seqs", False, ["test_1.R3.fastq.gz"]), + ], + } # making sure they are always in the same order - oft = obs['file_types'] - obs['file_types'][0] = (oft[0][0], oft[0][1], sorted(oft[0][2])) - obs['file_types'][1] = (oft[1][0], oft[1][1], sorted(oft[1][2])) + oft = obs["file_types"] + obs["file_types"][0] = (oft[0][0], oft[0][1], sorted(oft[0][2])) + obs["file_types"][1] = (oft[1][0], oft[1][1], sorted(oft[1][2])) self.assertEqual(obs, exp) PREP.delete(pt.id) def test_study_get_tags_request(self): - obs = study_get_tags_request('shared@foo.bar', 1) - exp = {'status': 'success', 'message': '', 'tags': []} + obs = study_get_tags_request("shared@foo.bar", 1) + exp = {"status": "success", "message": "", "tags": []} self.assertEqual(obs, exp) # check error - obs = study_get_tags_request('shared@foo.bar', 2000) - exp = {'message': 'Study does not exist', 'status': 'error'} + obs = study_get_tags_request("shared@foo.bar", 2000) + exp = {"message": "Study does not exist", "status": "error"} self.assertEqual(obs, exp) def test_study_patch_request_tags(self): # adding test for study_tags_request here as it makes sense to check # that the tags were added obs = study_tags_request() - exp = {'status': 'success', 'message': '', - 'tags': {'admin': [], 'user': []}} + exp = {"status": "success", "message": "", "tags": {"admin": [], "user": []}} self.assertEqual(obs, exp) obs = study_patch_request( - 'shared@foo.bar', 1, 'replace', '/tags', ['testA', 'testB']) - exp = {'status': 'success', 'message': ''} + "shared@foo.bar", 1, "replace", "/tags", ["testA", "testB"] + ) + exp = {"status": "success", "message": ""} self.assertEqual(obs, exp) obs = study_tags_request() - exp = {'status': 'success', 'message': '', - 'tags': {'admin': [], 'user': ['testA', 'testB']}} + exp = { + "status": "success", + "message": "", + "tags": {"admin": [], "user": ["testA", "testB"]}, + } self.assertEqual(obs, exp) obs = study_patch_request( - 'shared@foo.bar', 2000, 'replace', '/tags', ['testA', 'testB']) - exp = {'message': 'Study does not exist', 'status': 'error'} + "shared@foo.bar", 2000, "replace", "/tags", ["testA", "testB"] + ) + exp = {"message": "Study does not exist", "status": "error"} self.assertEqual(obs, exp) def test_study_patch_request_errors(self): # check errors obs = study_patch_request( - 'shared@foo.bar', 1, 'no-exists', '/tags', ['testA', 'testB']) - exp = {'message': ('Operation "no-exists" not supported. Current ' - 'supported operations: replace'), 'status': 'error'} + "shared@foo.bar", 1, "no-exists", "/tags", ["testA", "testB"] + ) + exp = { + "message": ( + 'Operation "no-exists" not supported. Current ' + "supported operations: replace" + ), + "status": "error", + } self.assertEqual(obs, exp) obs = study_patch_request( - 'shared@foo.bar', 1, 'replace', '/tags/na', ['testA', 'testB']) - exp = {'message': 'Incorrect path parameter', 'status': 'error'} + "shared@foo.bar", 1, "replace", "/tags/na", ["testA", "testB"] + ) + exp = {"message": "Incorrect path parameter", "status": "error"} self.assertEqual(obs, exp) - obs = study_patch_request( - 'shared@foo.bar', 1, 'replace', '/na') - exp = {'message': ('Attribute "na" not found. Please, check the ' - 'path parameter'), 'status': 'error'} + obs = study_patch_request("shared@foo.bar", 1, "replace", "/na") + exp = { + "message": ('Attribute "na" not found. Please, check the path parameter'), + "status": "error", + } self.assertEqual(obs, exp) def test_study_patch_request_toggle_public_raw_download(self): study_id = 1 study = qdb.study.Study(study_id) - obs = study_patch_request('shared@foo.bar', study_id, - 'replace', '/toggle_public_raw_download', - None) - exp = {'status': 'success', 'message': 'Successfully updated ' - 'public_raw_download'} + obs = study_patch_request( + "shared@foo.bar", study_id, "replace", "/toggle_public_raw_download", None + ) + exp = { + "status": "success", + "message": "Successfully updated public_raw_download", + } self.assertEqual(obs, exp) self.assertTrue(study.public_raw_download) @@ -482,98 +625,98 @@ def test_study_patch_request_toggle_public_raw_download(self): class TestStudyAPI2(TestStudyAPI): # This test expects a clean DB so creating it's own class def test_study_prep_get_req(self): - obs = study_prep_get_req(1, 'test@foo.bar') - obs_info = obs['info']['18S'] + obs = study_prep_get_req(1, "test@foo.bar") + obs_info = obs["info"]["18S"] temp_info = defaultdict(list) - temp_info['18S'] = [{ - 'id': 1, - 'status': 'private', - 'name': 'Prep information 1', - 'start_artifact_id': 1, - 'start_artifact': 'FASTQ', - 'youngest_artifact': 'BIOM - BIOM', - 'youngest_artifact_name': 'BIOM', - 'youngest_artifact_type': 'BIOM', - 'num_artifact_children': 2, - 'total_samples': 27, - 'ebi_experiment': 27, - 'modification_timestamp': - obs_info[0]['modification_timestamp'], - 'creation_timestamp': - obs_info[0]['creation_timestamp']}, { - 'id': 2, - 'status': 'private', - 'name': 'Prep information 2', - 'start_artifact': 'BIOM', - 'youngest_artifact': 'BIOM - BIOM', - 'youngest_artifact_name': 'BIOM', - 'youngest_artifact_type': 'BIOM', - 'total_samples': 27, - 'num_artifact_children': 0, - 'ebi_experiment': 27, - 'start_artifact_id': 7, - 'modification_timestamp': - obs_info[1]['modification_timestamp'], - 'creation_timestamp': - obs_info[1]['creation_timestamp'] - }] - exp = {'status': 'success', - 'message': '', - 'info': temp_info} + temp_info["18S"] = [ + { + "id": 1, + "status": "private", + "name": "Prep information 1", + "start_artifact_id": 1, + "start_artifact": "FASTQ", + "youngest_artifact": "BIOM - BIOM", + "youngest_artifact_name": "BIOM", + "youngest_artifact_type": "BIOM", + "num_artifact_children": 2, + "total_samples": 27, + "ebi_experiment": 27, + "modification_timestamp": obs_info[0]["modification_timestamp"], + "creation_timestamp": obs_info[0]["creation_timestamp"], + }, + { + "id": 2, + "status": "private", + "name": "Prep information 2", + "start_artifact": "BIOM", + "youngest_artifact": "BIOM - BIOM", + "youngest_artifact_name": "BIOM", + "youngest_artifact_type": "BIOM", + "total_samples": 27, + "num_artifact_children": 0, + "ebi_experiment": 27, + "start_artifact_id": 7, + "modification_timestamp": obs_info[1]["modification_timestamp"], + "creation_timestamp": obs_info[1]["creation_timestamp"], + }, + ] + exp = {"status": "success", "message": "", "info": temp_info} self.assertEqual(obs, exp) # Add a new prep template pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, qdb.metadata_template.prep_template.PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), - qdb.study.Study(1), '16S') - obs = study_prep_get_req(1, 'test@foo.bar') - temp_info['16S'] = [{'id': pt.id, - 'status': 'sandbox', - 'name': 'Prep information %d' % pt.id, - 'creation_timestamp': pt.creation_timestamp, - 'modification_timestamp': - pt.modification_timestamp, - 'total_samples': 1, - 'start_artifact_id': None, - 'start_artifact': None, - 'youngest_artifact': None, - 'num_artifact_children': 0, - 'youngest_artifact_name': None, - 'youngest_artifact_type': None, - 'ebi_experiment': 0}] - exp = {'status': 'success', - 'message': '', - 'info': temp_info} + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + qdb.study.Study(1), + "16S", + ) + obs = study_prep_get_req(1, "test@foo.bar") + temp_info["16S"] = [ + { + "id": pt.id, + "status": "sandbox", + "name": "Prep information %d" % pt.id, + "creation_timestamp": pt.creation_timestamp, + "modification_timestamp": pt.modification_timestamp, + "total_samples": 1, + "start_artifact_id": None, + "start_artifact": None, + "youngest_artifact": None, + "num_artifact_children": 0, + "youngest_artifact_name": None, + "youngest_artifact_type": None, + "ebi_experiment": 0, + } + ] + exp = {"status": "success", "message": "", "info": temp_info} self.assertEqual(obs, exp) - obs = study_prep_get_req(1, 'admin@foo.bar') + obs = study_prep_get_req(1, "admin@foo.bar") self.assertEqual(obs, exp) - qdb.artifact.Artifact(1).visibility = 'public' - obs = study_prep_get_req(1, 'demo@microbio.me') + qdb.artifact.Artifact(1).visibility = "public" + obs = study_prep_get_req(1, "demo@microbio.me") temp_info = defaultdict(list) - temp_info['18S'] = [{ - 'id': 1, - 'status': 'public', - 'name': 'Prep information 1', - 'start_artifact_id': 1, - 'start_artifact': 'FASTQ', - 'youngest_artifact': 'BIOM - BIOM', - 'youngest_artifact_name': 'BIOM', - 'youngest_artifact_type': 'BIOM', - 'num_artifact_children': 2, - 'total_samples': 27, - 'ebi_experiment': 27, - 'modification_timestamp': - obs_info[0]['modification_timestamp'], - 'creation_timestamp': - obs_info[0]['creation_timestamp']}] - temp_info['16S'] = [] - exp = {'status': 'success', - 'message': '', - 'info': temp_info} + temp_info["18S"] = [ + { + "id": 1, + "status": "public", + "name": "Prep information 1", + "start_artifact_id": 1, + "start_artifact": "FASTQ", + "youngest_artifact": "BIOM - BIOM", + "youngest_artifact_name": "BIOM", + "youngest_artifact_type": "BIOM", + "num_artifact_children": 2, + "total_samples": 27, + "ebi_experiment": 27, + "modification_timestamp": obs_info[0]["modification_timestamp"], + "creation_timestamp": obs_info[0]["creation_timestamp"], + } + ] + temp_info["16S"] = [] + exp = {"status": "success", "message": "", "info": temp_info} self.assertEqual(obs, exp) # Reset visibility of the artifacts for i in range(4, 0, -1): @@ -582,5 +725,5 @@ def test_study_prep_get_req(self): qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/tests/test_user.py b/qiita_pet/handlers/api_proxy/tests/test_user.py index 11c0cb1e2..2c2ff8890 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_user.py +++ b/qiita_pet/handlers/api_proxy/tests/test_user.py @@ -5,14 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from os.path import exists, isdir from os import remove +from os.path import exists, isdir from shutil import rmtree +from unittest import TestCase, main -from qiita_core.util import qiita_test_checker import qiita_db as qdb -from qiita_pet.handlers.api_proxy.user import (user_jobs_get_req) +from qiita_core.util import qiita_test_checker +from qiita_pet.handlers.api_proxy.user import user_jobs_get_req @qiita_test_checker() @@ -29,13 +29,10 @@ def tearDown(self): remove(fp) def test_user_jobs_get_req(self): - obs = user_jobs_get_req(qdb.user.User('shared@foo.bar')) - exp = { - 'status': 'success', - 'message': '', - 'jobs': []} + obs = user_jobs_get_req(qdb.user.User("shared@foo.bar")) + exp = {"status": "success", "message": "", "jobs": []} self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/tests/test_util.py b/qiita_pet/handlers/api_proxy/tests/test_util.py index 93022eaa1..f0ce98fb4 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_util.py +++ b/qiita_pet/handlers/api_proxy/tests/test_util.py @@ -5,8 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main from os.path import join +from unittest import TestCase, main from qiita_db.util import get_mountpoint from qiita_pet.handlers.api_proxy.util import check_access, check_fp @@ -14,36 +14,34 @@ class TestUtil(TestCase): def test_check_access(self): - obs = check_access(1, 'test@foo.bar') + obs = check_access(1, "test@foo.bar") self.assertEqual(obs, {}) def test_check_access_no_access(self): - obs = check_access(1, 'demo@microbio.me') - exp = {'status': 'error', - 'message': 'User has insufficient permissions'} + obs = check_access(1, "demo@microbio.me") + exp = {"status": "error", "message": "User has insufficient permissions"} self.assertEqual(obs, exp) def test_check_access_bad_id(self): - obs = check_access(232423423, 'test@foo.bar') - exp = {'status': 'error', - 'message': 'Study does not exist'} + obs = check_access(232423423, "test@foo.bar") + exp = {"status": "error", "message": "Study does not exist"} self.assertEqual(obs, exp) def test_check_fp(self): - obs = check_fp(1, 'uploaded_file.txt') + obs = check_fp(1, "uploaded_file.txt") _, base_fp = get_mountpoint("uploads")[0] - exp = {'status': 'success', - 'message': '', - 'file': join(base_fp, '1', 'uploaded_file.txt')} + exp = { + "status": "success", + "message": "", + "file": join(base_fp, "1", "uploaded_file.txt"), + } self.assertEqual(obs, exp) def test_check_fp_bad_fp(self): - obs = check_fp(1, 'badfile') - exp = {'status': 'error', - 'message': 'file does not exist', - 'file': 'badfile'} + obs = check_fp(1, "badfile") + exp = {"status": "error", "message": "file does not exist", "file": "badfile"} self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/api_proxy/user.py b/qiita_pet/handlers/api_proxy/user.py index 7e0b98e98..519d18710 100644 --- a/qiita_pet/handlers/api_proxy/user.py +++ b/qiita_pet/handlers/api_proxy/user.py @@ -33,16 +33,17 @@ def user_jobs_get_req(user, limit=30): hb = j.heartbeat hb = "" if hb is None else hb.strftime("%Y-%m-%d %H:%M:%S") pjw = j.processing_job_workflow - wid = '' if pjw is None else pjw.id - response.append({ - 'id': j.id, - 'name': name, - 'params': j.parameters.values, - 'status': j.status, - 'heartbeat': hb, - 'step': j.step, - 'processing_job_workflow_id': wid}) + wid = "" if pjw is None else pjw.id + response.append( + { + "id": j.id, + "name": name, + "params": j.parameters.values, + "status": j.status, + "heartbeat": hb, + "step": j.step, + "processing_job_workflow_id": wid, + } + ) - return {'status': 'success', - 'message': '', - 'jobs': response} + return {"status": "success", "message": "", "jobs": response} diff --git a/qiita_pet/handlers/api_proxy/util.py b/qiita_pet/handlers/api_proxy/util.py index c141ddfa1..196968cc5 100644 --- a/qiita_pet/handlers/api_proxy/util.py +++ b/qiita_pet/handlers/api_proxy/util.py @@ -34,11 +34,9 @@ def check_access(study_id, user_id): try: study = Study(int(study_id)) except QiitaDBUnknownIDError: - return {'status': 'error', - 'message': 'Study does not exist'} + return {"status": "error", "message": "Study does not exist"} if not study.has_access(User(user_id)): - return {'status': 'error', - 'message': 'User has insufficient permissions'} + return {"status": "error", "message": "User has insufficient permissions"} return {} @@ -68,9 +66,5 @@ def check_fp(study_id, filename): if not exists(fp_rsp): # The file does not exist, fail nicely - return {'status': 'error', - 'message': 'file does not exist', - 'file': filename} - return {'status': 'success', - 'message': '', - 'file': fp_rsp} + return {"status": "error", "message": "file does not exist", "file": filename} + return {"status": "success", "message": "", "file": fp_rsp} diff --git a/qiita_pet/handlers/artifact_handlers/__init__.py b/qiita_pet/handlers/artifact_handlers/__init__.py index 8d3c20c9c..4b8c52c50 100644 --- a/qiita_pet/handlers/artifact_handlers/__init__.py +++ b/qiita_pet/handlers/artifact_handlers/__init__.py @@ -6,7 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from .base_handlers import (ArtifactSummaryAJAX, ArtifactAJAX, - ArtifactSummaryHandler) +from .base_handlers import ArtifactAJAX, ArtifactSummaryAJAX, ArtifactSummaryHandler -__all__ = ['ArtifactSummaryAJAX', 'ArtifactAJAX', 'ArtifactSummaryHandler'] +__all__ = ["ArtifactSummaryAJAX", "ArtifactAJAX", "ArtifactSummaryHandler"] diff --git a/qiita_pet/handlers/artifact_handlers/base_handlers.py b/qiita_pet/handlers/artifact_handlers/base_handlers.py index f57a4c6b7..336acf4e3 100644 --- a/qiita_pet/handlers/artifact_handlers/base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/base_handlers.py @@ -6,25 +6,24 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import basename, relpath from json import dumps -from humanize import naturalsize +from os.path import basename, relpath -from tornado.web import authenticated, StaticFileHandler +from humanize import naturalsize +from tornado.web import StaticFileHandler, authenticated from qiita_core.qiita_settings import qiita_config, r_client -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_pet.handlers.util import safe_execution -from qiita_pet.exceptions import QiitaHTTPError from qiita_db.artifact import Artifact -from qiita_db.software import Command, Software, Parameters -from qiita_db.processing_job import ProcessingJob -from qiita_db.util import get_visibilities, send_email from qiita_db.logger import LogEntry from qiita_db.meta_util import RAW_DATA_ARTIFACT_TYPE +from qiita_db.processing_job import ProcessingJob +from qiita_db.software import Command, Parameters, Software +from qiita_db.util import get_visibilities, send_email +from qiita_pet.exceptions import QiitaHTTPError +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import safe_execution - -PREP_TEMPLATE_KEY_FORMAT = 'prep_template_%s' +PREP_TEMPLATE_KEY_FORMAT = "prep_template_%s" def check_artifact_access(user, artifact): @@ -42,30 +41,26 @@ def check_artifact_access(user, artifact): QiitaHTTPError If the user doesn't have access to the given artifact """ - if user.level in ('admin', 'wet-lab admin'): + if user.level in ("admin", "wet-lab admin"): return study = artifact.study - if artifact.visibility == 'public': + if artifact.visibility == "public": # if it's public we need to confirm that this artifact has no possible # human sequences if artifact.has_human and not study.has_access(user, True): - raise QiitaHTTPError(403, "Access denied to artifact %s" - % artifact.id) + raise QiitaHTTPError(403, "Access denied to artifact %s" % artifact.id) else: analysis = artifact.analysis if study: if not study.has_access(user): - raise QiitaHTTPError(403, "Access denied to study %s" - % artifact.id) + raise QiitaHTTPError(403, "Access denied to study %s" % artifact.id) elif analysis: if not analysis.has_access(user): - raise QiitaHTTPError(403, "Access denied to artifact %s" - % artifact.id) + raise QiitaHTTPError(403, "Access denied to artifact %s" % artifact.id) else: # This can't happen but worth adding a check - raise QiitaHTTPError(500, "Error accessing artifact %s" - % artifact.id) + raise QiitaHTTPError(500, "Error accessing artifact %s" % artifact.id) def artifact_summary_get_request(user, artifact_id): @@ -120,9 +115,9 @@ def artifact_summary_get_request(user, artifact_id): jobs = [] errored_summary_jobs = [] for j in all_jobs: - if j.status in ['queued', 'running']: + if j.status in ["queued", "running"]: jobs.append(j) - elif j.status in ['error']: + elif j.status in ["error"]: errored_summary_jobs.append(j) if jobs: # There is already a job generating the HTML. Also, there should be @@ -136,59 +131,63 @@ def artifact_summary_get_request(user, artifact_id): analysis = artifact.analysis # if is a folder and has no parents, it means that is an SPP job and # nobody should be able to change anything about it - if artifact_type == 'job-output-folder' and not artifact.parents: + if artifact_type == "job-output-folder" and not artifact.parents: editable = False else: editable = study.can_edit(user) if study else analysis.can_edit(user) buttons = [] btn_base = ( - '').format(artifact_id) - if not analysis and artifact_type != 'job-output-folder': + "' + ).format(artifact_id) + if not analysis and artifact_type != "job-output-folder": # If the artifact is part of a study, the buttons shown depend in # multiple factors (see each if statement for an explanation of those) if qiita_config.require_approval: - if visibility == 'sandbox' and artifact.parents: + if visibility == "sandbox" and artifact.parents: # The request approval button only appears if the artifact is # sandboxed and the qiita_config specifies that the approval # should be requested buttons.append( - btn_base % ('request approval for', 'awaiting_approval', - 'Request approval')) - elif user.level == 'admin' and visibility == 'awaiting_approval': + btn_base + % ("request approval for", "awaiting_approval", "Request approval") + ) + elif user.level == "admin" and visibility == "awaiting_approval": # The approve artifact button only appears if the user is an # admin the artifact is waiting to be approvaed and the qiita # config requires artifact approval - buttons.append(btn_base % ('approve', 'private', - 'Approve artifact')) + buttons.append(btn_base % ("approve", "private", "Approve artifact")) - if visibility == 'private': + if visibility == "private": # The make public button only appears if the artifact is private - buttons.append(btn_base % ('make public', 'public', 'Make public')) + buttons.append(btn_base % ("make public", "public", "Make public")) # The revert to sandbox button only appears if the artifact is not # sandboxed nor public - if visibility not in {'sandbox', 'public'}: - buttons.append(btn_base % ('revert to sandbox', 'sandbox', - 'Revert to sandbox')) + if visibility not in {"sandbox", "public"}: + buttons.append( + btn_base % ("revert to sandbox", "sandbox", "Revert to sandbox") + ) - if user.level == 'admin' and not study.autoloaded: + if user.level == "admin" and not study.autoloaded: if artifact.can_be_submitted_to_ebi: buttons.append( '' '' - ' Submit to EBI' % artifact_id) + " Submit to EBI" % artifact_id + ) if artifact.can_be_submitted_to_vamps: if not artifact.is_submitted_to_vamps: buttons.append( '' '' - ' Submit to VAMPS' % artifact_id) + " Submit to VAMPS" % artifact_id + ) - if visibility != 'public': + if visibility != "public": # Have no fear, this is just python to generate html with an onclick in # javascript that makes an ajax call to a separate url, takes the # response and writes it to the newly uncollapsed div. Do note that @@ -200,13 +199,20 @@ def artifact_summary_get_request(user, artifact_id): 'Download Link
Generating Download Link...' - '
') % artifact_id + "" + ) % artifact_id buttons.append(private_download) - files = [(x['fp_id'], "%s (%s)" % (basename(x['fp']), - x['fp_type'].replace('_', ' ')), - x['checksum'], naturalsize(x['fp_size'], gnu=True)) - for x in artifact.filepaths if x['fp_type'] != 'directory'] + files = [ + ( + x["fp_id"], + "%s (%s)" % (basename(x["fp"]), x["fp_type"].replace("_", " ")), + x["checksum"], + naturalsize(x["fp_size"], gnu=True), + ) + for x in artifact.filepaths + if x["fp_type"] != "directory" + ] # TODO: https://github.com/biocore/qiita/issues/1724 Remove this hardcoded # values to actually get the information from the database once it stores @@ -216,7 +222,7 @@ def artifact_summary_get_request(user, artifact_id): # study and users that has been shared with can see the files study = artifact.study has_access = study.has_access(user, no_public=True) - if (not study.public_raw_download and not has_access): + if not study.public_raw_download and not has_access: files = [] proc_params = artifact.processing_parameters @@ -224,32 +230,33 @@ def artifact_summary_get_request(user, artifact_id): cmd = proc_params.command sw = cmd.software processing_info = { - 'command': cmd.name, - 'software': sw.name, - 'software_version': sw.version, - 'processing_parameters': proc_params.values, - 'command_active': cmd.active, - 'software_deprecated': sw.deprecated, - 'software_description': sw.description - } + "command": cmd.name, + "software": sw.name, + "software_version": sw.version, + "processing_parameters": proc_params.values, + "command_active": cmd.active, + "software_deprecated": sw.deprecated, + "software_description": sw.description, + } else: processing_info = {} - return {'name': artifact.name, - 'artifact_id': artifact_id, - 'artifact_type': artifact_type, - 'visibility': visibility, - 'editable': editable, - 'buttons': ' '.join(buttons), - 'processing_info': processing_info, - 'files': files, - 'is_from_analysis': artifact.analysis is not None, - 'summary': summary, - 'job': job_info, - 'artifact_timestamp': artifact.timestamp.strftime( - "%Y-%m-%d %H:%m"), - 'being_deleted': artifact.being_deleted_by is not None, - 'errored_summary_jobs': errored_summary_jobs} + return { + "name": artifact.name, + "artifact_id": artifact_id, + "artifact_type": artifact_type, + "visibility": visibility, + "editable": editable, + "buttons": " ".join(buttons), + "processing_info": processing_info, + "files": files, + "is_from_analysis": artifact.analysis is not None, + "summary": summary, + "job": job_info, + "artifact_timestamp": artifact.timestamp.strftime("%Y-%m-%d %H:%m"), + "being_deleted": artifact.being_deleted_by is not None, + "errored_summary_jobs": errored_summary_jobs, + } def artifact_summary_post_request(user, artifact_id, force_creation=False): @@ -278,7 +285,7 @@ def artifact_summary_post_request(user, artifact_id, force_creation=False): # Check if the summary is being generated or has been already generated command = Command.get_html_generator(artifact.artifact_type) jobs = artifact.jobs(cmd=command) - jobs = [j for j in jobs if j.status in ['queued', 'running', 'success']] + jobs = [j for j in jobs if j.status in ["queued", "running", "success"]] if not force_creation and jobs: # The HTML summary is either being generated or already generated. # Return the information of that job so we only generate the HTML @@ -289,11 +296,14 @@ def artifact_summary_post_request(user, artifact_id, force_creation=False): else: # Create a new job to generate the HTML summary and return the newly # created job information - job = ProcessingJob.create(user, Parameters.load( - command, values_dict={'input_data': artifact_id}), True) + job = ProcessingJob.create( + user, + Parameters.load(command, values_dict={"input_data": artifact_id}), + True, + ) job.submit() - return {'job': [job.id, job.status, job.step]} + return {"job": [job.id, job.status, job.step]} class ArtifactSummaryAJAX(BaseHandler): @@ -311,8 +321,9 @@ def post(self, artifact_id): self.write(res) -def artifact_patch_request(user, artifact_id, req_op, req_path, req_value=None, - req_from=None): +def artifact_patch_request( + user, artifact_id, req_op, req_path, req_value=None, req_from=None +): """Modifies an attribute of the artifact Parameters @@ -338,10 +349,10 @@ def artifact_patch_request(user, artifact_id, req_op, req_path, req_value=None, If missing req_value If the attribute to replace is not known """ - if req_op == 'replace': - req_path = [v for v in req_path.split('/') if v] + if req_op == "replace": + req_path = [v for v in req_path.split("/") if v] if len(req_path) != 1: - raise QiitaHTTPError(404, 'Incorrect path parameter') + raise QiitaHTTPError(404, "Incorrect path parameter") attribute = req_path[0] @@ -350,53 +361,71 @@ def artifact_patch_request(user, artifact_id, req_op, req_path, req_value=None, check_artifact_access(user, artifact) if not req_value: - raise QiitaHTTPError(404, 'Missing value to replace') + raise QiitaHTTPError(404, "Missing value to replace") - if attribute == 'name': + if attribute == "name": artifact.name = req_value return - elif attribute == 'visibility': + elif attribute == "visibility": if req_value not in get_visibilities(): - raise QiitaHTTPError(400, 'Unknown visibility value: %s' - % req_value) + raise QiitaHTTPError(400, "Unknown visibility value: %s" % req_value) - if (req_value == 'private' and qiita_config.require_approval - and not user.level == 'admin'): - raise QiitaHTTPError(403, 'User does not have permissions ' - 'to approve change') + if ( + req_value == "private" + and qiita_config.require_approval + and not user.level == "admin" + ): + raise QiitaHTTPError( + 403, "User does not have permissions to approve change" + ) try: artifact.visibility = req_value except Exception as e: - raise QiitaHTTPError(403, str(e).replace('\n', '
')) + raise QiitaHTTPError(403, str(e).replace("\n", "
")) sid = artifact.study.id - if artifact.visibility == 'awaiting_approval': + if artifact.visibility == "awaiting_approval": email_to = qiita_config.help_email - subject = ('QIITA: Artifact %s awaiting_approval. Study %d, ' - 'Prep %d' % (artifact_id, sid, - artifact.prep_templates[0].id)) - message = ('%s requested approval. Study %d.' % (user.email, sid, sid)) + subject = "QIITA: Artifact %s awaiting_approval. Study %d, Prep %d" % ( + artifact_id, + sid, + artifact.prep_templates[0].id, + ) + message = ( + "%s requested approval. Study %d.' % (user.email, sid, sid) + ) try: send_email(email_to, subject, message) except Exception: - msg = ("Couldn't send email to admins, please email us " - "directly to {0}.".format( - email_to)) + msg = ( + "Couldn't send email to admins, please email us " + "directly to {0}.".format(email_to) + ) raise QiitaHTTPError(400, msg) else: - msg = '%s changed artifact %s (study %d) to %s' % ( - user.email, artifact_id, sid, req_value) - LogEntry.create('Warning', msg) + msg = "%s changed artifact %s (study %d) to %s" % ( + user.email, + artifact_id, + sid, + req_value, + ) + LogEntry.create("Warning", msg) else: # We don't understand the attribute so return an error - raise QiitaHTTPError(404, 'Attribute "%s" not found. Please, ' - 'check the path parameter' % attribute) + raise QiitaHTTPError( + 404, + 'Attribute "%s" not found. Please, ' + "check the path parameter" % attribute, + ) else: - raise QiitaHTTPError(400, 'Operation "%s" not supported. Current ' - 'supported operations: replace' % req_op) + raise QiitaHTTPError( + 400, + 'Operation "%s" not supported. Current ' + "supported operations: replace" % req_op, + ) def artifact_post_req(user, artifact_id): @@ -426,19 +455,18 @@ def artifact_post_req(user, artifact_id): pt_id = artifact.prep_templates[0].id redis_key = PREP_TEMPLATE_KEY_FORMAT % pt_id - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_artifact') - params = Parameters.load(cmd, values_dict={'artifact': artifact_id}) + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_artifact") + params = Parameters.load(cmd, values_dict={"artifact": artifact_id}) job = ProcessingJob.create(user, params, True) - r_client.set( - redis_key, dumps({'job_id': job.id, 'is_qiita_job': True})) + r_client.set(redis_key, dumps({"job_id": job.id, "is_qiita_job": True})) job.submit() else: job = being_deleted_by - return {'job': job.id} + return {"job": job.id} class ArtifactAJAX(BaseHandler): @@ -456,14 +484,15 @@ def patch(self, artifact_id): Follows the JSON PATCH specification: https://tools.ietf.org/html/rfc6902 """ - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) - req_from = self.get_argument('from', None) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) + req_from = self.get_argument("from", None) with safe_execution(): - artifact_patch_request(self.current_user, artifact_id, req_op, - req_path, req_value, req_from) + artifact_patch_request( + self.current_user, artifact_id, req_op, req_path, req_value, req_from + ) self.finish() @@ -476,7 +505,7 @@ def validate_absolute_path(self, root, absolute_path): # we are going to inverse traverse the absolute_path and find the first # instance of an int, which is the artifact_id - for s in reversed(absolute_path.split('/')): + for s in reversed(absolute_path.split("/")): try: artifact_id = int(s) break @@ -490,4 +519,5 @@ def validate_absolute_path(self, root, absolute_path): # If we reach this point the user has access to the file - return it return super(ArtifactSummaryHandler, self).validate_absolute_path( - root, absolute_path) + root, absolute_path + ) diff --git a/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py b/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py index b1f283700..2abd265c4 100644 --- a/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py @@ -6,30 +6,33 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main -from tempfile import mkstemp +from json import loads from os import close, remove from os.path import basename, exists, relpath -from json import loads -from mock import Mock +from tempfile import mkstemp +from unittest import TestCase, main +from mock import Mock from tornado.web import HTTPError from qiita_core.qiita_settings import qiita_config, r_client from qiita_core.testing import wait_for_prep_information_job from qiita_core.util import qiita_test_checker -from qiita_db.user import User from qiita_db.artifact import Artifact +from qiita_db.logger import LogEntry from qiita_db.processing_job import ProcessingJob -from qiita_db.software import Parameters, Command +from qiita_db.software import Command, Parameters +from qiita_db.user import User from qiita_pet.exceptions import QiitaHTTPError -from qiita_pet.test.tornado_test_base import TestHandlerBase -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.artifact_handlers.base_handlers import ( - check_artifact_access, artifact_summary_get_request, - artifact_summary_post_request, artifact_patch_request, - artifact_post_req) -from qiita_db.logger import LogEntry + artifact_patch_request, + artifact_post_req, + artifact_summary_get_request, + artifact_summary_post_request, + check_artifact_access, +) +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.test.tornado_test_base import TestHandlerBase @qiita_test_checker() @@ -47,349 +50,407 @@ def test_check_artifact_access(self): # "Study" artifact a = Artifact(1) # The user has access - u = User('test@foo.bar') + u = User("test@foo.bar") check_artifact_access(u, a) # Admin has access to everything - admin = User('admin@foo.bar') + admin = User("admin@foo.bar") check_artifact_access(admin, a) # Demo user doesn't have access - demo_u = User('demo@microbio.me') + demo_u = User("demo@microbio.me") with self.assertRaises(HTTPError): check_artifact_access(demo_u, a) # "Analysis" artifact a = Artifact(8) - a.visibility = 'private' + a.visibility = "private" check_artifact_access(u, a) check_artifact_access(admin, a) with self.assertRaises(HTTPError): check_artifact_access(demo_u, a) - check_artifact_access(User('shared@foo.bar'), a) - a.visibility = 'public' + check_artifact_access(User("shared@foo.bar"), a) + a.visibility = "public" check_artifact_access(demo_u, a) def _assert_summary_equal(self, obs, exp): "Utility function for testing the artifact summary get request" - obs_files = obs.pop('files') - exp_files = exp.pop('files') + obs_files = obs.pop("files") + exp_files = exp.pop("files") self.assertCountEqual(obs_files, exp_files) - obs_jobs = obs.pop('processing_jobs') - exp_jobs = obs.pop('processing_jobs') + obs_jobs = obs.pop("processing_jobs") + exp_jobs = obs.pop("processing_jobs") self.assertCountEqual(obs_jobs, exp_jobs) self.assertEqual(obs, exp) def test_artifact_summary_get_request(self): - user = User('test@foo.bar') + user = User("test@foo.bar") main_buttons = ( - ' ') + ') }" class="btn btn-primary btn-sm">Revert to sandbox ' + ) private_download_button = ( '') + "Download Link..." + ) # Artifact w/o summary obs = artifact_summary_get_request(user, 1) exp_files = [ - (1, '1_s_G1_L001_sequences.fastq.gz (raw forward seqs)', - '2125826711', '58B'), - (2, '1_s_G1_L001_sequences_barcodes.fastq.gz (raw barcodes)', - '2125826711', '58B')] - exp = {'name': 'Raw data 1', - 'artifact_id': 1, - 'artifact_type': 'FASTQ', 'being_deleted': False, - 'artifact_timestamp': '2012-10-01 09:10', - 'visibility': 'private', - 'editable': True, - 'buttons': main_buttons + private_download_button % 1, - 'processing_info': {}, - 'files': exp_files, - 'is_from_analysis': False, - 'summary': None, - 'job': None, - 'errored_summary_jobs': []} + ( + 1, + "1_s_G1_L001_sequences.fastq.gz (raw forward seqs)", + "2125826711", + "58B", + ), + ( + 2, + "1_s_G1_L001_sequences_barcodes.fastq.gz (raw barcodes)", + "2125826711", + "58B", + ), + ] + exp = { + "name": "Raw data 1", + "artifact_id": 1, + "artifact_type": "FASTQ", + "being_deleted": False, + "artifact_timestamp": "2012-10-01 09:10", + "visibility": "private", + "editable": True, + "buttons": main_buttons + private_download_button % 1, + "processing_info": {}, + "files": exp_files, + "is_from_analysis": False, + "summary": None, + "job": None, + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) # Artifact with summary being generated job = ProcessingJob.create( - User('test@foo.bar'), - Parameters.load(Command(7), values_dict={'input_data': 1}) + User("test@foo.bar"), + Parameters.load(Command(7), values_dict={"input_data": 1}), ) - job._set_status('queued') + job._set_status("queued") obs = artifact_summary_get_request(user, 1) - exp = {'name': 'Raw data 1', - 'artifact_id': 1, - 'artifact_type': 'FASTQ', 'being_deleted': False, - 'artifact_timestamp': '2012-10-01 09:10', - 'visibility': 'private', - 'editable': True, - 'buttons': main_buttons + private_download_button % 1, - 'processing_info': {}, - 'files': exp_files, - 'is_from_analysis': False, - 'summary': None, - 'job': [job.id, 'queued', None], - 'errored_summary_jobs': []} + exp = { + "name": "Raw data 1", + "artifact_id": 1, + "artifact_type": "FASTQ", + "being_deleted": False, + "artifact_timestamp": "2012-10-01 09:10", + "visibility": "private", + "editable": True, + "buttons": main_buttons + private_download_button % 1, + "processing_info": {}, + "files": exp_files, + "is_from_analysis": False, + "summary": None, + "job": [job.id, "queued", None], + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) # Artifact with summary fd, fp = mkstemp(suffix=".html") close(fd) - with open(fp, 'w') as f: - f.write('HTML TEST - not important\n') + with open(fp, "w") as f: + f.write("HTML TEST - not important\n") a = Artifact(1) a.set_html_summary(fp) self._files_to_remove.extend([fp, a.html_summary_fp[1]]) exp_files.append( - (a.html_summary_fp[0], - '%s (html summary)' % basename(a.html_summary_fp[1]), - '1642196267', '33B')) - exp_summary_path = relpath( - a.html_summary_fp[1], qiita_config.base_data_dir) + ( + a.html_summary_fp[0], + "%s (html summary)" % basename(a.html_summary_fp[1]), + "1642196267", + "33B", + ) + ) + exp_summary_path = relpath(a.html_summary_fp[1], qiita_config.base_data_dir) obs = artifact_summary_get_request(user, 1) - exp = {'name': 'Raw data 1', - 'artifact_id': 1, - 'artifact_type': 'FASTQ', 'being_deleted': False, - 'artifact_timestamp': '2012-10-01 09:10', - 'visibility': 'private', - 'editable': True, - 'buttons': main_buttons + private_download_button % 1, - 'processing_info': {}, - 'files': exp_files, - 'is_from_analysis': False, - 'summary': exp_summary_path, - 'job': None, - 'errored_summary_jobs': []} + exp = { + "name": "Raw data 1", + "artifact_id": 1, + "artifact_type": "FASTQ", + "being_deleted": False, + "artifact_timestamp": "2012-10-01 09:10", + "visibility": "private", + "editable": True, + "buttons": main_buttons + private_download_button % 1, + "processing_info": {}, + "files": exp_files, + "is_from_analysis": False, + "summary": exp_summary_path, + "job": None, + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) # No access - demo_u = User('demo@microbio.me') + demo_u = User("demo@microbio.me") with self.assertRaises(QiitaHTTPError): obs = artifact_summary_get_request(demo_u, 1) # A non-owner/share user can't see the files - a.visibility = 'public' + a.visibility = "public" obs = artifact_summary_get_request(demo_u, 1) - exp = {'name': 'Raw data 1', - 'artifact_id': 1, - 'artifact_type': 'FASTQ', 'being_deleted': False, - 'artifact_timestamp': '2012-10-01 09:10', - 'visibility': 'public', - 'editable': False, - 'buttons': '', - 'processing_info': {}, - 'files': [], - 'is_from_analysis': False, - 'summary': exp_summary_path, - 'job': None, - 'errored_summary_jobs': []} + exp = { + "name": "Raw data 1", + "artifact_id": 1, + "artifact_type": "FASTQ", + "being_deleted": False, + "artifact_timestamp": "2012-10-01 09:10", + "visibility": "public", + "editable": False, + "buttons": "", + "processing_info": {}, + "files": [], + "is_from_analysis": False, + "summary": exp_summary_path, + "job": None, + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) # testing sandbox - a.visibility = 'sandbox' + a.visibility = "sandbox" obs = artifact_summary_get_request(user, 1) - exp = {'name': 'Raw data 1', - 'artifact_id': 1, - 'artifact_type': 'FASTQ', 'being_deleted': False, - 'artifact_timestamp': '2012-10-01 09:10', - 'visibility': 'sandbox', - 'editable': True, - 'buttons': private_download_button % 1, - 'processing_info': {}, - 'files': exp_files, - 'is_from_analysis': False, - 'summary': exp_summary_path, - 'job': None, - 'errored_summary_jobs': []} + exp = { + "name": "Raw data 1", + "artifact_id": 1, + "artifact_type": "FASTQ", + "being_deleted": False, + "artifact_timestamp": "2012-10-01 09:10", + "visibility": "sandbox", + "editable": True, + "buttons": private_download_button % 1, + "processing_info": {}, + "files": exp_files, + "is_from_analysis": False, + "summary": exp_summary_path, + "job": None, + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) # returnig to private - a.visibility = 'private' + a.visibility = "private" # admin gets buttons - obs = artifact_summary_get_request(User('admin@foo.bar'), 2) + obs = artifact_summary_get_request(User("admin@foo.bar"), 2) exp_files = [ - (3, '1_seqs.fna (preprocessed fasta)', '', '0B'), - (4, '1_seqs.qual (preprocessed fastq)', '', '0B'), - (5, '1_seqs.demux (preprocessed demux)', '', '0B')] - exp = {'name': 'Demultiplexed 1', - 'artifact_id': 2, - 'artifact_type': 'Demultiplexed', 'being_deleted': False, - 'artifact_timestamp': '2012-10-01 10:10', - 'visibility': 'private', - 'editable': True, - 'buttons': (' ' - 'Submit to EBI Submit to VAMPS ' + - private_download_button % 2), - 'processing_info': { - 'command_active': True, 'software_deprecated': False, - 'software_description': ('Quantitative Insights Into ' - 'Microbial Ecology (QIIME) is an ' - 'open-source bioinformatics ' - 'pipeline for performing ' - 'microbiome analysis from raw DNA ' - 'sequencing data'), - 'command': 'Split libraries FASTQ', - 'processing_parameters': { - 'max_barcode_errors': '1.5', 'sequence_max_n': '0', - 'max_bad_run_length': '3', 'phred_offset': 'auto', - 'rev_comp': 'False', 'phred_quality_threshold': '3', - 'input_data': '1', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', - 'min_per_read_length_fraction': '0.75', - 'barcode_type': 'golay_12'}, - 'software_version': '1.9.1', 'software': 'QIIMEq2'}, - 'files': exp_files, - 'is_from_analysis': False, - 'summary': None, - 'job': None, - 'errored_summary_jobs': []} + (3, "1_seqs.fna (preprocessed fasta)", "", "0B"), + (4, "1_seqs.qual (preprocessed fastq)", "", "0B"), + (5, "1_seqs.demux (preprocessed demux)", "", "0B"), + ] + exp = { + "name": "Demultiplexed 1", + "artifact_id": 2, + "artifact_type": "Demultiplexed", + "being_deleted": False, + "artifact_timestamp": "2012-10-01 10:10", + "visibility": "private", + "editable": True, + "buttons": ( + " ' + 'Submit to EBI Submit to VAMPS ' + + private_download_button + % 2 + ), + "processing_info": { + "command_active": True, + "software_deprecated": False, + "software_description": ( + "Quantitative Insights Into " + "Microbial Ecology (QIIME) is an " + "open-source bioinformatics " + "pipeline for performing " + "microbiome analysis from raw DNA " + "sequencing data" + ), + "command": "Split libraries FASTQ", + "processing_parameters": { + "max_barcode_errors": "1.5", + "sequence_max_n": "0", + "max_bad_run_length": "3", + "phred_offset": "auto", + "rev_comp": "False", + "phred_quality_threshold": "3", + "input_data": "1", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "False", + "min_per_read_length_fraction": "0.75", + "barcode_type": "golay_12", + }, + "software_version": "1.9.1", + "software": "QIIMEq2", + }, + "files": exp_files, + "is_from_analysis": False, + "summary": None, + "job": None, + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) # the buttons shouldn't be present when the study is autoloaded study = a.study study.autoloaded = True - exp['buttons'] = (' ' + private_download_button % 2) - obs = artifact_summary_get_request(User('admin@foo.bar'), 2) + exp["buttons"] = ( + " " + private_download_button % 2 + ) + obs = artifact_summary_get_request(User("admin@foo.bar"), 2) self.assertEqual(obs, exp) study.autoloaded = False # analysis artifact obs = artifact_summary_get_request(user, 8) - exp = {'name': 'noname', - 'artifact_id': 8, - 'artifact_type': 'BIOM', 'being_deleted': False, - # this value changes on build so copy from obs - 'artifact_timestamp': obs['artifact_timestamp'], - 'visibility': 'sandbox', - 'editable': True, - 'buttons': private_download_button % 8, - 'processing_info': {}, - 'files': [(22, 'biom_table.biom (biom)', '1756512010', - '1.0M')], - 'is_from_analysis': True, - 'summary': None, - 'job': None, - 'errored_summary_jobs': []} + exp = { + "name": "noname", + "artifact_id": 8, + "artifact_type": "BIOM", + "being_deleted": False, + # this value changes on build so copy from obs + "artifact_timestamp": obs["artifact_timestamp"], + "visibility": "sandbox", + "editable": True, + "buttons": private_download_button % 8, + "processing_info": {}, + "files": [(22, "biom_table.biom (biom)", "1756512010", "1.0M")], + "is_from_analysis": True, + "summary": None, + "job": None, + "errored_summary_jobs": [], + } self.assertEqual(obs, exp) def test_artifact_summary_post_request(self): # No access with self.assertRaises(QiitaHTTPError): - artifact_summary_post_request(User('demo@microbio.me'), 1) + artifact_summary_post_request(User("demo@microbio.me"), 1) # Returns already existing job job = ProcessingJob.create( - User('test@foo.bar'), - Parameters.load(Command(7), values_dict={'input_data': 2}) + User("test@foo.bar"), + Parameters.load(Command(7), values_dict={"input_data": 2}), ) - job._set_status('queued') - obs = artifact_summary_post_request(User('test@foo.bar'), 2) - exp = {'job': [job.id, 'queued', None]} + job._set_status("queued") + obs = artifact_summary_post_request(User("test@foo.bar"), 2) + exp = {"job": [job.id, "queued", None]} self.assertEqual(obs, exp) def test_artifact_post_request(self): # No access with self.assertRaises(QiitaHTTPError): - artifact_post_req(User('demo@microbio.me'), 1) + artifact_post_req(User("demo@microbio.me"), 1) - obs = artifact_post_req(User('test@foo.bar'), 2) - self.assertCountEqual(obs.keys(), ['job']) + obs = artifact_post_req(User("test@foo.bar"), 2) + self.assertCountEqual(obs.keys(), ["job"]) # Wait until the job is completed wait_for_prep_information_job(1) # Check that the delete function has been actually called - job = ProcessingJob(loads(r_client.get('prep_template_1'))['job_id']) - self.assertEqual(job.status, 'error') - self.assertIn('Cannot delete artifact 2', job.log.msg) + job = ProcessingJob(loads(r_client.get("prep_template_1"))["job_id"]) + self.assertEqual(job.status, "error") + self.assertIn("Cannot delete artifact 2", job.log.msg) def test_artifact_patch_request(self): a = Artifact(1) - test_user = User('test@foo.bar') - self.assertEqual(a.name, 'Raw data 1') + test_user = User("test@foo.bar") + self.assertEqual(a.name, "Raw data 1") - artifact_patch_request(test_user, 1, 'replace', '/name/', - req_value='NEW_NAME') - self.assertEqual(a.name, 'NEW_NAME') + artifact_patch_request(test_user, 1, "replace", "/name/", req_value="NEW_NAME") + self.assertEqual(a.name, "NEW_NAME") # Reset the name - a.name = 'Raw data 1' + a.name = "Raw data 1" # No access with self.assertRaises(QiitaHTTPError): - artifact_patch_request(User('demo@microbio.me'), 1, 'replace', - '/name/', req_value='NEW_NAME') + artifact_patch_request( + User("demo@microbio.me"), 1, "replace", "/name/", req_value="NEW_NAME" + ) # Incorrect path parameter with self.assertRaises(QiitaHTTPError): - artifact_patch_request(test_user, 1, 'replace', - '/name/wrong/', req_value='NEW_NAME') + artifact_patch_request( + test_user, 1, "replace", "/name/wrong/", req_value="NEW_NAME" + ) # Missing value with self.assertRaises(QiitaHTTPError): - artifact_patch_request(test_user, 1, 'replace', '/name/') + artifact_patch_request(test_user, 1, "replace", "/name/") # Wrong attribute with self.assertRaises(QiitaHTTPError): - artifact_patch_request(test_user, 1, 'replace', - '/wrong/', req_value='NEW_NAME') + artifact_patch_request( + test_user, 1, "replace", "/wrong/", req_value="NEW_NAME" + ) # Wrong operation with self.assertRaises(QiitaHTTPError): - artifact_patch_request(test_user, 1, 'add', '/name/', - req_value='NEW_NAME') + artifact_patch_request(test_user, 1, "add", "/name/", req_value="NEW_NAME") # Changing visibility - self.assertEqual(a.visibility, 'private') - artifact_patch_request(test_user, 1, 'replace', '/visibility/', - req_value='sandbox') - self.assertEqual(a.visibility, 'sandbox') + self.assertEqual(a.visibility, "private") + artifact_patch_request( + test_user, 1, "replace", "/visibility/", req_value="sandbox" + ) + self.assertEqual(a.visibility, "sandbox") # checking that we have a new entry in the database for this self.assertEqual( LogEntry.newest_records(1)[0].msg, - 'test@foo.bar changed artifact 1 (study 1) to sandbox') + "test@foo.bar changed artifact 1 (study 1) to sandbox", + ) # Admin can change to private - artifact_patch_request(User('admin@foo.bar'), 1, 'replace', - '/visibility/', req_value='private') - self.assertEqual(a.visibility, 'private') + artifact_patch_request( + User("admin@foo.bar"), 1, "replace", "/visibility/", req_value="private" + ) + self.assertEqual(a.visibility, "private") # Test user can't change to private with self.assertRaises(QiitaHTTPError): - artifact_patch_request(test_user, 1, 'replace', '/visibility/', - req_value='private') + artifact_patch_request( + test_user, 1, "replace", "/visibility/", req_value="private" + ) # Unkown req value with self.assertRaises(QiitaHTTPError): - artifact_patch_request(test_user, 1, 'replace', '/visibility/', - req_value='wrong') + artifact_patch_request( + test_user, 1, "replace", "/visibility/", req_value="wrong" + ) class TestBaseHandlers(TestHandlerBase): @@ -404,48 +465,51 @@ def tearDown(self): remove(fp) def test_get_artifact_summary_ajax_handler(self): - response = self.get('/artifact/1/summary/') + response = self.get("/artifact/1/summary/") self.assertEqual(response.code, 200) def test_post_artifact_ajax_handler(self): - response = self.post('/artifact/2/', {}) + response = self.post("/artifact/2/", {}) self.assertEqual(response.code, 200) wait_for_prep_information_job(1) def test_patch_artifact_ajax_handler(self): a = Artifact(1) - self.assertEqual(a.name, 'Raw data 1') - arguments = {'op': 'replace', 'path': '/name/', 'value': 'NEW_NAME'} - response = self.patch('/artifact/1/', data=arguments) + self.assertEqual(a.name, "Raw data 1") + arguments = {"op": "replace", "path": "/name/", "value": "NEW_NAME"} + response = self.patch("/artifact/1/", data=arguments) self.assertEqual(response.code, 200) - self.assertEqual(a.name, 'NEW_NAME') - a.name = 'Raw data 1' + self.assertEqual(a.name, "NEW_NAME") + a.name = "Raw data 1" def test_get_artifact_summary_handler(self): a = Artifact(1) # Add a summary to the artifact fd, fp = mkstemp(suffix=".html") close(fd) - with open(fp, 'w') as f: - f.write('HTML TEST - not important\n') + with open(fp, "w") as f: + f.write("HTML TEST - not important\n") a = Artifact(1) a.set_html_summary(fp) self._files_to_remove.extend([fp, a.html_summary_fp[1]]) summary = relpath(a.html_summary_fp[1], qiita_config.base_data_dir) - response = self.get('/artifact/html_summary/%s' % summary) + response = self.get("/artifact/html_summary/%s" % summary) self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), - 'HTML TEST - not important\n') + self.assertEqual( + response.body.decode("ascii"), "HTML TEST - not important\n" + ) # testing with a not log user should return the login page BaseHandler.get_current_user = Mock(return_value=None) - response = self.get('/artifact/html_summary/%s' % summary) + response = self.get("/artifact/html_summary/%s" % summary) self.assertEqual(response.code, 200) - exp = ('') + exp = ( + '" + ) self.assertIn(exp, response.body.decode()) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/handlers/auth_handlers.py b/qiita_pet/handlers/auth_handlers.py index 4c5e306b1..75a55d820 100644 --- a/qiita_pet/handlers/auth_handlers.py +++ b/qiita_pet/handlers/auth_handlers.py @@ -6,22 +6,30 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.escape import url_escape, json_encode +from tornado.escape import json_encode, url_escape -from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_core.exceptions import ( + IncorrectEmailError, + IncorrectPasswordError, + UnverifiedEmailError, +) from qiita_core.qiita_settings import qiita_config, r_client from qiita_core.util import execute_as_transaction -from qiita_core.exceptions import (IncorrectPasswordError, IncorrectEmailError, - UnverifiedEmailError) -from qiita_db.util import send_email +from qiita_db.exceptions import ( + QiitaDBDuplicateError, + QiitaDBError, + QiitaDBUnknownIDError, +) from qiita_db.user import User -from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBDuplicateError, - QiitaDBError) +from qiita_db.util import send_email +from qiita_pet.handlers.base_handlers import BaseHandler + # login code modified from https://gist.github.com/guillaumevincent/4771570 class AuthCreateHandler(BaseHandler): """User Creation""" + def get(self): try: error_message = self.get_argument("error") @@ -53,37 +61,45 @@ def post(self): # qiita_config.portal_dir has it at the beginning but not at # the end. This constructs the correct URL url = qiita_config.base_url + qiita_config.portal_dir - send_email(username, "QIITA: Verify Email Address", "Please " - "click the following link to verify email address: " - "%s/auth/verify/%s?email=%s\n\nBy clicking you are " - "accepting our term and conditions: " - "%s/iframe/?iframe=qiita-terms" - % (url, info['user_verify_code'], - url_escape(username), url)) + send_email( + username, + "QIITA: Verify Email Address", + "Please " + "click the following link to verify email address: " + "%s/auth/verify/%s?email=%s\n\nBy clicking you are " + "accepting our term and conditions: " + "%s/iframe/?iframe=qiita-terms" + % (url, info["user_verify_code"], url_escape(username), url), + ) except Exception: - msg = ("Unable to send verification email. Please contact the " - "qiita developers at %s") % ( - qiita_config.help_email, qiita_config.help_email) - self.redirect(u"%s/?level=danger&message=%s" - % (qiita_config.portal_dir, url_escape(msg))) + msg = ( + "Unable to send verification email. Please contact the " + "qiita developers at %s" + ) % (qiita_config.help_email, qiita_config.help_email) + self.redirect( + "%s/?level=danger&message=%s" + % (qiita_config.portal_dir, url_escape(msg)) + ) return - msg = ("

User Successfully Created

Your Qiita account " - "has been successfully created. An email has been sent to " - "the email address you provided. This email contains " - "instructions on how to activate your account.

" - "

If you don't receive your activation email within a " - "couple of minutes, check your spam folder. If you still " - "don't see it, send us an email at %s" - ".

") % (qiita_config.help_email, - qiita_config.help_email) - self.redirect(u"%s/?level=success&message=%s" % - (qiita_config.portal_dir, url_escape(msg))) + msg = ( + "

User Successfully Created

Your Qiita account " + "has been successfully created. An email has been sent to " + "the email address you provided. This email contains " + "instructions on how to activate your account.

" + "

If you don't receive your activation email within a " + "couple of minutes, check your spam folder. If you still " + "don't see it, send us an email at %s' + ".

" + ) % (qiita_config.help_email, qiita_config.help_email) + self.redirect( + "%s/?level=success&message=%s" + % (qiita_config.portal_dir, url_escape(msg)) + ) else: - error_msg = u"?error=" + url_escape(msg) - self.redirect(u"%s/auth/create/%s" - % (qiita_config.portal_dir, error_msg)) + error_msg = "?error=" + url_escape(msg) + self.redirect("%s/auth/create/%s" % (qiita_config.portal_dir, error_msg)) class AuthVerifyHandler(BaseHandler): @@ -103,16 +119,21 @@ def get(self, code): if code_is_valid: msg = "Successfully verified user. You are now free to log in." color = "black" - r_client.zadd('qiita-usernames', {email: 0}) + r_client.zadd("qiita-usernames", {email: 0}) else: color = "red" - self.render("user_verified.html", msg=msg, color=color, - email=self.get_argument("email").strip()) + self.render( + "user_verified.html", + msg=msg, + color=color, + email=self.get_argument("email").strip(), + ) class AuthLoginHandler(BaseHandler): """user login, no page necessary""" + def get(self): self.redirect("%s/" % qiita_config.portal_dir) @@ -122,8 +143,8 @@ def post(self): passwd = self.get_argument("password", "") nextpage = self.get_argument("next", None) if nextpage is None: - if "auth/" not in self.request.headers['Referer']: - nextpage = self.request.headers['Referer'] + if "auth/" not in self.request.headers["Referer"]: + nextpage = self.request.headers["Referer"] else: nextpage = "%s/" % qiita_config.portal_dir @@ -132,18 +153,21 @@ def post(self): try: if User(username).level == "unverified": # email not verified so dont log in - msg = ("Email not verified. Please check your email and click " - "the verify link. You may need to check your spam " - "folder to find the email.
If a verification email" - " has not arrived in 15 minutes, please email %s") % (qiita_config.help_email, - qiita_config.help_email) + msg = ( + "Email not verified. Please check your email and click " + "the verify link. You may need to check your spam " + "folder to find the email.
If a verification email" + " has not arrived in 15 minutes, please email %s" + ) % (qiita_config.help_email, qiita_config.help_email) except QiitaDBUnknownIDError: msg = "Unknown user" except RuntimeError: # means DB not available, so set maintenance mode and failover - r_client.set("maintenance", "Database connection unavailable, " - "please try again later.") + r_client.set( + "maintenance", + "Database connection unavailable, please try again later.", + ) self.redirect("%s/" % qiita_config.portal_dir) return @@ -163,7 +187,7 @@ def post(self): self.set_current_user(username) self.redirect(nextpage) else: - self.render("index.html", message=msg, level='danger') + self.render("index.html", message=msg, level="danger") def set_current_user(self, user): if user: @@ -174,6 +198,7 @@ def set_current_user(self, user): class AuthLogoutHandler(BaseHandler): """Logout handler, no page necessary""" + def get(self): self.clear_cookie("user") self.redirect("%s/" % qiita_config.portal_dir) diff --git a/qiita_pet/handlers/base_handlers.py b/qiita_pet/handlers/base_handlers.py index 4639ab15d..965d6d215 100644 --- a/qiita_pet/handlers/base_handlers.py +++ b/qiita_pet/handlers/base_handlers.py @@ -15,18 +15,18 @@ class BaseHandler(RequestHandler): def get_current_user(self): - '''Overrides default method of returning user curently connected''' + """Overrides default method of returning user curently connected""" username = self.get_secure_cookie("user") if username is not None: # strip off quotes added by get_secure_cookie - username = username.decode('ascii').strip("\"' ") + username = username.decode("ascii").strip("\"' ") return User(username) else: self.clear_cookie("user") return None def write_error(self, status_code, **kwargs): - '''Overrides the error page created by Tornado''' + """Overrides the error page created by Tornado""" if status_code == 404: # just use the 404 page as the error self.render("404.html") @@ -40,36 +40,42 @@ def write_error(self, status_code, **kwargs): user = self.get_current_user() if user: try: - is_admin = user.level == 'admin' + is_admin = user.level == "admin" except Exception: # Any issue with this check leaves default as not admin pass # log the error from traceback import format_exception + exc_info = kwargs["exc_info"] - trace_info = ''.join(["%s\n" % line for line in - format_exception(*exc_info)]) + trace_info = "".join(["%s\n" % line for line in format_exception(*exc_info)]) req_dict = self.request.__dict__ # must trim body to 1024 chars to prevent huge error messages - req_dict['body'] = req_dict.get('body', '')[:1024] - request_info = ''.join(["%s: %s\n" % - (k, req_dict[k]) for k in - req_dict.keys() if k != 'files']) - error = str(exc_info[1]).split(':', 1) + req_dict["body"] = req_dict.get("body", "")[:1024] + request_info = "".join( + [ + "%s: %s\n" % (k, req_dict[k]) + for k in req_dict.keys() + if k != "files" + ] + ) + error = str(exc_info[1]).split(":", 1) if len(error) > 1: error = error[1] else: error = error[0] # render error page - self.render('error.html', status_code=status_code, is_admin=is_admin, - error=error) + self.render( + "error.html", status_code=status_code, is_admin=is_admin, error=error + ) LogEntry.create( - 'Runtime', - 'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' % - (error, trace_info, request_info)) + "Runtime", + "ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n" + % (error, trace_info, request_info), + ) def head(self): """Adds proper response for head requests""" @@ -77,21 +83,23 @@ def head(self): class MainHandler(BaseHandler): - '''Index page''' + """Index page""" + def get(self): - msg = self.get_argument('message', '') + msg = self.get_argument("message", "") msg = convert_text_html(msg) - lvl = self.get_argument('level', '') + lvl = self.get_argument("level", "") self.render("index.html", message=msg, level=lvl) class IFrame(BaseHandler): - '''Open one of the IFrame pages''' + """Open one of the IFrame pages""" + def get(self): - msg = self.get_argument('message', '') + msg = self.get_argument("message", "") msg = convert_text_html(msg) - lvl = self.get_argument('level', '') - iframe = self.get_argument('iframe', '') + lvl = self.get_argument("level", "") + iframe = self.get_argument("iframe", "") self.render("iframe.html", iframe=iframe, message=msg, level=lvl) diff --git a/qiita_pet/handlers/cloud_handlers/__init__.py b/qiita_pet/handlers/cloud_handlers/__init__.py index d10344a05..ce43dccae 100644 --- a/qiita_pet/handlers/cloud_handlers/__init__.py +++ b/qiita_pet/handlers/cloud_handlers/__init__.py @@ -1,9 +1,11 @@ -from .file_transfer_handlers import (FetchFileFromCentralHandler, - PushFileToCentralHandler) +from .file_transfer_handlers import ( + FetchFileFromCentralHandler, + PushFileToCentralHandler, +) -__all__ = ['FetchFileFromCentralHandler'] +__all__ = ["FetchFileFromCentralHandler"] ENDPOINTS = [ (r"/cloud/fetch_file_from_central/(.*)", FetchFileFromCentralHandler), - (r"/cloud/push_file_to_central/", PushFileToCentralHandler) + (r"/cloud/push_file_to_central/", PushFileToCentralHandler), ] diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 2287c5c31..4b17a6fa5 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -1,11 +1,11 @@ import os -from tornado.web import HTTPError, RequestHandler from tornado.gen import coroutine +from tornado.web import HTTPError, RequestHandler +from qiita_core.qiita_settings import qiita_config from qiita_core.util import execute_as_transaction from qiita_db.handlers.oauth2 import authenticate_oauth -from qiita_core.qiita_settings import qiita_config class FetchFileFromCentralHandler(RequestHandler): @@ -29,28 +29,34 @@ def get(self, requested_filepath): # attempt to access files outside of the BASE_DATA_DIR # intentionally NOT reporting the actual location to avoid exposing # instance internal information - raise HTTPError(403, reason=( - "You cannot access files outside of " - "the BASE_DATA_DIR of Qiita!")) + raise HTTPError( + 403, + reason=( + "You cannot access files outside of the BASE_DATA_DIR of Qiita!" + ), + ) if not os.path.exists(filepath): - raise HTTPError(403, reason=( - "The requested file is not present in Qiita's BASE_DATA_DIR!")) + raise HTTPError( + 403, + reason=("The requested file is not present in Qiita's BASE_DATA_DIR!"), + ) # delivery of the file via nginx requires replacing the basedatadir # with the prefix defined in the nginx configuration for the # base_data_dir, '/protected/' by default - protected_filepath = filepath.replace(basedatadir, '/protected') - - self.set_header('Content-Type', 'application/octet-stream') - self.set_header('Content-Transfer-Encoding', 'binary') - self.set_header('X-Accel-Redirect', protected_filepath) - self.set_header('Content-Description', 'File Transfer') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', - 'attachment; filename=%s' % os.path.basename( - protected_filepath)) + protected_filepath = filepath.replace(basedatadir, "/protected") + + self.set_header("Content-Type", "application/octet-stream") + self.set_header("Content-Transfer-Encoding", "binary") + self.set_header("X-Accel-Redirect", protected_filepath) + self.set_header("Content-Description", "File Transfer") + self.set_header("Expires", "0") + self.set_header("Cache-Control", "no-cache") + self.set_header( + "Content-Disposition", + "attachment; filename=%s" % os.path.basename(protected_filepath), + ) self.finish() @@ -60,7 +66,7 @@ class PushFileToCentralHandler(RequestHandler): @execute_as_transaction def post(self): if not self.request.files: - raise HTTPError(400, reason='No files to upload defined!') + raise HTTPError(400, reason="No files to upload defined!") # canonic version of base_data_dir basedatadir = os.path.abspath(qiita_config.base_data_dir) @@ -68,27 +74,32 @@ def post(self): for filespath, filelist in self.request.files.items(): if filespath.startswith(basedatadir): - filespath = filespath[len(basedatadir):] + filespath = filespath[len(basedatadir) :] for file in filelist: - filepath = os.path.join(filespath, file['filename']) + filepath = os.path.join(filespath, file["filename"]) # remove leading / if filepath.startswith(os.sep): - filepath = filepath[len(os.sep):] + filepath = filepath[len(os.sep) :] filepath = os.path.abspath(os.path.join(basedatadir, filepath)) if os.path.exists(filepath): - raise HTTPError(403, reason=( - "The requested file is already " - "present in Qiita's BASE_DATA_DIR!")) + raise HTTPError( + 403, + reason=( + "The requested file is already " + "present in Qiita's BASE_DATA_DIR!" + ), + ) os.makedirs(os.path.dirname(filepath), exist_ok=True) with open(filepath, "wb") as f: - f.write(file['body']) + f.write(file["body"]) stored_files.append(filepath) - self.write("Stored %i files into BASE_DATA_DIR of Qiita:\n%s\n" % ( - len(stored_files), - '\n'.join(map(lambda x: ' - %s' % x, stored_files)))) + self.write( + "Stored %i files into BASE_DATA_DIR of Qiita:\n%s\n" + % (len(stored_files), "\n".join(map(lambda x: " - %s" % x, stored_files))) + ) self.finish() diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 12dfac9b9..11dfd3ca0 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -1,10 +1,10 @@ -from unittest import main -from os.path import exists, basename -from os import remove import filecmp +from os import remove +from os.path import basename, exists +from unittest import main -from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb +from qiita_db.handlers.tests.oauthbase import OauthTestingBase class FetchFileFromCentralHandlerTests(OauthTestingBase): @@ -12,23 +12,22 @@ def setUp(self): super(FetchFileFromCentralHandlerTests, self).setUp() def test_get(self): - endpoint = '/cloud/fetch_file_from_central/' + endpoint = "/cloud/fetch_file_from_central/" base_data_dir = qdb.util.get_db_files_base_dir() - obs = self.get_authed(endpoint + 'nonexistingfile') + obs = self.get_authed(endpoint + "nonexistingfile") self.assertEqual(obs.status_code, 403) - self.assertIn('outside of the BASE_DATA_DIR', obs.reason) + self.assertIn("outside of the BASE_DATA_DIR", obs.reason) - obs = self.get_authed( - endpoint + base_data_dir[1:] + '/nonexistingfile') + obs = self.get_authed(endpoint + base_data_dir[1:] + "/nonexistingfile") self.assertEqual(obs.status_code, 403) - self.assertIn('The requested file is not present', obs.reason) + self.assertIn("The requested file is not present", obs.reason) obs = self.get_authed( - endpoint + base_data_dir[1:] + - '/raw_data/FASTA_QUAL_preprocessing.fna') + endpoint + base_data_dir[1:] + "/raw_data/FASTA_QUAL_preprocessing.fna" + ) self.assertEqual(obs.status_code, 200) - self.assertIn('FLP3FBN01ELBSX length=250 xy=1766_01', str(obs.content)) + self.assertIn("FLP3FBN01ELBSX length=250 xy=1766_01", str(obs.content)) class PushFileToCentralHandlerTests(OauthTestingBase): @@ -36,25 +35,25 @@ def setUp(self): super(PushFileToCentralHandlerTests, self).setUp() def test_post(self): - endpoint = '/cloud/push_file_to_central/' + endpoint = "/cloud/push_file_to_central/" base_data_dir = qdb.util.get_db_files_base_dir() # create a test file "locally", i.e. in current working directory - fp_source = 'foo.bar' - with open(fp_source, 'w') as f: + fp_source = "foo.bar" + with open(fp_source, "w") as f: f.write("this is a test\n") self._files_to_remove.append(fp_source) # if successful, expected location of the file in BASE_DATA_DIR - fp_target = base_data_dir + '/bar/' + basename(fp_source) + fp_target = base_data_dir + "/bar/" + basename(fp_source) self._files_to_remove.append(fp_target) # create a second test file - fp_source2 = 'foo_two.bar' - with open(fp_source2, 'w') as f: + fp_source2 = "foo_two.bar" + with open(fp_source2, "w") as f: f.write("this is another test\n") self._files_to_remove.append(fp_source2) - fp_target2 = base_data_dir + '/barr/' + basename(fp_source2) + fp_target2 = base_data_dir + "/barr/" + basename(fp_source2) self._files_to_remove.append(fp_target2) # test raise error if no file is given @@ -62,31 +61,29 @@ def test_post(self): self.assertEqual(obs.reason, "No files to upload defined!") # test correct mechanism - with open(fp_source, 'rb') as fh: - obs = self.post_authed(endpoint, files={'bar/': fh}) - self.assertIn('Stored 1 files into BASE_DATA_DIR of Qiita', - str(obs.content)) + with open(fp_source, "rb") as fh: + obs = self.post_authed(endpoint, files={"bar/": fh}) + self.assertIn( + "Stored 1 files into BASE_DATA_DIR of Qiita", str(obs.content) + ) self.assertTrue(filecmp.cmp(fp_source, fp_target, shallow=False)) # check if error is raised, if file already exists - with open(fp_source, 'rb') as fh: - obs = self.post_authed(endpoint, files={'bar/': fh}) - self.assertIn("already present in Qiita's BASE_DATA_DIR!", - obs.reason) + with open(fp_source, "rb") as fh: + obs = self.post_authed(endpoint, files={"bar/": fh}) + self.assertIn("already present in Qiita's BASE_DATA_DIR!", obs.reason) # test transfer of multiple files if exists(fp_target): remove(fp_target) - with open(fp_source, 'rb') as fh1: - with open(fp_source2, 'rb') as fh2: - obs = self.post_authed( - endpoint, files={'bar/': fh1, 'barr/': fh2}) - self.assertIn('Stored 2 files into BASE_DATA_DIR of Qiita', - str(obs.content)) - self.assertTrue(filecmp.cmp(fp_source, fp_target, - shallow=False)) - self.assertTrue(filecmp.cmp(fp_source2, fp_target2, - shallow=False)) + with open(fp_source, "rb") as fh1: + with open(fp_source2, "rb") as fh2: + obs = self.post_authed(endpoint, files={"bar/": fh1, "barr/": fh2}) + self.assertIn( + "Stored 2 files into BASE_DATA_DIR of Qiita", str(obs.content) + ) + self.assertTrue(filecmp.cmp(fp_source, fp_target, shallow=False)) + self.assertTrue(filecmp.cmp(fp_source2, fp_target2, shallow=False)) if __name__ == "__main__": diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index ca8437e3d..7a828de0c 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -6,38 +6,43 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated, HTTPError -from tornado.gen import coroutine - -from os.path import basename, getsize, join, isdir, getctime +from base64 import b64encode +from datetime import datetime, timedelta, timezone +from io import BytesIO from os import walk +from os.path import basename, getctime, getsize, isdir, join +from tempfile import mkdtemp +from uuid import uuid4 +from zipfile import ZipFile -from .base_handlers import BaseHandler -from qiita_pet.handlers.api_proxy.util import check_access -from qiita_pet.handlers.artifact_handlers.base_handlers \ - import check_artifact_access -from qiita_db.study import Study +from jose import jwt as jose_jwt +from tornado.gen import coroutine +from tornado.web import HTTPError, authenticated + +from qiita_core.qiita_settings import qiita_config +from qiita_core.util import execute_as_transaction, get_release_info from qiita_db.artifact import Artifact -from qiita_db.user import User from qiita_db.download_link import DownloadLink -from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir, - get_filepath_information, get_mountpoint, - filepath_id_to_object_id, get_data_types, - retrieve_filepaths, get_work_base_dir) +from qiita_db.exceptions import QiitaDBUnknownIDError from qiita_db.meta_util import validate_filepath_access_by_user -from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.prep_template import PrepTemplate -from qiita_db.exceptions import QiitaDBUnknownIDError -from qiita_core.util import execute_as_transaction, get_release_info -from qiita_core.qiita_settings import qiita_config +from qiita_db.metadata_template.sample_template import SampleTemplate +from qiita_db.study import Study +from qiita_db.user import User +from qiita_db.util import ( + filepath_id_to_object_id, + filepath_id_to_rel_path, + get_data_types, + get_db_files_base_dir, + get_filepath_information, + get_mountpoint, + get_work_base_dir, + retrieve_filepaths, +) +from qiita_pet.handlers.api_proxy.util import check_access +from qiita_pet.handlers.artifact_handlers.base_handlers import check_artifact_access -from jose import jwt as jose_jwt -from uuid import uuid4 -from base64 import b64encode -from datetime import datetime, timedelta, timezone -from tempfile import mkdtemp -from zipfile import ZipFile -from io import BytesIO +from .base_handlers import BaseHandler class BaseHandlerDownload(BaseHandler): @@ -45,22 +50,26 @@ def _check_permissions(self, sid): # Check general access to study study_info = check_access(sid, self.current_user.id) if study_info: - raise HTTPError(405, reason="%s: %s, %s" % ( - study_info['message'], self.current_user.email, sid)) + raise HTTPError( + 405, + reason="%s: %s, %s" + % (study_info["message"], self.current_user.email, sid), + ) return Study(sid) def _finish_generate_files(self, filename, text): - self.set_header('Content-Description', 'text/csv') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', 'attachment; ' - 'filename=%s' % filename) + self.set_header("Content-Description", "text/csv") + self.set_header("Expires", "0") + self.set_header("Cache-Control", "no-cache") + self.set_header("Content-Disposition", "attachment; filename=%s" % filename) self.write(text) self.finish() def _generate_files(self, header_name, accessions, filename): - text = "sample_name\t%s\n%s" % (header_name, '\n'.join( - ["%s\t%s" % (k, v) for k, v in accessions.items()])) + text = "sample_name\t%s\n%s" % ( + header_name, + "\n".join(["%s\t%s" % (k, v) for k, v in accessions.items()]), + ) self._finish_generate_files(filename, text) @@ -87,7 +96,7 @@ def _list_dir_files_nginx(self, dirpath): spath = fullpath if fullpath.startswith(basedir): spath = fullpath[basedir_len:] - to_download.append((spath, spath, '-', str(getsize(fullpath)))) + to_download.append((spath, spath, "-", str(getsize(fullpath)))) return to_download def _list_artifact_files_nginx(self, artifact): @@ -109,20 +118,18 @@ def _list_artifact_files_nginx(self, artifact): for i, x in enumerate(artifact.filepaths): # ignore if tgz as they could create problems and the # raw data is in the folder - if x['fp_type'] == 'tgz': + if x["fp_type"] == "tgz": continue - if isdir(x['fp']): + if isdir(x["fp"]): # If we have a directory, we actually need to list all the # files from the directory so NGINX can actually download all # of them - to_download.extend(self._list_dir_files_nginx(x['fp'])) - elif x['fp'].startswith(basedir): - spath = x['fp'][basedir_len:] - to_download.append( - (spath, spath, '-', str(x['fp_size']))) + to_download.extend(self._list_dir_files_nginx(x["fp"])) + elif x["fp"].startswith(basedir): + spath = x["fp"][basedir_len:] + to_download.append((spath, spath, "-", str(x["fp_size"]))) else: - to_download.append( - (x['fp'], x['fp'], '-', str(x['fp_size']))) + to_download.append((x["fp"], x["fp"], "-", str(x["fp_size"]))) for pt in artifact.prep_templates: # the latest prep template file is always the first [0] tuple and @@ -133,8 +140,8 @@ def _list_artifact_files_nginx(self, artifact): spt_fp = pt_fp if pt_fp.startswith(basedir): spt_fp = pt_fp[basedir_len:] - fname = 'mapping_files/%s_mapping_file.txt' % artifact.id - to_download.append((spt_fp, fname, '-', str(getsize(pt_fp)))) + fname = "mapping_files/%s_mapping_file.txt" % artifact.id + to_download.append((spt_fp, fname, "-", str(getsize(pt_fp)))) return to_download def _write_nginx_file_list(self, to_download): @@ -145,11 +152,14 @@ def _write_nginx_file_list(self, to_download): to_download : list of (str, str, str, str) The file list information """ - all_files = '\n'.join( - ["%s %s /protected/%s %s" % (fp_checksum, fp_size, fp, fp_name) - for fp, fp_name, fp_checksum, fp_size in to_download]) + all_files = "\n".join( + [ + "%s %s /protected/%s %s" % (fp_checksum, fp_size, fp, fp_name) + for fp, fp_name, fp_checksum, fp_size in to_download + ] + ) - self.set_header('X-Archive-Files', 'zip') + self.set_header("X-Archive-Files", "zip") self.write("%s\n" % all_files) def _set_nginx_headers(self, fname): @@ -160,11 +170,10 @@ def _set_nginx_headers(self, fname): fname : str Nginx's output filename """ - self.set_header('Content-Description', 'File Transfer') - self.set_header('Expires', '0') - self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', - 'attachment; filename=%s' % fname) + self.set_header("Content-Description", "File Transfer") + self.set_header("Expires", "0") + self.set_header("Cache-Control", "no-cache") + self.set_header("Content-Disposition", "attachment; filename=%s" % fname) def _write_nginx_placeholder_file(self, fp): """Writes nginx placeholder file in case that nginx is not set up @@ -175,9 +184,11 @@ def _write_nginx_placeholder_file(self, fp): The path to be downloaded through nginx """ # If we don't have nginx, write a file that indicates this - self.write("This installation of Qiita was not equipped with " - "nginx, so it is incapable of serving files. The file " - "you attempted to download is located at %s" % fp) + self.write( + "This installation of Qiita was not equipped with " + "nginx, so it is incapable of serving files. The file " + "you attempted to download is located at %s" % fp + ) class DownloadHandler(BaseHandlerDownload): @@ -189,27 +200,29 @@ def get(self, filepath_id): if not validate_filepath_access_by_user(self.current_user, fid): raise HTTPError( - 403, "%s doesn't have access to " - "filepath_id: %s" % (self.current_user.email, str(fid))) + 403, + "%s doesn't have access to " + "filepath_id: %s" % (self.current_user.email, str(fid)), + ) relpath = filepath_id_to_rel_path(fid) fp_info = get_filepath_information(fid) fname = basename(relpath) - if fp_info['filepath_type'] in ('directory', 'html_summary_dir'): + if fp_info["filepath_type"] in ("directory", "html_summary_dir"): # This is a directory, we need to list all the files so NGINX # can download all of them - to_download = self._list_dir_files_nginx(fp_info['fullpath']) + to_download = self._list_dir_files_nginx(fp_info["fullpath"]) self._write_nginx_file_list(to_download) - fname = '%s.zip' % fname + fname = "%s.zip" % fname else: self._write_nginx_placeholder_file(relpath) - self.set_header('Content-Type', 'application/octet-stream') - self.set_header('Content-Transfer-Encoding', 'binary') - self.set_header('X-Accel-Redirect', '/protected/' + relpath) + self.set_header("Content-Type", "application/octet-stream") + self.set_header("Content-Transfer-Encoding", "binary") + self.set_header("X-Accel-Redirect", "/protected/" + relpath) aid = filepath_id_to_object_id(fid) if aid is not None: - fname = '%d_%s' % (aid, fname) + fname = "%d_%s" % (aid, fname) self._set_nginx_headers(fname) self.finish() @@ -232,19 +245,24 @@ def get(self, study_id): # or the study is shared with him, then the user doesn't have full # access to the study data full_access = ( - (self.current_user.level == 'admin') | - (study.status != 'public') | - ((self.current_user == study.owner) | - (self.current_user in study.shared_with))) + (self.current_user.level == "admin") + | (study.status != "public") + | ( + (self.current_user == study.owner) + | (self.current_user in study.shared_with) + ) + ) - for a in study.artifacts(artifact_type='BIOM'): - if full_access or (a.visibility == 'public' and not a.has_human): + for a in study.artifacts(artifact_type="BIOM"): + if full_access or (a.visibility == "public" and not a.has_human): to_download.extend(self._list_artifact_files_nginx(a)) self._write_nginx_file_list(to_download) - zip_fn = 'study_%d_%s.zip' % ( - study_id, datetime.now().strftime('%m%d%y-%H%M%S')) + zip_fn = "study_%d_%s.zip" % ( + study_id, + datetime.now().strftime("%m%d%y-%H%M%S"), + ) self._set_nginx_headers(zip_fn) self.finish() @@ -254,7 +272,7 @@ class DownloadRelease(BaseHandlerDownload): @coroutine def get(self, extras): biom_metadata_release, archive_release = get_release_info() - if extras == 'archive': + if extras == "archive": relpath = archive_release[1] else: relpath = biom_metadata_release[1] @@ -266,10 +284,9 @@ def get(self, extras): self._set_nginx_headers(basename(relpath)) - self.set_header('Content-Type', 'application/octet-stream') - self.set_header('Content-Transfer-Encoding', 'binary') - self.set_header('X-Accel-Redirect', - f'/protected-working_dir/{relpath}') + self.set_header("Content-Type", "application/octet-stream") + self.set_header("Content-Transfer-Encoding", "binary") + self.set_header("X-Accel-Redirect", f"/protected-working_dir/{relpath}") self.finish() @@ -285,21 +302,26 @@ def get(self, study_id): is_owner = study.has_access(user, True) public_raw_download = study.public_raw_download if not is_owner and not public_raw_download: - raise HTTPError(405, reason="%s: %s, %s" % ( - 'No raw data access', self.current_user.email, str(study_id))) + raise HTTPError( + 405, + reason="%s: %s, %s" + % ("No raw data access", self.current_user.email, str(study_id)), + ) # loop over artifacts and retrieve raw data (no parents) to_download = [] for a in study.artifacts(): if not a.parents: - if not is_owner and (a.visibility != 'public' or a.has_human): + if not is_owner and (a.visibility != "public" or a.has_human): continue to_download.extend(self._list_artifact_files_nginx(a)) self._write_nginx_file_list(to_download) - zip_fn = 'study_raw_data_%d_%s.zip' % ( - study_id, datetime.now().strftime('%m%d%y-%H%M%S')) + zip_fn = "study_raw_data_%d_%s.zip" % ( + study_id, + datetime.now().strftime("%m%d%y-%H%M%S"), + ) self._set_nginx_headers(zip_fn) self.finish() @@ -314,8 +336,10 @@ def get(self, study_id): self._check_permissions(sid) self._generate_files( - 'sample_accession', SampleTemplate(sid).ebi_sample_accessions, - 'ebi_sample_accessions_study_%s.tsv' % sid) + "sample_accession", + SampleTemplate(sid).ebi_sample_accessions, + "ebi_sample_accessions_study_%s.tsv" % sid, + ) class DownloadEBIPrepAccessions(BaseHandlerDownload): @@ -330,8 +354,10 @@ def get(self, prep_template_id): self._check_permissions(sid) self._generate_files( - 'experiment_accession', pt.ebi_experiment_accessions, - 'ebi_experiment_accessions_study_%s_prep_%s.tsv' % (sid, pid)) + "experiment_accession", + pt.ebi_experiment_accessions, + "ebi_experiment_accessions_study_%s_prep_%s.tsv" % (sid, pid), + ) class DownloadSampleInfoPerPrep(BaseHandlerDownload): @@ -347,10 +373,9 @@ def get(self, prep_template_id): st = SampleTemplate(sid) - text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t') + text = st.to_dataframe(samples=list(pt)).to_csv(None, sep="\t") - self._finish_generate_files( - 'sample_information_from_prep_%s.tsv' % pid, text) + self._finish_generate_files("sample_information_from_prep_%s.tsv" % pid, text) class DownloadUpload(BaseHandlerDownload): @@ -359,20 +384,21 @@ class DownloadUpload(BaseHandlerDownload): @execute_as_transaction def get(self, path): user = self.current_user - if user.level != 'admin': - raise HTTPError(403, reason="%s doesn't have access to download " - "uploaded files" % user.email) + if user.level != "admin": + raise HTTPError( + 403, + reason="%s doesn't have access to download uploaded files" % user.email, + ) # [0] because it returns a list # [1] we only need the filepath - filepath = get_mountpoint("uploads")[0][1][ - len(get_db_files_base_dir()):] + filepath = get_mountpoint("uploads")[0][1][len(get_db_files_base_dir()) :] relpath = join(filepath, path) self._write_nginx_placeholder_file(relpath) - self.set_header('Content-Type', 'application/octet-stream') - self.set_header('Content-Transfer-Encoding', 'binary') - self.set_header('X-Accel-Redirect', '/protected/' + relpath) + self.set_header("Content-Type", "application/octet-stream") + self.set_header("Content-Transfer-Encoding", "binary") + self.set_header("X-Accel-Redirect", "/protected/" + relpath) self._set_nginx_headers(basename(relpath)) self.finish() @@ -382,97 +408,100 @@ class DownloadDataReleaseFromPrep(BaseHandlerDownload): @coroutine @execute_as_transaction def get(self, prep_template_id): - """ This method constructs an on the fly ZIP with all the files - required for a data-prep release/data-delivery. Mainly sample, prep - info, bioms and coverage + """This method constructs an on the fly ZIP with all the files + required for a data-prep release/data-delivery. Mainly sample, prep + info, bioms and coverage """ user = self.current_user - if user.level not in ('admin', 'wet-lab admin'): - raise HTTPError(403, reason="%s doesn't have access to download " - "the data release files" % user.email) + if user.level not in ("admin", "wet-lab admin"): + raise HTTPError( + 403, + reason="%s doesn't have access to download " + "the data release files" % user.email, + ) pid = int(prep_template_id) pt = PrepTemplate(pid) sid = pt.study_id st = SampleTemplate(sid) - date = datetime.now().strftime('%m%d%y-%H%M%S') + date = datetime.now().strftime("%m%d%y-%H%M%S") td = mkdtemp(dir=get_work_base_dir()) files = [] readme = [ - f'Delivery created on {date}', - '', - f'Host (human) removal: {pt.artifact.human_reads_filter_method}', - '', + f"Delivery created on {date}", + "", + f"Host (human) removal: {pt.artifact.human_reads_filter_method}", + "", # this is not changing in the near future so just leaving # hardcoded for now - 'Main woltka reference: WoLr2, more info visit: ' - 'https://ftp.microbio.me/pub/wol2/', - '', + "Main woltka reference: WoLr2, more info visit: " + "https://ftp.microbio.me/pub/wol2/", + "", f"Qiita's prep: https://qiita.ucsd.edu/study/description/{sid}" f"?prep_id={pid}", - '', + "", ] # helper dict to add "user/human" friendly names to the bioms human_names = { - 'ec.biom': 'KEGG Enzyme (EC)', - 'per-gene.biom': 'Per gene Predictions', - 'none.biom': 'Per genome Predictions', - 'cell_counts.biom': 'Cell counts', - 'pathway.biom': 'KEGG Pathway', - 'ko.biom': 'KEGG Ontology (KO)', - 'rna_copy_counts.biom': 'RNA copy counts' + "ec.biom": "KEGG Enzyme (EC)", + "per-gene.biom": "Per gene Predictions", + "none.biom": "Per genome Predictions", + "cell_counts.biom": "Cell counts", + "pathway.biom": "KEGG Pathway", + "ko.biom": "KEGG Ontology (KO)", + "rna_copy_counts.biom": "RNA copy counts", } # sample-info creation - fn = join(td, f'sample_information_from_prep_{pid}.tsv') - readme.append(f'Sample information: {basename(fn)}') + fn = join(td, f"sample_information_from_prep_{pid}.tsv") + readme.append(f"Sample information: {basename(fn)}") files.append([fn, basename(fn)]) - st.to_dataframe(samples=list(pt)).to_csv(fn, sep='\t') + st.to_dataframe(samples=list(pt)).to_csv(fn, sep="\t") # prep-info creation - fn = join(td, f'prep_information_{pid}.tsv') - readme.append(f'Prep information: {basename(fn)}') + fn = join(td, f"prep_information_{pid}.tsv") + readme.append(f"Prep information: {basename(fn)}") files.append([fn, basename(fn)]) - pt.to_dataframe().to_csv(fn, sep='\t') + pt.to_dataframe().to_csv(fn, sep="\t") - readme.append('') + readme.append("") # finding the bioms to be added bioms = dict() coverages = None - for a in Study(sid).artifacts(artifact_type='BIOM'): + for a in Study(sid).artifacts(artifact_type="BIOM"): if a.prep_templates[0].id != pid: continue biom = None for fp in a.filepaths: - if fp['fp_type'] == 'biom': + if fp["fp_type"] == "biom": biom = fp - if coverages is None and 'coverages.tgz' == basename(fp['fp']): - coverages = fp['fp'] + if coverages is None and "coverages.tgz" == basename(fp["fp"]): + coverages = fp["fp"] if biom is None: continue - biom_fn = basename(biom['fp']) + biom_fn = basename(biom["fp"]) # there is a small but real chance that the same prep has the same # artifacts so using the latests if biom_fn not in bioms: bioms[biom_fn] = [a, biom] else: - if getctime(biom['fp']) > getctime(bioms[biom_fn][1]['fp']): + if getctime(biom["fp"]) > getctime(bioms[biom_fn][1]["fp"]): bioms[biom_fn] = [a, biom] # once we have all the bioms, we can add them to the list of zips # and to the readme the biom details and all the processing for fn, (a, fp) in bioms.items(): aname = basename(fp["fp"]) - nname = f'{a.id}_{aname}' - files.append([fp['fp'], nname]) + nname = f"{a.id}_{aname}" + files.append([fp["fp"], nname]) - hname = '' + hname = "" if aname in human_names: hname = human_names[aname] - readme.append(f'{nname}\t{hname}') + readme.append(f"{nname}\t{hname}") for an in set(a.ancestors.nodes()): p = an.processing_parameters @@ -483,26 +512,26 @@ def get(self, prep_template_id): sn = s.name sv = s.version pd = p.dump() - readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}') + readme.append(f"\t{cn}\t{sn}\t{sv}\t{pd}") # if a coverage was found, add it to the list of files if coverages is not None: fn = basename(coverages) - readme.append(f'{fn}\tcoverage files') + readme.append(f"{fn}\tcoverage files") files.append([coverages, fn]) - fn = join(td, 'README.txt') - with open(fn, 'w') as fp: - fp.write('\n'.join(readme)) + fn = join(td, "README.txt") + with open(fn, "w") as fp: + fp.write("\n".join(readme)) files.append([fn, basename(fn)]) - zp_fn = f'data_release_{pid}_{date}.zip' + zp_fn = f"data_release_{pid}_{date}.zip" zp = BytesIO() - with ZipFile(zp, 'w') as zipf: + with ZipFile(zp, "w") as zipf: for fp, fn in files: zipf.write(fp, fn) - self.set_header('Content-Type', 'application/zip') + self.set_header("Content-Type", "application/zip") self.set_header("Content-Disposition", f"attachment; filename={zp_fn}") self.write(zp.getvalue()) zp.close() @@ -514,70 +543,87 @@ class DownloadPublicHandler(BaseHandlerDownload): @execute_as_transaction def get(self): data = self.get_argument("data", None) - study_id = self.get_argument("study_id", None) - prep_id = self.get_argument("prep_id", None) - data_type = self.get_argument("data_type", None) + study_id = self.get_argument("study_id", None) + prep_id = self.get_argument("prep_id", None) + data_type = self.get_argument("data_type", None) dtypes = get_data_types().keys() - templates = ['sample_information', 'prep_information'] - valid_data = ['raw', 'biom'] + templates + templates = ["sample_information", "prep_information"] + valid_data = ["raw", "biom"] + templates to_download = [] # for this block to work we need 3 main inputs: prep_id/sample_id, data # and data_type - if one is missing raise an error, if both # prep_id/sample_id are defined or data_type doesn't exist in qiita # we should error - if data is None or (study_id is None and prep_id is None) or \ - data not in valid_data: - raise HTTPError(422, reason='You need to specify both data (the ' - 'data type you want to download - %s) and ' - 'study_id or prep_id' % '/'.join(valid_data)) + if ( + data is None + or (study_id is None and prep_id is None) + or data not in valid_data + ): + raise HTTPError( + 422, + reason="You need to specify both data (the " + "data type you want to download - %s) and " + "study_id or prep_id" % "/".join(valid_data), + ) if data_type is not None and data_type not in dtypes: - raise HTTPError(422, reason='Not a valid data_type. Valid types ' - 'are: %s' % ', '.join(dtypes)) + raise HTTPError( + 422, + reason="Not a valid data_type. Valid types are: %s" % ", ".join(dtypes), + ) if data in templates and prep_id is None and study_id is None: - raise HTTPError(422, reason='If downloading a sample or ' - 'preparation file you need to define study_id or' - ' prep_id') + raise HTTPError( + 422, + reason="If downloading a sample or " + "preparation file you need to define study_id or" + " prep_id", + ) # if we get here, then we have two main options: templates or raw/biom; # however, for raw/biom we need to retrieve the data via the study_id # or the prep_id so splitting the next block in study_id/pre_id if data in templates: if data_type is not None: - raise HTTPError(422, reason='If requesting an information ' - 'file you cannot specify the data_type') - elif prep_id is not None and data == 'prep_information': - fname = 'preparation_information_%s' % prep_id + raise HTTPError( + 422, + reason="If requesting an information " + "file you cannot specify the data_type", + ) + elif prep_id is not None and data == "prep_information": + fname = "preparation_information_%s" % prep_id prep_id = int(prep_id) try: infofile = PrepTemplate(prep_id) except QiitaDBUnknownIDError: raise HTTPError( - 422, reason='Preparation information does not exist') - elif study_id is not None and data == 'sample_information': - fname = 'sample_information_%s' % study_id + 422, reason="Preparation information does not exist" + ) + elif study_id is not None and data == "sample_information": + fname = "sample_information_%s" % study_id study_id = int(study_id) try: infofile = SampleTemplate(study_id) except QiitaDBUnknownIDError: - raise HTTPError( - 422, reason='Sample information does not exist') + raise HTTPError(422, reason="Sample information does not exist") else: - raise HTTPError(422, reason='Review your parameters, not a ' - 'valid combination') + raise HTTPError( + 422, reason="Review your parameters, not a valid combination" + ) x = retrieve_filepaths( - infofile._filepath_table, infofile._id_column, infofile.id, - sort='descending')[0] + infofile._filepath_table, + infofile._id_column, + infofile.id, + sort="descending", + )[0] basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 - fp = x['fp'][basedir_len:] - to_download.append((fp, fp, '-', str(x['fp_size']))) + fp = x["fp"][basedir_len:] + to_download.append((fp, fp, "-", str(x["fp_size"]))) self._write_nginx_file_list(to_download) - zip_fn = '%s_%s.zip' % ( - fname, datetime.now().strftime('%m%d%y-%H%M%S')) + zip_fn = "%s_%s.zip" % (fname, datetime.now().strftime("%m%d%y-%H%M%S")) self._set_nginx_headers(zip_fn) else: # depending on if we have a study_id or a prep_id, instantiate @@ -587,61 +633,73 @@ def get(self): try: study = Study(study_id) except QiitaDBUnknownIDError: - raise HTTPError(422, reason='Study does not exist') - zip_fn = 'study_%d_%s_%s.zip' % ( - study_id, data, datetime.now().strftime( - '%m%d%y-%H%M%S')) + raise HTTPError(422, reason="Study does not exist") + zip_fn = "study_%d_%s_%s.zip" % ( + study_id, + data, + datetime.now().strftime("%m%d%y-%H%M%S"), + ) else: prep_id = int(prep_id) try: prep = PrepTemplate(prep_id) except QiitaDBUnknownIDError: - raise HTTPError(422, reason='Prep does not exist') + raise HTTPError(422, reason="Prep does not exist") study = Study(prep.study_id) - zip_fn = 'prep_%d_%s_%s.zip' % ( - prep_id, data, datetime.now().strftime( - '%m%d%y-%H%M%S')) + zip_fn = "prep_%d_%s_%s.zip" % ( + prep_id, + data, + datetime.now().strftime("%m%d%y-%H%M%S"), + ) public_raw_download = study.public_raw_download # just to be 100% that the data is public, let's start # with checking that the study is actually public - if study.status != 'public': - raise HTTPError(404, reason='Study is not public. If this ' - 'is a mistake contact: %s' % - qiita_config.help_email) + if study.status != "public": + raise HTTPError( + 404, + reason="Study is not public. If this " + "is a mistake contact: %s" % qiita_config.help_email, + ) # now let's check that if the data is raw, the study's # public_raw_download flag is on - if data == 'raw': + if data == "raw": if not public_raw_download: - raise HTTPError(422, reason='No raw data access. If this ' - 'is a mistake contact: %s' - % qiita_config.help_email) + raise HTTPError( + 422, + reason="No raw data access. If this " + "is a mistake contact: %s" % qiita_config.help_email, + ) if study_id is not None: - artifacts = [a for a in study.artifacts(dtype=data_type) - if not a.parents] + artifacts = [ + a for a in study.artifacts(dtype=data_type) if not a.parents + ] else: artifacts = [prep.artifact] else: # this is biom if study_id is not None: - artifacts = study.artifacts( - dtype=data_type, artifact_type='BIOM') + artifacts = study.artifacts(dtype=data_type, artifact_type="BIOM") else: - artifacts = [a for a in - prep.artifact.descendants.nodes() - if a.artifact_type == 'BIOM'] + artifacts = [ + a + for a in prep.artifact.descendants.nodes() + if a.artifact_type == "BIOM" + ] # at this point artifacts has all the available artifact # so we need to make sure they are public and have no has_human # to be added to_download for a in artifacts: - if a.visibility != 'public' or a.has_human: + if a.visibility != "public" or a.has_human: continue to_download.extend(self._list_artifact_files_nginx(a)) if not to_download: - raise HTTPError(422, reason='Nothing to download. If ' - 'this is a mistake contact: %s' - % qiita_config.help_email) + raise HTTPError( + 422, + reason="Nothing to download. If " + "this is a mistake contact: %s" % qiita_config.help_email, + ) self._write_nginx_file_list(to_download) self._set_nginx_headers(zip_fn) @@ -655,33 +713,41 @@ def get(self): artifact_id = self.get_argument("artifact_id", None) if artifact_id is None: - raise HTTPError(422, reason='You need to specify an artifact id') + raise HTTPError(422, reason="You need to specify an artifact id") else: try: artifact = Artifact(artifact_id) except QiitaDBUnknownIDError: - raise HTTPError(404, reason='Artifact does not exist') + raise HTTPError(404, reason="Artifact does not exist") else: - if artifact.visibility != 'public': - raise HTTPError(404, reason='Artifact is not public. If ' - 'this is a mistake contact: %s' - % qiita_config.help_email) + if artifact.visibility != "public": + raise HTTPError( + 404, + reason="Artifact is not public. If " + "this is a mistake contact: %s" % qiita_config.help_email, + ) elif artifact.has_human: - raise HTTPError(404, reason='Artifact has possible human ' - 'sequences. If this is a mistake contact: ' - '%s' % qiita_config.help_email) + raise HTTPError( + 404, + reason="Artifact has possible human " + "sequences. If this is a mistake contact: " + "%s" % qiita_config.help_email, + ) else: to_download = self._list_artifact_files_nginx(artifact) if not to_download: - raise HTTPError(422, reason='Nothing to download. If ' - 'this is a mistake contact: %s' - % qiita_config.help_email) + raise HTTPError( + 422, + reason="Nothing to download. If " + "this is a mistake contact: %s" % qiita_config.help_email, + ) else: self._write_nginx_file_list(to_download) - zip_fn = 'artifact_%s_%s.zip' % ( - artifact_id, datetime.now().strftime( - '%m%d%y-%H%M%S')) + zip_fn = "artifact_%s_%s.zip" % ( + artifact_id, + datetime.now().strftime("%m%d%y-%H%M%S"), + ) self._set_nginx_headers(zip_fn) self.finish() @@ -709,25 +775,27 @@ def post(self, artifact_id): jti = b64encode(uuid4().bytes).decode("utf-8") # Sign a jwt allowing access utcnow = datetime.now(timezone.utc) - jwt = jose_jwt.encode({ + jwt = jose_jwt.encode( + { "artifactId": str(artifact_id), "perm": "download", "sub": str(user._id), "email": str(user.email), "iat": int(utcnow.timestamp() * 1000), "exp": int((utcnow + timedelta(days=7)).timestamp() * 1000), - "jti": jti + "jti": jti, }, qiita_config.jwt_secret, - algorithm='HS256' + algorithm="HS256", ) # Save the jwt to the database DownloadLink.create(jwt) - url = qiita_config.base_url + '/private_download/' + jti - user_msg = "This link will expire in 7 days on: " + \ - (utcnow + timedelta(days=7)).strftime('%Y-%m-%d') + url = qiita_config.base_url + "/private_download/" + jti + user_msg = "This link will expire in 7 days on: " + ( + utcnow + timedelta(days=7) + ).strftime("%Y-%m-%d") self.set_status(200) self.finish({"url": url, "msg": user_msg}) @@ -740,14 +808,12 @@ def get(self, jti): # If no jwt, error response if jwt is None: - raise HTTPError( - 404, - reason='Download Not Found. Link may have expired.') + raise HTTPError(404, reason="Download Not Found. Link may have expired.") # If jwt doesn't validate, error response - jwt_data = jose_jwt.decode(jwt, qiita_config.jwt_secret, 'HS256') + jwt_data = jose_jwt.decode(jwt, qiita_config.jwt_secret, "HS256") if jwt_data is None: - raise HTTPError(403, reason='Invalid JWT') + raise HTTPError(403, reason="Invalid JWT") # Triple check expiration and user permissions user = User(jwt_data["sub"]) @@ -767,15 +833,18 @@ def get(self, jti): # All checks out, let's give them the files then! to_download = self._list_artifact_files_nginx(artifact) if not to_download: - raise HTTPError(422, reason='Nothing to download. If ' - 'this is a mistake contact: %s' % - qiita_config.help_email) + raise HTTPError( + 422, + reason="Nothing to download. If " + "this is a mistake contact: %s" % qiita_config.help_email, + ) else: self._write_nginx_file_list(to_download) - zip_fn = 'artifact_%s_%s.zip' % ( - jwt_data["artifactId"], datetime.now().strftime( - '%m%d%y-%H%M%S')) + zip_fn = "artifact_%s_%s.zip" % ( + jwt_data["artifactId"], + datetime.now().strftime("%m%d%y-%H%M%S"), + ) self._set_nginx_headers(zip_fn) self.finish() diff --git a/qiita_pet/handlers/logger_handlers.py b/qiita_pet/handlers/logger_handlers.py index 3701cb08a..19357bca8 100644 --- a/qiita_pet/handlers/logger_handlers.py +++ b/qiita_pet/handlers/logger_handlers.py @@ -5,20 +5,22 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated +from tornado.web import HTTPError, authenticated -from .base_handlers import BaseHandler -from qiita_db.logger import LogEntry from qiita_core.util import execute_as_transaction -from tornado.web import HTTPError +from qiita_db.logger import LogEntry + +from .base_handlers import BaseHandler class LogEntryViewerHandler(BaseHandler): def check_access(self): - if self.current_user.level not in {'admin', 'dev'}: - raise HTTPError(403, reason="User %s doesn't have sufficient " - "privileges to view error page" % - self.current_user.email) + if self.current_user.level not in {"admin", "dev"}: + raise HTTPError( + 403, + reason="User %s doesn't have sufficient " + "privileges to view error page" % self.current_user.email, + ) @authenticated @execute_as_transaction diff --git a/qiita_pet/handlers/ontology.py b/qiita_pet/handlers/ontology.py index ef4c14b53..f1f11d5a1 100644 --- a/qiita_pet/handlers/ontology.py +++ b/qiita_pet/handlers/ontology.py @@ -8,8 +8,8 @@ from tornado.web import authenticated -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.api_proxy import ontology_patch_handler +from qiita_pet.handlers.base_handlers import BaseHandler class OntologyHandler(BaseHandler): @@ -20,11 +20,10 @@ def patch(self): Follows the JSON PATCH specification: https://tools.ietf.org/html/rfc6902 """ - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) - req_from = self.get_argument('from', None) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) + req_from = self.get_argument("from", None) - response = ontology_patch_handler(req_op, req_path, req_value, - req_from) + response = ontology_patch_handler(req_op, req_path, req_value, req_from) self.write(response) diff --git a/qiita_pet/handlers/portal.py b/qiita_pet/handlers/portal.py index ababb1f15..f05515fec 100644 --- a/qiita_pet/handlers/portal.py +++ b/qiita_pet/handlers/portal.py @@ -7,25 +7,29 @@ # ----------------------------------------------------------------------------- import warnings -from json import dumps from copy import deepcopy +from json import dumps -from tornado.web import authenticated, HTTPError +from tornado.web import HTTPError, authenticated from qiita_core.util import execute_as_transaction -from qiita_db.study import Study -from qiita_db.portal import Portal from qiita_db.exceptions import QiitaDBError +from qiita_db.portal import Portal +from qiita_db.study import Study + from .base_handlers import BaseHandler class PortalEditBase(BaseHandler): - study_cols = ['study_id', 'study_title', 'study_alias'] + study_cols = ["study_id", "study_title", "study_alias"] def check_admin(self): if self.current_user.level != "admin": - raise HTTPError(403, reason="%s does not have access to portal " - "editing!" % self.current_user.id) + raise HTTPError( + 403, + reason="%s does not have access to portal " + "editing!" % self.current_user.id, + ) @execute_as_transaction def get_info(self, portal="QIITA"): @@ -39,7 +43,7 @@ def get_info(self, portal="QIITA"): for s in study_info: # Make sure in correct order hold = dict(s) - hold['portals'] = ', '.join(sorted(Study(s['study_id'])._portals)) + hold["portals"] = ", ".join(sorted(Study(s["study_id"])._portals)) info.append(hold) return info @@ -53,16 +57,21 @@ def get(self): portals = Portal.list_portals() headers = deepcopy(self.study_cols) headers.insert(0, "portals") - self.render('portals_edit.html', headers=headers, info=info, - portals=portals, submit_url="/admin/portals/studies/") + self.render( + "portals_edit.html", + headers=headers, + info=info, + portals=portals, + submit_url="/admin/portals/studies/", + ) @authenticated @execute_as_transaction def post(self): self.check_admin() - portal = self.get_argument('portal') - studies = map(int, self.get_arguments('selected')) - action = self.get_argument('action') + portal = self.get_argument("portal") + studies = map(int, self.get_arguments("selected")) + action = self.get_argument("action") try: portal = Portal(portal) @@ -80,7 +89,7 @@ def post(self): self.write(action.upper() + " ERROR:
" + str(e)) return - msg = '; '.join([str(w.message) for w in warns]) + msg = "; ".join([str(w.message) for w in warns]) self.write(action + " completed successfully
" + msg) @@ -89,16 +98,16 @@ class StudyPortalAJAXHandler(PortalEditBase): @execute_as_transaction def get(self): self.check_admin() - portal = self.get_argument('view-portal') - echo = self.get_argument('sEcho') + portal = self.get_argument("view-portal") + echo = self.get_argument("sEcho") info = self.get_info(portal=portal) # build the table json results = { "sEcho": echo, "iTotalRecords": len(info), "iTotalDisplayRecords": len(info), - "aaData": info + "aaData": info, } # return the json in compact form to save transmit size - self.write(dumps(results, separators=(',', ':'))) + self.write(dumps(results, separators=(",", ":"))) diff --git a/qiita_pet/handlers/prep_template.py b/qiita_pet/handlers/prep_template.py index e187944a7..7b3968e67 100644 --- a/qiita_pet/handlers/prep_template.py +++ b/qiita_pet/handlers/prep_template.py @@ -8,27 +8,38 @@ from tornado.web import authenticated -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.api_proxy import ( - prep_template_post_req, prep_template_patch_req, prep_template_delete_req, - prep_template_graph_get_req, prep_template_jobs_get_req) + prep_template_delete_req, + prep_template_graph_get_req, + prep_template_jobs_get_req, + prep_template_patch_req, + prep_template_post_req, +) +from qiita_pet.handlers.base_handlers import BaseHandler class PrepTemplateHandler(BaseHandler): @authenticated def post(self): """Creates a prep template""" - study_id = self.get_argument('study_id') - data_type = self.get_argument('data-type') - ena_ontology = self.get_argument('ena-ontology', None) - user_ontology = self.get_argument('user-ontology', None) - new_ontology = self.get_argument('new-ontology', None) - prep_fp = self.get_argument('prep-file') - name = self.get_argument('name', None) + study_id = self.get_argument("study_id") + data_type = self.get_argument("data-type") + ena_ontology = self.get_argument("ena-ontology", None) + user_ontology = self.get_argument("user-ontology", None) + new_ontology = self.get_argument("new-ontology", None) + prep_fp = self.get_argument("prep-file") + name = self.get_argument("name", None) response = prep_template_post_req( - study_id, self.get_current_user().id, prep_fp, data_type, - ena_ontology, user_ontology, new_ontology, name=name) + study_id, + self.get_current_user().id, + prep_fp, + data_type, + ena_ontology, + user_ontology, + new_ontology, + name=name, + ) self.write(response) @@ -39,28 +50,28 @@ def patch(self): Follows the JSON PATCH specification: https://tools.ietf.org/html/rfc6902 """ - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) - req_from = self.get_argument('from', None) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) + req_from = self.get_argument("from", None) response = prep_template_patch_req( - self.current_user.id, req_op, req_path, req_value, req_from) + self.current_user.id, req_op, req_path, req_value, req_from + ) self.write(response) @authenticated def delete(self): """Deletes a prep template from the system""" - prep_id = self.get_argument('prep-template-id') + prep_id = self.get_argument("prep-template-id") self.write(prep_template_delete_req(prep_id, self.current_user.id)) class PrepTemplateGraphHandler(BaseHandler): @authenticated def get(self, prep_id): - self.write( - prep_template_graph_get_req(prep_id, self.current_user.id)) + self.write(prep_template_graph_get_req(prep_id, self.current_user.id)) class PrepTemplateJobHandler(BaseHandler): diff --git a/qiita_pet/handlers/public.py b/qiita_pet/handlers/public.py index ae49a6c47..87a6057c3 100644 --- a/qiita_pet/handlers/public.py +++ b/qiita_pet/handlers/public.py @@ -6,95 +6,98 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import HTTPError from tornado.gen import coroutine +from tornado.web import HTTPError -from .base_handlers import BaseHandler -from qiita_db.study import Study +from qiita_core.util import execute_as_transaction from qiita_db.artifact import Artifact -from qiita_db.util import get_artifacts_information from qiita_db.exceptions import QiitaDBUnknownIDError -from qiita_core.util import execute_as_transaction -from qiita_pet.util import EBI_LINKIFIER +from qiita_db.study import Study +from qiita_db.util import get_artifacts_information from qiita_pet.handlers.util import doi_linkifier, pubmed_linkifier +from qiita_pet.util import EBI_LINKIFIER + +from .base_handlers import BaseHandler class PublicHandler(BaseHandler): @coroutine @execute_as_transaction def get(self): - study_id = self.get_argument("study_id", None) - artifact_id = self.get_argument("artifact_id", None) + study_id = self.get_argument("study_id", None) + artifact_id = self.get_argument("artifact_id", None) if study_id is None and artifact_id is None: - raise HTTPError( - 422, reason='You need to specify study_id or artifact_id') + raise HTTPError(422, reason="You need to specify study_id or artifact_id") self.finish() elif study_id is not None: try: study = Study(int(study_id)) except QiitaDBUnknownIDError: - raise HTTPError( - 422, reason="Study %s doesn't exist" % study_id) + raise HTTPError(422, reason="Study %s doesn't exist" % study_id) self.finish() - artifact_ids = [a.id for a in study.artifacts() - if a.visibility == 'public'] + artifact_ids = [a.id for a in study.artifacts() if a.visibility == "public"] else: try: artifact = Artifact(int(artifact_id)) except QiitaDBUnknownIDError: - raise HTTPError( - 422, reason="Artifact %s doesn't exist" % artifact_id) + raise HTTPError(422, reason="Artifact %s doesn't exist" % artifact_id) self.finish() - if artifact.visibility != 'public': - raise HTTPError( - 422, reason="Artifact %s is not public" % artifact_id) + if artifact.visibility != "public": + raise HTTPError(422, reason="Artifact %s is not public" % artifact_id) self.finish() study = artifact.study if study is None: - raise HTTPError(422, reason="Artifact %s doesn't belong to " - "a study" % artifact_id) + raise HTTPError( + 422, reason="Artifact %s doesn't belong to a study" % artifact_id + ) self.finish() artifact_ids = [artifact.id] - if study.status != 'public': - raise HTTPError( - 422, reason='Not a public study') + if study.status != "public": + raise HTTPError(422, reason="Not a public study") self.finish() study_info = study.info - study_info['study_id'] = study.id - study_info['study_title'] = study.title - study_info['shared_with'] = [s.id for s in study.shared_with] - study_info['status'] = study.status - study_info['ebi_study_accession'] = study.ebi_study_accession - study_info['ebi_submission_status'] = study.ebi_submission_status + study_info["study_id"] = study.id + study_info["study_title"] = study.title + study_info["shared_with"] = [s.id for s in study.shared_with] + study_info["status"] = study.status + study_info["ebi_study_accession"] = study.ebi_study_accession + study_info["ebi_submission_status"] = study.ebi_submission_status # Clean up StudyPerson objects to string for display email = '{name} ({affiliation})' - pi = study.info['principal_investigator'] - study_info['principal_investigator'] = email.format(**{ - 'name': pi.name, - 'email': pi.email, - 'affiliation': pi.affiliation}) + pi = study.info["principal_investigator"] + study_info["principal_investigator"] = email.format( + **{"name": pi.name, "email": pi.email, "affiliation": pi.affiliation} + ) - study_info['owner'] = study.owner.id + study_info["owner"] = study.owner.id # Add needed info that is not part of the initial info pull - study_info['publications'] = [] + study_info["publications"] = [] for pub, is_doi in study.publications: if is_doi: - study_info['publications'].append(pubmed_linkifier([pub])) + study_info["publications"].append(pubmed_linkifier([pub])) else: - study_info['publications'].append(doi_linkifier([pub])) - study_info['publications'] = ', '.join(study_info['publications']) + study_info["publications"].append(doi_linkifier([pub])) + study_info["publications"] = ", ".join(study_info["publications"]) - if study_info['ebi_study_accession']: - links = ''.join([EBI_LINKIFIER.format(a) for a in study_info[ - 'ebi_study_accession'].split(',')]) - study_info['ebi_study_accession'] = '%s (%s)' % ( - links, study_info['ebi_submission_status']) + if study_info["ebi_study_accession"]: + links = "".join( + [ + EBI_LINKIFIER.format(a) + for a in study_info["ebi_study_accession"].split(",") + ] + ) + study_info["ebi_study_accession"] = "%s (%s)" % ( + links, + study_info["ebi_submission_status"], + ) - self.render("public.html", study_info=study_info, - artifacts_info=get_artifacts_information( - artifact_ids, False)) + self.render( + "public.html", + study_info=study_info, + artifacts_info=get_artifacts_information(artifact_ids, False), + ) diff --git a/qiita_pet/handlers/qiita_redbiom.py b/qiita_pet/handlers/qiita_redbiom.py index 62157b89b..7ad7c759e 100644 --- a/qiita_pet/handlers/qiita_redbiom.py +++ b/qiita_pet/handlers/qiita_redbiom.py @@ -6,17 +6,18 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from requests import ConnectionError from collections import defaultdict -import redbiom.summarize -import redbiom.search + import redbiom._requests -import redbiom.util -import redbiom.fetch import redbiom.admin -from tornado.gen import coroutine, Task -from tornado.web import HTTPError +import redbiom.fetch +import redbiom.search +import redbiom.summarize +import redbiom.util +from requests import ConnectionError from requests.exceptions import HTTPError as rHTTPError +from tornado.gen import Task, coroutine +from tornado.web import HTTPError from qiita_core.util import execute_as_transaction from qiita_db.util import generate_study_list_without_artifacts @@ -31,31 +32,32 @@ def get(self, search): # we go to the main portal try: timestamps = redbiom.admin.get_timestamps() - except (rHTTPError): + except rHTTPError: timestamps = [] if timestamps: latest_release = timestamps[0] else: - latest_release = 'Not reported' - if self.request.uri != '/redbiom/': - self.redirect('/redbiom/') - self.render('redbiom.html', latest_release=latest_release) + latest_release = "Not reported" + if self.request.uri != "/redbiom/": + self.redirect("/redbiom/") + self.render("redbiom.html", latest_release=latest_release) def _redbiom_metadata_search(self, query, contexts): study_artifacts = defaultdict(lambda: defaultdict(list)) - message = '' + message = "" try: redbiom_samples = redbiom.search.metadata_full(query, False) except ValueError: message = ( 'Not a valid search: "%s", your query is too small ' - '(too few letters), try a longer query' % query) + "(too few letters), try a longer query" % query + ) except Exception: message = ( 'The query ("%s") did not work and may be malformed. Please ' - 'check the search help for more information on the queries.' - % query) + "check the search help for more information on the queries." % query + ) if not message and redbiom_samples: study_artifacts = defaultdict(lambda: defaultdict(list)) for ctx in contexts: @@ -64,27 +66,26 @@ def _redbiom_metadata_search(self, query, contexts): try: # if redbiom can't find a valid sample in the context it # will raise a ValueError: max() arg is an empty sequence - _, data = redbiom.fetch.data_from_samples( - ctx, redbiom_samples) + _, data = redbiom.fetch.data_from_samples(ctx, redbiom_samples) except ValueError: continue for idx in data.keys(): - sample_id, aid = idx.rsplit('.', 1) - sid = sample_id.split('.', 1)[0] + sample_id, aid = idx.rsplit(".", 1) + sid = sample_id.split(".", 1)[0] study_artifacts[sid][aid].append(sample_id) return message, study_artifacts def _redbiom_feature_search(self, query, contexts): study_artifacts = defaultdict(lambda: defaultdict(list)) - query = [f for f in query.split(' ')] + query = [f for f in query.split(" ")] for ctx in contexts: - for idx in redbiom.util.ids_from(query, False, 'feature', ctx): - aid, sample_id = idx.split('_', 1) - sid = sample_id.split('.', 1)[0] + for idx in redbiom.util.ids_from(query, False, "feature", ctx): + aid, sample_id = idx.split("_", 1) + sid = sample_id.split(".", 1)[0] study_artifacts[sid][aid].append(sample_id) - return '', study_artifacts + return "", study_artifacts def _redbiom_taxon_search(self, query, contexts): study_artifacts = defaultdict(lambda: defaultdict(list)) @@ -97,26 +98,28 @@ def _redbiom_taxon_search(self, query, contexts): # workers and raise this error quickly if len(features) > 600: raise HTTPError(504) - for idx in redbiom.util.ids_from(features, False, 'feature', ctx): - aid, sample_id = idx.split('_', 1) - sid = sample_id.split('.', 1)[0] + for idx in redbiom.util.ids_from(features, False, "feature", ctx): + aid, sample_id = idx.split("_", 1) + sid = sample_id.split(".", 1)[0] study_artifacts[sid][aid].append(sample_id) - return '', study_artifacts + return "", study_artifacts @execute_as_transaction def _redbiom_search(self, query, search_on, callback): - search_f = {'metadata': self._redbiom_metadata_search, - 'feature': self._redbiom_feature_search, - 'taxon': self._redbiom_taxon_search} + search_f = { + "metadata": self._redbiom_metadata_search, + "feature": self._redbiom_feature_search, + "taxon": self._redbiom_taxon_search, + } - message = '' + message = "" results = [] try: df = redbiom.summarize.contexts() except ConnectionError: - message = 'Redbiom is down - contact admin, thanks!' + message = "Redbiom is down - contact admin, thanks!" else: contexts = df.ContextName.values if search_on in search_f: @@ -124,26 +127,28 @@ def _redbiom_search(self, query, search_on, callback): if not message: studies = study_artifacts.keys() if studies: - results = generate_study_list_without_artifacts( - studies) + results = generate_study_list_without_artifacts(studies) # inserting the artifact_biom_ids to the results for i in range(len(results)): - results[i]['artifact_biom_ids'] = study_artifacts[ - str(results[i]['study_id'])] + results[i]["artifact_biom_ids"] = study_artifacts[ + str(results[i]["study_id"]) + ] else: message = "No samples were found! Try again ..." else: - message = ('Incorrect search by: you can use metadata, ' - 'features or taxon and you passed: %s' % search_on) + message = ( + "Incorrect search by: you can use metadata, " + "features or taxon and you passed: %s" % search_on + ) callback((results, message)) @coroutine @execute_as_transaction def post(self, search): - search = self.get_argument('search') - search_on = self.get_argument('search_on') + search = self.get_argument("search") + search_on = self.get_argument("search_on") data, msg = yield Task(self._redbiom_search, search, search_on) - self.write({'status': 'success', 'message': msg, 'data': data}) + self.write({"status": "success", "message": msg, "data": data}) diff --git a/qiita_pet/handlers/resources.py b/qiita_pet/handlers/resources.py index 0b5873997..dc5e687e8 100644 --- a/qiita_pet/handlers/resources.py +++ b/qiita_pet/handlers/resources.py @@ -1,34 +1,45 @@ -from tornado.gen import coroutine, Task -from tornado.web import authenticated, HTTPError -import json import ast -from .base_handlers import BaseHandler +import json + +from tornado.gen import Task, coroutine +from tornado.web import HTTPError, authenticated + from qiita_core.qiita_settings import r_client from qiita_core.util import execute_as_transaction -commands = 'resources:commands' +from .base_handlers import BaseHandler + +commands = "resources:commands" default_col_name = "samples * columns" class ResourcesHandler(BaseHandler): def check_admin(self): if self.current_user.level != "admin": - raise HTTPError(403, reason="%s does not have access to portal " - "editing!" % self.current_user.id) + raise HTTPError( + 403, + reason="%s does not have access to portal " + "editing!" % self.current_user.id, + ) @execute_as_transaction def _get_resources(self, cname, sname, version, col_name, callback): resources = {} vals = [ - ('img_mem', r_client.get), - ('img_time', r_client.get), - ('time', r_client.get), - ('title_mem', r_client.get), - ('title_time', r_client.get) + ("img_mem", r_client.get), + ("img_time", r_client.get), + ("time", r_client.get), + ("title_mem", r_client.get), + ("title_time", r_client.get), ] for k, f in vals: - redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % (cname, sname, - version, col_name, k) + redis_key = "resources$#%s$#%s$#%s$#%s:%s" % ( + cname, + sname, + version, + col_name, + k, + ) resources[k] = f(redis_key) callback(resources) @@ -44,21 +55,32 @@ def get(self): self.check_admin() commands = yield Task(self._get_commands) - commands_str = commands.decode('utf-8') + commands_str = commands.decode("utf-8") commands_dict = ast.literal_eval(commands_str) commands_json = json.dumps(commands_dict) - self.render('resources.html', - img_mem=None, img_time=None, - time=None, - mk=None, ma=None, mb=None, - mmodel=None, mreal=None, - mcalc=None, mfail=None, - tk=None, ta=None, tb=None, - tmodel=None, treal=None, - tcalc=None, tfail=None, - commands=commands_json, - initial_load=True) + self.render( + "resources.html", + img_mem=None, + img_time=None, + time=None, + mk=None, + ma=None, + mb=None, + mmodel=None, + mreal=None, + mcalc=None, + mfail=None, + tk=None, + ta=None, + tb=None, + tmodel=None, + treal=None, + tcalc=None, + tfail=None, + commands=commands_json, + initial_load=True, + ) @authenticated @coroutine @@ -66,43 +88,58 @@ def get(self): def post(self): try: data = json.loads(self.request.body) - software = data.get('software') - version = data.get('version') - command = data.get('command') + software = data.get("software") + version = data.get("version") + command = data.get("command") - resources = yield Task(self._get_resources, command, software, - version, default_col_name) + resources = yield Task( + self._get_resources, command, software, version, default_col_name + ) mcof, mmodel, mreal, mcalc, mfail = list( - map(lambda x: x.split(b": ")[1].strip().decode('utf-8'), - resources['title_mem'].split(b"\n"))) + map( + lambda x: x.split(b": ")[1].strip().decode("utf-8"), + resources["title_mem"].split(b"\n"), + ) + ) tcof, tmodel, treal, tcalc, tfail = list( - map(lambda x: x.split(b": ")[1].strip().decode('utf-8'), - resources['title_time'].split(b"\n"))) + map( + lambda x: x.split(b": ")[1].strip().decode("utf-8"), + resources["title_time"].split(b"\n"), + ) + ) mk, ma, mb = mcof.split("||") tk, ta, tb = tcof.split("||") response_data = { "status": "success", - "img_mem": resources[ - 'img_mem'].decode('utf-8') if isinstance( - resources['img_mem'], bytes) else resources['img_mem'], - "img_time": resources[ - 'img_time'].decode('utf-8') if isinstance( - resources['img_time'], bytes) else resources['img_time'], - "time": resources[ - 'time'].decode('utf-8') if isinstance( - resources['time'], bytes) else resources['time'], - "mk": mk, "ma": ma, "mb": mb, - "tk": tk, "ta": ta, "tb": tb, - "mmodel": mmodel, "mreal": mreal, - "mcalc": mcalc, "mfail": mfail, + "img_mem": resources["img_mem"].decode("utf-8") + if isinstance(resources["img_mem"], bytes) + else resources["img_mem"], + "img_time": resources["img_time"].decode("utf-8") + if isinstance(resources["img_time"], bytes) + else resources["img_time"], + "time": resources["time"].decode("utf-8") + if isinstance(resources["time"], bytes) + else resources["time"], + "mk": mk, + "ma": ma, + "mb": mb, + "tk": tk, + "ta": ta, + "tb": tb, + "mmodel": mmodel, + "mreal": mreal, + "mcalc": mcalc, + "mfail": mfail, "tcof": tcof, - "tmodel": tmodel, "treal": treal, - "tcalc": tcalc, "tfail": tfail, - "initial_load": False + "tmodel": tmodel, + "treal": treal, + "tcalc": tcalc, + "tfail": tfail, + "initial_load": False, } self.write(json.dumps(response_data) + "\n") @@ -111,21 +148,30 @@ def post(self): self.finish({"error": "Invalid JSON data"}) except Exception as e: import traceback + print(traceback.format_exc()) - if resources['title_mem'] is None: + if resources["title_mem"] is None: response_data = { "status": "no_data", "img_mem": None, "img_time": None, "time": None, - "mk": None, "ma": None, "mb": None, - "tk": None, "ta": None, "tb": None, - "mmodel": None, "mreal": None, - "mcalc": None, "mfail": None, + "mk": None, + "ma": None, + "mb": None, + "tk": None, + "ta": None, + "tb": None, + "mmodel": None, + "mreal": None, + "mcalc": None, + "mfail": None, "tcof": None, - "tmodel": None, "treal": None, - "tcalc": None, "tfail": None, - "initial_load": False + "tmodel": None, + "treal": None, + "tcalc": None, + "tfail": None, + "initial_load": False, } self.set_status(200) self.write(json.dumps(response_data) + "\n") diff --git a/qiita_pet/handlers/rest/__init__.py b/qiita_pet/handlers/rest/__init__.py index 913758457..0ca053ebc 100644 --- a/qiita_pet/handlers/rest/__init__.py +++ b/qiita_pet/handlers/rest/__init__.py @@ -6,37 +6,51 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from .study import StudyHandler, StudyCreatorHandler, StudyStatusHandler +from .study import StudyCreatorHandler, StudyHandler, StudyStatusHandler from .study_association import StudyAssociationHandler -from .study_samples import (StudySamplesHandler, StudySamplesInfoHandler, - StudySamplesCategoriesHandler, - StudySamplesDetailHandler, - StudySampleDetailHandler) from .study_person import StudyPersonHandler -from .study_preparation import (StudyPrepCreatorHandler, - StudyPrepArtifactCreatorHandler) - +from .study_preparation import StudyPrepArtifactCreatorHandler, StudyPrepCreatorHandler +from .study_samples import ( + StudySampleDetailHandler, + StudySamplesCategoriesHandler, + StudySamplesDetailHandler, + StudySamplesHandler, + StudySamplesInfoHandler, +) -__all__ = ['StudyHandler', 'StudySamplesHandler', 'StudySamplesInfoHandler', - 'StudySamplesCategoriesHandler', 'StudyPersonHandler', - 'StudyCreatorHandler', 'StudyPrepCreatorHandler', - 'StudyPrepArtifactCreatorHandler', 'StudyStatusHandler'] +__all__ = [ + "StudyHandler", + "StudySamplesHandler", + "StudySamplesInfoHandler", + "StudySamplesCategoriesHandler", + "StudyPersonHandler", + "StudyCreatorHandler", + "StudyPrepCreatorHandler", + "StudyPrepArtifactCreatorHandler", + "StudyStatusHandler", +] ENDPOINTS = ( (r"/api/v1/study$", StudyCreatorHandler), (r"/api/v1/study/([0-9]+)$", StudyHandler), (r"/api/v1/study/([0-9]+)/associations$", StudyAssociationHandler), - (r"/api/v1/study/([0-9]+)/samples/categories=([a-zA-Z\-0-9\.:,_]*)", - StudySamplesCategoriesHandler), + ( + r"/api/v1/study/([0-9]+)/samples/categories=([a-zA-Z\-0-9\.:,_]*)", + StudySamplesCategoriesHandler, + ), (r"/api/v1/study/([0-9]+)/samples", StudySamplesHandler), (r"/api/v1/study/([0-9]+)/samples/status", StudySamplesDetailHandler), - (r"/api/v1/study/([0-9]+)/sample/([a-zA-Z\-0-9\.]+)/status", - StudySampleDetailHandler), + ( + r"/api/v1/study/([0-9]+)/sample/([a-zA-Z\-0-9\.]+)/status", + StudySampleDetailHandler, + ), (r"/api/v1/study/([0-9]+)/samples/info", StudySamplesInfoHandler), (r"/api/v1/person(.*)", StudyPersonHandler), - (r"/api/v1/study/([0-9]+)/preparation/([0-9]+)/artifact", - StudyPrepArtifactCreatorHandler), + ( + r"/api/v1/study/([0-9]+)/preparation/([0-9]+)/artifact", + StudyPrepArtifactCreatorHandler, + ), (r"/api/v1/study/([0-9]+)/preparation(.*)", StudyPrepCreatorHandler), - (r"/api/v1/study/([0-9]+)/status$", StudyStatusHandler) + (r"/api/v1/study/([0-9]+)/status$", StudyStatusHandler), ) diff --git a/qiita_pet/handlers/rest/rest_handler.py b/qiita_pet/handlers/rest/rest_handler.py index 8aa281f41..f087d7a81 100644 --- a/qiita_pet/handlers/rest/rest_handler.py +++ b/qiita_pet/handlers/rest/rest_handler.py @@ -5,15 +5,15 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from qiita_db.study import Study from qiita_db.exceptions import QiitaDBUnknownIDError -from qiita_pet.handlers.util import to_int +from qiita_db.study import Study from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import to_int class RESTHandler(BaseHandler): def fail(self, msg, status, **kwargs): - out = {'message': msg} + out = {"message": msg} out.update(kwargs) self.write(out) @@ -26,6 +26,6 @@ def safe_get_study(self, study_id): try: s = Study(study_id) except QiitaDBUnknownIDError: - self.fail('Study not found', 404) + self.fail("Study not found", 404) finally: return s diff --git a/qiita_pet/handlers/rest/study.py b/qiita_pet/handlers/rest/study.py index 05db26b73..49e1f2b45 100644 --- a/qiita_pet/handlers/rest/study.py +++ b/qiita_pet/handlers/rest/study.py @@ -10,14 +10,14 @@ from tornado.escape import json_decode from qiita_db.handlers.oauth2 import authenticate_oauth -from qiita_db.study import StudyPerson, Study +from qiita_db.metadata_template.constants import SAMPLE_TEMPLATE_COLUMNS +from qiita_db.study import Study, StudyPerson from qiita_db.user import User + from .rest_handler import RESTHandler -from qiita_db.metadata_template.constants import SAMPLE_TEMPLATE_COLUMNS class StudyHandler(RESTHandler): - @authenticate_oauth def get(self, study_id): study = self.safe_get_study(study_id) @@ -25,92 +25,99 @@ def get(self, study_id): return info = study.info - pi = info['principal_investigator'] - lp = info['lab_person'] - self.write({'title': study.title, - 'contacts': {'principal_investigator': [ - pi.name, - pi.affiliation, - pi.email], - 'lab_person': [ - lp.name, - lp.affiliation, - lp.email]}, - 'study_abstract': info['study_abstract'], - 'study_description': info['study_description'], - 'study_alias': info['study_alias']}) + pi = info["principal_investigator"] + lp = info["lab_person"] + self.write( + { + "title": study.title, + "contacts": { + "principal_investigator": [pi.name, pi.affiliation, pi.email], + "lab_person": [lp.name, lp.affiliation, lp.email], + }, + "study_abstract": info["study_abstract"], + "study_description": info["study_description"], + "study_alias": info["study_alias"], + } + ) self.finish() class StudyCreatorHandler(RESTHandler): - @authenticate_oauth def post(self): try: payload = json_decode(self.request.body) except ValueError: - self.fail('Could not parse body', 400) + self.fail("Could not parse body", 400) return - required = {'title', 'study_abstract', 'study_description', - 'study_alias', 'owner', 'contacts'} + required = { + "title", + "study_abstract", + "study_description", + "study_alias", + "owner", + "contacts", + } if not required.issubset(payload): - self.fail('Not all required arguments provided', 400) + self.fail("Not all required arguments provided", 400) return - title = payload['title'] - study_abstract = payload['study_abstract'] - study_desc = payload['study_description'] - study_alias = payload['study_alias'] - notes = payload['notes'] + title = payload["title"] + study_abstract = payload["study_abstract"] + study_desc = payload["study_description"] + study_alias = payload["study_alias"] + notes = payload["notes"] - owner = payload['owner'] + owner = payload["owner"] if not User.exists(owner): - self.fail('Unknown user', 403) + self.fail("Unknown user", 403) return else: owner = User(owner) - contacts = payload['contacts'] + contacts = payload["contacts"] if Study.exists(title): - self.fail('Study title already exists', 409) + self.fail("Study title already exists", 409) return - pi_name = contacts['principal_investigator'][0] - pi_aff = contacts['principal_investigator'][1] + pi_name = contacts["principal_investigator"][0] + pi_aff = contacts["principal_investigator"][1] if not StudyPerson.exists(pi_name, pi_aff): - self.fail('Unknown principal investigator', 403) + self.fail("Unknown principal investigator", 403) return else: pi = StudyPerson.from_name_and_affiliation(pi_name, pi_aff) - lp_name = contacts['lab_person'][0] - lp_aff = contacts['lab_person'][1] + lp_name = contacts["lab_person"][0] + lp_aff = contacts["lab_person"][1] if not StudyPerson.exists(lp_name, lp_aff): - self.fail('Unknown lab person', 403) + self.fail("Unknown lab person", 403) return else: lp = StudyPerson.from_name_and_affiliation(lp_name, lp_aff) - info = {'lab_person_id': lp, - 'principal_investigator_id': pi, - 'study_abstract': study_abstract, - 'study_description': study_desc, - 'study_alias': study_alias, - 'notes': notes, - # TODO: we believe it is accurate that mixs is false and - # metadata completion is false as these cannot be known - # at study creation here no matter what. - # we do not know what should be done with the timeseries. - 'mixs_compliant': False, - 'metadata_complete': False, - 'timeseries_type_id': 1} + info = { + "lab_person_id": lp, + "principal_investigator_id": pi, + "study_abstract": study_abstract, + "study_description": study_desc, + "study_alias": study_alias, + "notes": notes, + # TODO: we believe it is accurate that mixs is false and + # metadata completion is false as these cannot be known + # at study creation here no matter what. + # we do not know what should be done with the timeseries. + "mixs_compliant": False, + "metadata_complete": False, + "timeseries_type_id": 1, + } study = Study.create(owner, title, info) self.set_status(201) - self.write({'id': study.id}) + self.write({"id": study.id}) self.finish() @@ -121,7 +128,7 @@ def get(self, study_id): if study is None: return - public = study.status == 'public' + public = study.status == "public" st = study.sample_template sample_information = st is not None if sample_information: @@ -144,12 +151,15 @@ def get(self, study_id): # to tease this out, but it replicates code present in # PrepTemplate.create, see: # https://github.com/biocore/qiita/issues/2096 - preparations.append({'id': pid, 'has_artifact': art}) - - self.write({'is_public': public, - 'has_sample_information': sample_information, - 'sample_information_has_warnings': - sample_information_warnings, - 'preparations': preparations}) + preparations.append({"id": pid, "has_artifact": art}) + + self.write( + { + "is_public": public, + "has_sample_information": sample_information, + "sample_information_has_warnings": sample_information_warnings, + "preparations": preparations, + } + ) self.set_status(200) self.finish() diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py index 855d33d42..3cd84e9fc 100644 --- a/qiita_pet/handlers/rest/study_association.py +++ b/qiita_pet/handlers/rest/study_association.py @@ -8,43 +8,43 @@ from qiita_db.handlers.oauth2 import authenticate_oauth -from .rest_handler import RESTHandler +from .rest_handler import RESTHandler # terms used more than once -_STUDY = 'study' -_PREP = 'prep' -_FILEPATH = 'filepath' -_STATUS = 'status' -_ARTIFACT = 'artifact' -_SAMPLE = 'sample' -_METADATA = 'metadata' -_TEMPLATE = 'template' -_ID = 'id' -_PROCESSING = 'processing' -_TYPE = 'type' +_STUDY = "study" +_PREP = "prep" +_FILEPATH = "filepath" +_STATUS = "status" +_ARTIFACT = "artifact" +_SAMPLE = "sample" +_METADATA = "metadata" +_TEMPLATE = "template" +_ID = "id" +_PROCESSING = "processing" +_TYPE = "type" # payload keys -STUDY_ID = f'{_STUDY}_{_ID}' -STUDY_SAMPLE_METADATA_FILEPATH = f'{_STUDY}_{_SAMPLE}_{_METADATA}_{_FILEPATH}' -PREP_TEMPLATES = f'{_PREP}_{_TEMPLATE}s' -PREP_ID = f'{_PREP}_{_ID}' -PREP_STATUS = f'{_PREP}_{_STATUS}' -PREP_SAMPLE_METADATA_FILEPATH = f'{_PREP}_{_SAMPLE}_{_METADATA}_{_FILEPATH}' -PREP_DATA_TYPE = f'{_PREP}_data_{_TYPE}' -PREP_HUMAN_FILTERING = f'{_PREP}_human_filtering' -PREP_ARTIFACTS = f'{_PREP}_{_ARTIFACT}s' -ARTIFACT_ID = f'{_ARTIFACT}_{_ID}' -ARTIFACT_STATUS = f'{_ARTIFACT}_{_STATUS}' -ARTIFACT_PARENT_IDS = f'{_ARTIFACT}_parent_{_ID}s' -ARTIFACT_BASAL_ID = f'{_ARTIFACT}_basal_{_ID}' -ARTIFACT_PROCESSING_ID = f'{_ARTIFACT}_{_PROCESSING}_{_ID}' -ARTIFACT_PROCESSING_NAME = f'{_ARTIFACT}_{_PROCESSING}_name' -ARTIFACT_PROCESSING_ARGUMENTS = f'{_ARTIFACT}_{_PROCESSING}_arguments' -ARTIFACT_FILEPATHS = f'{_ARTIFACT}_{_FILEPATH}s' -ARTIFACT_FILEPATH = f'{_ARTIFACT}_{_FILEPATH}' -ARTIFACT_FILEPATH_TYPE = f'{_ARTIFACT}_{_FILEPATH}_{_TYPE}' -ARTIFACT_FILEPATH_ID = f'{_ARTIFACT}_{_FILEPATH}_{_ID}' +STUDY_ID = f"{_STUDY}_{_ID}" +STUDY_SAMPLE_METADATA_FILEPATH = f"{_STUDY}_{_SAMPLE}_{_METADATA}_{_FILEPATH}" +PREP_TEMPLATES = f"{_PREP}_{_TEMPLATE}s" +PREP_ID = f"{_PREP}_{_ID}" +PREP_STATUS = f"{_PREP}_{_STATUS}" +PREP_SAMPLE_METADATA_FILEPATH = f"{_PREP}_{_SAMPLE}_{_METADATA}_{_FILEPATH}" +PREP_DATA_TYPE = f"{_PREP}_data_{_TYPE}" +PREP_HUMAN_FILTERING = f"{_PREP}_human_filtering" +PREP_ARTIFACTS = f"{_PREP}_{_ARTIFACT}s" +ARTIFACT_ID = f"{_ARTIFACT}_{_ID}" +ARTIFACT_STATUS = f"{_ARTIFACT}_{_STATUS}" +ARTIFACT_PARENT_IDS = f"{_ARTIFACT}_parent_{_ID}s" +ARTIFACT_BASAL_ID = f"{_ARTIFACT}_basal_{_ID}" +ARTIFACT_PROCESSING_ID = f"{_ARTIFACT}_{_PROCESSING}_{_ID}" +ARTIFACT_PROCESSING_NAME = f"{_ARTIFACT}_{_PROCESSING}_name" +ARTIFACT_PROCESSING_ARGUMENTS = f"{_ARTIFACT}_{_PROCESSING}_arguments" +ARTIFACT_FILEPATHS = f"{_ARTIFACT}_{_FILEPATH}s" +ARTIFACT_FILEPATH = f"{_ARTIFACT}_{_FILEPATH}" +ARTIFACT_FILEPATH_TYPE = f"{_ARTIFACT}_{_FILEPATH}_{_TYPE}" +ARTIFACT_FILEPATH_ID = f"{_ARTIFACT}_{_FILEPATH}_{_ID}" def _most_recent_template_path(template): @@ -93,7 +93,7 @@ def _set_prep_template(template_payload, prep_template): current_template[PREP_STATUS] = prep_template.status current_template[PREP_SAMPLE_METADATA_FILEPATH] = filepath current_template[PREP_DATA_TYPE] = prep_template.data_type() - current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template) # noqa + current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template) _set_artifacts(current_template, prep_template) @@ -105,7 +105,9 @@ def _get_artifacts(prep_template): if prep_template.artifact is None: return [] - pending_artifact_objects = [prep_template.artifact, ] + pending_artifact_objects = [ + prep_template.artifact, + ] all_artifact_objects = set(pending_artifact_objects[:]) while pending_artifact_objects: @@ -164,7 +166,7 @@ def _set_artifact_processing(artifact_payload, artifact): artifact_payload[ARTIFACT_PROCESSING_ID] = artifact_processing_id artifact_payload[ARTIFACT_PROCESSING_NAME] = artifact_processing_name - artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments # noqa + artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments def _set_artifact_filepaths(artifact_payload, artifact): @@ -172,9 +174,9 @@ def _set_artifact_filepaths(artifact_payload, artifact): artifact_filepaths = [] for filepath_data in artifact.filepaths: local_payload = {} - local_payload[ARTIFACT_FILEPATH] = filepath_data['fp'] - local_payload[ARTIFACT_FILEPATH_ID] = filepath_data['fp_id'] - local_payload[ARTIFACT_FILEPATH_TYPE] = filepath_data['fp_type'] + local_payload[ARTIFACT_FILEPATH] = filepath_data["fp"] + local_payload[ARTIFACT_FILEPATH_ID] = filepath_data["fp_id"] + local_payload[ARTIFACT_FILEPATH_TYPE] = filepath_data["fp_type"] artifact_filepaths.append(local_payload) # the test study includes an artifact which does not have filepaths diff --git a/qiita_pet/handlers/rest/study_person.py b/qiita_pet/handlers/rest/study_person.py index b8466f07d..1bc71178f 100644 --- a/qiita_pet/handlers/rest/study_person.py +++ b/qiita_pet/handlers/rest/study_person.py @@ -9,53 +9,62 @@ from tornado.escape import json_encode from tornado.web import MissingArgumentError +from qiita_db.exceptions import QiitaDBLookupError from qiita_db.handlers.oauth2 import authenticate_oauth from qiita_db.study import StudyPerson -from qiita_db.exceptions import QiitaDBLookupError + from .rest_handler import RESTHandler class StudyPersonHandler(RESTHandler): @authenticate_oauth def get(self, *args, **kwargs): - name = self.get_argument('name', None) - affiliation = self.get_argument('affiliation', None) + name = self.get_argument("name", None) + affiliation = self.get_argument("affiliation", None) if name is None and affiliation is None: # Retrieve the list of all the StudyPerson - sp = [{'name': p.name, 'affiliation': p.affiliation} - for p in StudyPerson.iter()] + sp = [ + {"name": p.name, "affiliation": p.affiliation} + for p in StudyPerson.iter() + ] self.write(json_encode(sp)) self.finish() elif name is not None and affiliation is not None: try: p = StudyPerson.from_name_and_affiliation(name, affiliation) except QiitaDBLookupError: - self.fail('Person not found', 404) + self.fail("Person not found", 404) return - self.write({'address': p.address, 'phone': p.phone, - 'email': p.email, 'id': p.id}) + self.write( + {"address": p.address, "phone": p.phone, "email": p.email, "id": p.id} + ) self.finish() else: - arg_name = 'name' if name is None else 'affiliation' + arg_name = "name" if name is None else "affiliation" raise MissingArgumentError(arg_name) @authenticate_oauth def post(self, *args, **kwargs): - name = self.get_argument('name') - affiliation = self.get_argument('affiliation') - email = self.get_argument('email') + name = self.get_argument("name") + affiliation = self.get_argument("affiliation") + email = self.get_argument("email") - phone = self.get_argument('phone', None) - address = self.get_argument('address', None) + phone = self.get_argument("phone", None) + address = self.get_argument("address", None) if StudyPerson.exists(name, affiliation): - self.fail('Person already exists', 409) + self.fail("Person already exists", 409) return - p = StudyPerson.create(name=name, affiliation=affiliation, email=email, - phone=phone, address=address) + p = StudyPerson.create( + name=name, + affiliation=affiliation, + email=email, + phone=phone, + address=address, + ) self.set_status(201) - self.write({'id': p.id}) + self.write({"id": p.id}) self.finish() diff --git a/qiita_pet/handlers/rest/study_preparation.py b/qiita_pet/handlers/rest/study_preparation.py index 0afc1702a..2494f4190 100644 --- a/qiita_pet/handlers/rest/study_preparation.py +++ b/qiita_pet/handlers/rest/study_preparation.py @@ -11,12 +11,13 @@ import pandas as pd from tornado.escape import json_decode -from qiita_db.util import get_mountpoint from qiita_db.artifact import Artifact -from qiita_pet.handlers.util import to_int from qiita_db.exceptions import QiitaDBUnknownIDError, QiitaError -from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.handlers.oauth2 import authenticate_oauth +from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.util import get_mountpoint +from qiita_pet.handlers.util import to_int + from .rest_handler import RESTHandler @@ -27,30 +28,27 @@ class StudyPrepCreatorHandler(RESTHandler): @authenticate_oauth def post(self, study_id, *args, **kwargs): - data_type = self.get_argument('data_type') - investigation_type = self.get_argument('investigation_type', None) + data_type = self.get_argument("data_type") + investigation_type = self.get_argument("investigation_type", None) study_id = self.safe_get_study(study_id) if study_id is None: return - data = pd.DataFrame.from_dict(json_decode(self.request.body), - orient='index') + data = pd.DataFrame.from_dict(json_decode(self.request.body), orient="index") try: - p = PrepTemplate.create(data, study_id, data_type, - investigation_type) + p = PrepTemplate.create(data, study_id, data_type, investigation_type) except QiitaError as e: self.fail(str(e), 406) return - self.write({'id': p.id}) + self.write({"id": p.id}) self.set_status(201) self.finish() class StudyPrepArtifactCreatorHandler(RESTHandler): - @authenticate_oauth def post(self, study_id, prep_id): study = self.safe_get_study(study_id) @@ -61,28 +59,32 @@ def post(self, study_id, prep_id): try: p = PrepTemplate(prep_id) except QiitaDBUnknownIDError: - self.fail('Preparation not found', 404) + self.fail("Preparation not found", 404) return if p.study_id != study.id: - self.fail('Preparation ID not associated with the study', 409) + self.fail("Preparation ID not associated with the study", 409) return artifact_deets = json_decode(self.request.body) - _, upload = get_mountpoint('uploads')[0] + _, upload = get_mountpoint("uploads")[0] base = os.path.join(upload, study_id) - filepaths = [(os.path.join(base, fp), fp_type) - for fp, fp_type in artifact_deets['filepaths']] + filepaths = [ + (os.path.join(base, fp), fp_type) + for fp, fp_type in artifact_deets["filepaths"] + ] try: - art = Artifact.create(filepaths, - artifact_deets['artifact_type'], - artifact_deets['artifact_name'], - p) + art = Artifact.create( + filepaths, + artifact_deets["artifact_type"], + artifact_deets["artifact_name"], + p, + ) except QiitaError as e: self.fail(str(e), 406) return - self.write({'id': art.id}) + self.write({"id": art.id}) self.set_status(201) self.finish() diff --git a/qiita_pet/handlers/rest/study_samples.py b/qiita_pet/handlers/rest/study_samples.py index 77f3c3667..700a8873b 100644 --- a/qiita_pet/handlers/rest/study_samples.py +++ b/qiita_pet/handlers/rest/study_samples.py @@ -5,26 +5,29 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from collections import defaultdict import io -from qiita_db.metadata_template.util import load_template_to_dataframe +from collections import defaultdict -from tornado.escape import json_encode, json_decode import pandas as pd +from tornado.escape import json_decode, json_encode from qiita_db.handlers.oauth2 import authenticate_oauth +from qiita_db.metadata_template.util import load_template_to_dataframe + from .rest_handler import RESTHandler def _sample_details(study, samples): def detail_maker(**kwargs): - base = {'sample_id': None, - 'sample_found': False, - 'ebi_sample_accession': None, - 'preparation_id': None, - 'ebi_experiment_accession': None, - 'preparation_visibility': None, - 'preparation_type': None} + base = { + "sample_id": None, + "sample_found": False, + "ebi_sample_accession": None, + "preparation_id": None, + "ebi_experiment_accession": None, + "preparation_visibility": None, + "preparation_type": None, + } assert set(kwargs).issubset(set(base)), "Unexpected key to set" @@ -54,8 +57,7 @@ def detail_maker(**kwargs): overlap_accessions = {i: accessions[i] for i in overlap} # store the detail we need - pt_light.append((pt.id, overlap_accessions, - pt.status, pt.data_type())) + pt_light.append((pt.id, overlap_accessions, pt.status, pt.data_type())) # only care about mapping the incoming samples for ptsample in overlap: @@ -75,23 +77,28 @@ def detail_maker(**kwargs): for pt_idx in sample_prep_mapping[sample]: ptid, ptacc, ptstatus, ptdtype = pt_light[pt_idx] - details.append(detail_maker( + details.append( + detail_maker( + sample_id=sample, + sample_found=True, + ebi_sample_accession=sample_acc, + preparation_id=ptid, + ebi_experiment_accession=ptacc.get(sample), + preparation_visibility=ptstatus, + preparation_type=ptdtype, + ) + ) + else: + # the sample is not present on any preparations + details.append( + detail_maker( sample_id=sample, sample_found=True, + # it would be weird to have an EBI sample accession + # but not be present on a preparation...? ebi_sample_accession=sample_acc, - preparation_id=ptid, - ebi_experiment_accession=ptacc.get(sample), - preparation_visibility=ptstatus, - preparation_type=ptdtype)) - else: - # the sample is not present on any preparations - details.append(detail_maker( - sample_id=sample, - sample_found=True, - - # it would be weird to have an EBI sample accession - # but not be present on a preparation...? - ebi_sample_accession=sample_acc)) + ) + ) else: # the is not present, let's note and move ona details.append(detail_maker(sample_id=sample)) @@ -103,7 +110,12 @@ class StudySampleDetailHandler(RESTHandler): @authenticate_oauth def get(self, study_id, sample_id): study = self.safe_get_study(study_id) - sample_detail = _sample_details(study, [sample_id, ]) + sample_detail = _sample_details( + study, + [ + sample_id, + ], + ) self.write(json_encode(sample_detail)) self.finish() @@ -113,19 +125,18 @@ class StudySamplesDetailHandler(RESTHandler): def post(self, study_id): samples = json_decode(self.request.body) - if 'sample_ids' not in samples: - self.fail('Missing sample_id key', 400) + if "sample_ids" not in samples: + self.fail("Missing sample_id key", 400) return study = self.safe_get_study(study_id) - samples_detail = _sample_details(study, samples['sample_ids']) + samples_detail = _sample_details(study, samples["sample_ids"]) self.write(json_encode(samples_detail)) self.finish() class StudySamplesHandler(RESTHandler): - @authenticate_oauth def get(self, study_id): study = self.safe_get_study(study_id) @@ -147,21 +158,20 @@ def patch(self, study_id): return if study.sample_template is None: - self.fail('No sample information found', 404) + self.fail("No sample information found", 404) return else: sample_info = study.sample_template.to_dataframe() # convert from json into a format that qiita can validate - rawdata = pd.DataFrame.from_dict(json_decode(self.request.body), - orient='index') - rawdata.index.name = 'sample_name' + rawdata = pd.DataFrame.from_dict(json_decode(self.request.body), orient="index") + rawdata.index.name = "sample_name" if len(rawdata.index) == 0: - self.fail('No samples provided', 400) + self.fail("No samples provided", 400) return buffer = io.StringIO() - rawdata.to_csv(buffer, sep='\t', index=True, header=True) + rawdata.to_csv(buffer, sep="\t", index=True, header=True) buffer.seek(0) # validate on load data = load_template_to_dataframe(buffer) @@ -172,8 +182,7 @@ def patch(self, study_id): # In either case, keep processing. Subsets of categories remain # invalid, however. if not set(data.columns).issuperset(categories): - self.fail('Not all sample information categories provided', - 400) + self.fail("Not all sample information categories provided", 400) return existing_samples = set(sample_info.index) @@ -204,42 +213,38 @@ def patch(self, study_id): class StudySamplesCategoriesHandler(RESTHandler): - @authenticate_oauth def get(self, study_id, categories): if not categories: - self.fail('No categories specified', 405) + self.fail("No categories specified", 405) return study = self.safe_get_study(study_id) if study is None: return - categories = categories.split(',') + categories = categories.split(",") if study.sample_template is None: - self.fail('Study does not have sample information', 404) + self.fail("Study does not have sample information", 404) return available_categories = set(study.sample_template.categories) not_found = set(categories) - available_categories if not_found: - self.fail('Category not found', 404, - categories_not_found=sorted(not_found)) + self.fail("Category not found", 404, categories_not_found=sorted(not_found)) return - blob = {'header': categories, - 'samples': {}} + blob = {"header": categories, "samples": {}} df = study.sample_template.to_dataframe() for idx, row in df[categories].iterrows(): - blob['samples'][idx] = list(row) + blob["samples"][idx] = list(row) self.write(json_encode(blob)) self.finish() class StudySamplesInfoHandler(RESTHandler): - @authenticate_oauth def get(self, study_id): study = self.safe_get_study(study_id) @@ -248,11 +253,9 @@ def get(self, study_id): st = study.sample_template if st is None: - info = {'number-of-samples': 0, - 'categories': []} + info = {"number-of-samples": 0, "categories": []} else: - info = {'number-of-samples': len(st), - 'categories': st.categories} + info = {"number-of-samples": len(st), "categories": st.categories} self.write(json_encode(info)) self.finish() diff --git a/qiita_pet/handlers/software.py b/qiita_pet/handlers/software.py index 67030c9e0..152398177 100644 --- a/qiita_pet/handlers/software.py +++ b/qiita_pet/handlers/software.py @@ -5,12 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from copy import deepcopy + from tornado.gen import coroutine from qiita_core.util import execute_as_transaction -from qiita_db.software import Software, DefaultWorkflow +from qiita_db.software import DefaultWorkflow, Software + from .base_handlers import BaseHandler -from copy import deepcopy class SoftwareHandler(BaseHandler): @@ -20,7 +22,7 @@ def get(self): # active True will only show active software active = True user = self.current_user - if user is not None and user.level in {'admin', 'dev'}: + if user is not None and user.level in {"admin", "dev"}: active = False software = Software.iter(active=active) @@ -32,7 +34,7 @@ def _retrive_workflows(active): def _default_parameters_parsing(node): dp = node.default_parameter cmd = dp.command - cmd_name = 'params_%d' % node.id + cmd_name = "params_%d" % node.id rp = deepcopy(cmd.required_parameters) op = deepcopy(cmd.optional_parameters) params = dict() @@ -46,15 +48,13 @@ def _default_parameters_parsing(node): inputs = [] outputs = [] for input in rp.values(): - accepted_values = ' | '.join(input[1]) + accepted_values = " | ".join(input[1]) inputs.append([cmd.id, accepted_values]) for output in cmd.outputs: - outputs.append([cmd.id, ' | '.join(output)]) - fcmd_name = cmd.name if not cmd.naming_order else \ - f'{cmd.name} | {dp.name}' + outputs.append([cmd.id, " | ".join(output)]) + fcmd_name = cmd.name if not cmd.naming_order else f"{cmd.name} | {dp.name}" - return ([cmd_name, cmd.id, fcmd_name, dp.name, params], - inputs, outputs) + return ([cmd_name, cmd.id, fcmd_name, dp.name, params], inputs, outputs) workflows = [] for w in DefaultWorkflow.iter(active=active): @@ -84,7 +84,7 @@ def _default_parameters_parsing(node): vals_y, input_y, output_y = _default_parameters_parsing(y) connections = [] - for a, _, c in graph[x][y]['connections'].connections: + for a, _, c in graph[x][y]["connections"].connections: connections.append("%s | %s" % (a, c)) if i == 0: @@ -93,13 +93,13 @@ def _default_parameters_parsing(node): if input_x and at in input_x[0][1]: input_x[0][1] = at elif input_x: - input_x[0][1] = '** WARNING, NOT DEFINED **' + input_x[0][1] = "** WARNING, NOT DEFINED **" else: # if we get to this point it means that the workflow has a # multiple commands starting from the main single input, # thus is fine to link them to the same raw data standalone_input = vals_x[0] - input_x = [['', at]] + input_x = [["", at]] name_x = vals_x[0] name_y = vals_y[0] @@ -108,11 +108,11 @@ def _default_parameters_parsing(node): if name_x not in main_nodes: main_nodes[name_x] = dict() for a, b in input_x: - name = 'input_%s_%s' % (name_x, b) + name = "input_%s_%s" % (name_x, b) if b in inputs: name = inputs[b] else: - name = 'input_%s_%s' % (name_x, b) + name = "input_%s_%s" % (name_x, b) if standalone_input is not None: standalone_input = name vals = [name, a, b] @@ -121,7 +121,7 @@ def _default_parameters_parsing(node): nodes.append(vals) edges.append([name, vals_x[0]]) for a, b in output_x: - name = 'output_%s_%s' % (name_x, b) + name = "output_%s_%s" % (name_x, b) vals = [name, a, b] if vals not in nodes: nodes.append(vals) @@ -143,13 +143,13 @@ def _default_parameters_parsing(node): if b in main_nodes[name_x]: name = main_nodes[name_x][b] else: - name = 'input_%s_%s' % (name_y, b) + name = "input_%s_%s" % (name_y, b) vals = [name, a, b] if vals not in nodes: nodes.append(vals) edges.append([name, name_y]) for a, b in output_y: - name = 'output_%s_%s' % (name_y, b) + name = "output_%s_%s" % (name_y, b) vals = [name, a, b] if vals not in nodes: nodes.append(vals) @@ -170,13 +170,13 @@ def _default_parameters_parsing(node): if input_x and at in input_x[0][1]: input_x[0][1] = at elif input_x: - input_x[0][1] = '** WARNING, NOT DEFINED **' + input_x[0][1] = "** WARNING, NOT DEFINED **" else: # if we get to this point it means that these are "standalone" # commands, thus is fine to link them to the same raw data if standalone_input is None: standalone_input = vals_x[0] - input_x = [['', at]] + input_x = [["", at]] name_x = vals_x[0] if vals_x not in (nodes): @@ -185,7 +185,7 @@ def _default_parameters_parsing(node): if b in inputs: name = inputs[b] else: - name = 'input_%s_%s' % (name_x, b) + name = "input_%s_%s" % (name_x, b) # if standalone_input == name_x then this is the first time # we are processing a standalone command so we need to add # the node and store the name of the node for future usage @@ -198,16 +198,23 @@ def _default_parameters_parsing(node): name = standalone_input edges.append([name, vals_x[0]]) for a, b in output_x: - name = 'output_%s_%s' % (name_x, b) + name = "output_%s_%s" % (name_x, b) nodes.append([name, a, b]) edges.append([name_x, name]) workflows.append( - {'name': w.name, 'id': w.id, 'data_types': w.data_type, - 'description': w.description, 'active': w.active, - 'parameters_sample': wparams['sample'], - 'parameters_prep': wparams['prep'], - 'nodes': nodes, 'edges': edges}) + { + "name": w.name, + "id": w.id, + "data_types": w.data_type, + "description": w.description, + "active": w.active, + "parameters_sample": wparams["sample"], + "parameters_prep": wparams["prep"], + "nodes": nodes, + "edges": edges, + } + ) return workflows @@ -219,7 +226,7 @@ def get(self): # active True will only show active workflows active = True user = self.current_user - if user is not None and user.level in {'admin', 'dev'}: + if user is not None and user.level in {"admin", "dev"}: active = False workflows = _retrive_workflows(active) diff --git a/qiita_pet/handlers/stats.py b/qiita_pet/handlers/stats.py index c984f8a3f..e09d39d6e 100644 --- a/qiita_pet/handlers/stats.py +++ b/qiita_pet/handlers/stats.py @@ -7,11 +7,12 @@ # ----------------------------------------------------------------------------- from random import choice -from tornado.gen import coroutine, Task +from tornado.gen import Task, coroutine -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import qiita_config, r_client +from qiita_core.util import execute_as_transaction from qiita_db.study import Study + from .base_handlers import BaseHandler @@ -22,19 +23,20 @@ def _get_stats(self, callback): # checking values from redis portal = qiita_config.portal vals = [ - ('number_studies', r_client.hgetall), - ('number_of_samples', r_client.hgetall), - ('num_users', r_client.get), - ('per_data_type_stats', r_client.hgetall), - ('lat_longs', r_client.get), - ('num_studies_ebi', r_client.get), - ('num_samples_ebi', r_client.get), - ('number_samples_ebi_prep', r_client.get), - ('img', r_client.get), - ('time', r_client.get), - ('num_processing_jobs', r_client.get)] + ("number_studies", r_client.hgetall), + ("number_of_samples", r_client.hgetall), + ("num_users", r_client.get), + ("per_data_type_stats", r_client.hgetall), + ("lat_longs", r_client.get), + ("num_studies_ebi", r_client.get), + ("num_samples_ebi", r_client.get), + ("number_samples_ebi_prep", r_client.get), + ("img", r_client.get), + ("time", r_client.get), + ("num_processing_jobs", r_client.get), + ] for k, f in vals: - redis_key = '%s:stats:%s' % (portal, k) + redis_key = "%s:stats:%s" % (portal, k) stats[k] = f(redis_key) callback(stats) @@ -45,7 +47,7 @@ def get(self): stats = yield Task(self._get_stats) # Pull a random public study from the database - public_studies = Study.get_by_status('public') + public_studies = Study.get_by_status("public") study = choice(list(public_studies)) if public_studies else None if study is None: @@ -57,18 +59,20 @@ def get(self): random_study_title = study.title random_study_id = study.id - self.render('stats.html', - number_studies=stats['number_studies'], - number_of_samples=stats['number_of_samples'], - num_users=stats['num_users'], - per_data_type_stats=stats['per_data_type_stats'], - lat_longs=eval( - stats['lat_longs']) if stats['lat_longs'] else [], - num_studies_ebi=stats['num_studies_ebi'], - num_samples_ebi=stats['num_samples_ebi'], - number_samples_ebi_prep=stats['number_samples_ebi_prep'], - img=stats['img'], time=stats['time'], - num_processing_jobs=stats['num_processing_jobs'], - random_study_info=random_study_info, - random_study_title=random_study_title, - random_study_id=random_study_id) + self.render( + "stats.html", + number_studies=stats["number_studies"], + number_of_samples=stats["number_of_samples"], + num_users=stats["num_users"], + per_data_type_stats=stats["per_data_type_stats"], + lat_longs=eval(stats["lat_longs"]) if stats["lat_longs"] else [], + num_studies_ebi=stats["num_studies_ebi"], + num_samples_ebi=stats["num_samples_ebi"], + number_samples_ebi_prep=stats["number_samples_ebi_prep"], + img=stats["img"], + time=stats["time"], + num_processing_jobs=stats["num_processing_jobs"], + random_study_info=random_study_info, + random_study_title=random_study_title, + random_study_id=random_study_id, + ) diff --git a/qiita_pet/handlers/study_handlers/__init__.py b/qiita_pet/handlers/study_handlers/__init__.py index 25a58aba1..d66c11c34 100644 --- a/qiita_pet/handlers/study_handlers/__init__.py +++ b/qiita_pet/handlers/study_handlers/__init__.py @@ -6,36 +6,91 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from .listing_handlers import (ListStudiesHandler, StudyApprovalList, - ShareStudyAJAX, ListStudiesAJAX, - AutocompleteHandler) -from .edit_handlers import StudyEditHandler, CreateStudyAJAX +from .artifact import ( + ArtifactAdminAJAX, + ArtifactGetInfo, + ArtifactGetSamples, + ArtifactGraphAJAX, + NewArtifactHandler, +) +from .base import ( + DataTypesMenuAJAX, + Study, + StudyBaseInfoAJAX, + StudyDeleteAjax, + StudyFilesAJAX, + StudyGetTags, + StudyIndexHandler, + StudyTags, +) from .ebi_handlers import EBISubmitHandler -from .vamps_handlers import VAMPSHandler -from .base import (StudyIndexHandler, StudyBaseInfoAJAX, StudyDeleteAjax, - DataTypesMenuAJAX, StudyFilesAJAX, StudyGetTags, StudyTags, - Study) +from .edit_handlers import CreateStudyAJAX, StudyEditHandler +from .listing_handlers import ( + AutocompleteHandler, + ListStudiesAJAX, + ListStudiesHandler, + ShareStudyAJAX, + StudyApprovalList, +) from .prep_template import ( - PrepTemplateAJAX, PrepFilesHandler, AddDefaultWorkflowHandler, - NewPrepTemplateAjax, PrepTemplateSummaryAJAX) -from .processing import (ListCommandsHandler, ListOptionsHandler, - WorkflowHandler, WorkflowRunHandler, JobAJAX) -from .artifact import (ArtifactGraphAJAX, NewArtifactHandler, - ArtifactAdminAJAX, ArtifactGetSamples, ArtifactGetInfo) + AddDefaultWorkflowHandler, + NewPrepTemplateAjax, + PrepFilesHandler, + PrepTemplateAJAX, + PrepTemplateSummaryAJAX, +) +from .processing import ( + JobAJAX, + ListCommandsHandler, + ListOptionsHandler, + WorkflowHandler, + WorkflowRunHandler, +) from .sample_template import ( - SampleTemplateHandler, SampleTemplateOverviewHandler, AnalysesAjax, - SampleTemplateColumnsHandler, SampleAJAX) + AnalysesAjax, + SampleAJAX, + SampleTemplateColumnsHandler, + SampleTemplateHandler, + SampleTemplateOverviewHandler, +) +from .vamps_handlers import VAMPSHandler -__all__ = ['ListStudiesHandler', 'StudyApprovalList', 'ShareStudyAJAX', - 'StudyEditHandler', 'CreateStudyAJAX', 'EBISubmitHandler', - 'VAMPSHandler', 'ListStudiesAJAX', 'ArtifactGraphAJAX', - 'ArtifactAdminAJAX', 'StudyIndexHandler', 'StudyBaseInfoAJAX', - 'SampleTemplateHandler', 'SampleTemplateOverviewHandler', - 'SampleTemplateColumnsHandler', 'AddDefaultWorkflowHandler', - 'PrepTemplateAJAX', 'NewArtifactHandler', 'PrepFilesHandler', - 'ListCommandsHandler', 'ListOptionsHandler', 'SampleAJAX', - 'StudyDeleteAjax', 'NewPrepTemplateAjax', - 'DataTypesMenuAJAX', 'StudyFilesAJAX', 'PrepTemplateSummaryAJAX', - 'WorkflowHandler', 'WorkflowRunHandler', 'AnalysesAjax', - 'JobAJAX', 'AutocompleteHandler', 'StudyGetTags', 'StudyTags', - 'Study', 'ArtifactGetSamples', 'ArtifactGetInfo'] +__all__ = [ + "ListStudiesHandler", + "StudyApprovalList", + "ShareStudyAJAX", + "StudyEditHandler", + "CreateStudyAJAX", + "EBISubmitHandler", + "VAMPSHandler", + "ListStudiesAJAX", + "ArtifactGraphAJAX", + "ArtifactAdminAJAX", + "StudyIndexHandler", + "StudyBaseInfoAJAX", + "SampleTemplateHandler", + "SampleTemplateOverviewHandler", + "SampleTemplateColumnsHandler", + "AddDefaultWorkflowHandler", + "PrepTemplateAJAX", + "NewArtifactHandler", + "PrepFilesHandler", + "ListCommandsHandler", + "ListOptionsHandler", + "SampleAJAX", + "StudyDeleteAjax", + "NewPrepTemplateAjax", + "DataTypesMenuAJAX", + "StudyFilesAJAX", + "PrepTemplateSummaryAJAX", + "WorkflowHandler", + "WorkflowRunHandler", + "AnalysesAjax", + "JobAJAX", + "AutocompleteHandler", + "StudyGetTags", + "StudyTags", + "Study", + "ArtifactGetSamples", + "ArtifactGetInfo", +] diff --git a/qiita_pet/handlers/study_handlers/artifact.py b/qiita_pet/handlers/study_handlers/artifact.py index 42734743d..9509e3945 100644 --- a/qiita_pet/handlers/study_handlers/artifact.py +++ b/qiita_pet/handlers/study_handlers/artifact.py @@ -7,24 +7,27 @@ # ----------------------------------------------------------------------------- from tornado.web import authenticated - -from qiita_pet.handlers.util import to_int -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_pet.handlers.api_proxy import ( - artifact_graph_get_req, artifact_types_get_req, artifact_post_req, - artifact_status_put_req, artifact_get_req, artifact_get_prep_req, - artifact_get_info) -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import qiita_config +from qiita_core.util import execute_as_transaction +from qiita_pet.handlers.api_proxy import ( + artifact_get_info, + artifact_get_prep_req, + artifact_get_req, + artifact_graph_get_req, + artifact_post_req, + artifact_status_put_req, + artifact_types_get_req, +) +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import to_int class ArtifactGraphAJAX(BaseHandler): @authenticated def get(self): - direction = self.get_argument('direction') - artifact = to_int(self.get_argument('artifact_id')) - self.write(artifact_graph_get_req(artifact, direction, - self.current_user.id)) + direction = self.get_argument("direction") + artifact = to_int(self.get_argument("artifact_id")) + self.write(artifact_graph_get_req(artifact, direction, self.current_user.id)) class NewArtifactHandler(BaseHandler): @@ -32,44 +35,53 @@ class NewArtifactHandler(BaseHandler): def get(self): study_id = self.get_argument("study_id") prep_id = self.get_argument("prep_template_id") - artifact_types = [(at, desc) for at, desc, _, _, is_user_uploadable in - artifact_types_get_req()['types'] - if is_user_uploadable] - - self.render("study_ajax/add_artifact.html", - study_id=study_id, prep_id=prep_id, - artifact_types=artifact_types) + artifact_types = [ + (at, desc) + for at, desc, _, _, is_user_uploadable in artifact_types_get_req()["types"] + if is_user_uploadable + ] + + self.render( + "study_ajax/add_artifact.html", + study_id=study_id, + prep_id=prep_id, + artifact_types=artifact_types, + ) @authenticated @execute_as_transaction def post(self): - artifact_type = self.get_argument('artifact-type') - name = self.get_argument('name') - prep_id = self.get_argument('prep-template-id') - artifact_id = self.get_argument('import-artifact') + artifact_type = self.get_argument("artifact-type") + name = self.get_argument("name") + prep_id = self.get_argument("prep-template-id") + artifact_id = self.get_argument("import-artifact") # Request the rest of the arguments, which will be the files files = dict() for arg in self.request.arguments: - if arg not in ['name', 'prep-template-id', 'artifact-type', - 'import-artifact']: + if arg not in [ + "name", + "prep-template-id", + "artifact-type", + "import-artifact", + ]: arg_name = arg # removing ending [], in case they exist, necessary for JS # array transformation - if arg_name.endswith('[]'): + if arg_name.endswith("[]"): arg_name = arg_name[:-2] files[arg_name] = self.get_argument(arg) artifact = artifact_post_req( - self.current_user.id, files, artifact_type, name, prep_id, - artifact_id) + self.current_user.id, files, artifact_type, name, prep_id, artifact_id + ) self.write(artifact) class ArtifactGetSamples(BaseHandler): @authenticated def get(self): - aids = map(int, self.request.arguments.get('ids[]', [])) + aids = map(int, self.request.arguments.get("ids[]", [])) response = artifact_get_prep_req(self.current_user.id, aids) @@ -79,8 +91,8 @@ def get(self): class ArtifactGetInfo(BaseHandler): @authenticated def post(self): - aids = map(int, self.request.arguments.get('ids[]', [])) - only_biom = self.get_argument('only_biom', 'True') == 'True' + aids = map(int, self.request.arguments.get("ids[]", [])) + only_biom = self.get_argument("only_biom", "True") == "True" response = artifact_get_info(self.current_user.id, aids, only_biom) @@ -90,58 +102,63 @@ def post(self): class ArtifactAdminAJAX(BaseHandler): @authenticated def get(self): - artifact_id = to_int(self.get_argument('artifact_id')) + artifact_id = to_int(self.get_argument("artifact_id")) info = artifact_get_req(self.current_user.id, artifact_id) - status = info['visibility'] + status = info["visibility"] buttons = [] - btn_base = ('').format(artifact_id) + btn_base = ( + "' + ).format(artifact_id) if qiita_config.require_approval: - if status == 'sandbox': + if status == "sandbox": # The request approval button only appears if the processed # data issandboxed and the qiita_config specifies that the # approval should be requested - buttons.append( - btn_base % ('awaiting_approval', 'Request approval')) - elif self.current_user.level == 'admin' and \ - status == 'awaiting_approval': + buttons.append(btn_base % ("awaiting_approval", "Request approval")) + elif self.current_user.level == "admin" and status == "awaiting_approval": # The approve processed data button only appears if the user is # an admin, the processed data is waiting to be approved and # the qiita config requires processed data approval - buttons.append(btn_base % ('private', 'Approve artifact')) - if status == 'private': + buttons.append(btn_base % ("private", "Approve artifact")) + if status == "private": # The make public button only appears if the status is private - buttons.append(btn_base % ('public', 'Make public')) + buttons.append(btn_base % ("public", "Make public")) # The revert to sandbox button only appears if the processed data is # not sandboxed or public - if status not in {'sandbox', 'public'}: - buttons.append(btn_base % ('sandbox', 'Revert to sandbox')) + if status not in {"sandbox", "public"}: + buttons.append(btn_base % ("sandbox", "Revert to sandbox")) # Add EBI and VAMPS submission buttons if allowed - if not info['ebi_run_accessions'] and info['can_submit_ebi']: - buttons.append(' Submit to EBI') - if not info['is_submitted_vamps'] and \ - info['can_submit_vamps']: - buttons.append(' Submit to VAMPS') + if not info["ebi_run_accessions"] and info["can_submit_ebi"]: + buttons.append( + ' Submit to EBI' + ) + if not info["is_submitted_vamps"] and info["can_submit_vamps"]: + buttons.append( + ' Submit to VAMPS' + ) # Add delete button if in sandbox status - if status == 'sandbox': - buttons = ['' - % (artifact_id)] + if status == "sandbox": + buttons = [ + '' % (artifact_id) + ] - self.write(' '.join(buttons)) + self.write(" ".join(buttons)) @authenticated def post(self): - visibility = self.get_argument('visibility') - artifact_id = int(self.get_argument('artifact_id')) - response = artifact_status_put_req(artifact_id, self.current_user.id, - visibility) + visibility = self.get_argument("visibility") + artifact_id = int(self.get_argument("artifact_id")) + response = artifact_status_put_req( + artifact_id, self.current_user.id, visibility + ) self.write(response) diff --git a/qiita_pet/handlers/study_handlers/base.py b/qiita_pet/handlers/study_handlers/base.py index 6a8d65241..1d253f22b 100644 --- a/qiita_pet/handlers/study_handlers/base.py +++ b/qiita_pet/handlers/study_handlers/base.py @@ -5,52 +5,60 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated, HTTPError -from tornado.escape import json_decode from markdown2 import Markdown +from tornado.escape import json_decode +from tornado.web import HTTPError, authenticated -from qiita_pet.util import EBI_LINKIFIER -from qiita_pet.handlers.util import to_int, doi_linkifier, pubmed_linkifier -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.api_proxy import ( - study_prep_get_req, study_get_req, study_delete_req, study_tags_request, - study_patch_request, study_get_tags_request, study_files_get_req) + study_delete_req, + study_files_get_req, + study_get_req, + study_get_tags_request, + study_patch_request, + study_prep_get_req, + study_tags_request, +) +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import doi_linkifier, pubmed_linkifier, to_int +from qiita_pet.util import EBI_LINKIFIER class StudyIndexHandler(BaseHandler): @authenticated def get(self, study_id): study = to_int(study_id) - level = self.get_argument('level', '') - message = self.get_argument('message', '') - prep_id = self.get_argument('prep_id', default=None) + level = self.get_argument("level", "") + message = self.get_argument("message", "") + prep_id = self.get_argument("prep_id", default=None) study_info = study_get_req(study, self.current_user.id) - if study_info['status'] != 'success': - raise HTTPError(404, reason=study_info['message']) + if study_info["status"] != "success": + raise HTTPError(404, reason=study_info["message"]) - if message != '' and level != '': - study_info['level'] = level - study_info['message'] = message + if message != "" and level != "": + study_info["level"] = level + study_info["message"] = message if prep_id: msg = f"'{prep_id}' is not a valid preparation for this study" - study_info['study_info']['prep_id'] = to_int(prep_id, msg) + study_info["study_info"]["prep_id"] = to_int(prep_id, msg) # prep_id is an integer - confirm that it's a valid prep_id. prep_info = study_prep_get_req(study, self.current_user.id) - if prep_info['status'] != 'success': - raise HTTPError(404, reason=prep_info['message']) + if prep_info["status"] != "success": + raise HTTPError(404, reason=prep_info["message"]) prep_ids = [] - for prep_type in prep_info['info']: + for prep_type in prep_info["info"]: # prep_type will be either '18S', '16S', or similarly named. # generate a list of prep-ids from the preps in each list. - prep_ids += [x['id'] for x in prep_info['info'][prep_type]] + prep_ids += [x["id"] for x in prep_info["info"][prep_type]] - if study_info['study_info']['prep_id'] not in prep_ids: - raise HTTPError(400, reason=(f"'{prep_id}' is not a valid " - "preparation for this study")) + if study_info["study_info"]["prep_id"] not in prep_ids: + raise HTTPError( + 400, + reason=(f"'{prep_id}' is not a valid preparation for this study"), + ) self.render("study_base.html", **study_info) @@ -58,71 +66,81 @@ def get(self, study_id): class StudyBaseInfoAJAX(BaseHandler): @authenticated def get(self): - study_id = self.get_argument('study_id') + study_id = self.get_argument("study_id") study = to_int(study_id) res = study_get_req(study, self.current_user.id) - study_info = res['study_info'] - pdoi = [doi_linkifier([p]) for p in study_info['publication_doi']] - ppid = [pubmed_linkifier([p]) for p in study_info['publication_pid']] + study_info = res["study_info"] + pdoi = [doi_linkifier([p]) for p in study_info["publication_doi"]] + ppid = [pubmed_linkifier([p]) for p in study_info["publication_pid"]] email = '{name} ({affiliation})' - pi = email.format(**study_info['principal_investigator']) - if study_info['lab_person']: - contact = email.format(**study_info['lab_person']) + pi = email.format(**study_info["principal_investigator"]) + if study_info["lab_person"]: + contact = email.format(**study_info["lab_person"]) else: contact = None - share_access = (self.current_user.id in study_info['shared_with'] or - self.current_user.id == study_info['owner']) + share_access = ( + self.current_user.id in study_info["shared_with"] + or self.current_user.id == study_info["owner"] + ) - ebi_info = study_info['ebi_submission_status'] - ebi_study_accession = study_info['ebi_study_accession'] + ebi_info = study_info["ebi_submission_status"] + ebi_study_accession = study_info["ebi_study_accession"] if ebi_study_accession: - links = ''.join([EBI_LINKIFIER.format(a) - for a in ebi_study_accession.split(',')]) - ebi_info = '%s (%s)' % (links, study_info['ebi_submission_status']) + links = "".join( + [EBI_LINKIFIER.format(a) for a in ebi_study_accession.split(",")] + ) + ebi_info = "%s (%s)" % (links, study_info["ebi_submission_status"]) markdowner = Markdown() - study_info['notes'] = markdowner.convert(study_info['notes']) + study_info["notes"] = markdowner.convert(study_info["notes"]) - self.render('study_ajax/base_info.html', - study_info=study_info, publications=', '.join(pdoi + ppid), - pi=pi, contact=contact, editable=res['editable'], - share_access=share_access, ebi_info=ebi_info) + self.render( + "study_ajax/base_info.html", + study_info=study_info, + publications=", ".join(pdoi + ppid), + pi=pi, + contact=contact, + editable=res["editable"], + share_access=share_access, + ebi_info=ebi_info, + ) class StudyDeleteAjax(BaseHandler): @authenticated def post(self): - study_id = self.get_argument('study_id') + study_id = self.get_argument("study_id") self.write(study_delete_req(int(study_id), self.current_user.id)) class DataTypesMenuAJAX(BaseHandler): @authenticated def get(self): - study_id = to_int(self.get_argument('study_id')) + study_id = to_int(self.get_argument("study_id")) # Retrieve the prep template information for the menu prep_info = study_prep_get_req(study_id, self.current_user.id) # Make sure study exists - if prep_info['status'] != 'success': - raise HTTPError(404, reason=prep_info['message']) + if prep_info["status"] != "success": + raise HTTPError(404, reason=prep_info["message"]) - prep_info = prep_info['info'] + prep_info = prep_info["info"] - self.render('study_ajax/data_type_menu.html', prep_info=prep_info, - study_id=study_id) + self.render( + "study_ajax/data_type_menu.html", prep_info=prep_info, study_id=study_id + ) class StudyFilesAJAX(BaseHandler): @authenticated def get(self): - study_id = to_int(self.get_argument('study_id')) - atype = self.get_argument('artifact_type') - pt_id = self.get_argument('prep_template_id') + study_id = to_int(self.get_argument("study_id")) + atype = self.get_argument("artifact_type") + pt_id = self.get_argument("prep_template_id") res = study_files_get_req(self.current_user.id, study_id, pt_id, atype) - self.render('study_ajax/artifact_file_selector.html', **res) + self.render("study_ajax/artifact_file_selector.html", **res) class StudyGetTags(BaseHandler): @@ -152,11 +170,12 @@ def patch(self, study_id): study_id = to_int(study_id) data = json_decode(self.request.body) - req_op = data.get('op') - req_path = data.get('path') - req_value = data.get('value') - req_from = data.get('from', None) + req_op = data.get("op") + req_path = data.get("path") + req_value = data.get("value") + req_from = data.get("from", None) - response = study_patch_request(self.current_user.id, study_id, - req_op, req_path, req_value, req_from) + response = study_patch_request( + self.current_user.id, study_id, req_op, req_path, req_value, req_from + ) self.write(response) diff --git a/qiita_pet/handlers/study_handlers/ebi_handlers.py b/qiita_pet/handlers/study_handlers/ebi_handlers.py index 878961695..8d5626d4f 100644 --- a/qiita_pet/handlers/study_handlers/ebi_handlers.py +++ b/qiita_pet/handlers/study_handlers/ebi_handlers.py @@ -5,24 +5,25 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated, HTTPError -from tornado.escape import url_escape from json import dumps from qiita_files.demux import stats as demux_stats +from tornado.escape import url_escape +from tornado.web import HTTPError, authenticated -from qiita_core.qiita_settings import r_client, qiita_config +from qiita_core.qiita_settings import qiita_config, r_client from qiita_core.util import execute_as_transaction -from qiita_db.metadata_template.constants import (SAMPLE_TEMPLATE_COLUMNS, - PREP_TEMPLATE_COLUMNS) -from qiita_db.exceptions import QiitaDBUnknownIDError from qiita_db.artifact import Artifact +from qiita_db.exceptions import QiitaDBUnknownIDError +from qiita_db.metadata_template.constants import ( + PREP_TEMPLATE_COLUMNS, + SAMPLE_TEMPLATE_COLUMNS, +) from qiita_db.processing_job import ProcessingJob -from qiita_db.software import Software, Parameters +from qiita_db.software import Parameters, Software from qiita_pet.handlers.base_handlers import BaseHandler - -VALID_SUBMISSION_TYPES = ['Demultiplexed', 'per_sample_FASTQ'] +VALID_SUBMISSION_TYPES = ["Demultiplexed", "per_sample_FASTQ"] class EBISubmitHandler(BaseHandler): @@ -33,20 +34,23 @@ def display_template(self, artifact_id, msg, msg_level): try: artifact = Artifact(artifact_id) except QiitaDBUnknownIDError: - raise HTTPError(404, reason="Artifact %d does not exist!" % - artifact_id) + raise HTTPError(404, reason="Artifact %d does not exist!" % artifact_id) else: user = self.current_user - if user.level != 'admin': - raise HTTPError(403, reason="No permissions of admin, " - "get/EBISubmitHandler: %s!" % user.id) + if user.level != "admin": + raise HTTPError( + 403, + reason="No permissions of admin, " + "get/EBISubmitHandler: %s!" % user.id, + ) prep_templates = artifact.prep_templates allow_submission = len(prep_templates) == 1 msg_list = ["Submission to EBI disabled:"] if not allow_submission: msg_list.append( - "Only artifacts with a single prep template can be submitted") + "Only artifacts with a single prep template can be submitted" + ) # If allow_submission is already false, we technically don't need to # do the following work. However, there is no clean way to fix this # using the current structure, so we perform the work as we @@ -57,68 +61,84 @@ def display_template(self, artifact_id, msg, msg_level): study = artifact.study sample_template = study.sample_template stats = { - 'Number of samples': len(prep_template), - 'Number of metadata headers': len(sample_template.categories), - 'Number of sequences': 'N/A', - 'Total forward': 'N/A', - 'Total reverse': 'N/A' + "Number of samples": len(prep_template), + "Number of metadata headers": len(sample_template.categories), + "Number of sequences": "N/A", + "Total forward": "N/A", + "Total reverse": "N/A", } artifact_type = artifact.artifact_type if artifact_type not in VALID_SUBMISSION_TYPES: msg = "You can only submit: '%s' and this artifact is '%s'" % ( - ', '.join(VALID_SUBMISSION_TYPES), artifact_type) - msg_level = 'danger' - elif artifact_type == 'Demultiplexed': - demux = [x['fp'] for x in artifact.filepaths - if x['fp_type'] == 'preprocessed_demux'] + ", ".join(VALID_SUBMISSION_TYPES), + artifact_type, + ) + msg_level = "danger" + elif artifact_type == "Demultiplexed": + demux = [ + x["fp"] + for x in artifact.filepaths + if x["fp_type"] == "preprocessed_demux" + ] demux_length = len(demux) if demux_length > 1: msg = "Study appears to have multiple demultiplexed files!" - msg_level = 'danger' + msg_level = "danger" else: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) - stats['Number of sequences'] = demux_file_stats.n - msg_level = 'success' - elif artifact_type == 'per_sample_FASTQ': + stats["Number of sequences"] = demux_file_stats.n + msg_level = "success" + elif artifact_type == "per_sample_FASTQ": raw_forward_seqs = [] raw_reverse_seqs = [] for x in artifact.filepaths: - if x['fp_type'] == 'raw_forward_seqs': - raw_forward_seqs.append(x['fp']) - elif x['fp_type'] == 'raw_reverse_seqs': - raw_reverse_seqs.append(x['fp']) - stats['Total forward'] = len(raw_forward_seqs) - stats['Total reverse'] = len(raw_reverse_seqs) - msg_level = 'success' + if x["fp_type"] == "raw_forward_seqs": + raw_forward_seqs.append(x["fp"]) + elif x["fp_type"] == "raw_reverse_seqs": + raw_reverse_seqs.append(x["fp"]) + stats["Total forward"] = len(raw_forward_seqs) + stats["Total reverse"] = len(raw_reverse_seqs) + msg_level = "success" # Check if the templates have all the required columns for EBI pt_missing_cols = prep_template.check_restrictions( - [PREP_TEMPLATE_COLUMNS['EBI']]) + [PREP_TEMPLATE_COLUMNS["EBI"]] + ) st_missing_cols = sample_template.check_restrictions( - [SAMPLE_TEMPLATE_COLUMNS['EBI']]) - allow_submission = (len(pt_missing_cols) == 0 and - len(st_missing_cols) == 0 and allow_submission) + [SAMPLE_TEMPLATE_COLUMNS["EBI"]] + ) + allow_submission = ( + len(pt_missing_cols) == 0 and len(st_missing_cols) == 0 and allow_submission + ) if not allow_submission: if len(pt_missing_cols) > 0: - msg_list.append("Columns missing in prep template: %s" - % ', '.join(pt_missing_cols)) + msg_list.append( + "Columns missing in prep template: %s" % ", ".join(pt_missing_cols) + ) if len(st_missing_cols) > 0: - msg_list.append("Columns missing in sample template: %s" - % ', '.join(st_missing_cols)) + msg_list.append( + "Columns missing in sample template: %s" + % ", ".join(st_missing_cols) + ) ebi_disabled_msg = "
".join(msg_list) else: ebi_disabled_msg = None - self.render('ebi_submission.html', - study_title=study.title, stats=stats.items(), message=msg, - study_id=study.id, level=msg_level, - preprocessed_data_id=artifact_id, - investigation_type=prep_template.investigation_type, - allow_submission=allow_submission, - ebi_disabled_msg=ebi_disabled_msg) + self.render( + "ebi_submission.html", + study_title=study.title, + stats=stats.items(), + message=msg, + study_id=study.id, + level=msg_level, + preprocessed_data_id=artifact_id, + investigation_type=prep_template.investigation_type, + allow_submission=allow_submission, + ebi_disabled_msg=ebi_disabled_msg, + ) @authenticated def get(self, preprocessed_data_id): @@ -129,34 +149,44 @@ def get(self, preprocessed_data_id): def post(self, preprocessed_data_id): user = self.current_user # make sure user is admin and can therefore actually submit to EBI - if user.level != 'admin': - raise HTTPError(403, reason="User %s cannot submit to EBI!" % - user.id) - submission_type = self.get_argument('submission_type') + if user.level != "admin": + raise HTTPError(403, reason="User %s cannot submit to EBI!" % user.id) + submission_type = self.get_argument("submission_type") - if submission_type not in ['ADD', 'MODIFY']: - raise HTTPError(403, reason="User: %s, %s is not a recognized " - "submission type" % (user.id, submission_type)) + if submission_type not in ["ADD", "MODIFY"]: + raise HTTPError( + 403, + reason="User: %s, %s is not a recognized " + "submission type" % (user.id, submission_type), + ) study = Artifact(preprocessed_data_id).study state = study.ebi_submission_status - if state == 'submitting': + if state == "submitting": message = "Cannot resubmit! Current state is: %s" % state - self.display_template(preprocessed_data_id, message, 'danger') + self.display_template(preprocessed_data_id, message, "danger") else: - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('submit_to_EBI') + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("submit_to_EBI") params = Parameters.load( - cmd, values_dict={'artifact': preprocessed_data_id, - 'submission_type': submission_type}) + cmd, + values_dict={ + "artifact": preprocessed_data_id, + "submission_type": submission_type, + }, + ) job = ProcessingJob.create(user, params, True) - r_client.set('ebi_submission_%s' % preprocessed_data_id, - dumps({'job_id': job.id, 'is_qiita_job': True})) + r_client.set( + "ebi_submission_%s" % preprocessed_data_id, + dumps({"job_id": job.id, "is_qiita_job": True}), + ) job.submit() - level = 'success' - message = 'EBI submission started. Job id: %s' % job.id + level = "success" + message = "EBI submission started. Job id: %s" % job.id - self.redirect("%s/study/description/%d?level=%s&message=%s" % ( - qiita_config.portal_dir, study.id, level, url_escape(message))) + self.redirect( + "%s/study/description/%d?level=%s&message=%s" + % (qiita_config.portal_dir, study.id, level, url_escape(message)) + ) diff --git a/qiita_pet/handlers/study_handlers/edit_handlers.py b/qiita_pet/handlers/study_handlers/edit_handlers.py index 707e2b9c8..8b953ef99 100644 --- a/qiita_pet/handlers/study_handlers/edit_handlers.py +++ b/qiita_pet/handlers/study_handlers/edit_handlers.py @@ -5,17 +5,23 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated, HTTPError -from wtforms import (Form, StringField, SelectField, SelectMultipleField, - TextAreaField, validators) +from tornado.web import HTTPError, authenticated +from wtforms import ( + Form, + SelectField, + SelectMultipleField, + StringField, + TextAreaField, + validators, +) from qiita_core.qiita_settings import qiita_config -from qiita_db.study import Study, StudyPerson -from qiita_db.util import get_timeseries_types, get_environmental_packages +from qiita_core.util import execute_as_transaction from qiita_db.exceptions import QiitaDBUnknownIDError +from qiita_db.study import Study, StudyPerson +from qiita_db.util import get_environmental_packages, get_timeseries_types from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import check_access -from qiita_core.util import execute_as_transaction class StudyEditorForm(Form): @@ -45,26 +51,31 @@ class StudyEditorForm(Form): StudyEditorExtendedForm wtforms.Form """ - study_title = StringField('Study Title', [validators.Required()]) - study_alias = StringField('Study Alias', [validators.Required()]) + + study_title = StringField("Study Title", [validators.Required()]) + study_alias = StringField("Study Alias", [validators.Required()]) publication_doi = StringField( - 'DOI', description=('Just values, no links, comma separated values')) + "DOI", description=("Just values, no links, comma separated values") + ) publication_pid = StringField( - 'PUBMED ID', description=('Just values, no links, comma ' - 'separated values')) - study_abstract = TextAreaField('Study Abstract', [validators.Required()]) - study_description = StringField('Study Description', - [validators.Required()]) + "PUBMED ID", description=("Just values, no links, comma separated values") + ) + study_abstract = TextAreaField("Study Abstract", [validators.Required()]) + study_description = StringField("Study Description", [validators.Required()]) # The choices for these "people" fields will be filled from the database - principal_investigator = SelectField('Principal Investigator', - [validators.Required()], - coerce=lambda x: x) - - lab_person = SelectField('Lab Person', coerce=lambda x: x) - notes = TextAreaField('Analytical Notes', description=( - 'Any relevant information about the samples or the processing that ' - 'other users should be aware of (e.g. problematic samples, ' - 'explaining certain metadata columns, etc) - renders as markdown')) + principal_investigator = SelectField( + "Principal Investigator", [validators.Required()], coerce=lambda x: x + ) + + lab_person = SelectField("Lab Person", coerce=lambda x: x) + notes = TextAreaField( + "Analytical Notes", + description=( + "Any relevant information about the samples or the processing that " + "other users should be aware of (e.g. problematic samples, " + "explaining certain metadata columns, etc) - renders as markdown" + ), + ) @execute_as_transaction def __init__(self, study=None, **kwargs): @@ -72,9 +83,10 @@ def __init__(self, study=None, **kwargs): # Get people from the study_person table to populate the PI and # lab_person fields - choices = [(sp.id, u"%s, %s" % (sp.name, sp.affiliation)) - for sp in StudyPerson.iter()] - choices.insert(0, ('', '')) + choices = [ + (sp.id, "%s, %s" % (sp.name, sp.affiliation)) for sp in StudyPerson.iter() + ] + choices.insert(0, ("", "")) self.lab_person.choices = choices self.principal_investigator.choices = choices @@ -84,7 +96,7 @@ def __init__(self, study=None, **kwargs): study_info = study.info self.study_title.data = study.title - self.study_alias.data = study_info['study_alias'] + self.study_alias.data = study_info["study_alias"] dois = [] pids = [] for p, is_doi in study.publications: @@ -94,12 +106,12 @@ def __init__(self, study=None, **kwargs): pids.append(p) self.publication_doi.data = ",".join(dois) self.publication_pid.data = ",".join(pids) - self.study_abstract.data = study_info['study_abstract'] - self.study_description.data = study_info['study_description'] - self.principal_investigator.data = study_info[ - 'principal_investigator'].id - self.lab_person.data = (study_info['lab_person'].id - if study_info['lab_person'] else None) + self.study_abstract.data = study_info["study_abstract"] + self.study_description.data = study_info["study_description"] + self.principal_investigator.data = study_info["principal_investigator"].id + self.lab_person.data = ( + study_info["lab_person"].id if study_info["lab_person"] else None + ) self.notes.data = study.notes @@ -124,9 +136,11 @@ class StudyEditorExtendedForm(StudyEditorForm): StudyEditorForm wtforms.Form """ - environmental_packages = SelectMultipleField('Environmental Packages', - [validators.Required()]) - timeseries = SelectField('Event-Based Data', coerce=lambda x: x) + + environmental_packages = SelectMultipleField( + "Environmental Packages", [validators.Required()] + ) + timeseries = SelectField("Event-Based Data", coerce=lambda x: x) @execute_as_transaction def __init__(self, study=None, **kwargs): @@ -138,13 +152,16 @@ def __init__(self, study=None, **kwargs): # (table name, env package name) so the actual environmental package # name is displayed on the GUI self.environmental_packages.choices = [ - (name, name) for name, table in get_environmental_packages()] + (name, name) for name, table in get_environmental_packages() + ] # Get the available timeseries types to populate the timeseries field - choices = [[time_id, '%s, %s' % (int_t, time_t)] - for time_id, time_t, int_t in get_timeseries_types()] + choices = [ + [time_id, "%s, %s" % (int_t, time_t)] + for time_id, time_t, int_t in get_timeseries_types() + ] # Change None, None to 'No timeseries', just for GUI purposes - choices[0][1] = 'No timeseries' + choices[0][1] = "No timeseries" self.timeseries.choices = choices # If a study is provided, put its values in the form @@ -152,7 +169,7 @@ def __init__(self, study=None, **kwargs): study_info = study.info self.environmental_packages.data = study.environmental_packages - self.timeseries.data = study_info['timeseries_type_id'] + self.timeseries.data = study_info["timeseries_type_id"] class StudyEditHandler(BaseHandler): @@ -203,13 +220,12 @@ def get(self, study_id=None): study = self._check_study_exists_and_user_access(study_id) # If the study is not sandboxed, we use the short # version of the form - if study.status != 'sandbox': + if study.status != "sandbox": form_factory = StudyEditorForm creation_form = form_factory(study=study) - self.render('edit_study.html', - creation_form=creation_form, study=study) + self.render("edit_study.html", creation_form=creation_form, study=study) @authenticated @execute_as_transaction @@ -220,7 +236,7 @@ def post(self, study=None): # Check study and user access the_study = self._check_study_exists_and_user_access(study) # If the study is not sandbox, we use the short version - if the_study.status != 'sandbox': + if the_study.status != "sandbox": form_factory = StudyEditorForm # Get the form data from the request arguments @@ -232,22 +248,24 @@ def post(self, study=None): # values instead of empty strings new_people_info = [ (name, email, affiliation, phone or None, address or None) - for name, email, affiliation, phone, address in - zip(self.get_arguments('new_people_names'), - self.get_arguments('new_people_emails'), - self.get_arguments('new_people_affiliations'), - self.get_arguments('new_people_phones'), - self.get_arguments('new_people_addresses'))] + for name, email, affiliation, phone, address in zip( + self.get_arguments("new_people_names"), + self.get_arguments("new_people_emails"), + self.get_arguments("new_people_affiliations"), + self.get_arguments("new_people_phones"), + self.get_arguments("new_people_addresses"), + ) + ] # New people will be indexed with negative numbers, so we reverse # the list here new_people_info.reverse() - index = int(form_data.data['principal_investigator'][0]) + index = int(form_data.data["principal_investigator"][0]) PI = self._get_study_person_id(index, new_people_info) - if form_data.data['lab_person'][0]: - index = int(form_data.data['lab_person'][0]) + if form_data.data["lab_person"][0]: + index = int(form_data.data["lab_person"][0]) lab_person = self._get_study_person_id(index, new_people_info) else: lab_person = None @@ -255,67 +273,71 @@ def post(self, study=None): # TODO: MIXS compliant? Always true, right? fd = form_data.data info = { - 'lab_person_id': lab_person, - 'principal_investigator_id': PI, - 'metadata_complete': False, - 'mixs_compliant': True, - 'study_description': fd['study_description'][0].decode('utf-8'), - 'study_alias': fd['study_alias'][0].decode('utf-8'), - 'study_abstract': fd['study_abstract'][0].decode('utf-8'), - 'notes': fd['notes'][0].decode('utf-8')} + "lab_person_id": lab_person, + "principal_investigator_id": PI, + "metadata_complete": False, + "mixs_compliant": True, + "study_description": fd["study_description"][0].decode("utf-8"), + "study_alias": fd["study_alias"][0].decode("utf-8"), + "study_abstract": fd["study_abstract"][0].decode("utf-8"), + "notes": fd["notes"][0].decode("utf-8"), + } - if 'timeseries' in fd and fd['timeseries']: - info['timeseries_type_id'] = fd['timeseries'][0].decode('utf-8') + if "timeseries" in fd and fd["timeseries"]: + info["timeseries_type_id"] = fd["timeseries"][0].decode("utf-8") - study_title = fd['study_title'][0].decode('utf-8') + study_title = fd["study_title"][0].decode("utf-8") if the_study: # We are under editing, so just update the values the_study.title = study_title the_study.info = info - msg = ('Study %s ' - 'successfully updated' % - (qiita_config.portal_dir, the_study.id, study_title)) + msg = ( + 'Study %s ' + "successfully updated" + % (qiita_config.portal_dir, the_study.id, study_title) + ) else: # create the study # TODO: Fix this EFO once ontology stuff from emily is added the_study = Study.create(self.current_user, study_title, info=info) - msg = ('Study %s ' - 'successfully created' % - (qiita_config.portal_dir, the_study.id, study_title)) + msg = ( + 'Study %s ' + "successfully created" + % (qiita_config.portal_dir, the_study.id, study_title) + ) # Add the environmental packages, this attribute can only be edited # if the study is not public, otherwise this cannot be changed if isinstance(form_data, StudyEditorExtendedForm): - vals = [ - eval(v).decode('utf-8') for v in fd['environmental_packages']] + vals = [eval(v).decode("utf-8") for v in fd["environmental_packages"]] the_study.environmental_packages = vals pubs = [] - dois = fd['publication_doi'] + dois = fd["publication_doi"] if dois and dois[0]: # The user can provide a comma-seprated list - dois = dois[0].decode('utf-8').split(',') + dois = dois[0].decode("utf-8").split(",") # Make sure that we strip the spaces from the pubmed ids pubs.extend([(doi.strip(), True) for doi in dois]) - pids = fd['publication_pid'] + pids = fd["publication_pid"] if pids and pids[0]: # The user can provide a comma-seprated list - pids = pids[0].decode('utf-8').split(',') + pids = pids[0].decode("utf-8").split(",") # Make sure that we strip the spaces from the pubmed ids pubs.extend([(pid.strip(), False) for pid in pids]) the_study.publications = pubs - self.render('index.html', message=msg, level='success') + self.render("index.html", message=msg, level="success") class CreateStudyAJAX(BaseHandler): @authenticated def get(self): - study_title = self.get_argument('study_title', None) - old_study_title = self.get_argument('old_study_title', None) + study_title = self.get_argument("study_title", None) + old_study_title = self.get_argument("old_study_title", None) if study_title is None: to_write = False diff --git a/qiita_pet/handlers/study_handlers/listing_handlers.py b/qiita_pet/handlers/study_handlers/listing_handlers.py index 91b83a562..a59339870 100644 --- a/qiita_pet/handlers/study_handlers/listing_handlers.py +++ b/qiita_pet/handlers/study_handlers/listing_handlers.py @@ -5,23 +5,27 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from json import dumps from collections import defaultdict +from json import dumps -from tornado.web import authenticated, HTTPError -from tornado.gen import coroutine, Task +from tornado.gen import Task, coroutine +from tornado.web import HTTPError, authenticated -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import qiita_config, r_client +from qiita_core.util import execute_as_transaction from qiita_db.artifact import Artifact -from qiita_db.user import User from qiita_db.study import Study -from qiita_db.util import (add_message, generate_study_list) -from qiita_pet.util import EBI_LINKIFIER +from qiita_db.user import User +from qiita_db.util import add_message, generate_study_list from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.util import ( - study_person_linkifier, doi_linkifier, pubmed_linkifier, check_access, - get_shared_links) + check_access, + doi_linkifier, + get_shared_links, + pubmed_linkifier, + study_person_linkifier, +) +from qiita_pet.util import EBI_LINKIFIER class ListStudiesHandler(BaseHandler): @@ -29,7 +33,7 @@ class ListStudiesHandler(BaseHandler): @coroutine @execute_as_transaction def get(self, message="", msg_level=None): - self.render('list_studies.html', message=message, msg_level=msg_level) + self.render("list_studies.html", message=message, msg_level=msg_level) def _get_all_emails(self, callback): callback([email for email, name in User.iter()]) @@ -40,28 +44,33 @@ class StudyApprovalList(BaseHandler): @execute_as_transaction def get(self): user = self.current_user - if user.level != 'admin': - raise HTTPError(403, - reason='User %s is not admin' % self.current_user) + if user.level != "admin": + raise HTTPError(403, reason="User %s is not admin" % self.current_user) studies = defaultdict(list) - for artifact in Artifact.iter_by_visibility('awaiting_approval'): + for artifact in Artifact.iter_by_visibility("awaiting_approval"): studies[artifact.study].append(artifact.id) - parsed_studies = [(s.id, s.title, s.owner.email, pds) - for s, pds in studies.items()] + parsed_studies = [ + (s.id, s.title, s.owner.email, pds) for s, pds in studies.items() + ] - self.render('admin_approval.html', - study_info=parsed_studies) + self.render("admin_approval.html", study_info=parsed_studies) class AutocompleteHandler(BaseHandler): @authenticated def get(self): - text = self.get_argument('text') - vals = r_client.execute_command('zrangebylex', 'qiita-usernames', - u'[%s' % text, u'[%s\xff' % text) - self.write({'results': [{'id': s.decode('utf-8'), - 'text': s.decode('utf-8')} for s in vals]}) + text = self.get_argument("text") + vals = r_client.execute_command( + "zrangebylex", "qiita-usernames", "[%s" % text, "[%s\xff" % text + ) + self.write( + { + "results": [ + {"id": s.decode("utf-8"), "text": s.decode("utf-8")} for s in vals + ] + } + ) class ShareStudyAJAX(BaseHandler): @@ -74,29 +83,30 @@ def _get_shared_for_study(self, study, callback): @execute_as_transaction def _share(self, study, user, callback): user = User(user) - add_message('Study \'%s\' ' - 'has been shared with you.' % - (qiita_config.portal_dir, study.id, study.title), [user]) + add_message( + "Study '%s' " + "has been shared with you." + % (qiita_config.portal_dir, study.id, study.title), + [user], + ) callback(study.share(user)) @execute_as_transaction def _unshare(self, study, user, callback): user = User(user) - add_message('Study \'%s\' has been unshared from you.' % - study.title, [user]) + add_message("Study '%s' has been unshared from you." % study.title, [user]) callback(study.unshare(user)) @authenticated @coroutine @execute_as_transaction def get(self): - study_id = int(self.get_argument('id')) + study_id = int(self.get_argument("id")) study = Study(study_id) - check_access(self.current_user, study, no_public=True, - raise_error=True) + check_access(self.current_user, study, no_public=True, raise_error=True) - selected = self.get_argument('selected', None) - deselected = self.get_argument('deselected', None) + selected = self.get_argument("selected", None) + deselected = self.get_argument("deselected", None) if selected is not None: yield Task(self._share, study, selected) @@ -105,51 +115,52 @@ def get(self): users, links = yield Task(self._get_shared_for_study, study) - self.write(dumps({'users': users, 'links': links})) + self.write(dumps({"users": users, "links": links})) class ListStudiesAJAX(BaseHandler): @authenticated @execute_as_transaction def get(self, ignore): - user = self.get_argument('user') - visibility = self.get_argument('visibility') - echo = int(self.get_argument('sEcho')) + user = self.get_argument("user") + visibility = self.get_argument("visibility") + echo = int(self.get_argument("sEcho")) if user != self.current_user.id: - raise HTTPError(403, reason='Unauthorized search!') - if visibility not in ['user', 'public']: - raise HTTPError(400, reason='Not a valid visibility') + raise HTTPError(403, reason="Unauthorized search!") + if visibility not in ["user", "public"]: + raise HTTPError(400, reason="Not a valid visibility") info = generate_study_list(self.current_user, visibility) # linkifying data len_info = len(info) for i in range(len_info): - info[i]['shared'] = ", ".join([study_person_linkifier(element) - for element in info[i]['shared']]) + info[i]["shared"] = ", ".join( + [study_person_linkifier(element) for element in info[i]["shared"]] + ) - ppid = [pubmed_linkifier([p]) for p in info[i]['publication_pid']] - pdoi = [doi_linkifier([p]) for p in info[i]['publication_doi']] - del info[i]['publication_pid'] - del info[i]['publication_doi'] - info[i]['pubs'] = ', '.join(ppid + pdoi) + ppid = [pubmed_linkifier([p]) for p in info[i]["publication_pid"]] + pdoi = [doi_linkifier([p]) for p in info[i]["publication_doi"]] + del info[i]["publication_pid"] + del info[i]["publication_doi"] + info[i]["pubs"] = ", ".join(ppid + pdoi) - info[i]['pi'] = study_person_linkifier(info[i]['pi']) + info[i]["pi"] = study_person_linkifier(info[i]["pi"]) - info[i]['ebi_info'] = '' - ebi_study_accession = info[i]['ebi_study_accession'] + info[i]["ebi_info"] = "" + ebi_study_accession = info[i]["ebi_study_accession"] if ebi_study_accession: - info[i]['ebi_info'] = ''.join([ - EBI_LINKIFIER.format(a) - for a in ebi_study_accession.split(',')]) + info[i]["ebi_info"] = "".join( + [EBI_LINKIFIER.format(a) for a in ebi_study_accession.split(",")] + ) # build the table json results = { "sEcho": echo, "iTotalRecords": len_info, "iTotalDisplayRecords": len_info, - "aaData": info + "aaData": info, } # return the json in compact form to save transmit size - self.write(dumps(results, separators=(',', ':'))) + self.write(dumps(results, separators=(",", ":"))) diff --git a/qiita_pet/handlers/study_handlers/prep_template.py b/qiita_pet/handlers/study_handlers/prep_template.py index 6afcde59c..43067d8ea 100644 --- a/qiita_pet/handlers/study_handlers/prep_template.py +++ b/qiita_pet/handlers/study_handlers/prep_template.py @@ -7,108 +7,117 @@ # ----------------------------------------------------------------------------- from os.path import join, relpath -from tornado.web import authenticated -from tornado.escape import url_escape import pandas as pd +from tornado.escape import url_escape +from tornado.web import authenticated from qiita_core.qiita_settings import qiita_config -from qiita_pet.handlers.util import to_int -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_db.util import (get_files_from_uploads_folders, get_mountpoint, - supported_filepath_types) from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.util import ( + get_files_from_uploads_folders, + get_mountpoint, + supported_filepath_types, +) from qiita_pet.handlers.api_proxy import ( - prep_template_ajax_get_req, new_prep_template_get_req, - prep_template_summary_get_req) + new_prep_template_get_req, + prep_template_ajax_get_req, + prep_template_summary_get_req, +) +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import to_int class NewPrepTemplateAjax(BaseHandler): @authenticated def get(self): - study_id = to_int(self.get_argument('study_id')) + study_id = to_int(self.get_argument("study_id")) result = new_prep_template_get_req(study_id) - self.render('study_ajax/add_prep_template.html', - prep_files=result['prep_files'], - data_types=result['data_types'], - ontology=result['ontology'], - study_id=study_id) + self.render( + "study_ajax/add_prep_template.html", + prep_files=result["prep_files"], + data_types=result["data_types"], + ontology=result["ontology"], + study_id=study_id, + ) class AddDefaultWorkflowHandler(BaseHandler): @authenticated def post(self): - prep_id = self.get_argument('prep_id') + prep_id = self.get_argument("prep_id") msg_error = None data = None try: - workflow = PrepTemplate(prep_id).add_default_workflow( - self.current_user) + workflow = PrepTemplate(prep_id).add_default_workflow(self.current_user) data = workflow.id except Exception as error: msg_error = str(error) - self.write({'data': data, 'msg_error': msg_error}) + self.write({"data": data, "msg_error": msg_error}) class PrepTemplateSummaryAJAX(BaseHandler): @authenticated def get(self): - prep_id = to_int(self.get_argument('prep_id')) + prep_id = to_int(self.get_argument("prep_id")) res = prep_template_summary_get_req(prep_id, self.current_user.id) - self.render('study_ajax/prep_summary_table.html', pid=prep_id, - stats=res['summary'], editable=res['editable'], - num_samples=res['num_samples']) + self.render( + "study_ajax/prep_summary_table.html", + pid=prep_id, + stats=res["summary"], + editable=res["editable"], + num_samples=res["num_samples"], + ) class PrepTemplateAJAX(BaseHandler): @authenticated def get(self): """Send formatted summary page of prep template""" - prep_id = to_int(self.get_argument('prep_id')) - row_id = self.get_argument('row_id', '0') + prep_id = to_int(self.get_argument("prep_id")) + row_id = self.get_argument("row_id", "0") current_user = self.current_user res = prep_template_ajax_get_req(current_user.id, prep_id) - res['prep_id'] = prep_id - res['row_id'] = row_id + res["prep_id"] = prep_id + res["row_id"] = row_id # Escape the message just in case javascript breaking characters in it - res['alert_message'] = url_escape(res['alert_message']) - res['user_level'] = current_user.level - if res['creation_job'] is not None: - params = res['creation_job'].parameters.values + res["alert_message"] = url_escape(res["alert_message"]) + res["user_level"] = current_user.level + if res["creation_job"] is not None: + params = res["creation_job"].parameters.values summary = None - if 'sample_sheet' in params: - fp = params['sample_sheet'] - res['creation_job_filename'] = fp['filename'] - res['creation_job_filename_body'] = fp['body'] - if res['creation_job'].status == 'success': - if res['creation_job'].outputs: + if "sample_sheet" in params: + fp = params["sample_sheet"] + res["creation_job_filename"] = fp["filename"] + res["creation_job_filename_body"] = fp["body"] + if res["creation_job"].status == "success": + if res["creation_job"].outputs: # [0] is the id, [1] is the filepath - _file = res['creation_job'].outputs[ - 'output'].html_summary_fp[1] + _file = res["creation_job"].outputs["output"].html_summary_fp[1] summary = relpath(_file, qiita_config.base_data_dir) else: - res['creation_job_filename'] = None - res['creation_job_filename_body'] = None - res['creation_job_artifact_summary'] = summary - res['human_reads_filter_method'] = None + res["creation_job_filename"] = None + res["creation_job_filename_body"] = None + res["creation_job_artifact_summary"] = summary + res["human_reads_filter_method"] = None a = PrepTemplate(prep_id).artifact if a is not None: hrfm = a.human_reads_filter_method if hrfm is not None: - res['human_reads_filter_method'] = hrfm + res["human_reads_filter_method"] = hrfm - self.render('study_ajax/prep_summary.html', **res) + self.render("study_ajax/prep_summary.html", **res) class PrepFilesHandler(BaseHandler): @authenticated def get(self): - study_id = self.get_argument('study_id') - prep_file = self.get_argument('prep_file') - prep_type = self.get_argument('type') + study_id = self.get_argument("study_id") + prep_file = self.get_argument("prep_file") + prep_type = self.get_argument("type") # TODO: Get file types for the artifact type # FILE TYPE IN POSTION 0 MUST BE DEFAULT FOR SELECTED @@ -118,12 +127,12 @@ def get(self): not_selected = [] _, base = get_mountpoint("uploads")[0] uploaded = get_files_from_uploads_folders(study_id) - prep = pd.read_table(join(base, study_id, prep_file), sep='\t') - if 'run_prefix' in prep.columns: + prep = pd.read_table(join(base, study_id, prep_file), sep="\t") + if "run_prefix" in prep.columns: # Use run_prefix column of prep template to auto-select # per-prefix uploaded files if available. per_prefix = True - prep_prefixes = set(prep['run_prefix']) + prep_prefixes = set(prep["run_prefix"]) for _, filename in uploaded: for prefix in prep_prefixes: if filename.startswith(prefix): @@ -136,8 +145,11 @@ def get(self): # Write out if this prep template supports per-prefix files, and the # as well as pre-selected and remaining files - self.write({ - 'per_prefix': per_prefix, - 'file_types': file_types, - 'selected': selected, - 'remaining': not_selected}) + self.write( + { + "per_prefix": per_prefix, + "file_types": file_types, + "selected": selected, + "remaining": not_selected, + } + ) diff --git a/qiita_pet/handlers/study_handlers/processing.py b/qiita_pet/handlers/study_handlers/processing.py index 3eea270fd..753cd1554 100644 --- a/qiita_pet/handlers/study_handlers/processing.py +++ b/qiita_pet/handlers/study_handlers/processing.py @@ -5,14 +5,20 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from json import dumps, loads + from tornado.web import authenticated -from json import loads, dumps -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.api_proxy import ( - list_commands_handler_get_req, list_options_handler_get_req, - workflow_handler_post_req, workflow_handler_patch_req, job_ajax_get_req, - workflow_run_post_req, job_ajax_patch_req) + job_ajax_get_req, + job_ajax_patch_req, + list_commands_handler_get_req, + list_options_handler_get_req, + workflow_handler_patch_req, + workflow_handler_post_req, + workflow_run_post_req, +) +from qiita_pet.handlers.base_handlers import BaseHandler class ListCommandsHandler(BaseHandler): @@ -21,9 +27,8 @@ def get(self): # Fun fact - if the argument is a list, JS adds '[]' to the # argument name artifact_id = self.get_argument("artifact_id") - exclude_analysis = self.get_argument('include_analysis') == 'false' - self.write( - list_commands_handler_get_req(artifact_id, exclude_analysis)) + exclude_analysis = self.get_argument("include_analysis") == "false" + self.write(list_commands_handler_get_req(artifact_id, exclude_analysis)) class ListOptionsHandler(BaseHandler): @@ -32,7 +37,7 @@ def get(self): command_id = self.get_argument("command_id") artifact_id = self.get_argument("artifact_id", None) # if the artifact id has ':' it means that it's a job in construction - if artifact_id is not None and ':' in artifact_id: + if artifact_id is not None and ":" in artifact_id: artifact_id = None self.write(list_options_handler_get_req(command_id, artifact_id)) @@ -40,60 +45,59 @@ def get(self): class WorkflowRunHandler(BaseHandler): @authenticated def post(self): - w_id = self.get_argument('workflow_id') + w_id = self.get_argument("workflow_id") self.write(workflow_run_post_req(w_id)) class WorkflowHandler(BaseHandler): @authenticated def post(self): - command_id = self.get_argument('command_id') - params = self.get_argument('params') + command_id = self.get_argument("command_id") + params = self.get_argument("params") if self.request.files: parameters = loads(params) for k, v in self.request.files.items(): # [0] there is only one file -- this block is needed because # 'body' is a byte and JSON doesn't know how to translate it - parameters[k] = {'body': v[0]['body'].decode("utf-8"), - 'filename': v[0]['filename'], - 'content_type': v[0]['content_type']} + parameters[k] = { + "body": v[0]["body"].decode("utf-8"), + "filename": v[0]["filename"], + "content_type": v[0]["content_type"], + } params = dumps(parameters) - self.write(workflow_handler_post_req( - self.current_user.id, command_id, params)) + self.write(workflow_handler_post_req(self.current_user.id, command_id, params)) @authenticated def patch(self): - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) - req_from = self.get_argument('from', None) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) + req_from = self.get_argument("from", None) try: - res = workflow_handler_patch_req( - req_op, req_path, req_value, req_from) + res = workflow_handler_patch_req(req_op, req_path, req_value, req_from) self.write(res) except Exception as e: - self.write({'status': 'error', - 'message': str(e)}) + self.write({"status": "error", "message": str(e)}) class JobAJAX(BaseHandler): @authenticated def get(self): - job_id = self.get_argument('job_id') + job_id = self.get_argument("job_id") self.write(job_ajax_get_req(job_id)) @authenticated def patch(self): - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) - req_from = self.get_argument('from', None) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) + req_from = self.get_argument("from", None) try: res = job_ajax_patch_req(req_op, req_path, req_value, req_from) self.write(res) except Exception as e: - self.write({'status': 'error', 'message': str(e)}) + self.write({"status": "error", "message": str(e)}) diff --git a/qiita_pet/handlers/study_handlers/sample_template.py b/qiita_pet/handlers/study_handlers/sample_template.py index 2c3373f87..3a7c31c61 100644 --- a/qiita_pet/handlers/study_handlers/sample_template.py +++ b/qiita_pet/handlers/study_handlers/sample_template.py @@ -6,36 +6,38 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from json import dumps, loads from os.path import basename, exists -from json import loads, dumps from tempfile import NamedTemporaryFile -from tornado.web import authenticated, HTTPError +from tornado.web import HTTPError, authenticated -from qiita_core.qiita_settings import r_client, qiita_config -from qiita_pet.handlers.util import to_int -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_db.util import get_files_from_uploads_folders -from qiita_db.study import Study +from qiita_core.qiita_settings import qiita_config, r_client +from qiita_db.exceptions import QiitaDBUnknownIDError from qiita_db.metadata_template.sample_template import SampleTemplate from qiita_db.metadata_template.util import looks_like_qiime_mapping_file -from qiita_db.software import Software, Parameters from qiita_db.processing_job import ProcessingJob -from qiita_db.exceptions import QiitaDBUnknownIDError - +from qiita_db.software import Parameters, Software +from qiita_db.study import Study +from qiita_db.util import get_files_from_uploads_folders from qiita_pet.handlers.api_proxy import ( - data_types_get_req, sample_template_samples_get_req, - prep_template_samples_get_req, study_prep_get_req, - sample_template_meta_cats_get_req, sample_template_category_get_req, - get_sample_template_processing_status, analyses_associated_with_study, - check_fp) - + analyses_associated_with_study, + check_fp, + data_types_get_req, + get_sample_template_processing_status, + prep_template_samples_get_req, + sample_template_category_get_req, + sample_template_meta_cats_get_req, + sample_template_samples_get_req, + study_prep_get_req, +) +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.util import to_int -SAMPLE_TEMPLATE_KEY_FORMAT = 'sample_template_%s' +SAMPLE_TEMPLATE_KEY_FORMAT = "sample_template_%s" -def sample_template_checks(study_id, user, check_exists=False, - no_public=False): +def sample_template_checks(study_id, user, check_exists=False, no_public=False): """Performs different checks and raises errors if any of the checks fail Parameters @@ -59,18 +61,20 @@ def sample_template_checks(study_id, user, check_exists=False, try: study = Study(int(study_id)) except QiitaDBUnknownIDError: - raise HTTPError(404, reason='Study does not exist') + raise HTTPError(404, reason="Study does not exist") if not study.has_access(user, no_public=no_public): - raise HTTPError(403, reason='User has insufficient permissions') + raise HTTPError(403, reason="User has insufficient permissions") # Check if the sample template exists if check_exists and not SampleTemplate.exists(study_id): - raise HTTPError(404, reason="Study %s doesn't have sample information" - % study_id) + raise HTTPError( + 404, reason="Study %s doesn't have sample information" % study_id + ) -def sample_template_handler_post_request(study_id, user, filepath, - data_type=None, direct_upload=False): +def sample_template_handler_post_request( + study_id, user, filepath, data_type=None, direct_upload=False +): """Creates a new sample template Parameters @@ -104,31 +108,36 @@ def sample_template_handler_post_request(study_id, user, filepath, # Check if the file exists if not direct_upload: fp_rsp = check_fp(study_id, filepath) - if fp_rsp['status'] != 'success': - raise HTTPError(404, reason='Filepath not found') - filepath = fp_rsp['file'] + if fp_rsp["status"] != "success": + raise HTTPError(404, reason="Filepath not found") + filepath = fp_rsp["file"] is_mapping_file = looks_like_qiime_mapping_file(filepath) if is_mapping_file and not data_type: - raise HTTPError(400, reason='Please, choose a data type if uploading ' - 'a QIIME mapping file') + raise HTTPError( + 400, reason="Please, choose a data type if uploading a QIIME mapping file" + ) - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('create_sample_template') + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("create_sample_template") params = Parameters.load( - cmd, values_dict={'fp': filepath, 'study_id': study_id, - 'is_mapping_file': is_mapping_file, - 'data_type': data_type}) + cmd, + values_dict={ + "fp": filepath, + "study_id": study_id, + "is_mapping_file": is_mapping_file, + "data_type": data_type, + }, + ) job = ProcessingJob.create(user, params, True) - r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, - dumps({'job_id': job.id})) + r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, dumps({"job_id": job.id})) job.submit() - return {'job': job.id} + return {"job": job.id} -def sample_template_handler_patch_request(user, req_op, req_path, - req_value=None, req_from=None, - direct_upload=False): +def sample_template_handler_patch_request( + user, req_op, req_path, req_value=None, req_from=None, direct_upload=False +): """Patches the sample template Parameters @@ -155,40 +164,43 @@ def sample_template_handler_patch_request(user, req_op, req_path, 400 If the path parameter doens't follow the expected format 400 If the given operation is not supported """ - req_path = [v for v in req_path.split('/') if v] + req_path = [v for v in req_path.split("/") if v] # At this point we know the path should be at least length 2 if len(req_path) < 2: - raise HTTPError(400, reason='Incorrect path parameter') + raise HTTPError(400, reason="Incorrect path parameter") study_id = int(req_path[0]) # Check if the current user has access to the study and if the sample # template exists sample_template_checks(study_id, user, check_exists=True, no_public=True) - if req_op == 'remove': + if req_op == "remove": # Path format # column: study_id/columns/column_name # sample: study_id/samples/sample_id if len(req_path) != 3: - raise HTTPError(400, reason='Incorrect path parameter') + raise HTTPError(400, reason="Incorrect path parameter") attribute = req_path[1] attr_id = req_path[2] - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_sample_or_column') + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_sample_or_column") params = Parameters.load( - cmd, values_dict={'obj_class': 'SampleTemplate', - 'obj_id': study_id, - 'sample_or_col': attribute, - 'name': attr_id}) + cmd, + values_dict={ + "obj_class": "SampleTemplate", + "obj_id": study_id, + "sample_or_col": attribute, + "name": attr_id, + }, + ) job = ProcessingJob.create(user, params, True) # Store the job id attaching it to the sample template id - r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, - dumps({'job_id': job.id})) + r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, dumps({"job_id": job.id})) job.submit() - return {'job': job.id} - elif req_op == 'replace': + return {"job": job.id} + elif req_op == "replace": # WARNING: Although the patch operation is a replace, is not a full # true replace. A replace is in theory equivalent to a remove + add. # In this case, the replace operation doesn't necessarily removes @@ -199,50 +211,57 @@ def sample_template_handler_patch_request(user, req_op, req_path, # to use this function to replace other elements of the sample # information if len(req_path) != 2: - raise HTTPError(400, reason='Incorrect path parameter') + raise HTTPError(400, reason="Incorrect path parameter") attribute = req_path[1] - if attribute == 'data': + if attribute == "data": # Update the sample information if req_value is None: - raise HTTPError(400, reason="Value is required when updating " - "sample information") + raise HTTPError( + 400, reason="Value is required when updating sample information" + ) if direct_upload: # We can assume that the file exist as it was generated by # the system filepath = req_value if not exists(filepath): - reason = ('Upload file not found (%s), please report to %s' - % (filepath, qiita_config.help_email)) + reason = "Upload file not found (%s), please report to %s" % ( + filepath, + qiita_config.help_email, + ) raise HTTPError(404, reason=reason) else: # Check if the file exists fp_rsp = check_fp(study_id, req_value) - if fp_rsp['status'] != 'success': - raise HTTPError(404, reason='Filepath not found') - filepath = fp_rsp['file'] + if fp_rsp["status"] != "success": + raise HTTPError(404, reason="Filepath not found") + filepath = fp_rsp["file"] - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('update_sample_template') + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("update_sample_template") params = Parameters.load( - cmd, values_dict={'study': study_id, - 'template_fp': filepath}) + cmd, values_dict={"study": study_id, "template_fp": filepath} + ) job = ProcessingJob.create(user, params, True) # Store the job id attaching it to the sample template id - r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, - dumps({'job_id': job.id})) + r_client.set( + SAMPLE_TEMPLATE_KEY_FORMAT % study_id, dumps({"job_id": job.id}) + ) job.submit() - return {'job': job.id} + return {"job": job.id} else: - raise HTTPError(404, reason='Attribute %s not found' % attribute) + raise HTTPError(404, reason="Attribute %s not found" % attribute) else: - raise HTTPError(400, reason='Operation %s not supported. Current ' - 'supported operations: remove, replace' % req_op) + raise HTTPError( + 400, + reason="Operation %s not supported. Current " + "supported operations: remove, replace" % req_op, + ) def sample_template_handler_delete_request(study_id, user): @@ -269,72 +288,78 @@ def sample_template_handler_delete_request(study_id, user): # template exists sample_template_checks(study_id, user, check_exists=True) - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = qiita_plugin.get_command('delete_sample_template') - params = Parameters.load(cmd, values_dict={'study': int(study_id)}) + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = qiita_plugin.get_command("delete_sample_template") + params = Parameters.load(cmd, values_dict={"study": int(study_id)}) job = ProcessingJob.create(user, params, True) # Store the job if deleteing the sample template - r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, - dumps({'job_id': job.id})) + r_client.set(SAMPLE_TEMPLATE_KEY_FORMAT % study_id, dumps({"job_id": job.id})) job.submit() - return {'job': job.id} + return {"job": job.id} class SampleTemplateHandler(BaseHandler): @authenticated def get(self): - study_id = self.get_argument('study_id') + study_id = self.get_argument("study_id") # Check if the current user has access to the study sample_template_checks(study_id, self.current_user) - self.render('study_ajax/sample_summary.html', study_id=study_id) + self.render("study_ajax/sample_summary.html", study_id=study_id) @authenticated def post(self): - study_id = int(self.get_argument('study_id')) - filepath = self.get_argument('filepath') - data_type = self.get_argument('data_type') - direct_upload = self.get_argument('direct_upload', False) + study_id = int(self.get_argument("study_id")) + filepath = self.get_argument("filepath") + data_type = self.get_argument("data_type") + direct_upload = self.get_argument("direct_upload", False) - if direct_upload and direct_upload == 'true': + if direct_upload and direct_upload == "true": direct_upload = True - with NamedTemporaryFile(suffix='.txt', delete=False) as fp: - fp.write(self.request.files['theFile'][0]['body']) + with NamedTemporaryFile(suffix=".txt", delete=False) as fp: + fp.write(self.request.files["theFile"][0]["body"]) filepath = fp.name - self.write(sample_template_handler_post_request( - study_id, self.current_user, filepath, data_type=data_type, - direct_upload=direct_upload)) + self.write( + sample_template_handler_post_request( + study_id, + self.current_user, + filepath, + data_type=data_type, + direct_upload=direct_upload, + ) + ) @authenticated def patch(self): - req_op = self.get_argument('op') - req_path = self.get_argument('path') - req_value = self.get_argument('value', None) - req_from = self.get_argument('from', None) - direct_upload = self.get_argument('direct_upload', False) + req_op = self.get_argument("op") + req_path = self.get_argument("path") + req_value = self.get_argument("value", None) + req_from = self.get_argument("from", None) + direct_upload = self.get_argument("direct_upload", False) - if direct_upload and direct_upload == 'true': + if direct_upload and direct_upload == "true": direct_upload = True - with NamedTemporaryFile(suffix='.txt', delete=False) as fp: - fp.write(self.request.files['value'][0]['body']) + with NamedTemporaryFile(suffix=".txt", delete=False) as fp: + fp.write(self.request.files["value"][0]["body"]) req_value = fp.name - self.write(sample_template_handler_patch_request( - self.current_user, req_op, req_path, req_value, req_from, - direct_upload)) + self.write( + sample_template_handler_patch_request( + self.current_user, req_op, req_path, req_value, req_from, direct_upload + ) + ) @authenticated def delete(self): - study_id = int(self.get_argument('study_id')) - self.write(sample_template_handler_delete_request( - study_id, self.current_user)) + study_id = int(self.get_argument("study_id")) + self.write(sample_template_handler_delete_request(study_id, self.current_user)) def sample_template_overview_handler_get_request(study_id, user): @@ -347,13 +372,16 @@ def sample_template_overview_handler_get_request(study_id, user): # The following information should always be provided: # The files that have been uploaded to the system and can be a # sample template file - files = [f for _, f, _ in get_files_from_uploads_folders(study_id) - if f.endswith(('txt', 'tsv', 'xlsx'))] + files = [ + f + for _, f, _ in get_files_from_uploads_folders(study_id) + if f.endswith(("txt", "tsv", "xlsx")) + ] # If there is a job associated with the sample information, the job id job = None job_info = r_client.get(SAMPLE_TEMPLATE_KEY_FORMAT % study_id) if job_info: - job = loads(job_info)['job_id'] + job = loads(job_info)["job_id"] # Specific information if it exists or not: data_types = [] @@ -362,7 +390,7 @@ def sample_template_overview_handler_get_request(study_id, user): num_samples = 0 num_cols = 0 columns = [] - sample_restrictions = '' + sample_restrictions = "" if exists: # If it exists we need to provide: # The id of the sample template file so the user can download it and @@ -383,28 +411,30 @@ def sample_template_overview_handler_get_request(study_id, user): else: # It doesn't exist, we also need to provide the data_types in case # the user uploads a QIIME mapping file - data_types = sorted(data_types_get_req()['data_types']) - - return {'exists': exists, - 'uploaded_files': files, - 'data_types': data_types, - 'user_can_edit': Study(study_id).can_edit(user), - 'job': job, - 'download_id': st_fp_id, - 'st_files': st_files, - 'num_samples': num_samples, - 'num_columns': num_cols, - 'columns': columns, - 'sample_restrictions': sample_restrictions} + data_types = sorted(data_types_get_req()["data_types"]) + + return { + "exists": exists, + "uploaded_files": files, + "data_types": data_types, + "user_can_edit": Study(study_id).can_edit(user), + "job": job, + "download_id": st_fp_id, + "st_files": st_files, + "num_samples": num_samples, + "num_columns": num_cols, + "columns": columns, + "sample_restrictions": sample_restrictions, + } class SampleTemplateOverviewHandler(BaseHandler): @authenticated def get(self): - study_id = int(self.get_argument('study_id')) + study_id = int(self.get_argument("study_id")) self.write( - sample_template_overview_handler_get_request( - study_id, self.current_user)) + sample_template_overview_handler_get_request(study_id, self.current_user) + ) def sample_template_columns_get_req(study_id, column, user): @@ -445,13 +475,13 @@ class SampleTemplateColumnsHandler(BaseHandler): @authenticated def get(self): """Send formatted summary page of sample template""" - sid = int(self.get_argument('study_id')) - column = self.get_argument('column', None) + sid = int(self.get_argument("study_id")) + column = self.get_argument("column", None) reply = sample_template_columns_get_req(sid, column, self.current_user) # we reply with {'values': reply} because tornado expectes a dict - self.write({'values': reply}) + self.write({"values": reply}) def _build_sample_summary(study_id, user_id): @@ -472,8 +502,10 @@ def _build_sample_summary(study_id, user_id): [ {field_1: 'value', ...}, ...] """ # Load all samples available into dictionary and set - rows = {s: {'sample': s} for s in sample_template_samples_get_req( - study_id, user_id)['samples']} + rows = { + s: {"sample": s} + for s in sample_template_samples_get_req(study_id, user_id)["samples"] + } samples = rows.keys() # Add one column per prep template highlighting what samples exist preps = study_prep_get_req(study_id, user_id)["info"] @@ -483,10 +515,9 @@ def _build_sample_summary(study_id, user_id): field = "prep%d" % prep["id"] name = "%s (%d)" % (prep["name"], prep["id"]) columns[field] = name - prep_samples = prep_template_samples_get_req( - prep['id'], user_id)['samples'] + prep_samples = prep_template_samples_get_req(prep["id"], user_id)["samples"] for s in samples: - rows[s][field] = 'X' if s in prep_samples else '' + rows[s][field] = "X" if s in prep_samples else "" return columns, rows @@ -495,50 +526,51 @@ class SampleAJAX(BaseHandler): @authenticated def get(self): """Show the sample summary page""" - study_id = int(self.get_argument('study_id')) + study_id = int(self.get_argument("study_id")) email = self.current_user.id res = sample_template_meta_cats_get_req(study_id, email) - if res['status'] == 'error': - if 'does not exist' in res['message']: - raise HTTPError(404, reason=res['message']) - elif 'User has insufficient permissions' in res['message']: - raise HTTPError(403, reason=res['message']) + if res["status"] == "error": + if "does not exist" in res["message"]: + raise HTTPError(404, reason=res["message"]) + elif "User has insufficient permissions" in res["message"]: + raise HTTPError(403, reason=res["message"]) else: - raise HTTPError(500, reason=res['message']) - categories = res['categories'] + raise HTTPError(500, reason=res["message"]) + categories = res["categories"] columns, rows = _build_sample_summary(study_id, email) - _, alert_type, alert_msg = get_sample_template_processing_status( - study_id) + _, alert_type, alert_msg = get_sample_template_processing_status(study_id) - self.render('study_ajax/sample_prep_summary.html', - rows=rows, columns=columns, categories=categories, - study_id=study_id, alert_type=alert_type, - alert_message=alert_msg, - user_can_edit=Study(study_id).can_edit(self.current_user)) + self.render( + "study_ajax/sample_prep_summary.html", + rows=rows, + columns=columns, + categories=categories, + study_id=study_id, + alert_type=alert_type, + alert_message=alert_msg, + user_can_edit=Study(study_id).can_edit(self.current_user), + ) @authenticated def post(self): - study_id = int(self.get_argument('study_id')) - meta_col = self.get_argument('meta_col') - values = sample_template_category_get_req(meta_col, study_id, - self.current_user.id) - if values['status'] != 'success': + study_id = int(self.get_argument("study_id")) + meta_col = self.get_argument("meta_col") + values = sample_template_category_get_req( + meta_col, study_id, self.current_user.id + ) + if values["status"] != "success": self.write(values) else: - self.write({'status': 'success', - 'message': '', - 'values': values['values'] - }) + self.write({"status": "success", "message": "", "values": values["values"]}) class AnalysesAjax(BaseHandler): @authenticated def get(self): user_id = self.current_user.id - study_id = to_int(self.get_argument('study_id')) + study_id = to_int(self.get_argument("study_id")) result = analyses_associated_with_study(study_id, user_id) - self.render('study_ajax/study_analyses.html', - analyses=result['values']) + self.render("study_ajax/study_analyses.html", analyses=result["values"]) diff --git a/qiita_pet/handlers/study_handlers/tests/test_artifact.py b/qiita_pet/handlers/study_handlers/tests/test_artifact.py index d965d150c..84c66d66b 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_artifact.py +++ b/qiita_pet/handlers/study_handlers/tests/test_artifact.py @@ -6,94 +6,103 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main +from json import loads +from os import close, remove from os.path import exists, join -from os import remove, close from tempfile import mkstemp -from json import loads +from unittest import main -import pandas as pd import numpy.testing as npt +import pandas as pd from qiita_core.testing import wait_for_prep_information_job -from qiita_pet.test.tornado_test_base import TestHandlerBase from qiita_db.artifact import Artifact +from qiita_db.exceptions import QiitaDBWarning +from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.study import Study from qiita_db.util import get_mountpoint -from qiita_db.metadata_template.prep_template import PrepTemplate -from qiita_db.exceptions import QiitaDBWarning +from qiita_pet.test.tornado_test_base import TestHandlerBase class ArtifactGraphAJAXTests(TestHandlerBase): def test_get_ancestors(self): - response = self.get('/artifact/graph/', {'direction': 'ancestors', - 'artifact_id': 1}) - exp = {'status': 'success', - 'message': '', - 'node_labels': [[1, 'Raw data 1 - FASTQ']], - 'edge_list': []} + response = self.get( + "/artifact/graph/", {"direction": "ancestors", "artifact_id": 1} + ) + exp = { + "status": "success", + "message": "", + "node_labels": [[1, "Raw data 1 - FASTQ"]], + "edge_list": [], + } self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) def test_get_descendants(self): - response = self.get('/artifact/graph/', {'direction': 'descendants', - 'artifact_id': 1}) - exp = {'status': 'success', - 'message': '', - 'node_labels': [[1, 'Raw data 1 - FASTQ'], - [3, 'Demultiplexed 2 - Demultiplexed'], - [2, 'Demultiplexed 1 - Demultiplexed'], - [4, 'BIOM - BIOM'], - [5, 'BIOM - BIOM'], - [6, 'BIOM - BIOM']], - 'edge_list': [[1, 3], [1, 2], [2, 4], [2, 5], [2, 6]]} + response = self.get( + "/artifact/graph/", {"direction": "descendants", "artifact_id": 1} + ) + exp = { + "status": "success", + "message": "", + "node_labels": [ + [1, "Raw data 1 - FASTQ"], + [3, "Demultiplexed 2 - Demultiplexed"], + [2, "Demultiplexed 1 - Demultiplexed"], + [4, "BIOM - BIOM"], + [5, "BIOM - BIOM"], + [6, "BIOM - BIOM"], + ], + "edge_list": [[1, 3], [1, 2], [2, 4], [2, 5], [2, 6]], + } self.assertEqual(response.code, 200) obs = loads(response.body) - self.assertEqual(obs['status'], exp['status']) - self.assertEqual(obs['message'], exp['message']) - self.assertCountEqual(obs['node_labels'], exp['node_labels']) - self.assertCountEqual(obs['edge_list'], exp['edge_list']) + self.assertEqual(obs["status"], exp["status"]) + self.assertEqual(obs["message"], exp["message"]) + self.assertCountEqual(obs["node_labels"], exp["node_labels"]) + self.assertCountEqual(obs["edge_list"], exp["edge_list"]) def test_get_unknown(self): - response = self.get('/artifact/graph/', {'direction': 'BAD', - 'artifact_id': 1}) - exp = {'status': 'error', - 'message': 'Unknown directon BAD'} + response = self.get("/artifact/graph/", {"direction": "BAD", "artifact_id": 1}) + exp = {"status": "error", "message": "Unknown directon BAD"} self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) class NewArtifactHandlerTestsReadOnly(TestHandlerBase): def test_get(self): - args = {'study_id': 1, 'prep_template_id': 1} - response = self.get('/study/new_artifact/', args) + args = {"study_id": 1, "prep_template_id": 1} + response = self.get("/study/new_artifact/", args) self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") class NewArtifactHandlerTests(TestHandlerBase): - def setUp(self): super(NewArtifactHandlerTests, self).setUp() - tmp_dir = join(get_mountpoint('uploads')[0][1], '1') + tmp_dir = join(get_mountpoint("uploads")[0][1], "1") # Create prep test file to point at - fd, prep_fp = mkstemp(dir=tmp_dir, suffix='.txt') + fd, prep_fp = mkstemp(dir=tmp_dir, suffix=".txt") close(fd) - with open(prep_fp, 'w') as f: + with open(prep_fp, "w") as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self.prep = npt.assert_warns( - QiitaDBWarning, PrepTemplate.create, - pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), "16S") + QiitaDBWarning, + PrepTemplate.create, + pd.DataFrame({"new_col": {"1.SKD6.640190": 1}}), + Study(1), + "16S", + ) fd, self.fwd_fp = mkstemp(dir=tmp_dir, suffix=".fastq") close(fd) - with open(self.fwd_fp, 'w') as f: + with open(self.fwd_fp, "w") as f: f.write("@seq\nTACGA\n+ABBBB\n") fd, self.barcodes_fp = mkstemp(dir=tmp_dir, suffix=".fastq") close(fd) - with open(self.barcodes_fp, 'w') as f: + with open(self.barcodes_fp, "w") as f: f.write("@seq\nTACGA\n+ABBBB\n") self._files_to_remove = [prep_fp, self.fwd_fp, self.barcodes_fp] @@ -106,25 +115,25 @@ def tearDown(self): remove(fp) # Replace file if removed as part of function testing - uploads_path = get_mountpoint('uploads')[0][1] - fp = join(uploads_path, '1', 'uploaded_file.txt') + uploads_path = get_mountpoint("uploads")[0][1] + fp = join(uploads_path, "1", "uploaded_file.txt") if not exists(fp): - with open(fp, 'w') as f: - f.write('') + with open(fp, "w") as f: + f.write("") def test_post_artifact(self): args = { - 'artifact-type': 'FASTQ', - 'name': 'New Artifact Handler test', - 'prep-template-id': self.prep.id, - 'raw_forward_seqs': [self.fwd_fp], - 'raw_barcodes': [self.barcodes_fp], - 'raw_reverse_seqs': [], - 'import-artifact': ''} - response = self.post('/study/new_artifact/', args) + "artifact-type": "FASTQ", + "name": "New Artifact Handler test", + "prep-template-id": self.prep.id, + "raw_forward_seqs": [self.fwd_fp], + "raw_barcodes": [self.barcodes_fp], + "raw_reverse_seqs": [], + "import-artifact": "", + } + response = self.post("/study/new_artifact/", args) self.assertEqual(response.code, 200) - self.assertEqual(loads(response.body), - {'status': 'success', 'message': ''}) + self.assertEqual(loads(response.body), {"status": "success", "message": ""}) # make sure new artifact created wait_for_prep_information_job(self.prep.id) @@ -132,67 +141,133 @@ def test_post_artifact(self): class ArtifactGetSamplesTest(TestHandlerBase): def test_get(self): - response = self.get('/artifact/samples/', {'ids[]': [4, 5]}) + response = self.get("/artifact/samples/", {"ids[]": [4, 5]}) self.assertEqual(response.code, 200) - exp = {'status': 'success', 'msg': '', 'data': { - '4': ['1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', - '1.SKB4.640189', '1.SKB5.640181', '1.SKB6.640176', - '1.SKB7.640196', '1.SKB8.640193', '1.SKB9.640200', - '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', - '1.SKD7.640191', '1.SKD8.640184', '1.SKD9.640182', - '1.SKM1.640183', '1.SKM2.640199', '1.SKM3.640197', - '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192'], - '5': ['1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', - '1.SKB4.640189', '1.SKB5.640181', '1.SKB6.640176', - '1.SKB7.640196', '1.SKB8.640193', '1.SKB9.640200', - '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198', - '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', - '1.SKD7.640191', '1.SKD8.640184', '1.SKD9.640182', - '1.SKM1.640183', '1.SKM2.640199', '1.SKM3.640197', - '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187', - '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192']}} + exp = { + "status": "success", + "msg": "", + "data": { + "4": [ + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + ], + "5": [ + "1.SKB1.640202", + "1.SKB2.640194", + "1.SKB3.640195", + "1.SKB4.640189", + "1.SKB5.640181", + "1.SKB6.640176", + "1.SKB7.640196", + "1.SKB8.640193", + "1.SKB9.640200", + "1.SKD1.640179", + "1.SKD2.640178", + "1.SKD3.640198", + "1.SKD4.640185", + "1.SKD5.640186", + "1.SKD6.640190", + "1.SKD7.640191", + "1.SKD8.640184", + "1.SKD9.640182", + "1.SKM1.640183", + "1.SKM2.640199", + "1.SKM3.640197", + "1.SKM4.640180", + "1.SKM5.640177", + "1.SKM6.640187", + "1.SKM7.640188", + "1.SKM8.640201", + "1.SKM9.640192", + ], + }, + } self.assertDictEqual(loads(response.body), exp) class ArtifactGetInfoTest(TestHandlerBase): def test_post(self): - response = self.post('/artifact/info/', {'ids[]': [6, 7]}) + response = self.post("/artifact/info/", {"ids[]": [6, 7]}) self.assertEqual(response.code, 200) data = [ - {'files': ['1_study_1001_closed_reference_otu_table_Silva.biom'], - 'target_subfragment': ['V4'], 'artifact_id': 6, - 'data_type': '16S', 'timestamp': '2012-10-02 17:30:00', - 'prep_samples': 27, 'platform': 'Illumina', - 'algorithm_az': 'd480799a0a7a2fbe0e9022bc9c602018', - 'deprecated': False, 'active': True, - 'algorithm': 'Pick closed-reference OTUs | Split libraries FASTQ', - 'parameters': { - 'reference': '2', 'similarity': '0.97', - 'sortmerna_e_value': '1', 'sortmerna_max_pos': '10000', - 'threads': '1', 'sortmerna_coverage': '0.97'}, - 'target_gene': '16S rRNA', 'name': 'BIOM'}, - {'files': ['biom_table.biom'], 'target_subfragment': ['V4'], - 'artifact_id': 7, - 'data_type': '16S', 'timestamp': '2012-10-02 17:30:00', - 'prep_samples': 27, 'platform': 'Illumina', 'algorithm_az': '', - 'deprecated': False, 'active': True, 'algorithm': '', - 'parameters': {}, 'target_gene': '16S rRNA', 'name': 'BIOM'}] - exp = {'status': 'success', 'msg': '', 'data': data} + { + "files": ["1_study_1001_closed_reference_otu_table_Silva.biom"], + "target_subfragment": ["V4"], + "artifact_id": 6, + "data_type": "16S", + "timestamp": "2012-10-02 17:30:00", + "prep_samples": 27, + "platform": "Illumina", + "algorithm_az": "d480799a0a7a2fbe0e9022bc9c602018", + "deprecated": False, + "active": True, + "algorithm": "Pick closed-reference OTUs | Split libraries FASTQ", + "parameters": { + "reference": "2", + "similarity": "0.97", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "threads": "1", + "sortmerna_coverage": "0.97", + }, + "target_gene": "16S rRNA", + "name": "BIOM", + }, + { + "files": ["biom_table.biom"], + "target_subfragment": ["V4"], + "artifact_id": 7, + "data_type": "16S", + "timestamp": "2012-10-02 17:30:00", + "prep_samples": 27, + "platform": "Illumina", + "algorithm_az": "", + "deprecated": False, + "active": True, + "algorithm": "", + "parameters": {}, + "target_gene": "16S rRNA", + "name": "BIOM", + }, + ] + exp = {"status": "success", "msg": "", "data": data} obs = loads(response.body) self.assertCountEqual(list(obs.keys()), exp.keys()) - self.assertEqual(obs['status'], exp['status']) - self.assertEqual(obs['msg'], exp['msg']) - self.assertCountEqual(obs['data'], exp['data']) + self.assertEqual(obs["status"], exp["status"]) + self.assertEqual(obs["msg"], exp["msg"]) + self.assertCountEqual(obs["data"], exp["data"]) class ArtifactAdminAJAXTestsReadOnly(TestHandlerBase): def test_get_admin(self): - response = self.get('/admin/artifact/', - {'artifact_id': 3}) + response = self.get("/admin/artifact/", {"artifact_id": 3}) self.assertEqual(response.code, 200) - body = response.body.decode('ascii') + body = response.body.decode("ascii") # checking that proper actions shown self.assertIn("Make public", body) @@ -202,19 +277,19 @@ def test_get_admin(self): class ArtifactAdminAJAXTests(TestHandlerBase): - def test_post_admin(self): - response = self.post('/admin/artifact/', - {'artifact_id': 3, - 'visibility': 'sandbox'}) + response = self.post( + "/admin/artifact/", {"artifact_id": 3, "visibility": "sandbox"} + ) self.assertEqual(response.code, 200) # checking that proper actions shown - self.assertEqual({"status": "success", - "message": "Artifact visibility changed to sandbox"}, - loads(response.body)) + self.assertEqual( + {"status": "success", "message": "Artifact visibility changed to sandbox"}, + loads(response.body), + ) - self.assertEqual(Artifact(3).visibility, 'sandbox') + self.assertEqual(Artifact(3).visibility, "sandbox") if __name__ == "__main__": diff --git a/qiita_pet/handlers/study_handlers/tests/test_base.py b/qiita_pet/handlers/study_handlers/tests/test_base.py index 0630409a6..d7b1c4422 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_base.py +++ b/qiita_pet/handlers/study_handlers/tests/test_base.py @@ -9,25 +9,24 @@ from tornado.escape import json_decode -from qiita_pet.test.tornado_test_base import TestHandlerBase from qiita_db.handlers.tests.oauthbase import OauthTestingBase +from qiita_pet.test.tornado_test_base import TestHandlerBase class StudyIndexHandlerTests(TestHandlerBase): def test_get_exists(self): - response = self.get('/study/description/1') + response = self.get("/study/description/1") self.assertEqual(response.code, 200) - self.assertTrue('study/description/baseinfo' in str(response.body)) + self.assertTrue("study/description/baseinfo" in str(response.body)) def test_get_no_exists(self): - response = self.get('/study/description/245') + response = self.get("/study/description/245") self.assertEqual(response.code, 404) def test_get_prep_page(self): - response = self.get('/study/description/1?prep_id=1') + response = self.get("/study/description/1?prep_id=1") self.assertEqual(response.code, 200) - self.assertTrue('study/description/prep_template' in - str(response.body)) + self.assertTrue("study/description/prep_template" in str(response.body)) class StudyBaseInfoAJAX(TestHandlerBase): @@ -37,78 +36,73 @@ class StudyBaseInfoAJAX(TestHandlerBase): class DataTypesMenuAJAXTests(TestHandlerBase): def test_get(self): - response = self.get('/study/description/data_type_menu/', - {'study_id': '1'}) + response = self.get("/study/description/data_type_menu/", {"study_id": "1"}) self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") def test_get_no_exists(self): - response = self.get('/study/description/data_type_menu/', - {'study_id': '245'}) + response = self.get("/study/description/data_type_menu/", {"study_id": "245"}) self.assertEqual(response.code, 404) class StudyFilesAJAXTests(TestHandlerBase): def test_get(self): - args = {'study_id': 1, 'artifact_type': 'FASTQ', 'prep_template_id': 1} - response = self.get('/study/files/', args) + args = {"study_id": 1, "artifact_type": "FASTQ", "prep_template_id": 1} + response = self.get("/study/files/", args) self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") class TestStudyGetTags(TestHandlerBase): def test_get(self): - response = self.get('/study/get_tags/') - exp = ('{"status": "success", "message": "", "tags": ' - '{"admin": [], "user": []}}') + response = self.get("/study/get_tags/") + exp = '{"status": "success", "message": "", "tags": {"admin": [], "user": []}}' self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), exp) + self.assertEqual(response.body.decode("ascii"), exp) class TestStudyTags(OauthTestingBase): def test_get(self): - response = self.get('/study/tags/1') - exp = ('{"status": "success", "message": "", "tags": []}') + response = self.get("/study/tags/1") + exp = '{"status": "success", "message": "", "tags": []}' self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), exp) + self.assertEqual(response.body.decode("ascii"), exp) # test error - response = self.get('/study/tags/bla') + response = self.get("/study/tags/bla") self.assertEqual(response.code, 400) class TestStudy(OauthTestingBase): def test_patch_tags(self): - arguments = {'op': 'replace', 'path': '/tags', - 'value': ['testA', 'testB']} - obs = self.patch('/study/1', headers=self.header, - data=arguments, asjson=True) + arguments = {"op": "replace", "path": "/tags", "value": ["testA", "testB"]} + obs = self.patch("/study/1", headers=self.header, data=arguments, asjson=True) self.assertEqual(obs.code, 200) - self.assertEqual(obs.body.decode('ascii'), - '{"status": "success", "message": ""}') + self.assertEqual( + obs.body.decode("ascii"), '{"status": "success", "message": ""}' + ) # checking the tags were added - response = self.get('/study/tags/1') - exp = ({"status": "success", "message": "", - "tags": ['testA', 'testB']}) + response = self.get("/study/tags/1") + exp = {"status": "success", "message": "", "tags": ["testA", "testB"]} self.assertEqual(response.code, 200) self.assertEqual(json_decode(response.body), exp) def test_patch_tags_not_found(self): - arguments = {'op': 'replace', 'path': '/tags', - 'value': ['testA', 'testB']} - obs = self.patch('/study/100000000000', headers=self.header, - data=arguments, asjson=True) - self.assertEqual(json_decode(obs.body), {'status': 'error', 'message': - 'Study does not exist'}) + arguments = {"op": "replace", "path": "/tags", "value": ["testA", "testB"]} + obs = self.patch( + "/study/100000000000", headers=self.header, data=arguments, asjson=True + ) + self.assertEqual( + json_decode(obs.body), + {"status": "error", "message": "Study does not exist"}, + ) self.assertEqual(obs.code, 200) def test_patch_not_allowed(self): - arguments = {'op': 'replace', 'path': '/tags', - 'value': ['testA', 'testB']} - obs = self.patch('/study/b', headers=self.header, - data=arguments, asjson=True) + arguments = {"op": "replace", "path": "/tags", "value": ["testA", "testB"]} + obs = self.patch("/study/b", headers=self.header, data=arguments, asjson=True) self.assertEqual(obs.code, 405) diff --git a/qiita_pet/handlers/study_handlers/tests/test_ebi_handlers.py b/qiita_pet/handlers/study_handlers/tests/test_ebi_handlers.py index 0ba8da1b4..283bdac28 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_ebi_handlers.py +++ b/qiita_pet/handlers/study_handlers/tests/test_ebi_handlers.py @@ -5,19 +5,19 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main -from os import remove, close +from os import close, remove from os.path import exists from tempfile import mkstemp +from unittest import main from h5py import File from mock import Mock from qiita_files.demux import to_hdf5 -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_pet.test.tornado_test_base import TestHandlerBase from qiita_db.artifact import Artifact from qiita_db.user import User +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestEBISubmitHandler(TestHandlerBase): @@ -32,13 +32,16 @@ def tearDown(self): remove(fp) def test_get(self): - demux_fp = [x['fp'] for x in Artifact(2).filepaths - if x['fp_type'] == 'preprocessed_demux'][0] - fd, fna_fp = mkstemp(suffix='_seqs.fna') + demux_fp = [ + x["fp"] + for x in Artifact(2).filepaths + if x["fp_type"] == "preprocessed_demux" + ][0] + fd, fna_fp = mkstemp(suffix="_seqs.fna") close(fd) self._clean_up_files.extend([fna_fp, demux_fp]) - with open(fna_fp, 'w') as f: - f.write('>a_1 X orig_bc=X new_bc=X bc_diffs=0\nCCC') + with open(fna_fp, "w") as f: + f.write(">a_1 X orig_bc=X new_bc=X bc_diffs=0\nCCC") with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) @@ -50,7 +53,7 @@ def test_get_no_admin(self): self.assertEqual(response.code, 403) def test_get_no_exist(self): - response = self.get('/ebi_submission/100') + response = self.get("/ebi_submission/100") self.assertEqual(response.code, 404) diff --git a/qiita_pet/handlers/study_handlers/tests/test_edit_handlers.py b/qiita_pet/handlers/study_handlers/tests/test_edit_handlers.py index e1b35f10f..ecfcaf433 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_edit_handlers.py +++ b/qiita_pet/handlers/study_handlers/tests/test_edit_handlers.py @@ -7,13 +7,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from unittest import main + from mock import Mock +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.util import check_count, get_count from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.test.tornado_test_base import TestHandlerBase -from qiita_db.study import StudyPerson, Study -from qiita_db.user import User -from qiita_db.util import get_count, check_count class TestStudyEditorForm(TestHandlerBase): @@ -29,13 +30,12 @@ class TestStudyEditorExtendedForm(TestHandlerBase): class TestStudyEditHandlerReadOnly(TestHandlerBase): def test_get(self): """Make sure the page loads when no arguments are passed""" - response = self.get('/study/create/') + response = self.get("/study/create/") self.assertEqual(response.code, 200) self.assertNotEqual(str(response.body), "") class TestStudyEditHandler(TestHandlerBase): - def test_get_edit_utf8(self): """Make sure the page loads when utf8 characters are present""" study = Study(1) @@ -43,109 +43,111 @@ def test_get_edit_utf8(self): study.alias = "TEST_ø" study.description = "TEST_ø" study.abstract = "TEST_ø" - response = self.get('/study/edit/1') + response = self.get("/study/edit/1") self.assertEqual(response.code, 200) self.assertNotEqual(str(response.body), "") def test_post(self): - person_count_before = get_count('qiita.study_person') - study_count_before = get_count('qiita.study') - - post_data = {'new_people_names': ['Adam', 'Ethan'], - 'new_people_emails': ['a@mail.com', 'e@mail.com'], - 'new_people_affiliations': ['CU Boulder', 'NYU'], - 'new_people_addresses': ['Some St., Boulder, CO 80305', - ''], - 'new_people_phones': ['', ''], - 'study_title': 'dummy title', - 'study_alias': 'dummy alias', - 'pubmed_id': 'dummy pmid', - 'environmental_packages': ['air'], - 'timeseries': '1', - 'study_abstract': "dummy abstract", - 'study_description': 'dummy description', - 'principal_investigator': '-2', - 'notes': '', - 'lab_person': '1'} - - self.post('/study/create/', post_data) + person_count_before = get_count("qiita.study_person") + study_count_before = get_count("qiita.study") + + post_data = { + "new_people_names": ["Adam", "Ethan"], + "new_people_emails": ["a@mail.com", "e@mail.com"], + "new_people_affiliations": ["CU Boulder", "NYU"], + "new_people_addresses": ["Some St., Boulder, CO 80305", ""], + "new_people_phones": ["", ""], + "study_title": "dummy title", + "study_alias": "dummy alias", + "pubmed_id": "dummy pmid", + "environmental_packages": ["air"], + "timeseries": "1", + "study_abstract": "dummy abstract", + "study_description": "dummy description", + "principal_investigator": "-2", + "notes": "", + "lab_person": "1", + } + + self.post("/study/create/", post_data) # Check that the new person was created expected_id = person_count_before + 1 - self.assertTrue(check_count('qiita.study_person', expected_id)) + self.assertTrue(check_count("qiita.study_person", expected_id)) new_person = StudyPerson(expected_id) - self.assertTrue(new_person.name == 'Ethan') - self.assertTrue(new_person.email == 'e@mail.com') - self.assertTrue(new_person.affiliation == 'NYU') + self.assertTrue(new_person.name == "Ethan") + self.assertTrue(new_person.email == "e@mail.com") + self.assertTrue(new_person.affiliation == "NYU") self.assertTrue(new_person.address is None) self.assertTrue(new_person.phone is None) # Check the study was created expected_id = study_count_before + 1 - self.assertTrue(check_count('qiita.study', expected_id)) + self.assertTrue(check_count("qiita.study", expected_id)) def test_post_edit(self): - study_count_before = get_count('qiita.study') + study_count_before = get_count("qiita.study") study = Study(1) study_info = study.info post_data = { - 'new_people_names': [], - 'new_people_emails': [], - 'new_people_affiliations': [], - 'new_people_addresses': [], - 'new_people_phones': [], - 'study_title': 'New title - test post edit', - 'study_alias': study_info['study_alias'], - 'publications_doi': ','.join( - [doi for doi, _ in study.publications]), - 'study_abstract': study_info['study_abstract'], - 'study_description': study_info['study_description'], - 'notes': '', - 'principal_investigator': study_info['principal_investigator'].id, - 'lab_person': study_info['lab_person'].id} - - self.post('/study/edit/1', post_data) + "new_people_names": [], + "new_people_emails": [], + "new_people_affiliations": [], + "new_people_addresses": [], + "new_people_phones": [], + "study_title": "New title - test post edit", + "study_alias": study_info["study_alias"], + "publications_doi": ",".join([doi for doi, _ in study.publications]), + "study_abstract": study_info["study_abstract"], + "study_description": study_info["study_description"], + "notes": "", + "principal_investigator": study_info["principal_investigator"].id, + "lab_person": study_info["lab_person"].id, + } + + self.post("/study/edit/1", post_data) # Check that the study was updated - self.assertTrue(check_count('qiita.study', study_count_before)) - self.assertEqual(study.title, 'New title - test post edit') + self.assertTrue(check_count("qiita.study", study_count_before)) + self.assertEqual(study.title, "New title - test post edit") def test_post_edit_blank_doi(self): - study_count_before = get_count('qiita.study') + study_count_before = get_count("qiita.study") study = Study(1) study_info = study.info post_data = { - 'new_people_names': [], - 'new_people_emails': [], - 'new_people_affiliations': [], - 'new_people_addresses': [], - 'new_people_phones': [], - 'study_title': 'New title - test post edit', - 'study_alias': study_info['study_alias'], - 'publications_doi': '', - 'study_abstract': study_info['study_abstract'], - 'study_description': study_info['study_description'], - 'notes': '', - 'principal_investigator': study_info['principal_investigator'].id, - 'lab_person': study_info['lab_person'].id} - - response = self.post('/study/edit/1', post_data) + "new_people_names": [], + "new_people_emails": [], + "new_people_affiliations": [], + "new_people_addresses": [], + "new_people_phones": [], + "study_title": "New title - test post edit", + "study_alias": study_info["study_alias"], + "publications_doi": "", + "study_abstract": study_info["study_abstract"], + "study_description": study_info["study_description"], + "notes": "", + "principal_investigator": study_info["principal_investigator"].id, + "lab_person": study_info["lab_person"].id, + } + + response = self.post("/study/edit/1", post_data) self.assertEqual(response.code, 200) # Check that the study was updated - self.assertTrue(check_count('qiita.study', study_count_before)) - self.assertEqual(study.title, 'New title - test post edit') + self.assertTrue(check_count("qiita.study", study_count_before)) + self.assertEqual(study.title, "New title - test post edit") self.assertEqual(study.publications, []) # check for failure - old_title = post_data['study_title'] - post_data['study_title'] = 'My new title!' - shared = User('shared@foo.bar') + old_title = post_data["study_title"] + post_data["study_title"] = "My new title!" + shared = User("shared@foo.bar") study.unshare(shared) BaseHandler.get_current_user = Mock(return_value=shared) - response = self.post('/study/edit/1', post_data) + response = self.post("/study/edit/1", post_data) self.assertEqual(response.code, 403) # Check that the study wasn't updated self.assertEqual(study.title, old_title) @@ -156,23 +158,23 @@ def test_post_edit_blank_doi(self): class TestCreateStudyAJAX(TestHandlerBase): def test_get(self): - response = self.get('/check_study/', {'study_title': 'notreal'}) + response = self.get("/check_study/", {"study_title": "notreal"}) self.assertEqual(response.code, 200) # make sure responds properly - self.assertEqual(response.body.decode('ascii'), 'True') + self.assertEqual(response.body.decode("ascii"), "True") - response = self.get('/check_study/') + response = self.get("/check_study/") self.assertEqual(response.code, 200) # make sure responds properly - self.assertEqual(response.body.decode('ascii'), 'False') + self.assertEqual(response.body.decode("ascii"), "False") response = self.get( - '/check_study/', - {'study_title': - 'Identification of the Microbiomes for Cannabis Soils'}) + "/check_study/", + {"study_title": "Identification of the Microbiomes for Cannabis Soils"}, + ) self.assertEqual(response.code, 200) # make sure responds properly - self.assertEqual(response.body.decode('ascii'), 'False') + self.assertEqual(response.body.decode("ascii"), "False") if __name__ == "__main__": diff --git a/qiita_pet/handlers/study_handlers/tests/test_listing_handlers.py b/qiita_pet/handlers/study_handlers/tests/test_listing_handlers.py index a63256f96..b7dcef3c7 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_listing_handlers.py +++ b/qiita_pet/handlers/study_handlers/tests/test_listing_handlers.py @@ -5,8 +5,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from mock import Mock @@ -14,17 +14,23 @@ from qiita_db.artifact import Artifact from qiita_db.study import Study from qiita_db.user import User -from qiita_pet.test.tornado_test_base import TestHandlerBase from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.test.tornado_test_base import TestHandlerBase -GPARAMS = {'similarity': 0.97, 'reference_name': 'Greengenes', - 'sortmerna_e_value': 1, 'sortmerna_max_pos': 10000, 'threads': 1, - 'sortmerna_coverage': 0.97, 'reference_version': u'13_8'} +GPARAMS = { + "similarity": 0.97, + "reference_name": "Greengenes", + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "threads": 1, + "sortmerna_coverage": 0.97, + "reference_version": "13_8", +} class TestListStudiesHandler(TestHandlerBase): def test_get(self): - response = self.get('/study/list/') + response = self.get("/study/list/") self.assertEqual(response.code, 200) @@ -32,148 +38,167 @@ class TestStudyApprovalList(TestHandlerBase): def test_get(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) Artifact(4).visibility = "awaiting_approval" - response = self.get('/admin/approval/') + response = self.get("/admin/approval/") self.assertEqual(response.code, 200) - self.assertIn("test@foo.bar", response.body.decode('ascii')) + self.assertIn("test@foo.bar", response.body.decode("ascii")) class TestAutocompleteHandler(TestHandlerBase): def test_get(self): - base_url = '/study/sharing/autocomplete/?text=%s' + base_url = "/study/sharing/autocomplete/?text=%s" - r_client.zadd('qiita-usernames', {e: 0 for e, n in User.iter()}) - response = self.get(base_url % 't') + r_client.zadd("qiita-usernames", {e: 0 for e, n in User.iter()}) + response = self.get(base_url % "t") self.assertEqual(response.code, 200) - self.assertEqual(loads(response.body), - {'results': [{"id": "test@foo.bar", - "text": "test@foo.bar"}]}) + self.assertEqual( + loads(response.body), + {"results": [{"id": "test@foo.bar", "text": "test@foo.bar"}]}, + ) - response = self.get(base_url % 'admi') + response = self.get(base_url % "admi") self.assertEqual(response.code, 200) - self.assertEqual(loads(response.body), - {'results': [{"id": "admin@foo.bar", - "text": "admin@foo.bar"}]}) + self.assertEqual( + loads(response.body), + {"results": [{"id": "admin@foo.bar", "text": "admin@foo.bar"}]}, + ) - response = self.get(base_url % 'tesq') + response = self.get(base_url % "tesq") self.assertEqual(response.code, 200) - self.assertEqual(loads(response.body), - {'results': []}) + self.assertEqual(loads(response.body), {"results": []}) - r_client.delete('qiita-usernames') + r_client.delete("qiita-usernames") class TestShareStudyAjax(TestHandlerBase): - def test_get_deselected(self): s = Study(1) - u = User('shared@foo.bar') - args = {'deselected': u.id, 'id': s.id} + u = User("shared@foo.bar") + args = {"deselected": u.id, "id": s.id} self.assertEqual(s.shared_with, [u]) - response = self.get('/study/sharing/', args) + response = self.get("/study/sharing/", args) self.assertEqual(response.code, 200) - exp = {'users': [], 'links': ''} + exp = {"users": [], "links": ""} self.assertEqual(loads(response.body), exp) self.assertEqual(s.shared_with, []) # Make sure unshared message added to the system - self.assertEqual('Study \'Identification of the Microbiomes for ' - 'Cannabis Soils\' has been unshared from you.', - u.messages()[0][1]) + self.assertEqual( + "Study 'Identification of the Microbiomes for " + "Cannabis Soils' has been unshared from you.", + u.messages()[0][1], + ) # Share the study back with the user s.share(u) def test_get_selected(self): s = Study(1) - u = User('admin@foo.bar') - args = {'selected': u.id, 'id': s.id} - response = self.get('/study/sharing/', args) + u = User("admin@foo.bar") + args = {"selected": u.id, "id": s.id} + response = self.get("/study/sharing/", args) self.assertEqual(response.code, 200) exp = { - 'users': ['shared@foo.bar', u.id], - 'links': - ('Shared, ' - 'Admin')} + "users": ["shared@foo.bar", u.id], + "links": ( + 'Shared, ' + 'Admin' + ), + } self.assertEqual(loads(response.body), exp) - self.assertEqual(s.shared_with, [User('shared@foo.bar'), u]) + self.assertEqual(s.shared_with, [User("shared@foo.bar"), u]) # Make sure shared message added to the system - self.assertEqual('Study ' - '\'Identification of the Microbiomes for Cannabis ' - 'Soils\' has been shared with you.', - u.messages()[0][1]) + self.assertEqual( + 'Study ' + "'Identification of the Microbiomes for Cannabis " + "Soils' has been shared with you.", + u.messages()[0][1], + ) def test_get_no_access(self): # Create a new study belonging to the 'shared' user, so 'test' doesn't # have access info = { - 'timeseries_type_id': 1, - 'lab_person_id': None, - 'principal_investigator_id': 3, - 'metadata_complete': False, - 'mixs_compliant': True, - 'study_description': 'desc', - 'study_alias': 'alias', - 'study_abstract': 'abstract'} - u = User('shared@foo.bar') - s = Study.create(u, 'test_study', info=info) + "timeseries_type_id": 1, + "lab_person_id": None, + "principal_investigator_id": 3, + "metadata_complete": False, + "mixs_compliant": True, + "study_description": "desc", + "study_alias": "alias", + "study_abstract": "abstract", + } + u = User("shared@foo.bar") + s = Study.create(u, "test_study", info=info) self.assertEqual(s.shared_with, []) - args = {'selected': 'test@foo.bar', 'id': s.id} - response = self.get('/study/sharing/', args) + args = {"selected": "test@foo.bar", "id": s.id} + response = self.get("/study/sharing/", args) self.assertEqual(response.code, 403) self.assertEqual(s.shared_with, []) class TestListStudiesAJAX(TestHandlerBase): - def setUp(self): super(TestListStudiesAJAX, self).setUp() self.json = { - 'iTotalRecords': 1, - 'aaData': [{ - 'status': 'private', - 'ebi_info': ('EBI123456-BB' - ' (submitted)'), - 'study_title': ('Identification of the Microbiomes for ' - 'Cannabis Soils'), - 'metadata_complete': True, - 'study_id': 1, - 'study_alias': 'Cannabis Soils', - 'owner': 'Dude', - 'autoloaded': False, - 'ebi_study_accession': 'EBI123456-BB', - 'shared': ('' - 'Shared'), - 'pubs': ( - '123456, ' - '7891011, 10.100/123456, ' - '10.100/7891011'), - 'pi': ('' - 'PIDude'), - 'study_abstract': ( - 'This is a preliminary study to examine the microbiota ' - 'associated with the Cannabis plant. Soils samples from ' - 'the bulk soil, soil associated with the roots, and the ' - 'rhizosphere were extracted and the DNA sequenced. Roots ' - 'from three independent plants of different strains were ' - 'examined. These roots were obtained November 11, 2011 ' - 'from plants that had been harvested in the summer. ' - 'Future studies will attempt to analyze the soils and ' - 'rhizospheres from the same location at different time ' - 'points in the plant lifecycle.'), - 'artifact_biom_ids': [4, 5, 6, 7], - 'study_tags': None}], - 'sEcho': 1021, - 'iTotalDisplayRecords': 1} - self.empty = {'aaData': [], - 'iTotalDisplayRecords': 0, - 'iTotalRecords': 0, - 'sEcho': 1021} + "iTotalRecords": 1, + "aaData": [ + { + "status": "private", + "ebi_info": ( + 'EBI123456-BB' + " (submitted)" + ), + "study_title": ( + "Identification of the Microbiomes for Cannabis Soils" + ), + "metadata_complete": True, + "study_id": 1, + "study_alias": "Cannabis Soils", + "owner": "Dude", + "autoloaded": False, + "ebi_study_accession": "EBI123456-BB", + "shared": ( + 'Shared' + ), + "pubs": ( + '123456, ' + '7891011, 10.100/123456, ' + "10.100/7891011" + ), + "pi": ( + 'PIDude' + ), + "study_abstract": ( + "This is a preliminary study to examine the microbiota " + "associated with the Cannabis plant. Soils samples from " + "the bulk soil, soil associated with the roots, and the " + "rhizosphere were extracted and the DNA sequenced. Roots " + "from three independent plants of different strains were " + "examined. These roots were obtained November 11, 2011 " + "from plants that had been harvested in the summer. " + "Future studies will attempt to analyze the soils and " + "rhizospheres from the same location at different time " + "points in the plant lifecycle." + ), + "artifact_biom_ids": [4, 5, 6, 7], + "study_tags": None, + } + ], + "sEcho": 1021, + "iTotalDisplayRecords": 1, + } + self.empty = { + "aaData": [], + "iTotalDisplayRecords": 0, + "iTotalRecords": 0, + "sEcho": 1021, + } self.portal = qiita_config.portal def tearDown(self): diff --git a/qiita_pet/handlers/study_handlers/tests/test_prep_template.py b/qiita_pet/handlers/study_handlers/tests/test_prep_template.py index dd316013b..6435b4e88 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_prep_template.py +++ b/qiita_pet/handlers/study_handlers/tests/test_prep_template.py @@ -12,23 +12,25 @@ class TestNewPrepTemplateAjax(TestHandlerBase): def test_get(self): - response = self.get('/study/new_prep_template/', {'study_id': '1'}) + response = self.get("/study/new_prep_template/", {"study_id": "1"}) self.assertEqual(response.code, 200) class TestPrepTemplateAJAXReadOnly(TestHandlerBase): def test_get(self): - response = self.get('/study/description/prep_template/', - {'prep_id': 1, 'study_id': 1}) + response = self.get( + "/study/description/prep_template/", {"prep_id": 1, "study_id": 1} + ) self.assertEqual(response.code, 200) - self.assertNotEqual(response.body, '') + self.assertNotEqual(response.body, "") class TestPrepFilesHandler(TestHandlerBase): def test_get_files_not_allowed(self): response = self.post( - '/study/prep_files/', - {'type': 'BIOM', 'prep_file': 'uploaded_file.txt', 'study_id': 1}) + "/study/prep_files/", + {"type": "BIOM", "prep_file": "uploaded_file.txt", "study_id": 1}, + ) self.assertEqual(response.code, 405) diff --git a/qiita_pet/handlers/study_handlers/tests/test_processing.py b/qiita_pet/handlers/study_handlers/tests/test_processing.py index d064f0848..71c1959c6 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_processing.py +++ b/qiita_pet/handlers/study_handlers/tests/test_processing.py @@ -5,102 +5,150 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main -from qiita_pet.test.tornado_test_base import TestHandlerBase +from qiita_db.processing_job import ProcessingJob, ProcessingWorkflow from qiita_db.software import Command, Parameters from qiita_db.user import User -from qiita_db.processing_job import ProcessingWorkflow, ProcessingJob +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestListCommandsHandler(TestHandlerBase): def test_get(self): - response = self.get('/study/process/commands/', - {'artifact_id': '8', 'include_analysis': 'true'}) + response = self.get( + "/study/process/commands/", {"artifact_id": "8", "include_analysis": "true"} + ) self.assertEqual(response.code, 200) - exp = {'status': 'success', 'message': '', 'commands': [ - {'id': 9, 'command': 'Summarize Taxa', - 'output': [['taxa_summary', 'taxa_summary']]}, - {'id': 10, 'command': 'Beta Diversity', 'output': [ - ['distance_matrix', 'beta_div_plots']]}, - {'id': 11, 'command': 'Alpha Rarefaction', 'output': [ - ['rarefaction_curves', 'rarefaction_curves']]}, - {'id': 12, 'command': 'Single Rarefaction', 'output': [ - ['rarefied_table', 'BIOM']]}]} - - response = self.get('/study/process/commands/', - {'artifact_id': '3', 'include_analysis': 'false'}) + exp = { + "status": "success", + "message": "", + "commands": [ + { + "id": 9, + "command": "Summarize Taxa", + "output": [["taxa_summary", "taxa_summary"]], + }, + { + "id": 10, + "command": "Beta Diversity", + "output": [["distance_matrix", "beta_div_plots"]], + }, + { + "id": 11, + "command": "Alpha Rarefaction", + "output": [["rarefaction_curves", "rarefaction_curves"]], + }, + { + "id": 12, + "command": "Single Rarefaction", + "output": [["rarefied_table", "BIOM"]], + }, + ], + } + + response = self.get( + "/study/process/commands/", + {"artifact_id": "3", "include_analysis": "false"}, + ) self.assertEqual(response.code, 200) - exp = {'status': 'success', 'message': '', 'commands': [ - {'id': 3, 'command': 'Pick closed-reference OTUs', 'output': [ - ['OTU table', 'BIOM']]}]} + exp = { + "status": "success", + "message": "", + "commands": [ + { + "id": 3, + "command": "Pick closed-reference OTUs", + "output": [["OTU table", "BIOM"]], + } + ], + } self.assertEqual(loads(response.body), exp) class TestListOptionsHandler(TestHandlerBase): def test_get(self): - response = self.get('/study/process/commands/options/', - {'command_id': '3', 'artifact_id': '8'}) + response = self.get( + "/study/process/commands/options/", {"command_id": "3", "artifact_id": "8"} + ) self.assertEqual(response.code, 200) - exp = {'status': 'success', 'message': '', 'options': [ - {'id': 10, 'name': 'Defaults', 'values': - {'reference': 1, 'sortmerna_e_value': 1, - 'sortmerna_max_pos': 10000, 'similarity': 0.97, - 'sortmerna_coverage': 0.97, 'threads': 1}}], - 'req_options': {'input_data': ['artifact', ['Demultiplexed']]}, - 'opt_options': {'reference': ['reference', '1'], - 'sortmerna_e_value': ['float', '1'], - 'sortmerna_max_pos': ['integer', '10000'], - 'similarity': ['float', '0.97'], - 'sortmerna_coverage': ['float', '0.97'], - 'threads': ['integer', '1']}, - 'extra_artifacts': {}} + exp = { + "status": "success", + "message": "", + "options": [ + { + "id": 10, + "name": "Defaults", + "values": { + "reference": 1, + "sortmerna_e_value": 1, + "sortmerna_max_pos": 10000, + "similarity": 0.97, + "sortmerna_coverage": 0.97, + "threads": 1, + }, + } + ], + "req_options": {"input_data": ["artifact", ["Demultiplexed"]]}, + "opt_options": { + "reference": ["reference", "1"], + "sortmerna_e_value": ["float", "1"], + "sortmerna_max_pos": ["integer", "10000"], + "similarity": ["float", "0.97"], + "sortmerna_coverage": ["float", "0.97"], + "threads": ["integer", "1"], + }, + "extra_artifacts": {}, + } self.assertEqual(loads(response.body), exp) # test that it works fine with a job_id:artifact_type - response = self.get('/study/process/commands/options/', - {'command_id': '3', - 'artifact_id': 'job_id:artifact_type'}) + response = self.get( + "/study/process/commands/options/", + {"command_id": "3", "artifact_id": "job_id:artifact_type"}, + ) self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) # test that it works fine with no artifact_id - response = self.get('/study/process/commands/options/', - {'command_id': '3'}) + response = self.get("/study/process/commands/options/", {"command_id": "3"}) self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) class TestJobAJAX(TestHandlerBase): def test_get(self): - response = self.get('/study/process/job/', - {'job_id': '063e553b-327c-4818-ab4a-adfe58e49860'}) + response = self.get( + "/study/process/job/", {"job_id": "063e553b-327c-4818-ab4a-adfe58e49860"} + ) self.assertEqual(response.code, 200) exp = { - 'status': 'success', - 'message': '', - 'job_id': "063e553b-327c-4818-ab4a-adfe58e49860", - 'job_external_id': 'Not Available', - 'job_status': "queued", - 'job_step': None, - 'job_error': None, - 'job_parameters': {'barcode_type': u'golay_12', - 'input_data': 1, - 'max_bad_run_length': 3, - 'max_barcode_errors': 1.5, - 'min_per_read_length_fraction': 0.75, - 'phred_quality_threshold': 3, - 'rev_comp': False, - 'rev_comp_barcode': False, - 'rev_comp_mapping_barcodes': False, - 'sequence_max_n': 0, - 'phred_offset': 'auto'}, - 'command': 'Split libraries FASTQ', - 'command_description': 'Demultiplexes and applies quality ' - 'control to FASTQ data', - 'software': 'QIIMEq2', - 'software_version': '1.9.1'} + "status": "success", + "message": "", + "job_id": "063e553b-327c-4818-ab4a-adfe58e49860", + "job_external_id": "Not Available", + "job_status": "queued", + "job_step": None, + "job_error": None, + "job_parameters": { + "barcode_type": "golay_12", + "input_data": 1, + "max_bad_run_length": 3, + "max_barcode_errors": 1.5, + "min_per_read_length_fraction": 0.75, + "phred_quality_threshold": 3, + "rev_comp": False, + "rev_comp_barcode": False, + "rev_comp_mapping_barcodes": False, + "sequence_max_n": 0, + "phred_offset": "auto", + }, + "command": "Split libraries FASTQ", + "command_description": "Demultiplexes and applies quality " + "control to FASTQ data", + "software": "QIIMEq2", + "software_version": "1.9.1", + } self.assertEqual(loads(response.body), exp) def test_patch(self): @@ -112,77 +160,93 @@ def test_patch(self): '"barcode_type": "golay_12", "max_bad_run_length": 3, ' '"rev_comp": false, "phred_quality_threshold": 3, ' '"rev_comp_barcode": false, "rev_comp_mapping_barcodes": false, ' - '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}') + '"min_per_read_length_fraction": 0.75, "sequence_max_n": 0}' + ) exp_params = Parameters.load(exp_command, json_str=json_str) - exp_user = User('test@foo.bar') + exp_user = User("test@foo.bar") name = "Test processing workflow" # tests success wf = ProcessingWorkflow.from_scratch( - exp_user, exp_params, name=name, force=True) + exp_user, exp_params, name=name, force=True + ) graph = wf.graph nodes = list(graph.nodes()) job_id = nodes[0].id - response = self.patch('/study/process/job/', - {'op': 'remove', 'path': job_id}) + response = self.patch("/study/process/job/", {"op": "remove", "path": job_id}) self.assertEqual(response.code, 200) - exp = {'status': 'error', - 'message': "Can't delete job %s. It is 'in_construction' " - "status. Please use /study/process/workflow/" - % job_id} + exp = { + "status": "error", + "message": "Can't delete job %s. It is 'in_construction' " + "status. Please use /study/process/workflow/" % job_id, + } self.assertEqual(loads(response.body), exp) # Test success - ProcessingJob(job_id)._set_error('Killed for testing') - response = self.patch('/study/process/job/', - {'op': 'remove', 'path': job_id}) + ProcessingJob(job_id)._set_error("Killed for testing") + response = self.patch("/study/process/job/", {"op": "remove", "path": job_id}) self.assertEqual(response.code, 200) - exp = {'status': 'success', - 'message': ''} + exp = {"status": "success", "message": ""} self.assertEqual(loads(response.body), exp) class TestWorkflowHandler(TestHandlerBase): def test_post(self): # test error - response = self.post('/study/process/workflow/', - {'command_id': '3', 'params': '{}'}) + response = self.post( + "/study/process/workflow/", {"command_id": "3", "params": "{}"} + ) self.assertEqual(response.code, 200) - exp = {'status': 'error', 'workflow_id': None, 'job': None, - 'message': "The provided JSON string doesn't encode a parameter" - " set for command 'Pick closed-reference OTUs " - "(ID: 3)'. Missing required parameter: " - "input_data"} + exp = { + "status": "error", + "workflow_id": None, + "job": None, + "message": "The provided JSON string doesn't encode a parameter" + " set for command 'Pick closed-reference OTUs " + "(ID: 3)'. Missing required parameter: " + "input_data", + } self.assertDictEqual(loads(response.body), exp) # test success - response = self.post('/study/process/workflow/', - {'command_id': '3', - 'params': '{"input_data": 1}'}) + response = self.post( + "/study/process/workflow/", + {"command_id": "3", "params": '{"input_data": 1}'}, + ) self.assertEqual(response.code, 200) obs = loads(response.body) # we are going to copy the workflow_id/job information because we only # care about the reply - exp = {'status': 'success', 'workflow_id': obs['workflow_id'], - 'job': obs['job'], 'message': ''} + exp = { + "status": "success", + "workflow_id": obs["workflow_id"], + "job": obs["job"], + "message": "", + } self.assertEqual(obs, exp) # test with files - response = self.post('/study/process/workflow/', - {'command_id': '3', 'params': '{"input_data": 3}', - 'files': '{"template": {"body": b""}}', - 'headers': { - 'Content-Type': 'application/json', - 'Origin': 'localhost' - }, }) + response = self.post( + "/study/process/workflow/", + { + "command_id": "3", + "params": '{"input_data": 3}', + "files": '{"template": {"body": b""}}', + "headers": {"Content-Type": "application/json", "Origin": "localhost"}, + }, + ) self.assertEqual(response.code, 200) obs = loads(response.body) # we are going to copy the workflow_id/job information because we only # care about the reply - exp = {'status': 'success', 'workflow_id': obs['workflow_id'], - 'job': obs['job'], 'message': ''} + exp = { + "status": "success", + "workflow_id": obs["workflow_id"], + "job": obs["job"], + "message": "", + } self.assertEqual(obs, exp) diff --git a/qiita_pet/handlers/study_handlers/tests/test_sample_template.py b/qiita_pet/handlers/study_handlers/tests/test_sample_template.py index f2388138d..ebea78f02 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_sample_template.py +++ b/qiita_pet/handlers/study_handlers/tests/test_sample_template.py @@ -5,31 +5,34 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads -from os import remove, close +from os import close, remove from os.path import exists, join from tempfile import mkstemp +from unittest import main -from tornado.web import HTTPError -from mock import Mock import pandas as pd +from mock import Mock +from tornado.web import HTTPError -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_core.qiita_settings import r_client from qiita_core.testing import wait_for_processing_job -from qiita_db.user import User -from qiita_db.study import Study, StudyPerson -from qiita_db.util import get_mountpoint from qiita_db.exceptions import QiitaDBColumnError from qiita_db.metadata_template.sample_template import SampleTemplate -from qiita_pet.test.tornado_test_base import TestHandlerBase +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.util import get_mountpoint +from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.study_handlers.sample_template import ( - _build_sample_summary, sample_template_checks, + _build_sample_summary, + sample_template_checks, + sample_template_columns_get_req, + sample_template_handler_delete_request, + sample_template_handler_patch_request, sample_template_handler_post_request, sample_template_overview_handler_get_request, - sample_template_handler_delete_request, - sample_template_handler_patch_request, sample_template_columns_get_req) +) +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestHelpers(TestHandlerBase): @@ -65,12 +68,12 @@ def _create_study(self, study_title): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": StudyPerson(3), - "lab_person_id": StudyPerson(1) + "lab_person_id": StudyPerson(1), } - return Study.create(User('test@foo.bar'), study_title, info) + return Study.create(User("test@foo.bar"), study_title, info) def test_sample_template_checks(self): - user = User('test@foo.bar') + user = User("test@foo.bar") # If the user has access, this should not raise anything, so it will # keep the execution @@ -78,528 +81,644 @@ def test_sample_template_checks(self): sample_template_checks(1, user, check_exists=True) # Test study doesn't exist - with self.assertRaisesRegex(HTTPError, 'Study does not exist'): + with self.assertRaisesRegex(HTTPError, "Study does not exist"): sample_template_checks(1000000, user) # Test user doesn't have access to the study - with self.assertRaisesRegex(HTTPError, - 'User has insufficient permissions'): - sample_template_checks(1, User('demo@microbio.me')) + with self.assertRaisesRegex(HTTPError, "User has insufficient permissions"): + sample_template_checks(1, User("demo@microbio.me")) # Test sample template doesn't exist - new_study = self._create_study('Test Sample Template Checks') - with self.assertRaisesRegex(HTTPError, - "Study %s doesn't have sample information" - % new_study.id): + new_study = self._create_study("Test Sample Template Checks") + with self.assertRaisesRegex( + HTTPError, "Study %s doesn't have sample information" % new_study.id + ): sample_template_checks(new_study.id, user, check_exists=True) def test_sample_template_handler_post_request(self): # Test user doesn't have access - with self.assertRaisesRegex(HTTPError, - 'User has insufficient permissions'): - sample_template_handler_post_request( - 1, User('demo@microbio.me'), 'ignored') + with self.assertRaisesRegex(HTTPError, "User has insufficient permissions"): + sample_template_handler_post_request(1, User("demo@microbio.me"), "ignored") # Test study doesn't exist - user = User('test@foo.bar') - with self.assertRaisesRegex(HTTPError, 'Study does not exist'): - sample_template_handler_post_request(1000000, user, 'ignored') + user = User("test@foo.bar") + with self.assertRaisesRegex(HTTPError, "Study does not exist"): + sample_template_handler_post_request(1000000, user, "ignored") # Test file doesn't exist - with self.assertRaisesRegex(HTTPError, 'Filepath not found'): - sample_template_handler_post_request(1, user, 'DoesNotExist.txt') + with self.assertRaisesRegex(HTTPError, "Filepath not found"): + sample_template_handler_post_request(1, user, "DoesNotExist.txt") # Test looks like mapping file and no data_type provided - uploads_dir = join(get_mountpoint('uploads')[0][1], '1') - fd, fp = mkstemp(suffix='.txt', dir=uploads_dir) + uploads_dir = join(get_mountpoint("uploads")[0][1], "1") + fd, fp = mkstemp(suffix=".txt", dir=uploads_dir) self._clean_up_files.append(fp) close(fd) - with open(fp, 'w') as f: - f.write('#SampleID\tCol1\nSample1\tVal1') + with open(fp, "w") as f: + f.write("#SampleID\tCol1\nSample1\tVal1") with self.assertRaisesRegex( - HTTPError, 'Please, choose a data type if uploading a QIIME ' - 'mapping file'): + HTTPError, "Please, choose a data type if uploading a QIIME mapping file" + ): sample_template_handler_post_request(1, user, fp) # Test success - obs = sample_template_handler_post_request( - 1, user, 'uploaded_file.txt') - self.assertCountEqual(obs.keys(), ['job']) - job_info = r_client.get('sample_template_1') + obs = sample_template_handler_post_request(1, user, "uploaded_file.txt") + self.assertCountEqual(obs.keys(), ["job"]) + job_info = r_client.get("sample_template_1") self.assertIsNotNone(job_info) # Wait until the job is done - wait_for_processing_job(loads(job_info)['job_id']) + wait_for_processing_job(loads(job_info)["job_id"]) # Test direct upload obs = sample_template_handler_post_request( - 1, user, fp, data_type='16S', direct_upload=True) - self.assertCountEqual(obs.keys(), ['job']) - job_info = r_client.get('sample_template_1') + 1, user, fp, data_type="16S", direct_upload=True + ) + self.assertCountEqual(obs.keys(), ["job"]) + job_info = r_client.get("sample_template_1") self.assertIsNotNone(job_info) # Wait until the job is done - wait_for_processing_job(loads(job_info)['job_id']) + wait_for_processing_job(loads(job_info)["job_id"]) def test_sample_template_handler_patch_request(self): - user = User('test@foo.bar') + user = User("test@foo.bar") # Test user doesn't have access - with self.assertRaisesRegex(HTTPError, - 'User has insufficient permissions'): + with self.assertRaisesRegex(HTTPError, "User has insufficient permissions"): sample_template_handler_patch_request( - User('demo@microbio.me'), "remove", - "/1/columns/season_environment/") + User("demo@microbio.me"), "remove", "/1/columns/season_environment/" + ) # Test study doesn't exist - with self.assertRaisesRegex(HTTPError, 'Study does not exist'): + with self.assertRaisesRegex(HTTPError, "Study does not exist"): sample_template_handler_patch_request( - user, "remove", "/10000/columns/season_environment/") + user, "remove", "/10000/columns/season_environment/" + ) # Test sample template doesn't exist - new_study = self._create_study('Patching test') - with self.assertRaisesRegex(HTTPError, - "Study %s doesn't have sample information" - % new_study.id): + new_study = self._create_study("Patching test") + with self.assertRaisesRegex( + HTTPError, "Study %s doesn't have sample information" % new_study.id + ): sample_template_handler_patch_request( - user, "remove", "/%s/columns/season_environment/" - % new_study.id) + user, "remove", "/%s/columns/season_environment/" % new_study.id + ) # Test wrong operation value with self.assertRaisesRegex( - HTTPError, 'Operation add not supported. Current supported ' - 'operations: remove.'): + HTTPError, + "Operation add not supported. Current supported operations: remove.", + ): sample_template_handler_patch_request( - user, 'add', '/1/columns/season_environment') + user, "add", "/1/columns/season_environment" + ) # Test wrong path parameter < 2 - with self.assertRaisesRegex(HTTPError, 'Incorrect path parameter'): - sample_template_handler_patch_request(user, 'ignored', '1') + with self.assertRaisesRegex(HTTPError, "Incorrect path parameter"): + sample_template_handler_patch_request(user, "ignored", "1") # TESTS FOR OPERATION: remove # Test wrong path parameter - with self.assertRaisesRegex(HTTPError, 'Incorrect path parameter'): + with self.assertRaisesRegex(HTTPError, "Incorrect path parameter"): sample_template_handler_patch_request( - user, 'remove', '/1/season_environment/') + user, "remove", "/1/season_environment/" + ) # Add sample information to the new study so we can delete one column # without affecting the other tests md = pd.DataFrame.from_dict( - {'Sample1': {'col1': 'val1', 'col2': 'val2'}}, - orient='index', dtype=str) + {"Sample1": {"col1": "val1", "col2": "val2"}}, orient="index", dtype=str + ) st = SampleTemplate.create(md, new_study) # Test success obs = sample_template_handler_patch_request( - user, "remove", "/%s/columns/col2/" - % new_study.id) - self.assertCountEqual(obs.keys(), ['job']) - job_info = r_client.get('sample_template_%s' % new_study.id) + user, "remove", "/%s/columns/col2/" % new_study.id + ) + self.assertCountEqual(obs.keys(), ["job"]) + job_info = r_client.get("sample_template_%s" % new_study.id) self.assertIsNotNone(job_info) # Wait until the job is done - wait_for_processing_job(loads(job_info)['job_id']) - self.assertNotIn('col2', st.categories) + wait_for_processing_job(loads(job_info)["job_id"]) + self.assertNotIn("col2", st.categories) # TESTS FOR OPERATION: replace # Test incorrect path parameter with replace - with self.assertRaisesRegex(HTTPError, 'Incorrect path parameter'): + with self.assertRaisesRegex(HTTPError, "Incorrect path parameter"): sample_template_handler_patch_request(user, "replace", "/1/") # Test attribute not found - with self.assertRaisesRegex(HTTPError, 'Attribute name not found'): + with self.assertRaisesRegex(HTTPError, "Attribute name not found"): sample_template_handler_patch_request(user, "replace", "/1/name") # Test missing value - with self.assertRaisesRegex(HTTPError, - 'Value is required when updating sample ' - 'information'): + with self.assertRaisesRegex( + HTTPError, "Value is required when updating sample information" + ): sample_template_handler_patch_request(user, "replace", "/1/data") # Test file doesn't exist - with self.assertRaisesRegex(HTTPError, 'Filepath not found'): - sample_template_handler_patch_request(user, "replace", "/1/data", - req_value='DoesNotExist') + with self.assertRaisesRegex(HTTPError, "Filepath not found"): + sample_template_handler_patch_request( + user, "replace", "/1/data", req_value="DoesNotExist" + ) # Test success obs = sample_template_handler_patch_request( - user, "replace", "/1/data", req_value='uploaded_file.txt') - self.assertCountEqual(obs.keys(), ['job']) - job_info = r_client.get('sample_template_1') + user, "replace", "/1/data", req_value="uploaded_file.txt" + ) + self.assertCountEqual(obs.keys(), ["job"]) + job_info = r_client.get("sample_template_1") self.assertIsNotNone(job_info) # Wait until the job is done - wait_for_processing_job(loads(job_info)['job_id']) + wait_for_processing_job(loads(job_info)["job_id"]) def test_sample_template_handler_delete_request(self): # Test user doesn't have access - with self.assertRaisesRegex(HTTPError, - 'User has insufficient permissions'): - sample_template_handler_delete_request( - 1, User('demo@microbio.me')) + with self.assertRaisesRegex(HTTPError, "User has insufficient permissions"): + sample_template_handler_delete_request(1, User("demo@microbio.me")) # Test study doesn't exist - user = User('test@foo.bar') - with self.assertRaisesRegex(HTTPError, 'Study does not exist'): + user = User("test@foo.bar") + with self.assertRaisesRegex(HTTPError, "Study does not exist"): sample_template_handler_delete_request(1000000, user) # Test sample information doesn't exist - new_study = self._create_study('Study for deleting test') - with self.assertRaisesRegex(HTTPError, "Study %s doesn't have sample " - "information" % new_study.id): + new_study = self._create_study("Study for deleting test") + with self.assertRaisesRegex( + HTTPError, "Study %s doesn't have sample information" % new_study.id + ): sample_template_handler_delete_request(new_study.id, user) # Test success - user = User('test@foo.bar') + user = User("test@foo.bar") obs = sample_template_handler_delete_request(1, user) - self.assertCountEqual(obs.keys(), ['job']) - job_info = r_client.get('sample_template_1') + self.assertCountEqual(obs.keys(), ["job"]) + job_info = r_client.get("sample_template_1") self.assertIsNotNone(job_info) # Wait until the job is done - wait_for_processing_job(loads(job_info)['job_id']) + wait_for_processing_job(loads(job_info)["job_id"]) def test_sample_template_overview_handler_get_request(self): # Test user doesn't have access - with self.assertRaisesRegex(HTTPError, - 'User has insufficient permissions'): - sample_template_overview_handler_get_request( - 1, User('demo@microbio.me')) + with self.assertRaisesRegex(HTTPError, "User has insufficient permissions"): + sample_template_overview_handler_get_request(1, User("demo@microbio.me")) # Test study doesn't exist - user = User('test@foo.bar') - with self.assertRaisesRegex(HTTPError, 'Study does not exist'): + user = User("test@foo.bar") + with self.assertRaisesRegex(HTTPError, "Study does not exist"): sample_template_overview_handler_get_request(1000000, user) # Test sample template exist obs = sample_template_overview_handler_get_request(1, user) - exp = {'exists': True, - 'uploaded_files': ['uploaded_file.txt'], - 'data_types': [], - 'user_can_edit': True, - 'job': None, - 'download_id': 23, - 'st_files': ['1_19700101-000000.txt'], - 'num_samples': 27, - 'num_columns': 31, - 'columns': sorted( - ['season_environment', 'assigned_from_geo', - 'texture', 'taxon_id', 'depth', 'host_taxid', - 'common_name', 'water_content_soil', 'elevation', - 'temp', 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', - 'env_feature', 'physical_specimen_location', - 'physical_specimen_remaining', 'dna_extracted', - 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', 'latitude', - 'longitude', 'scientific_name', 'env_package']), - 'sample_restrictions': ''} + exp = { + "exists": True, + "uploaded_files": ["uploaded_file.txt"], + "data_types": [], + "user_can_edit": True, + "job": None, + "download_id": 23, + "st_files": ["1_19700101-000000.txt"], + "num_samples": 27, + "num_columns": 31, + "columns": sorted( + [ + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "scientific_name", + "env_package", + ] + ), + "sample_restrictions": "", + } # the first element is the current fp and the name is based on the # last update so just removing to easy test - obs['st_files'].pop(0) + obs["st_files"].pop(0) self.assertEqual(obs, exp) # Test sample template doesn't exist - new_study = self._create_study('Some New Study') + new_study = self._create_study("Some New Study") obs = sample_template_overview_handler_get_request(new_study.id, user) - exp = {'exists': False, - 'uploaded_files': [], - 'data_types': ['16S', '18S', 'Genomics', 'ITS', - 'Job Output Folder', 'Metabolomic', - 'Metagenomic', 'Metatranscriptomics', - 'Multiomic', 'Proteomic', 'Transcriptomics', - 'Viromics'], - 'user_can_edit': True, - 'job': None, - 'download_id': None, - 'st_files': [], - 'num_samples': 0, - 'num_columns': 0, - 'columns': [], - 'sample_restrictions': ''} + exp = { + "exists": False, + "uploaded_files": [], + "data_types": [ + "16S", + "18S", + "Genomics", + "ITS", + "Job Output Folder", + "Metabolomic", + "Metagenomic", + "Metatranscriptomics", + "Multiomic", + "Proteomic", + "Transcriptomics", + "Viromics", + ], + "user_can_edit": True, + "job": None, + "download_id": None, + "st_files": [], + "num_samples": 0, + "num_columns": 0, + "columns": [], + "sample_restrictions": "", + } self.assertEqual(obs, exp) def test_sample_template_columns_get_req(self): # Test user doesn't have access - with self.assertRaisesRegex(HTTPError, - 'User has insufficient permissions'): - sample_template_columns_get_req(1, None, User('demo@microbio.me')) + with self.assertRaisesRegex(HTTPError, "User has insufficient permissions"): + sample_template_columns_get_req(1, None, User("demo@microbio.me")) # Test study doesn't exist - user = User('test@foo.bar') - with self.assertRaisesRegex(HTTPError, 'Study does not exist'): + user = User("test@foo.bar") + with self.assertRaisesRegex(HTTPError, "Study does not exist"): sample_template_columns_get_req(1000000, None, user) # Test sample template doesn't exist - new_study = self._create_study('New Study - Summary') - with self.assertRaisesRegex(HTTPError, "Study %s doesn't have sample " - "information" % new_study.id): + new_study = self._create_study("New Study - Summary") + with self.assertRaisesRegex( + HTTPError, "Study %s doesn't have sample information" % new_study.id + ): sample_template_columns_get_req(new_study.id, None, user) # Test that if the column doesn't exist it raises an error - with self.assertRaisesRegex(QiitaDBColumnError, 'should-fail'): - sample_template_columns_get_req(1, 'should-fail', user) + with self.assertRaisesRegex(QiitaDBColumnError, "should-fail"): + sample_template_columns_get_req(1, "should-fail", user) # Test success obs = sample_template_columns_get_req(1, None, user) - exp = sorted([ - 'season_environment', 'assigned_from_geo', 'texture', 'taxon_id', - 'depth', 'host_taxid', 'common_name', 'water_content_soil', - 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', 'tot_org_carb', - 'description_duplicate', 'env_feature', 'env_package', - 'physical_specimen_location', 'physical_specimen_remaining', - 'dna_extracted', 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', 'latitude', 'longitude', - 'scientific_name']) + exp = sorted( + [ + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "env_package", + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "scientific_name", + ] + ) self.assertEqual(obs, exp) - obs = sample_template_columns_get_req(1, 'season_environment', user) - exp = ['winter', 'winter', 'winter', 'winter', 'winter', 'winter', - 'winter', 'winter', 'winter', 'winter', 'winter', 'winter', - 'winter', 'winter', 'winter', 'winter', 'winter', 'winter', - 'winter', 'winter', 'winter', 'winter', 'winter', 'winter', - 'winter', 'winter', 'winter'] + obs = sample_template_columns_get_req(1, "season_environment", user) + exp = [ + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + "winter", + ] self.assertCountEqual(obs, exp) def test_build_sample_summary(self): - cols, rows = _build_sample_summary(1, 'test@foo.bar') + cols, rows = _build_sample_summary(1, "test@foo.bar") cols_exp = { - 'prep2': 'Prep information 2 (2)', - 'prep1': 'Prep information 1 (1)'} + "prep2": "Prep information 2 (2)", + "prep1": "Prep information 1 (1)", + } rows_exp = { - '1.SKB2.640194': { - 'sample': '1.SKB2.640194', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM4.640180': { - 'sample': '1.SKM4.640180', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB3.640195': { - 'sample': '1.SKB3.640195', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB6.640176': { - 'sample': '1.SKB6.640176', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD6.640190': { - 'sample': '1.SKD6.640190', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM6.640187': { - 'sample': '1.SKM6.640187', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD9.640182': { - 'sample': '1.SKD9.640182', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM8.640201': { - 'sample': '1.SKM8.640201', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM2.640199': { - 'sample': '1.SKM2.640199', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD2.640178': { - 'sample': '1.SKD2.640178', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB7.640196': { - 'sample': '1.SKB7.640196', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD4.640185': { - 'sample': '1.SKD4.640185', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB8.640193': { - 'sample': '1.SKB8.640193', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM3.640197': { - 'sample': '1.SKM3.640197', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD5.640186': { - 'sample': '1.SKD5.640186', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB1.640202': { - 'sample': '1.SKB1.640202', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM1.640183': { - 'sample': '1.SKM1.640183', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD1.640179': { - 'sample': '1.SKD1.640179', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD3.640198': { - 'sample': '1.SKD3.640198', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB5.640181': { - 'sample': '1.SKB5.640181', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB4.640189': { - 'sample': '1.SKB4.640189', 'prep2': 'X', 'prep1': 'X'}, - '1.SKB9.640200': { - 'sample': '1.SKB9.640200', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM9.640192': { - 'sample': '1.SKM9.640192', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD8.640184': { - 'sample': '1.SKD8.640184', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM5.640177': { - 'sample': '1.SKM5.640177', 'prep2': 'X', 'prep1': 'X'}, - '1.SKM7.640188': { - 'sample': '1.SKM7.640188', 'prep2': 'X', 'prep1': 'X'}, - '1.SKD7.640191': { - 'sample': '1.SKD7.640191', 'prep2': 'X', 'prep1': 'X'}} + "1.SKB2.640194": {"sample": "1.SKB2.640194", "prep2": "X", "prep1": "X"}, + "1.SKM4.640180": {"sample": "1.SKM4.640180", "prep2": "X", "prep1": "X"}, + "1.SKB3.640195": {"sample": "1.SKB3.640195", "prep2": "X", "prep1": "X"}, + "1.SKB6.640176": {"sample": "1.SKB6.640176", "prep2": "X", "prep1": "X"}, + "1.SKD6.640190": {"sample": "1.SKD6.640190", "prep2": "X", "prep1": "X"}, + "1.SKM6.640187": {"sample": "1.SKM6.640187", "prep2": "X", "prep1": "X"}, + "1.SKD9.640182": {"sample": "1.SKD9.640182", "prep2": "X", "prep1": "X"}, + "1.SKM8.640201": {"sample": "1.SKM8.640201", "prep2": "X", "prep1": "X"}, + "1.SKM2.640199": {"sample": "1.SKM2.640199", "prep2": "X", "prep1": "X"}, + "1.SKD2.640178": {"sample": "1.SKD2.640178", "prep2": "X", "prep1": "X"}, + "1.SKB7.640196": {"sample": "1.SKB7.640196", "prep2": "X", "prep1": "X"}, + "1.SKD4.640185": {"sample": "1.SKD4.640185", "prep2": "X", "prep1": "X"}, + "1.SKB8.640193": {"sample": "1.SKB8.640193", "prep2": "X", "prep1": "X"}, + "1.SKM3.640197": {"sample": "1.SKM3.640197", "prep2": "X", "prep1": "X"}, + "1.SKD5.640186": {"sample": "1.SKD5.640186", "prep2": "X", "prep1": "X"}, + "1.SKB1.640202": {"sample": "1.SKB1.640202", "prep2": "X", "prep1": "X"}, + "1.SKM1.640183": {"sample": "1.SKM1.640183", "prep2": "X", "prep1": "X"}, + "1.SKD1.640179": {"sample": "1.SKD1.640179", "prep2": "X", "prep1": "X"}, + "1.SKD3.640198": {"sample": "1.SKD3.640198", "prep2": "X", "prep1": "X"}, + "1.SKB5.640181": {"sample": "1.SKB5.640181", "prep2": "X", "prep1": "X"}, + "1.SKB4.640189": {"sample": "1.SKB4.640189", "prep2": "X", "prep1": "X"}, + "1.SKB9.640200": {"sample": "1.SKB9.640200", "prep2": "X", "prep1": "X"}, + "1.SKM9.640192": {"sample": "1.SKM9.640192", "prep2": "X", "prep1": "X"}, + "1.SKD8.640184": {"sample": "1.SKD8.640184", "prep2": "X", "prep1": "X"}, + "1.SKM5.640177": {"sample": "1.SKM5.640177", "prep2": "X", "prep1": "X"}, + "1.SKM7.640188": {"sample": "1.SKM7.640188", "prep2": "X", "prep1": "X"}, + "1.SKD7.640191": {"sample": "1.SKD7.640191", "prep2": "X", "prep1": "X"}, + } self.assertEqual(cols, cols_exp) self.assertEqual(rows, rows_exp) class TestSampleTemplateHandler(TestHandlerBase): def test_get(self): - response = self.get('/study/description/sample_template/', - {'study_id': 1}) + response = self.get("/study/description/sample_template/", {"study_id": 1}) self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") # Study doesn't exist - response = self.get('/study/description/sample_template/', - {'study_id': 10000}) + response = self.get("/study/description/sample_template/", {"study_id": 10000}) self.assertEqual(response.code, 404) # User doesn't have access - BaseHandler.get_current_user = Mock( - return_value=User('demo@microbio.me')) - response = self.get('/study/description/sample_template/', - {'study_id': 1}) + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/study/description/sample_template/", {"study_id": 1}) self.assertEqual(response.code, 403) def test_post(self): - response = self.post('/study/description/sample_template/', - {'study_id': 1, - 'filepath': 'uploaded_file.txt', - 'data_type': ''}) + response = self.post( + "/study/description/sample_template/", + {"study_id": 1, "filepath": "uploaded_file.txt", "data_type": ""}, + ) self.assertEqual(response.code, 200) self.assertIsNotNone(response.body) obs = loads(response.body) - self.assertCountEqual(obs.keys(), ['job']) + self.assertCountEqual(obs.keys(), ["job"]) # Wait until the job is done - wait_for_processing_job(obs['job']) + wait_for_processing_job(obs["job"]) def test_patch(self): - response = self.patch('/study/description/sample_template/', - {'op': 'replace', - 'path': '/1/data', - 'value': 'uploaded_file.txt'}) + response = self.patch( + "/study/description/sample_template/", + {"op": "replace", "path": "/1/data", "value": "uploaded_file.txt"}, + ) self.assertEqual(response.code, 200) self.assertIsNotNone(response.body) obs = loads(response.body) - self.assertCountEqual(obs.keys(), ['job']) + self.assertCountEqual(obs.keys(), ["job"]) # Wait until the job is done - wait_for_processing_job(obs['job']) + wait_for_processing_job(obs["job"]) def test_delete(self): - response = self.delete('/study/description/sample_template/', - {'study_id': 1}) + response = self.delete("/study/description/sample_template/", {"study_id": 1}) self.assertEqual(response.code, 200) self.assertIsNotNone(response.body) obs = loads(response.body) - self.assertCountEqual(obs.keys(), ['job']) + self.assertCountEqual(obs.keys(), ["job"]) # Wait until the job is done - wait_for_processing_job(obs['job']) + wait_for_processing_job(obs["job"]) class TestSampleTemplateOverviewHandler(TestHandlerBase): def test_get(self): - response = self.get('/study/description/sample_template/overview/', - {'study_id': 1}) + response = self.get( + "/study/description/sample_template/overview/", {"study_id": 1} + ) self.assertEqual(response.code, 200) self.assertIsNotNone(response.body) obs = loads(response.body) - exp = {'exists': True, - 'uploaded_files': ['uploaded_file.txt'], - 'data_types': [], - 'user_can_edit': True, - 'job': None, - 'download_id': 23, - 'st_files': ['1_19700101-000000.txt'], - 'num_samples': 27, - 'num_columns': 31, - 'columns': sorted( - ['season_environment', 'assigned_from_geo', - 'texture', 'taxon_id', 'depth', 'host_taxid', - 'common_name', 'water_content_soil', 'elevation', - 'temp', 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', - 'env_feature', 'physical_specimen_location', - 'physical_specimen_remaining', 'dna_extracted', - 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', 'latitude', - 'longitude', 'scientific_name', 'env_package']), - 'sample_restrictions': ''} + exp = { + "exists": True, + "uploaded_files": ["uploaded_file.txt"], + "data_types": [], + "user_can_edit": True, + "job": None, + "download_id": 23, + "st_files": ["1_19700101-000000.txt"], + "num_samples": 27, + "num_columns": 31, + "columns": sorted( + [ + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "scientific_name", + "env_package", + ] + ), + "sample_restrictions": "", + } # the first element is the current fp and the name is based on the # last update so just removing to easy test - obs['st_files'].pop(0) + obs["st_files"].pop(0) self.assertDictEqual(obs, exp) class TestSampleTemplateColumnsHandler(TestHandlerBase): def test_get(self): - response = self.get('/study/description/sample_template/columns/', - {'study_id': 1}) + response = self.get( + "/study/description/sample_template/columns/", {"study_id": 1} + ) self.assertEqual(response.code, 200) self.assertIsNotNone(response.body) obs = loads(response.body) - exp = {'values': sorted([ - 'season_environment', 'assigned_from_geo', 'texture', 'taxon_id', - 'depth', 'host_taxid', 'common_name', 'water_content_soil', - 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', 'tot_org_carb', - 'description_duplicate', 'env_feature', 'env_package', - 'physical_specimen_location', 'physical_specimen_remaining', - 'dna_extracted', 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', 'latitude', 'longitude', - 'scientific_name'])} + exp = { + "values": sorted( + [ + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "env_package", + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "scientific_name", + ] + ) + } self.assertEqual(obs, exp) class TestSampleAJAXReadOnly(TestHandlerBase): def test_get(self): - res = self.get("/study/description/sample_summary/", {'study_id': 1}) + res = self.get("/study/description/sample_summary/", {"study_id": 1}) self.assertEqual(res.code, 200) # Make sure metadata read properly line = '' - self.assertIn(line, res.body.decode('ascii')) + self.assertIn(line, res.body.decode("ascii")) class TestAnalysesAjax(TestHandlerBase): def test_get(self): - res = self.get("/study/analyses/", {'study_id': 1}) + res = self.get("/study/analyses/", {"study_id": 1}) self.assertEqual(res.code, 200) # making sure at least one analysis is in the page - line = '/analysis/description/1/' - self.assertIn(line, res.body.decode('ascii')) + line = "/analysis/description/1/" + self.assertIn(line, res.body.decode("ascii")) class TestSampleAJAX(TestHandlerBase): - def test_post(self): - res = self.post("/study/description/sample_summary/", { - 'study_id': 1, 'meta_col': 'latitude'}) + res = self.post( + "/study/description/sample_summary/", + {"study_id": 1, "meta_col": "latitude"}, + ) self.assertEqual(res.code, 200) - exp = {"status": "success", - "message": "", - "values": {'1.SKB2.640194': '35.2374368957', - '1.SKM4.640180': "Not applicable", - '1.SKB3.640195': '95.2060749748', - '1.SKB6.640176': '78.3634273709', - '1.SKD6.640190': '29.1499460692', - '1.SKM6.640187': '0.291867635913', - '1.SKD9.640182': '23.1218032799', - '1.SKM8.640201': '3.21190859967', - '1.SKM2.640199': '82.8302905615', - '1.SKD2.640178': '53.5050692395', - '1.SKB7.640196': '13.089194595', - '1.SKD4.640185': '40.8623799474', - '1.SKB8.640193': '74.0894932572', - '1.SKM3.640197': "Not applicable", - '1.SKD5.640186': '85.4121476399', - '1.SKB1.640202': '4.59216095574', - '1.SKM1.640183': '38.2627021402', - '1.SKD1.640179': '68.0991287718', - '1.SKD3.640198': '84.0030227585', - '1.SKB5.640181': '10.6655599093', - '1.SKB4.640189': '43.9614715197', - '1.SKB9.640200': '12.6245524972', - '1.SKM9.640192': '12.7065957714', - '1.SKD8.640184': '57.571893782', - '1.SKM5.640177': '44.9725384282', - '1.SKM7.640188': '60.1102854322', - '1.SKD7.640191': '68.51099627'}} + exp = { + "status": "success", + "message": "", + "values": { + "1.SKB2.640194": "35.2374368957", + "1.SKM4.640180": "Not applicable", + "1.SKB3.640195": "95.2060749748", + "1.SKB6.640176": "78.3634273709", + "1.SKD6.640190": "29.1499460692", + "1.SKM6.640187": "0.291867635913", + "1.SKD9.640182": "23.1218032799", + "1.SKM8.640201": "3.21190859967", + "1.SKM2.640199": "82.8302905615", + "1.SKD2.640178": "53.5050692395", + "1.SKB7.640196": "13.089194595", + "1.SKD4.640185": "40.8623799474", + "1.SKB8.640193": "74.0894932572", + "1.SKM3.640197": "Not applicable", + "1.SKD5.640186": "85.4121476399", + "1.SKB1.640202": "4.59216095574", + "1.SKM1.640183": "38.2627021402", + "1.SKD1.640179": "68.0991287718", + "1.SKD3.640198": "84.0030227585", + "1.SKB5.640181": "10.6655599093", + "1.SKB4.640189": "43.9614715197", + "1.SKB9.640200": "12.6245524972", + "1.SKM9.640192": "12.7065957714", + "1.SKD8.640184": "57.571893782", + "1.SKM5.640177": "44.9725384282", + "1.SKM7.640188": "60.1102854322", + "1.SKD7.640191": "68.51099627", + }, + } self.assertEqual(loads(res.body), exp) def test_post_error(self): - res = self.post("/study/description/sample_summary/", { - 'study_id': 1, 'meta_col': 'NOEXIST'}) + res = self.post( + "/study/description/sample_summary/", {"study_id": 1, "meta_col": "NOEXIST"} + ) self.assertEqual(res.code, 200) - exp = {"status": "error", - "message": "Category NOEXIST does not exist in sample template"} + exp = { + "status": "error", + "message": "Category NOEXIST does not exist in sample template", + } self.assertEqual(loads(res.body), exp) diff --git a/qiita_pet/handlers/study_handlers/vamps_handlers.py b/qiita_pet/handlers/study_handlers/vamps_handlers.py index 85c30fe56..94846a6ac 100644 --- a/qiita_pet/handlers/study_handlers/vamps_handlers.py +++ b/qiita_pet/handlers/study_handlers/vamps_handlers.py @@ -5,15 +5,15 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated, HTTPError from qiita_files.demux import stats as demux_stats +from tornado.web import HTTPError, authenticated -from qiita_db.exceptions import QiitaDBUnknownIDError +from qiita_core.util import execute_as_transaction from qiita_db.artifact import Artifact -from qiita_db.software import Software, Parameters +from qiita_db.exceptions import QiitaDBUnknownIDError from qiita_db.processing_job import ProcessingJob +from qiita_db.software import Parameters, Software from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_core.util import execute_as_transaction class VAMPSHandler(BaseHandler): @@ -24,19 +24,24 @@ def display_template(self, preprocessed_data_id, msg, msg_level): try: preprocessed_data = Artifact(preprocessed_data_id) except QiitaDBUnknownIDError: - raise HTTPError(404, reason="Artifact %d does not exist!" % - preprocessed_data_id) + raise HTTPError( + 404, reason="Artifact %d does not exist!" % preprocessed_data_id + ) else: user = self.current_user - if user.level != 'admin': - raise HTTPError(403, reason="No permissions of admin, " - "get/VAMPSSubmitHandler: %s!" % user.id) + if user.level != "admin": + raise HTTPError( + 403, + reason="No permissions of admin, " + "get/VAMPSSubmitHandler: %s!" % user.id, + ) prep_templates = preprocessed_data.prep_templates allow_submission = len(prep_templates) == 1 msg_list = ["Submission to EBI disabled:"] if not allow_submission: msg_list.append( - "Only artifacts with a single prep template can be submitted") + "Only artifacts with a single prep template can be submitted" + ) # If allow_submission is already false, we technically don't need to # do the following work. However, there is no clean way to fix this # using the current structure, so we perform the work as we @@ -46,26 +51,29 @@ def display_template(self, preprocessed_data_id, msg, msg_level): prep_template = prep_templates[0] study = preprocessed_data.study sample_template = study.sample_template - stats = [('Number of samples', len(prep_template)), - ('Number of metadata headers', - len(sample_template.categories))] + stats = [ + ("Number of samples", len(prep_template)), + ("Number of metadata headers", len(sample_template.categories)), + ] - demux = [x['fp'] for x in preprocessed_data.filepaths - if x['fp_type'] == 'preprocessed_demux'] + demux = [ + x["fp"] + for x in preprocessed_data.filepaths + if x["fp_type"] == "preprocessed_demux" + ] demux_length = len(demux) if not demux_length: - msg = ("Study does not appear to have demultiplexed " - "sequences associated") - msg_level = 'danger' + msg = "Study does not appear to have demultiplexed sequences associated" + msg_level = "danger" elif demux_length > 1: - msg = ("Study appears to have multiple demultiplexed files!") - msg_level = 'danger' + msg = "Study appears to have multiple demultiplexed files!" + msg_level = "danger" elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) - stats.append(('Number of sequences', demux_file_stats.n)) - msg_level = 'success' + stats.append(("Number of sequences", demux_file_stats.n)) + msg_level = "success" # In EBI here we check that we have the required field for submission, # however for VAMPS we don't need that @@ -75,13 +83,18 @@ def display_template(self, preprocessed_data_id, msg, msg_level): else: disabled_msg = None - self.render('vamps_submission.html', - study_title=study.title, stats=stats, message=msg, - study_id=study.id, level=msg_level, - preprocessed_data_id=preprocessed_data_id, - investigation_type=prep_template.investigation_type, - allow_submission=allow_submission, - disabled_msg=disabled_msg) + self.render( + "vamps_submission.html", + study_title=study.title, + stats=stats, + message=msg, + study_id=study.id, + level=msg_level, + preprocessed_data_id=preprocessed_data_id, + investigation_type=prep_template.investigation_type, + allow_submission=allow_submission, + disabled_msg=disabled_msg, + ) @authenticated def get(self, preprocessed_data_id): @@ -92,28 +105,28 @@ def get(self, preprocessed_data_id): def post(self, preprocessed_data_id): user = self.current_user # make sure user is admin and can therefore actually submit to VAMPS - if user.level != 'admin': - raise HTTPError(403, reason="User %s cannot submit to VAMPS!" % - user.id) - msg = '' - msg_level = 'success' + if user.level != "admin": + raise HTTPError(403, reason="User %s cannot submit to VAMPS!" % user.id) + msg = "" + msg_level = "success" - plugin = Software.from_name_and_version('Qiita', 'alpha') - cmd = plugin.get_command('submit_to_VAMPS') + plugin = Software.from_name_and_version("Qiita", "alpha") + cmd = plugin.get_command("submit_to_VAMPS") artifact = Artifact(preprocessed_data_id) # Check if the artifact is already being submitted to VAMPS is_being_submitted = any( - [j.status in ('queued', 'running') - for j in artifact.jobs(cmd=cmd)]) + [j.status in ("queued", "running") for j in artifact.jobs(cmd=cmd)] + ) - if is_being_submitted == 'submitting': + if is_being_submitted == "submitting": msg = "Cannot resubmit! Data is already being submitted" - msg_level = 'danger' + msg_level = "danger" self.display_template(preprocessed_data_id, msg, msg_level) else: params = Parameters.load( - cmd, values_dict={'artifact': preprocessed_data_id}) + cmd, values_dict={"artifact": preprocessed_data_id} + ) job = ProcessingJob.create(user, params, True) job.submit() - self.redirect('/study/description/%s' % artifact.study.study_id) + self.redirect("/study/description/%s" % artifact.study.study_id) diff --git a/qiita_pet/handlers/upload.py b/qiita_pet/handlers/upload.py index a3595fa6d..5c97f8acd 100644 --- a/qiita_pet/handlers/upload.py +++ b/qiita_pet/handlers/upload.py @@ -6,30 +6,31 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.web import authenticated, HTTPError - -from os.path import join, exists -from os import remove, chmod -from json import loads, dumps - from collections import defaultdict -from shutil import rmtree, move +from json import dumps, loads +from os import chmod, remove +from os.path import exists, join +from shutil import move, rmtree -from .util import check_access -from .base_handlers import BaseHandler +from tornado.web import HTTPError, authenticated from qiita_core.qiita_settings import qiita_config, r_client from qiita_core.util import execute_as_transaction -from qiita_db.util import (get_files_from_uploads_folders, - get_mountpoint, move_upload_files_to_trash) -from qiita_db.study import Study -from qiita_db.processing_job import ProcessingJob -from qiita_db.software import Software, Parameters from qiita_db.exceptions import QiitaDBUnknownIDError -from qiita_db.util import create_nested_path +from qiita_db.processing_job import ProcessingJob +from qiita_db.software import Parameters, Software +from qiita_db.study import Study +from qiita_db.util import ( + create_nested_path, + get_files_from_uploads_folders, + get_mountpoint, + move_upload_files_to_trash, +) +from .base_handlers import BaseHandler +from .util import check_access -UPLOAD_STUDY_FORMAT = 'upload_study_%s' +UPLOAD_STUDY_FORMAT = "upload_study_%s" class StudyUploadFileHandler(BaseHandler): @@ -40,46 +41,52 @@ def display_template(self, study_id, msg): study_id = int(study_id) study = Study(study_id) user = self.current_user - level = 'info' - message = '' - remote_url = '' + level = "info" + message = "" + remote_url = "" remote_files = [] check_access(user, study, no_public=True, raise_error=True) job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id) if job_info: - job_info = defaultdict(lambda: '', loads(job_info)) - job_id = job_info['job_id'] + job_info = defaultdict(lambda: "", loads(job_info)) + job_id = job_info["job_id"] job = ProcessingJob(job_id) job_status = job.status - processing = job_status not in ('success', 'error') - url = job.parameters.values['url'] + processing = job_status not in ("success", "error") + url = job.parameters.values["url"] if processing: - if job.command.name == 'list_remote_files': - message = 'Retrieving remote files: listing %s' % url + if job.command.name == "list_remote_files": + message = "Retrieving remote files: listing %s" % url else: - message = 'Retrieving remote files: download %s' % url - elif job_status == 'error': - level = 'danger' - message = job.log.msg.replace('\n', '
') + message = "Retrieving remote files: download %s" % url + elif job_status == "error": + level = "danger" + message = job.log.msg.replace("\n", "
") # making errors nicer for users - if 'No such file' in message: - message = 'URL not valid: %s, please review.' % url + if "No such file" in message: + message = "URL not valid: %s, please review." % url else: - remote_url = job_info['url'] - remote_files = job_info['files'] - level = job_info['alert_type'] - message = job_info['alert_msg'].replace('\n', '
') + remote_url = job_info["url"] + remote_files = job_info["files"] + level = job_info["alert_type"] + message = job_info["alert_msg"].replace("\n", "
") # getting the ontologies - self.render('upload.html', - study_title=study.title, study_info=study.info, - study_id=study_id, is_admin=user.level == 'admin', - extensions=','.join(qiita_config.valid_upload_extension), - max_upload_size=qiita_config.max_upload_size, level=level, - message=message, remote_url=remote_url, - remote_files=remote_files, - files=get_files_from_uploads_folders(str(study_id))) + self.render( + "upload.html", + study_title=study.title, + study_info=study.info, + study_id=study_id, + is_admin=user.level == "admin", + extensions=",".join(qiita_config.valid_upload_extension), + max_upload_size=qiita_config.max_upload_size, + level=level, + message=message, + remote_url=remote_url, + remote_files=remote_files, + files=get_files_from_uploads_folders(str(study_id)), + ) @authenticated @execute_as_transaction @@ -88,8 +95,7 @@ def get(self, study_id): study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(404, reason="Study %s does not exist" % study_id) - check_access(self.current_user, study, no_public=True, - raise_error=True) + check_access(self.current_user, study, no_public=True, raise_error=True) self.display_template(study_id, "") @authenticated @@ -99,15 +105,14 @@ def post(self, study_id): study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(404, reason="Study %s does not exist" % study_id) - check_access(self.current_user, study, no_public=True, - raise_error=True) + check_access(self.current_user, study, no_public=True, raise_error=True) files_to_move = [] - for v in self.get_arguments('files_to_erase', strip=True): - v = v.split('-', 1) + for v in self.get_arguments("files_to_erase", strip=True): + v = v.split("-", 1) # if the file was just uploaded JS will not know which id the # current upload folder has so we need to retrieve it - if v[0] == 'undefined': + if v[0] == "undefined": v[0], _ = get_mountpoint("uploads")[0] files_to_move.append((int(v[0]), v[1])) @@ -121,50 +126,59 @@ class StudyUploadViaRemote(BaseHandler): @authenticated @execute_as_transaction def post(self, study_id): - method = self.get_argument('remote-request-type') - url = self.get_argument('inputURL') - ssh_key = self.request.files['ssh-key'][0]['body'] - status = 'success' - message = '' + method = self.get_argument("remote-request-type") + url = self.get_argument("inputURL") + ssh_key = self.request.files["ssh-key"][0]["body"] + status = "success" + message = "" try: study = Study(int(study_id)) except QiitaDBUnknownIDError: raise HTTPError(404, reason="Study %s does not exist" % study_id) - check_access( - self.current_user, study, no_public=True, raise_error=True) + check_access(self.current_user, study, no_public=True, raise_error=True) _, upload_folder = get_mountpoint("uploads")[0] upload_folder = join(upload_folder, study_id) - ssh_key_fp = join(upload_folder, '.key.txt') + ssh_key_fp = join(upload_folder, ".key.txt") create_nested_path(upload_folder) - with open(ssh_key_fp, 'wb') as f: + with open(ssh_key_fp, "wb") as f: f.write(ssh_key) chmod(ssh_key_fp, 0o600) - qiita_plugin = Software.from_name_and_version('Qiita', 'alpha') - if method == 'list': - cmd = qiita_plugin.get_command('list_remote_files') - params = Parameters.load(cmd, values_dict={ - 'url': url, 'private_key': ssh_key_fp, 'study_id': study_id}) - elif method == 'transfer': - cmd = qiita_plugin.get_command('download_remote_files') - params = Parameters.load(cmd, values_dict={ - 'url': url, 'private_key': ssh_key_fp, - 'destination': upload_folder}) + qiita_plugin = Software.from_name_and_version("Qiita", "alpha") + if method == "list": + cmd = qiita_plugin.get_command("list_remote_files") + params = Parameters.load( + cmd, + values_dict={ + "url": url, + "private_key": ssh_key_fp, + "study_id": study_id, + }, + ) + elif method == "transfer": + cmd = qiita_plugin.get_command("download_remote_files") + params = Parameters.load( + cmd, + values_dict={ + "url": url, + "private_key": ssh_key_fp, + "destination": upload_folder, + }, + ) else: - status = 'error' - message = 'Not a valid method' + status = "error" + message = "Not a valid method" - if status == 'success': + if status == "success": job = ProcessingJob.create(self.current_user, params, True) job.submit() - r_client.set( - UPLOAD_STUDY_FORMAT % study_id, dumps({'job_id': job.id})) + r_client.set(UPLOAD_STUDY_FORMAT % study_id, dumps({"job_id": job.id})) - self.write({'status': status, 'message': message}) + self.write({"status": status, "message": message}) class UploadFileHandler(BaseHandler): @@ -180,21 +194,25 @@ def validate_file_extension(self, filename): """ if not filename.endswith(tuple(qiita_config.valid_upload_extension)): self.set_status(415) - raise HTTPError(415, reason="User %s is trying to upload %s" % - (self.current_user, str(filename))) + raise HTTPError( + 415, + reason="User %s is trying to upload %s" + % (self.current_user, str(filename)), + ) @authenticated @execute_as_transaction def post(self): - resumable_identifier = self.get_argument('resumableIdentifier') - resumable_filename = self.get_argument('resumableFilename') - resumable_chunk_number = int(self.get_argument('resumableChunkNumber')) - resumable_total_chunks = int(self.get_argument('resumableTotalChunks')) - study_id = self.get_argument('study_id') - data = self.request.files['file'][0]['body'] + resumable_identifier = self.get_argument("resumableIdentifier") + resumable_filename = self.get_argument("resumableFilename") + resumable_chunk_number = int(self.get_argument("resumableChunkNumber")) + resumable_total_chunks = int(self.get_argument("resumableTotalChunks")) + study_id = self.get_argument("study_id") + data = self.request.files["file"][0]["body"] - check_access(self.current_user, Study(int(study_id)), - no_public=True, raise_error=True) + check_access( + self.current_user, Study(int(study_id)), no_public=True, raise_error=True + ) self.validate_file_extension(resumable_filename) @@ -212,7 +230,7 @@ def post(self): remove(temporary_location) # append every transmitted chunk - with open(temporary_location, 'ab') as tmp_file: + with open(temporary_location, "ab") as tmp_file: tmp_file.write(bytes(data)) if resumable_chunk_number == resumable_total_chunks: @@ -228,16 +246,17 @@ def post(self): @authenticated @execute_as_transaction def get(self): - """ this is the first point of entry into the upload service + """this is the first point of entry into the upload service this should either set the status as 400 (error) so the file/chunk is sent via post or 200 (valid) to not send the file """ - study_id = self.get_argument('study_id') - resumable_filename = self.get_argument('resumableFilename') + study_id = self.get_argument("study_id") + resumable_filename = self.get_argument("resumableFilename") - check_access(self.current_user, Study(int(study_id)), - no_public=True, raise_error=True) + check_access( + self.current_user, Study(int(study_id)), no_public=True, raise_error=True + ) self.validate_file_extension(resumable_filename) diff --git a/qiita_pet/handlers/user_handlers.py b/qiita_pet/handlers/user_handlers.py index 1783fba37..3a8954e83 100644 --- a/qiita_pet/handlers/user_handlers.py +++ b/qiita_pet/handlers/user_handlers.py @@ -7,24 +7,24 @@ # ----------------------------------------------------------------------------- import re -from json import dumps import warnings +from json import dumps -from tornado.web import authenticated, HTTPError -from wtforms import Form, StringField, BooleanField, validators +from tornado.web import HTTPError, authenticated +from wtforms import BooleanField, Form, StringField, validators from wtforms.validators import ValidationError -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_pet.handlers.api_proxy import user_jobs_get_req -from qiita_pet.handlers.portal import PortalEditBase import qiita_db as qdb -from qiita_db.util import send_email -from qiita_db.user import User -from qiita_db.logger import LogEntry -from qiita_db.exceptions import QiitaDBUnknownIDError, QiitaDBError from qiita_core.exceptions import IncorrectPasswordError -from qiita_core.util import execute_as_transaction from qiita_core.qiita_settings import qiita_config +from qiita_core.util import execute_as_transaction +from qiita_db.exceptions import QiitaDBError, QiitaDBUnknownIDError +from qiita_db.logger import LogEntry +from qiita_db.user import User +from qiita_db.util import send_email +from qiita_pet.handlers.api_proxy import user_jobs_get_req +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.portal import PortalEditBase class UserProfile(Form): @@ -58,15 +58,17 @@ def validate_general(value: str, infomsg: str, url_prefix: str): if value != value.strip(): raise ValidationError( - 'Please remove all leading and trailing whitespaces from your ' - 'input.
%s' % infomsg) + "Please remove all leading and trailing whitespaces from your " + "input.
%s" % infomsg + ) if len(url_prefix) > 0: isPrefix = re.search("^%s" % url_prefix, value) if isPrefix is not None: raise ValidationError( - 'Please remove the "%s" part from your input.
%s' % ( - isPrefix[0], infomsg)) + 'Please remove the "%s" part from your input.
%s' + % (isPrefix[0], infomsg) + ) # if there is still no error raised, we return the actual value of the # user input @@ -90,18 +92,19 @@ def validator_orcid_id(form: Form, field: StringField): ------ ValidationError if user input is not valid """ - infomsg = ('Enter only your 16 digit numerical ORCID identifier, where' - ' every four digits are separated with a dash "-". An ' - 'example is: 0000-0002-0975-9019') - value = UserProfile.validate_general( - field.data, infomsg, 'https://orcid.org') + infomsg = ( + "Enter only your 16 digit numerical ORCID identifier, where" + ' every four digits are separated with a dash "-". An ' + "example is: 0000-0002-0975-9019" + ) + value = UserProfile.validate_general(field.data, infomsg, "https://orcid.org") if value is None: return True if re.search(r"^\d{4}-\d{4}-\d{4}-\d{4}$", value) is None: raise ValidationError( - "Your input does not follow the required format.
%s" % - infomsg) + "Your input does not follow the required format.
%s" % infomsg + ) def validator_gscholar_id(form, field): """A WTForm validator to check if user input follows google scholar ID @@ -122,33 +125,38 @@ def validator_gscholar_id(form, field): ------ ValidationError if user input is not valid """ - infomsg = ('To retrieve your google scholar ID, surf to your profile ' - 'and copy the URL in your browser. It might read like ' - 'https://scholar.google.com/citations?user=_e3QL94AAAAJ&' - 'hl=en
Ignore everything left of the "?". The right ' - 'part is a set of key=value pairs, separated by "&" ' - 'characters. Find the key "user=", the right part up to ' - 'the next "&" is your google scholar ID, in the example: ' - '"_e3QL94AAAAJ"') + infomsg = ( + "To retrieve your google scholar ID, surf to your profile " + "and copy the URL in your browser. It might read like " + "https://scholar.google.com/citations?user=_e3QL94AAAAJ&" + 'hl=en
Ignore everything left of the "?". The right ' + 'part is a set of key=value pairs, separated by "&" ' + 'characters. Find the key "user=", the right part up to ' + 'the next "&" is your google scholar ID, in the example: ' + '"_e3QL94AAAAJ"' + ) # we need a regex here, since we don't know the TLD the user is # presenting to us value = UserProfile.validate_general( - field.data, infomsg, r'https://scholar.google.\w{1,3}/citations\?') + field.data, infomsg, r"https://scholar.google.\w{1,3}/citations\?" + ) if value is None: return True - if '&' in value: + if "&" in value: raise ValidationError( - 'Your input contains multiple key=value pairs (we found at ' - 'least one "&" character).
%s' % infomsg) - if 'user=' in value: + "Your input contains multiple key=value pairs (we found at " + 'least one "&" character).
%s' % infomsg + ) + if "user=" in value: raise ValidationError( 'Please remove the key "user" and the "=" character from ' - 'your input.
%s' % infomsg) - if value.startswith('='): + "your input.
%s" % infomsg + ) + if value.startswith("="): raise ValidationError( - 'Please remove leading "=" characters from your input.' - '
%s' % infomsg) + 'Please remove leading "=" characters from your input.
%s' % infomsg + ) def validator_rgate_id(form, field): """A WTForm validator to check if user input follows ResearchGate @@ -169,13 +177,16 @@ def validator_rgate_id(form, field): ------ ValidationError if user input is not valid """ - infomsg = ('To retrieve your ResearchGate ID, surf to your profile ' - 'and copy the URL in your browser. It might read like ' - 'https://www.researchgate.net/profile/Rob-Knight
' - 'Your ID is the part right of the last "/", in the example:' - ' "Rob-Knight"') + infomsg = ( + "To retrieve your ResearchGate ID, surf to your profile " + "and copy the URL in your browser. It might read like " + "https://www.researchgate.net/profile/Rob-Knight
" + 'Your ID is the part right of the last "/", in the example:' + ' "Rob-Knight"' + ) value = UserProfile.validate_general( - field.data, infomsg, 'https://www.researchgate.net/profile/') + field.data, infomsg, "https://www.researchgate.net/profile/" + ) if value is None: return True @@ -183,26 +194,31 @@ def validator_rgate_id(form, field): affiliation = StringField("Affiliation") address = StringField("Address") phone = StringField("Phone") - receive_processing_job_emails = BooleanField( - "Receive Processing Job Emails?") + receive_processing_job_emails = BooleanField("Receive Processing Job Emails?") - social_orcid = StringField( - "ORCID", [validator_orcid_id], description="") + social_orcid = StringField("ORCID", [validator_orcid_id], description="") social_googlescholar = StringField( - "Google Scholar", [validator_gscholar_id], description="") + "Google Scholar", [validator_gscholar_id], description="" + ) social_researchgate = StringField( - "ResearchGate", [validator_rgate_id], description="") + "ResearchGate", [validator_rgate_id], description="" + ) class UserProfileHandler(BaseHandler): """Displays user profile page and handles profile updates""" + @authenticated def get(self): profile = UserProfile() profile.process(data=self.current_user.info) - self.render("user_profile.html", profile=profile, msg="", passmsg="", - creation_timestamp=self.current_user.info[ - 'creation_timestamp']) + self.render( + "user_profile.html", + profile=profile, + msg="", + passmsg="", + creation_timestamp=self.current_user.info["creation_timestamp"], + ) @authenticated @execute_as_transaction @@ -215,28 +231,33 @@ def post(self): if action == "profile": # tuple of columns available for profile # FORM INPUT NAMES MUST MATCH DB COLUMN NAMES - not_str_fields = ('receive_processing_job_emails') + not_str_fields = "receive_processing_job_emails" form_data.process(data=self.request.arguments) - profile = {name: data[0].decode('ascii') - if name not in not_str_fields else - data - for name, data in form_data.data.items()} + profile = { + name: data[0].decode("ascii") if name not in not_str_fields else data + for name, data in form_data.data.items() + } # Turn default value as list into default strings for field in form_data: if field.name not in not_str_fields: - field.data = field.data[0].decode('ascii') + field.data = field.data[0].decode("ascii") if form_data.validate() is False: - msg = ("ERROR: profile could not be updated" - " as some of your above inputs must be corrected.") + msg = ( + "ERROR: profile could not be updated" + " as some of your above inputs must be corrected." + ) else: try: user.info = profile msg = "Profile updated successfully" except Exception as e: msg = "ERROR: profile could not be updated" - LogEntry.create('Runtime', "Cound not update profile: %s" % - str(e), info={'User': user.id}) + LogEntry.create( + "Runtime", + "Cound not update profile: %s" % str(e), + info={"User": user.id}, + ) elif action == "password": form_data.process(data=user.info) @@ -246,21 +267,29 @@ def post(self): changed = user.change_password(oldpass, newpass) except Exception as e: passmsg = "ERROR: could not change password" - LogEntry.create('Runtime', "Could not change password: %s" % - str(e), info={'User': user.id}) + LogEntry.create( + "Runtime", + "Could not change password: %s" % str(e), + info={"User": user.id}, + ) else: if changed: passmsg = "Password changed successfully" else: passmsg = "Incorrect old password" - self.render("user_profile.html", user=user.id, profile=form_data, - msg=msg, passmsg=passmsg, - creation_timestamp=self.current_user.info[ - 'creation_timestamp']) + self.render( + "user_profile.html", + user=user.id, + profile=form_data, + msg=msg, + passmsg=passmsg, + creation_timestamp=self.current_user.info["creation_timestamp"], + ) class ForgotPasswordHandler(BaseHandler): """Displays forgot password page and generates code for lost passwords""" + def get(self): self.render("lost_pass.html", user=None, message="", level="") @@ -285,31 +314,40 @@ def post(self): # qiita_config.portal_dir has it at the beginning but not at # the end. This constructs the correct URL url = qiita_config.base_url + qiita_config.portal_dir - send_email(user.id, "Qiita: Password Reset", "Please go to " - "the following URL to reset your password: \n" - "%s/auth/reset/%s \nYou " - "have 30 minutes from the time you requested a " - "reset to change your password. After this period, " - "you will have to request another reset." % - (url, info["pass_reset_code"])) - message = ("Check your email for the reset code.") + send_email( + user.id, + "Qiita: Password Reset", + "Please go to " + "the following URL to reset your password: \n" + "%s/auth/reset/%s \nYou " + "have 30 minutes from the time you requested a " + "reset to change your password. After this period, " + "you will have to request another reset." + % (url, info["pass_reset_code"]), + ) + message = "Check your email for the reset code." level = "success" page = "index.html" except Exception as e: - message = ("Unable to send email. Error has been registered. " - "Your password has not been reset.") + message = ( + "Unable to send email. Error has been registered. " + "Your password has not been reset." + ) level = "danger" - LogEntry.create('Runtime', "Unable to send forgot password " - "email: %s" % str(e), info={'User': user.id}) + LogEntry.create( + "Runtime", + "Unable to send forgot password email: %s" % str(e), + info={"User": user.id}, + ) self.render(page, user=user_id, message=message, level=level) class ChangeForgotPasswordHandler(BaseHandler): """Displays change password page and handles password reset""" + def get(self, code): - self.render("change_lost_pass.html", user=None, message="", - level="", code=code) + self.render("change_lost_pass.html", user=None, message="", level="", code=code) @execute_as_transaction def post(self, code): @@ -335,15 +373,16 @@ def post(self, code): changed = False if changed: - message = ("Password reset successful. Please log in to " - "continue.") + message = "Password reset successful. Please log in to continue." level = "success" page = "index.html" else: if message != "": - message = ("Unable to reset password. Most likely your " - "email is incorrect or your reset window has " - "timed out.") + message = ( + "Unable to reset password. Most likely your " + "email is incorrect or your reset window has " + "timed out." + ) level = "danger" self.render(page, message=message, level=level, code=code) @@ -352,8 +391,7 @@ def post(self, code): class UserMessagesHander(BaseHandler): @authenticated def get(self): - self.render("user_messages.html", - messages=self.current_user.messages()) + self.render("user_messages.html", messages=self.current_user.messages()) def post(self): action = self.get_argument("action") @@ -370,8 +408,7 @@ def post(self): else: raise HTTPError(400, reason="Unknown action: %s" % action) - self.render("user_messages.html", - messages=self.current_user.messages()) + self.render("user_messages.html", messages=self.current_user.messages()) class UserJobs(BaseHandler): @@ -383,8 +420,7 @@ def get(self): class PurgeUsersAJAXHandler(PortalEditBase): # define columns besides email that will be displayed on website - FIELDS = ['name', 'affiliation', 'address', 'phone', - 'creation_timestamp'] + FIELDS = ["name", "affiliation", "address", "phone", "creation_timestamp"] @authenticated @execute_as_transaction @@ -396,7 +432,7 @@ def get(self): FROM qiita.qiita_user WHERE (user_level_id=5) AND (creation_timestamp < (NOW() - INTERVAL '30 DAY')) - """.format(','.join(self.FIELDS)) + """.format(",".join(self.FIELDS)) qdb.sql_connection.TRN.add(sql) users = qdb.sql_connection.TRN.execute()[1:] @@ -405,13 +441,13 @@ def get(self): for list in users: for user in list: usermail = user[0] - user_unit = {'email': usermail} + user_unit = {"email": usermail} user_infos = User(usermail).info for col in self.FIELDS: user_unit[col] = str(user_infos[col]) result.append(user_unit) # returning information as JSON - self.write(dumps(result, separators=(',', ':'))) + self.write(dumps(result, separators=(",", ":"))) class PurgeUsersHandler(PortalEditBase): @@ -420,16 +456,18 @@ class PurgeUsersHandler(PortalEditBase): def get(self): # render page and transfer headers to be included for the table self.check_admin() - self.render('admin_purge_users.html', - headers=['email'] + PurgeUsersAJAXHandler.FIELDS, - submit_url="/admin/purge_users/") + self.render( + "admin_purge_users.html", + headers=["email"] + PurgeUsersAJAXHandler.FIELDS, + submit_url="/admin/purge_users/", + ) def post(self): # check if logged in user is admin and fetch all checked boxes as well # as the action self.check_admin() - users = map(str, self.get_arguments('selected')) - action = self.get_argument('action') + users = map(str, self.get_arguments("selected")) + action = self.get_argument("action") # depending on the action delete user from db (remove) num_deleted_user = 0 @@ -441,11 +479,12 @@ def post(self): user_to_delete.delete(user) num_deleted_user += 1 else: - raise HTTPError( - 400, reason="Unknown action: %s" % action) + raise HTTPError(400, reason="Unknown action: %s" % action) except QiitaDBError as e: self.write(action.upper() + " ERROR:
" + str(e)) return - msg = '; '.join([str(w.message) for w in warns]) - self.write(("%i non-validated user(s) successfully removed from " - "database
%s") % (num_deleted_user, msg)) + msg = "; ".join([str(w.message) for w in warns]) + self.write( + ("%i non-validated user(s) successfully removed from database
%s") + % (num_deleted_user, msg) + ) diff --git a/qiita_pet/handlers/util.py b/qiita_pet/handlers/util.py index c1b52702b..3048e84e9 100644 --- a/qiita_pet/handlers/util.py +++ b/qiita_pet/handlers/util.py @@ -5,14 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from functools import partial from contextlib import contextmanager +from functools import partial from tornado.web import HTTPError -from qiita_pet.util import linkify -from qiita_pet.exceptions import QiitaHTTPError from qiita_core.util import execute_as_transaction +from qiita_pet.exceptions import QiitaHTTPError +from qiita_pet.util import linkify @contextmanager @@ -34,8 +34,10 @@ def check_access(user, study, no_public=False, raise_error=False): """make sure user has access to the study requested""" if not study.has_access(user, no_public): if raise_error: - raise HTTPError(403, reason="User %s does not have access to " - "study %d" % (user.id, study.id)) + raise HTTPError( + 403, + reason="User %s does not have access to study %d" % (user.id, study.id), + ) else: return False return True @@ -64,21 +66,24 @@ def download_link_or_path(is_local_request, filepath, fp_id, label): if is_local_request: resp = "%s: %s" % (label, filepath) else: - resp = (' %s' - % (fp_id, label)) + resp = ( + ' %s' % (fp_id, label) + ) return resp study_person_linkifier = partial( - linkify, "{1}") + linkify, '{1}' +) pubmed_linkifier = partial( - linkify, "{0}") + linkify, '{0}' +) doi_linkifier = partial( - linkify, "{0}") + linkify, '{0}' +) def to_int(value, reason=None): @@ -102,8 +107,7 @@ def to_int(value, reason=None): try: res = int(value) except ValueError: - msg = f"{value} cannot be converted to an integer" if reason is None \ - else reason + msg = f"{value} cannot be converted to an integer" if reason is None else reason raise HTTPError(400, reason=msg) return res @@ -125,13 +129,11 @@ def get_shared_links(obj): """ shared = [] for person in obj.shared_with: - name = person.info['name'] + name = person.info["name"] email = person.email # Name is optional, so default to email if non existant if name: - shared.append(study_person_linkifier( - (email, name))) + shared.append(study_person_linkifier((email, name))) else: - shared.append(study_person_linkifier( - (email, email))) + shared.append(study_person_linkifier((email, email))) return ", ".join(shared) diff --git a/qiita_pet/handlers/websocket_handlers.py b/qiita_pet/handlers/websocket_handlers.py index 74fd853d6..90c3e5160 100644 --- a/qiita_pet/handlers/websocket_handlers.py +++ b/qiita_pet/handlers/websocket_handlers.py @@ -8,18 +8,18 @@ # adapted from # https://github.com/leporo/tornado-redis/blob/master/demos/websockets -from json import loads, dumps from itertools import chain +from json import dumps, loads import toredis +from tornado.gen import Task, engine from tornado.web import authenticated from tornado.websocket import WebSocketHandler -from tornado.gen import engine, Task from qiita_core.qiita_settings import r_client -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_db.artifact import Artifact from qiita_core.util import execute_as_transaction +from qiita_db.artifact import Artifact +from qiita_pet.handlers.base_handlers import BaseHandler class MessageHandler(WebSocketHandler): @@ -46,7 +46,7 @@ def get_current_user(self): # pylint: disable=W0221 @authenticated def open(self): - self.write_message('hello') + self.write_message("hello") @authenticated def on_message(self, msg): @@ -56,10 +56,10 @@ def on_message(self, msg): # Determine which Redis communication channel the server needs to # listen on - self.channel = msginfo.get('user', None) + self.channel = msginfo.get("user", None) if self.channel is not None: - self.channel_messages = '%s:messages' % self.channel + self.channel_messages = "%s:messages" % self.channel self.listen() def listen(self): @@ -80,18 +80,19 @@ def callback(self, msg): # if a compute process wrote to the Redis channel that we are # listening too, and if it is actually a message, send the payload to # the javascript client via the websocket - if channel == self.channel and message_type == 'message': + if channel == self.channel and message_type == "message": self.write_message(payload) @engine def on_close(self): yield Task(self.toredis.unsubscribe, self.channel) - self.r_client.delete('%s:messages' % self.channel) + self.r_client.delete("%s:messages" % self.channel) self.redis.disconnect() class SelectedSocketHandler(WebSocketHandler, BaseHandler): """Websocket for removing samples on default analysis display page""" + @authenticated @execute_as_transaction def on_message(self, msg): @@ -100,16 +101,16 @@ def on_message(self, msg): msginfo = loads(msg) default = self.current_user.default_analysis - if 'remove_sample' in msginfo: - data = msginfo['remove_sample'] - artifact = Artifact(data['proc_data']) - default.remove_samples([artifact], data['samples']) - elif 'remove_pd' in msginfo: - data = msginfo['remove_pd'] - default.remove_samples([Artifact(data['proc_data'])]) - elif 'clear' in msginfo: - data = msginfo['clear'] - artifacts = [Artifact(_id) for _id in data['pids']] + if "remove_sample" in msginfo: + data = msginfo["remove_sample"] + artifact = Artifact(data["proc_data"]) + default.remove_samples([artifact], data["samples"]) + elif "remove_pd" in msginfo: + data = msginfo["remove_pd"] + default.remove_samples([Artifact(data["proc_data"])]) + elif "clear" in msginfo: + data = msginfo["clear"] + artifacts = [Artifact(_id) for _id in data["pids"]] default.remove_samples(artifacts) self.write_message(msg) @@ -118,11 +119,12 @@ def on_message(self, msg): @authenticated @execute_as_transaction def open(self): - self.write_message('hello') + self.write_message("hello") class SelectSamplesHandler(WebSocketHandler, BaseHandler): """Websocket for selecting and deselecting samples on list studies page""" + @authenticated @execute_as_transaction def on_message(self, msg): @@ -136,9 +138,14 @@ def on_message(self, msg): """ msginfo = loads(msg) default = self.current_user.default_analysis - default.add_samples(msginfo['sel']) + default.add_samples(msginfo["sel"]) # Count total number of unique samples selected and return - self.write_message(dumps({ - 'sel': len(set( - chain.from_iterable(s for s in msginfo['sel'].values()))) - })) + self.write_message( + dumps( + { + "sel": len( + set(chain.from_iterable(s for s in msginfo["sel"].values())) + ) + } + ) + ) diff --git a/qiita_pet/nginx_example.conf b/qiita_pet/nginx_example.conf index 36c3d7809..d0b4ab71b 100644 --- a/qiita_pet/nginx_example.conf +++ b/qiita_pet/nginx_example.conf @@ -3,7 +3,7 @@ events { } http { - client_max_body_size 7M; # increase maximum body size from default 1M to match https://github.com/qiita-spots/qiita/blob/ac62aba5333f537c32e213855edc39c273aa9871/qiita_pet/static/vendor/js/resumable-uploader.js#L51 (which is 3M). Note that resumable-uploader.js's last chunk can be max. twice as large as chunk size, see: https://github.com/23/resumable.js/issues/51 + client_max_body_size 7M; # increase maximum body size from default 1M to match https://github.com/qiita-spots/qiita/blob/ac62aba5333f537c32e213855edc39c273aa9871/qiita_pet/static/vendor/js/resumable-uploader.js#L51 (which is 3M). Note that resumable-uploader.js's last chunk can be max. twice as large as chunk size, see: https://github.com/23/resumable.js/issues/51 # ports to redirect for mainqiita upstream mainqiita { diff --git a/qiita_pet/portal.py b/qiita_pet/portal.py index eb7f961a0..5cfa058ec 100644 --- a/qiita_pet/portal.py +++ b/qiita_pet/portal.py @@ -5,13 +5,12 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import join, dirname, abspath +from configparser import ConfigParser +from os.path import abspath, dirname, join from qiita_core.exceptions import MissingConfigSection from qiita_core.qiita_settings import qiita_config -from configparser import ConfigParser - class PortalStyleManager(object): """Holds the portal style information @@ -40,26 +39,28 @@ class PortalStyleManager(object): css_fp : str The filepath to the portal styling custom CSS """ + def __init__(self): if qiita_config.portal_fp: self.conf_fp = qiita_config.portal_fp else: - self.conf_fp = join(dirname(abspath(__file__)), - 'support_files/config_portal.cfg') + self.conf_fp = join( + dirname(abspath(__file__)), "support_files/config_portal.cfg" + ) # Parse the configuration file config = ConfigParser() with open(self.conf_fp, newline=None) as conf_file: config.read_file(conf_file) - _required_sections = {'sitebase', 'index', 'study_list'} + _required_sections = {"sitebase", "index", "study_list"} if not _required_sections.issubset(set(config.sections())): missing = _required_sections - set(config.sections()) - raise MissingConfigSection(', '.join(missing)) + raise MissingConfigSection(", ".join(missing)) - self.css_fp = config.get('sitebase', 'CSS_FP') + self.css_fp = config.get("sitebase", "CSS_FP") # Load the custom CSS if needed - self.custom_css = '' + self.custom_css = "" if self.css_fp: with open(self.css_fp, newline=None) as f: self.custom_css = f.read() @@ -70,17 +71,17 @@ def __init__(self): def _get_sitebase(self, config): """Get the configuration of the sitebase section""" - self.logo = config.get('sitebase', 'LOGO') - self.title = config.get('sitebase', 'TITLE') + self.logo = config.get("sitebase", "LOGO") + self.title = config.get("sitebase", "TITLE") def _get_index(self, config): """Get the configuration of the index section""" - self.index_header = config.get('index', 'HEADER') - self.index_text = config.get('index', 'TEXT') + self.index_header = config.get("index", "HEADER") + self.index_text = config.get("index", "TEXT") def _get_study_list(self, config): """Get the configuration of the study_list section""" - self.example_search = config.get('study_list', 'EXAMPLE_SEARCH') + self.example_search = config.get("study_list", "EXAMPLE_SEARCH") portal_styling = PortalStyleManager() diff --git a/qiita_pet/test/rest/test_base.py b/qiita_pet/test/rest/test_base.py index 569a4e692..b9d189e61 100644 --- a/qiita_pet/test/rest/test_base.py +++ b/qiita_pet/test/rest/test_base.py @@ -6,17 +6,16 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from qiita_core.qiita_settings import r_client - from qiita_pet.test.tornado_test_base import TestHandlerBase class RESTHandlerTestCase(TestHandlerBase): def setUp(self): - self.client_token = 'SOMEAUTHTESTINGTOKENHERE2122' - r_client.hset(self.client_token, 'timestamp', '12/12/12 12:12:00') - r_client.hset(self.client_token, 'client_id', 'test123123123') - r_client.hset(self.client_token, 'grant_type', 'client') + self.client_token = "SOMEAUTHTESTINGTOKENHERE2122" + r_client.hset(self.client_token, "timestamp", "12/12/12 12:12:00") + r_client.hset(self.client_token, "client_id", "test123123123") + r_client.hset(self.client_token, "grant_type", "client") r_client.expire(self.client_token, 5) - self.headers = {'Authorization': 'Bearer ' + self.client_token} + self.headers = {"Authorization": "Bearer " + self.client_token} super(RESTHandlerTestCase, self).setUp() diff --git a/qiita_pet/test/rest/test_sample_detail.py b/qiita_pet/test/rest/test_sample_detail.py index 8d5d03e85..5cad897f3 100644 --- a/qiita_pet/test/rest/test_sample_detail.py +++ b/qiita_pet/test/rest/test_sample_detail.py @@ -6,136 +6,173 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main, TestCase +from unittest import TestCase, main from tornado.escape import json_decode import qiita_db - -from qiita_pet.test.rest.test_base import RESTHandlerTestCase from qiita_pet.handlers.rest.study_samples import _sample_details +from qiita_pet.test.rest.test_base import RESTHandlerTestCase class SupportTests(TestCase): def test_samples_detail(self): - exp = [{'sample_id': '1.SKD7.640191', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000021', - 'preparation_id': 1, - 'ebi_experiment_accession': 'ERX0000021', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}, - {'sample_id': '1.SKD7.640191', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000021', - 'preparation_id': 2, - 'ebi_experiment_accession': 'ERX0000021', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}, - {'sample_id': 'doesnotexist', - 'sample_found': False, - 'ebi_sample_accession': None, - 'preparation_id': None, - 'ebi_experiment_accession': None, - 'preparation_visibility': None, - 'preparation_type': None}] - obs = _sample_details(qiita_db.study.Study(1), - ['1.SKD7.640191', 'doesnotexist']) + exp = [ + { + "sample_id": "1.SKD7.640191", + "sample_found": True, + "ebi_sample_accession": "ERS000021", + "preparation_id": 1, + "ebi_experiment_accession": "ERX0000021", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + { + "sample_id": "1.SKD7.640191", + "sample_found": True, + "ebi_sample_accession": "ERS000021", + "preparation_id": 2, + "ebi_experiment_accession": "ERX0000021", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + { + "sample_id": "doesnotexist", + "sample_found": False, + "ebi_sample_accession": None, + "preparation_id": None, + "ebi_experiment_accession": None, + "preparation_visibility": None, + "preparation_type": None, + }, + ] + obs = _sample_details( + qiita_db.study.Study(1), ["1.SKD7.640191", "doesnotexist"] + ) self.assertEqual(len(obs), len(exp)) self.assertCountEqual(obs, exp) class SampleDetailHandlerTests(RESTHandlerTestCase): def test_get_missing_sample(self): - exp = [{'sample_id': 'doesnotexist', - 'sample_found': False, - 'ebi_sample_accession': None, - 'preparation_id': None, - 'ebi_experiment_accession': None, - 'preparation_visibility': None, - 'preparation_type': None}, ] - - response = self.get('/api/v1/study/1/sample/doesnotexist/status', - headers=self.headers) + exp = [ + { + "sample_id": "doesnotexist", + "sample_found": False, + "ebi_sample_accession": None, + "preparation_id": None, + "ebi_experiment_accession": None, + "preparation_visibility": None, + "preparation_type": None, + }, + ] + + response = self.get( + "/api/v1/study/1/sample/doesnotexist/status", headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_valid_sample(self): - exp = [{'sample_id': '1.SKD7.640191', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000021', - 'preparation_id': 1, - 'ebi_experiment_accession': 'ERX0000021', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}, - {'sample_id': '1.SKD7.640191', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000021', - 'preparation_id': 2, - 'ebi_experiment_accession': 'ERX0000021', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}] - - response = self.get('/api/v1/study/1/sample/1.SKD7.640191/status', - headers=self.headers) + exp = [ + { + "sample_id": "1.SKD7.640191", + "sample_found": True, + "ebi_sample_accession": "ERS000021", + "preparation_id": 1, + "ebi_experiment_accession": "ERX0000021", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + { + "sample_id": "1.SKD7.640191", + "sample_found": True, + "ebi_sample_accession": "ERS000021", + "preparation_id": 2, + "ebi_experiment_accession": "ERX0000021", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + ] + + response = self.get( + "/api/v1/study/1/sample/1.SKD7.640191/status", headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertCountEqual(obs, exp) def test_post_samples_status_bad_request(self): - body = {'malformed': 'with garbage'} - response = self.post('/api/v1/study/1/samples/status', - headers=self.headers, - data=body, asjson=True) + body = {"malformed": "with garbage"} + response = self.post( + "/api/v1/study/1/samples/status", + headers=self.headers, + data=body, + asjson=True, + ) self.assertEqual(response.code, 400) def test_post_samples_status(self): - exp = [{'sample_id': '1.SKD7.640191', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000021', - 'preparation_id': 1, - 'ebi_experiment_accession': 'ERX0000021', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}, - {'sample_id': '1.SKD7.640191', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000021', - 'preparation_id': 2, - 'ebi_experiment_accession': 'ERX0000021', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}, - {'sample_id': 'doesnotexist', - 'sample_found': False, - 'ebi_sample_accession': None, - 'preparation_id': None, - 'ebi_experiment_accession': None, - 'preparation_visibility': None, - 'preparation_type': None}, - {'sample_id': '1.SKM5.640177', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000005', - 'preparation_id': 1, - 'ebi_experiment_accession': 'ERX0000005', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}, - {'sample_id': '1.SKM5.640177', - 'sample_found': True, - 'ebi_sample_accession': 'ERS000005', - 'preparation_id': 2, - 'ebi_experiment_accession': 'ERX0000005', - 'preparation_visibility': 'private', - 'preparation_type': '18S'}] - - body = {'sample_ids': ['1.SKD7.640191', 'doesnotexist', - '1.SKM5.640177']} - response = self.post('/api/v1/study/1/samples/status', - headers=self.headers, - data=body, asjson=True) + exp = [ + { + "sample_id": "1.SKD7.640191", + "sample_found": True, + "ebi_sample_accession": "ERS000021", + "preparation_id": 1, + "ebi_experiment_accession": "ERX0000021", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + { + "sample_id": "1.SKD7.640191", + "sample_found": True, + "ebi_sample_accession": "ERS000021", + "preparation_id": 2, + "ebi_experiment_accession": "ERX0000021", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + { + "sample_id": "doesnotexist", + "sample_found": False, + "ebi_sample_accession": None, + "preparation_id": None, + "ebi_experiment_accession": None, + "preparation_visibility": None, + "preparation_type": None, + }, + { + "sample_id": "1.SKM5.640177", + "sample_found": True, + "ebi_sample_accession": "ERS000005", + "preparation_id": 1, + "ebi_experiment_accession": "ERX0000005", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + { + "sample_id": "1.SKM5.640177", + "sample_found": True, + "ebi_sample_accession": "ERS000005", + "preparation_id": 2, + "ebi_experiment_accession": "ERX0000005", + "preparation_visibility": "private", + "preparation_type": "18S", + }, + ] + + body = {"sample_ids": ["1.SKD7.640191", "doesnotexist", "1.SKM5.640177"]} + response = self.post( + "/api/v1/study/1/samples/status", + headers=self.headers, + data=body, + asjson=True, + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertCountEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/rest/test_study.py b/qiita_pet/test/rest/test_study.py index 1806fcc14..516e70401 100644 --- a/qiita_pet/test/rest/test_study.py +++ b/qiita_pet/test/rest/test_study.py @@ -16,50 +16,48 @@ class StudyHandlerTests(RESTHandlerTestCase): def test_get_valid(self): - exp = {u'title': u'Identification of the Microbiomes for Cannabis ' - u'Soils', - u'contacts': {'principal_investigator': [u'PIDude', - u'Wash U', - u'PI_dude@foo.bar'], - 'lab_person': [u'LabDude', - u'knight lab', - u'lab_dude@foo.bar']}, - u'study_abstract': - (u'This is a preliminary study to examine the ' - u'microbiota associated with the Cannabis plant. ' - u'Soils samples from the bulk soil, soil ' - u'associated with the roots, and the rhizosphere ' - u'were extracted and the DNA sequenced. Roots ' - u'from three independent plants of different ' - u'strains were examined. These roots were ' - u'obtained November 11, 2011 from plants that ' - u'had been harvested in the summer. Future ' - u'studies will attempt to analyze the soils and ' - u'rhizospheres from the same location at ' - u'different time points in the plant lifecycle.'), - u'study_description': (u'Analysis of the Cannabis Plant ' - u'Microbiome'), - u'study_alias': 'Cannabis Soils'} - - response = self.get('/api/v1/study/1', headers=self.headers) + exp = { + "title": "Identification of the Microbiomes for Cannabis Soils", + "contacts": { + "principal_investigator": ["PIDude", "Wash U", "PI_dude@foo.bar"], + "lab_person": ["LabDude", "knight lab", "lab_dude@foo.bar"], + }, + "study_abstract": ( + "This is a preliminary study to examine the " + "microbiota associated with the Cannabis plant. " + "Soils samples from the bulk soil, soil " + "associated with the roots, and the rhizosphere " + "were extracted and the DNA sequenced. Roots " + "from three independent plants of different " + "strains were examined. These roots were " + "obtained November 11, 2011 from plants that " + "had been harvested in the summer. Future " + "studies will attempt to analyze the soils and " + "rhizospheres from the same location at " + "different time points in the plant lifecycle." + ), + "study_description": ("Analysis of the Cannabis Plant Microbiome"), + "study_alias": "Cannabis Soils", + } + + response = self.get("/api/v1/study/1", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study/0', headers=self.headers) + response = self.get("/api/v1/study/0", headers=self.headers) self.assertEqual(response.code, 404) - self.assertEqual(json_decode(response.body), - {'message': 'Study not found'}) + self.assertEqual(json_decode(response.body), {"message": "Study not found"}) def test_get_invalid_negative(self): - response = self.get('/api/v1/study/-1', headers=self.headers) + response = self.get("/api/v1/study/-1", headers=self.headers) self.assertEqual(response.code, 404) # not asserting the body content as this is not a valid URI according # to the regex associating the handler to the webserver def test_get_invalid_namespace(self): - response = self.get('/api/v1/study/1.11111', headers=self.headers) + response = self.get("/api/v1/study/1.11111", headers=self.headers) self.assertEqual(response.code, 404) # not asserting the body content as this is not a valid URI according # to the regex associating the handler to the webserver @@ -67,100 +65,115 @@ def test_get_invalid_namespace(self): class StudyCreatorTests(RESTHandlerTestCase): def test_post_malformed_study(self): - response = self.post('/api/v1/study', data={'foo': 'bar'}, - headers=self.headers, asjson=True) + response = self.post( + "/api/v1/study", data={"foo": "bar"}, headers=self.headers, asjson=True + ) self.assertEqual(response.code, 400) def test_post_already_exists(self): - payload = {'title': 'Identification of the Microbiomes for Cannabis ' - 'Soils', - 'study_abstract': 'stuff', - 'study_description': 'asdasd', - 'owner': 'admin@foo.bar', - 'study_alias': 'blah', - 'notes': '', - 'contacts': {'principal_investigator': [u'PIDude', - u'PI_dude@foo.bar'], - 'lab_person': [u'LabDude', - u'lab_dude@foo.bar']}} - response = self.post('/api/v1/study', data=payload, asjson=True, - headers=self.headers) + payload = { + "title": "Identification of the Microbiomes for Cannabis Soils", + "study_abstract": "stuff", + "study_description": "asdasd", + "owner": "admin@foo.bar", + "study_alias": "blah", + "notes": "", + "contacts": { + "principal_investigator": ["PIDude", "PI_dude@foo.bar"], + "lab_person": ["LabDude", "lab_dude@foo.bar"], + }, + } + response = self.post( + "/api/v1/study", data=payload, asjson=True, headers=self.headers + ) self.assertEqual(response.code, 409) obs = json_decode(response.body) - self.assertEqual(obs, - {'message': 'Study title already exists'}) + self.assertEqual(obs, {"message": "Study title already exists"}) def test_post_valid(self): - payload = {'title': 'foo', - 'study_abstract': 'stuff', - 'study_description': 'asdasd', - 'owner': 'admin@foo.bar', - 'study_alias': 'blah', - 'notes': '', - 'contacts': {'principal_investigator': [u'PIDude', - u'Wash U'], - 'lab_person': [u'LabDude', - u'knight lab']}} - response = self.post('/api/v1/study', data=payload, - headers=self.headers, asjson=True) + payload = { + "title": "foo", + "study_abstract": "stuff", + "study_description": "asdasd", + "owner": "admin@foo.bar", + "study_alias": "blah", + "notes": "", + "contacts": { + "principal_investigator": ["PIDude", "Wash U"], + "lab_person": ["LabDude", "knight lab"], + }, + } + response = self.post( + "/api/v1/study", data=payload, headers=self.headers, asjson=True + ) self.assertEqual(response.code, 201) - study_id = json_decode(response.body)['id'] + study_id = json_decode(response.body)["id"] study = Study(int(study_id)) - self.assertEqual(study.title, payload['title']) - self.assertEqual(study.info['study_abstract'], - payload['study_abstract']) - self.assertEqual(study.info['study_description'], - payload['study_description']) - self.assertEqual(study.info['study_alias'], payload['study_alias']) - self.assertEqual(study.owner.email, payload['owner']) - self.assertEqual(study.info['principal_investigator'].name, - payload['contacts']['principal_investigator'][0]) - self.assertEqual(study.info['principal_investigator'].affiliation, - payload['contacts']['principal_investigator'][1]) - self.assertEqual(study.info['lab_person'].name, - payload['contacts']['lab_person'][0]) - self.assertEqual(study.info['lab_person'].affiliation, - payload['contacts']['lab_person'][1]) + self.assertEqual(study.title, payload["title"]) + self.assertEqual(study.info["study_abstract"], payload["study_abstract"]) + self.assertEqual(study.info["study_description"], payload["study_description"]) + self.assertEqual(study.info["study_alias"], payload["study_alias"]) + self.assertEqual(study.owner.email, payload["owner"]) + self.assertEqual( + study.info["principal_investigator"].name, + payload["contacts"]["principal_investigator"][0], + ) + self.assertEqual( + study.info["principal_investigator"].affiliation, + payload["contacts"]["principal_investigator"][1], + ) + self.assertEqual( + study.info["lab_person"].name, payload["contacts"]["lab_person"][0] + ) + self.assertEqual( + study.info["lab_person"].affiliation, payload["contacts"]["lab_person"][1] + ) def test_post_invalid_user(self): - payload = {'title': 'foo', - 'study_abstract': 'stuff', - 'study_description': 'asdasd', - 'owner': 'doesnotexist@foo.bar', - 'study_alias': 'blah', - 'notes': '', - 'contacts': {'principal_investigator': [u'PIDude', - u'Wash U'], - 'lab_person': [u'LabDude', - u'knight lab']}} - response = self.post('/api/v1/study', data=payload, - headers=self.headers, asjson=True) + payload = { + "title": "foo", + "study_abstract": "stuff", + "study_description": "asdasd", + "owner": "doesnotexist@foo.bar", + "study_alias": "blah", + "notes": "", + "contacts": { + "principal_investigator": ["PIDude", "Wash U"], + "lab_person": ["LabDude", "knight lab"], + }, + } + response = self.post( + "/api/v1/study", data=payload, headers=self.headers, asjson=True + ) self.assertEqual(response.code, 403) obs = json_decode(response.body) - self.assertEqual(obs, {'message': 'Unknown user'}) + self.assertEqual(obs, {"message": "Unknown user"}) class StudyStatusHandlerTests(RESTHandlerTestCase): def test_get_no_study(self): - response = self.get('/api/v1/study/0/status', headers=self.headers) + response = self.get("/api/v1/study/0/status", headers=self.headers) self.assertEqual(response.code, 404) obs = json_decode(response.body) - exp = {'message': 'Study not found'} + exp = {"message": "Study not found"} self.assertEqual(obs, exp) def test_get_valid(self): - response = self.get('/api/v1/study/1/status', headers=self.headers) + response = self.get("/api/v1/study/1/status", headers=self.headers) self.assertEqual(response.code, 200) - exp = {'is_public': False, - 'has_sample_information': True, - 'sample_information_has_warnings': False, - 'preparations': [{'id': 1, 'has_artifact': True}, - {'id': 2, 'has_artifact': True}] - } + exp = { + "is_public": False, + "has_sample_information": True, + "sample_information_has_warnings": False, + "preparations": [ + {"id": 1, "has_artifact": True}, + {"id": 2, "has_artifact": True}, + ], + } obs = json_decode(response.body) self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index 43df423b3..3436062ac 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -15,151 +15,202 @@ class StudyAssociationTests(RESTHandlerTestCase): def test_get_valid(self): - IGNORE = 'IGNORE' - exp = {'study_id': 1, - 'study_sample_metadata_filepath': IGNORE, - 'prep_templates': [ - {'prep_id': 1, - 'prep_status': 'private', - 'prep_sample_metadata_filepath': IGNORE, - 'prep_data_type': '18S', - 'prep_human_filtering': 'The greatest human filtering method', # noqa - 'prep_artifacts': [ - {'artifact_id': 1, - 'artifact_status': 'private', - 'artifact_parent_ids': None, - 'artifact_basal_id': 1, - 'artifact_processing_id': None, - 'artifact_processing_name': None, - 'artifact_processing_arguments': None, - 'artifact_filepaths': [ - {'artifact_filepath_id': 1, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'raw_forward_seqs'}, - {'artifact_filepath_id': 2, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'raw_barcodes'}]}, - {'artifact_id': 2, - 'artifact_status': 'private', - 'artifact_parent_ids': [1], - 'artifact_basal_id': 1, - 'artifact_processing_id': 1, - 'artifact_processing_name': 'Split libraries FASTQ', - 'artifact_processing_arguments': { - 'input_data': '1', - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', - 'rev_comp': 'False', - 'phred_quality_threshold': '3', - 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', - 'phred_offset': 'auto'}, - 'artifact_filepaths': [ - {'artifact_filepath_id': 3, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'preprocessed_fasta'}, - {'artifact_filepath': IGNORE, - 'artifact_filepath_id': 4, - 'artifact_filepath_type': 'preprocessed_fastq'}, - {'artifact_filepath': IGNORE, - 'artifact_filepath_id': 5, - 'artifact_filepath_type': 'preprocessed_demux'}]}, - {'artifact_id': 3, - 'artifact_status': 'private', - 'artifact_parent_ids': [1], - 'artifact_basal_id': 1, - 'artifact_processing_id': 1, - 'artifact_processing_name': 'Split libraries FASTQ', - 'artifact_processing_arguments': { - 'input_data': '1', - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'True', - 'rev_comp': 'False', - 'phred_quality_threshold': '3', - 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', - 'phred_offset': 'auto'}, - 'artifact_filepaths': None}, - {'artifact_id': 4, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': { - 'input_data': '2', - 'reference': '1', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{ - 'artifact_filepath_id': 9, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}, - {'artifact_id': 5, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': { - 'input_data': '2', - 'reference': '1', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{ - 'artifact_filepath_id': 9, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}, - {'artifact_id': 6, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': { - 'input_data': '2', - 'reference': '2', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{ - 'artifact_filepath_id': 12, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}]}, - {'prep_id': 2, - 'prep_status': 'private', - 'prep_sample_metadata_filepath': IGNORE, - 'prep_data_type': '18S', - 'prep_human_filtering': None, - 'prep_artifacts': [{ - 'artifact_id': 7, - 'artifact_parent_ids': None, - 'artifact_basal_id': 7, - 'artifact_status': 'private', - 'artifact_processing_id': None, - 'artifact_processing_name': None, - 'artifact_processing_arguments': None, - 'artifact_filepaths': [{ - 'artifact_filepath_id': 22, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}]}]} + IGNORE = "IGNORE" + exp = { + "study_id": 1, + "study_sample_metadata_filepath": IGNORE, + "prep_templates": [ + { + "prep_id": 1, + "prep_status": "private", + "prep_sample_metadata_filepath": IGNORE, + "prep_data_type": "18S", + "prep_human_filtering": "The greatest human filtering method", + "prep_artifacts": [ + { + "artifact_id": 1, + "artifact_status": "private", + "artifact_parent_ids": None, + "artifact_basal_id": 1, + "artifact_processing_id": None, + "artifact_processing_name": None, + "artifact_processing_arguments": None, + "artifact_filepaths": [ + { + "artifact_filepath_id": 1, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "raw_forward_seqs", + }, + { + "artifact_filepath_id": 2, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "raw_barcodes", + }, + ], + }, + { + "artifact_id": 2, + "artifact_status": "private", + "artifact_parent_ids": [1], + "artifact_basal_id": 1, + "artifact_processing_id": 1, + "artifact_processing_name": "Split libraries FASTQ", + "artifact_processing_arguments": { + "input_data": "1", + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "False", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 3, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "preprocessed_fasta", + }, + { + "artifact_filepath": IGNORE, + "artifact_filepath_id": 4, + "artifact_filepath_type": "preprocessed_fastq", + }, + { + "artifact_filepath": IGNORE, + "artifact_filepath_id": 5, + "artifact_filepath_type": "preprocessed_demux", + }, + ], + }, + { + "artifact_id": 3, + "artifact_status": "private", + "artifact_parent_ids": [1], + "artifact_basal_id": 1, + "artifact_processing_id": 1, + "artifact_processing_name": "Split libraries FASTQ", + "artifact_processing_arguments": { + "input_data": "1", + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "True", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + "artifact_filepaths": None, + }, + { + "artifact_id": 4, + "artifact_status": "private", + "artifact_parent_ids": [2], + "artifact_basal_id": 1, + "artifact_processing_id": 3, + "artifact_processing_name": "Pick closed-reference OTUs", + "artifact_processing_arguments": { + "input_data": "2", + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 9, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + }, + { + "artifact_id": 5, + "artifact_status": "private", + "artifact_parent_ids": [2], + "artifact_basal_id": 1, + "artifact_processing_id": 3, + "artifact_processing_name": "Pick closed-reference OTUs", + "artifact_processing_arguments": { + "input_data": "2", + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 9, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + }, + { + "artifact_id": 6, + "artifact_status": "private", + "artifact_parent_ids": [2], + "artifact_basal_id": 1, + "artifact_processing_id": 3, + "artifact_processing_name": "Pick closed-reference OTUs", + "artifact_processing_arguments": { + "input_data": "2", + "reference": "2", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 12, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + }, + ], + }, + { + "prep_id": 2, + "prep_status": "private", + "prep_sample_metadata_filepath": IGNORE, + "prep_data_type": "18S", + "prep_human_filtering": None, + "prep_artifacts": [ + { + "artifact_id": 7, + "artifact_parent_ids": None, + "artifact_basal_id": 7, + "artifact_status": "private", + "artifact_processing_id": None, + "artifact_processing_name": None, + "artifact_processing_arguments": None, + "artifact_filepaths": [ + { + "artifact_filepath_id": 22, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + } + ], + }, + ], + } - response = self.get('/api/v1/study/1/associations', - headers=self.headers) + response = self.get("/api/v1/study/1/associations", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) @@ -170,28 +221,26 @@ def _process_list(list_): if list_ is None: return [] - return [dk for d in list_ - for dk in _process_dict(d)] + return [dk for d in list_ for dk in _process_dict(d)] stack = _process_dict(obs) while stack: (d, k) = stack.pop() - if k.endswith('filepath'): + if k.endswith("filepath"): d[k] = IGNORE - elif k.endswith('filepaths'): + elif k.endswith("filepaths"): stack.extend(_process_list(d[k])) - elif k.endswith('templates'): + elif k.endswith("templates"): stack.extend(_process_list(d[k])) - elif k.endswith('artifacts'): + elif k.endswith("artifacts"): stack.extend(_process_list(d[k])) self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study/0/associations', - headers=self.headers) + response = self.get("/api/v1/study/0/associations", headers=self.headers) self.assertEqual(response.code, 404) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/rest/test_study_person.py b/qiita_pet/test/rest/test_study_person.py index f3d384bd4..5a3596142 100644 --- a/qiita_pet/test/rest/test_study_person.py +++ b/qiita_pet/test/rest/test_study_person.py @@ -16,87 +16,114 @@ class StudyPersonHandlerTests(RESTHandlerTestCase): def test_get_list(self): - exp = [{'name': 'LabDude', 'affiliation': 'knight lab'}, - {'name': 'empDude', 'affiliation': 'broad'}, - {'name': 'PIDude', 'affiliation': 'Wash U'}] - response = self.get('/api/v1/person', headers=self.headers) + exp = [ + {"name": "LabDude", "affiliation": "knight lab"}, + {"name": "empDude", "affiliation": "broad"}, + {"name": "PIDude", "affiliation": "Wash U"}, + ] + response = self.get("/api/v1/person", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertCountEqual(obs, exp) def test_exist(self): - exp = {'email': 'lab_dude@foo.bar', 'phone': '121-222-3333', - 'address': '123 lab street', 'id': 1} - response = self.get('/api/v1/person?name=LabDude&' - 'affiliation=knight%20lab', headers=self.headers) + exp = { + "email": "lab_dude@foo.bar", + "phone": "121-222-3333", + "address": "123 lab street", + "id": 1, + } + response = self.get( + "/api/v1/person?name=LabDude&affiliation=knight%20lab", headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_does_not_exist(self): - exp = {'message': 'Person not found'} - response = self.get('/api/v1/person?name=Boaty%20McBoatFace&' - 'affiliation=UCSD', headers=self.headers) + exp = {"message": "Person not found"} + response = self.get( + "/api/v1/person?name=Boaty%20McBoatFace&affiliation=UCSD", + headers=self.headers, + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_does_not_exist_affiliation(self): - exp = {'message': 'Person not found'} - response = self.get('/api/v1/person?name=LabDude%20&affiliation=UCSD', - headers=self.headers) + exp = {"message": "Person not found"} + response = self.get( + "/api/v1/person?name=LabDude%20&affiliation=UCSD", headers=self.headers + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_invalid_query_string(self): - response = self.get('/api/v1/person?name=LabDude', - headers=self.headers) + response = self.get("/api/v1/person?name=LabDude", headers=self.headers) self.assertEqual(response.code, 400) def test_get_invalid_query_string_2(self): - response = self.get('/api/v1/person?affiliation=knight%20lab', - headers=self.headers) + response = self.get( + "/api/v1/person?affiliation=knight%20lab", headers=self.headers + ) self.assertEqual(response.code, 400) def test_get_valid_extra_arguments(self): - exp = {'email': 'lab_dude@foo.bar', 'phone': '121-222-3333', - 'address': '123 lab street', 'id': 1} - response = self.get('/api/v1/person?name=LabDude&' - 'affiliation=knight%20lab&foo=bar', - headers=self.headers) + exp = { + "email": "lab_dude@foo.bar", + "phone": "121-222-3333", + "address": "123 lab street", + "id": 1, + } + response = self.get( + "/api/v1/person?name=LabDude&affiliation=knight%20lab&foo=bar", + headers=self.headers, + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_post_new_person(self): - body = {'name': 'Boaty McBoatFace', 'affiliation': 'UCSD', - 'email': 'boat@ucsd.edu', 'phone': '720-876-5309'} - - response = self.post('/api/v1/person', data=body, headers=self.headers) + body = { + "name": "Boaty McBoatFace", + "affiliation": "UCSD", + "email": "boat@ucsd.edu", + "phone": "720-876-5309", + } + + response = self.post("/api/v1/person", data=body, headers=self.headers) self.assertEqual(response.code, 201) obs = json_decode(response.body) - exp = StudyPerson.from_name_and_affiliation(body['name'], - body['affiliation']).id - self.assertEqual(exp, obs['id']) + exp = StudyPerson.from_name_and_affiliation( + body["name"], body["affiliation"] + ).id + self.assertEqual(exp, obs["id"]) def test_post_existing(self): - body = {'name': 'LabDude', 'affiliation': 'knight lab', - 'email': 'lab_dude@foo.bar', 'phone': '121-222-3333'} - - response = self.post('/api/v1/person', data=body, headers=self.headers) + body = { + "name": "LabDude", + "affiliation": "knight lab", + "email": "lab_dude@foo.bar", + "phone": "121-222-3333", + } + + response = self.post("/api/v1/person", data=body, headers=self.headers) self.assertEqual(response.code, 409) obs = json_decode(response.body) - exp = {'message': 'Person already exists'} + exp = {"message": "Person already exists"} self.assertEqual(exp, obs) def test_post_incomplete_details(self): - body = {'affiliation': 'knight lab', - 'email': 'lab_dude@foo.bar', 'phone': '121-222-3333'} + body = { + "affiliation": "knight lab", + "email": "lab_dude@foo.bar", + "phone": "121-222-3333", + } - response = self.post('/api/v1/person', data=body, headers=self.headers) + response = self.post("/api/v1/person", data=body, headers=self.headers) self.assertEqual(response.code, 400) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/rest/test_study_preparation.py b/qiita_pet/test/rest/test_study_preparation.py index 9de0e7b77..91fa5e977 100644 --- a/qiita_pet/test/rest/test_study_preparation.py +++ b/qiita_pet/test/rest/test_study_preparation.py @@ -6,18 +6,17 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main -from io import StringIO import os +from io import StringIO +from unittest import main import pandas as pd - from tornado.escape import json_decode -from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_db.metadata_template.prep_template import PrepTemplate -from qiita_pet.test.rest.test_base import RESTHandlerTestCase +from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_db.util import get_mountpoint +from qiita_pet.test.rest.test_base import RESTHandlerTestCase class StudyPrepCreatorTests(RESTHandlerTestCase): @@ -26,135 +25,156 @@ def test_post_non_existant_study(self): prep = StringIO(EXP_PREP_TEMPLATE.format(0)) prep_table = load_template_to_dataframe(prep) - response = self.post('/api/v1/study/0/preparation?' - '&data_type=16S', - data=prep_table.T.to_dict(), - headers=self.headers, asjson=True) + response = self.post( + "/api/v1/study/0/preparation?&data_type=16S", + data=prep_table.T.to_dict(), + headers=self.headers, + asjson=True, + ) self.assertEqual(response.code, 404) def test_post_non_matching_identifiers(self): prep = StringIO(EXP_PREP_TEMPLATE.format(100)) prep_table = load_template_to_dataframe(prep) - response = self.post('/api/v1/study/1/preparation?' - 'data_type=16S', - data=prep_table.T.to_dict(), - headers=self.headers, asjson=True) + response = self.post( + "/api/v1/study/1/preparation?data_type=16S", + data=prep_table.T.to_dict(), + headers=self.headers, + asjson=True, + ) self.assertEqual(response.code, 406) obs = json_decode(response.body) - self.assertCountEqual(obs.keys(), ['message']) - self.assertGreater(len(obs['message']), 0) + self.assertCountEqual(obs.keys(), ["message"]) + self.assertGreater(len(obs["message"]), 0) def test_post_valid_study(self): prep = StringIO(EXP_PREP_TEMPLATE.format(1)) prep_table = load_template_to_dataframe(prep) - response = self.post('/api/v1/study/1/preparation?data_type=16S', - data=prep_table.T.to_dict(), - headers=self.headers, asjson=True) + response = self.post( + "/api/v1/study/1/preparation?data_type=16S", + data=prep_table.T.to_dict(), + headers=self.headers, + asjson=True, + ) self.assertEqual(response.code, 201) exp = json_decode(response.body) - exp_prep = PrepTemplate(exp['id']).to_dataframe() + exp_prep = PrepTemplate(exp["id"]).to_dataframe() - prep_table.index.name = 'sample_name' + prep_table.index.name = "sample_name" # sort columns to be comparable prep_table = prep_table[sorted(prep_table.columns.tolist())] exp_prep = exp_prep[sorted(exp_prep.columns.tolist())] - exp_prep.drop('qiita_prep_id', axis=1, inplace=True) + exp_prep.drop("qiita_prep_id", axis=1, inplace=True) pd.testing.assert_frame_equal(prep_table, exp_prep) class StudyPrepArtifactCreatorTests(RESTHandlerTestCase): def test_post_non_existant_study(self): - uri = '/api/v1/study/0/preparation/0/artifact' - body = {'artifact_type': 'foo', 'filepaths': [['foo.txt', 1], - ['bar.txt', 1]], - 'artifact_name': 'a name is a name'} + uri = "/api/v1/study/0/preparation/0/artifact" + body = { + "artifact_type": "foo", + "filepaths": [["foo.txt", 1], ["bar.txt", 1]], + "artifact_name": "a name is a name", + } response = self.post(uri, data=body, headers=self.headers, asjson=True) - exp = {'message': 'Study not found'} + exp = {"message": "Study not found"} self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_post_non_existant_prep(self): - uri = '/api/v1/study/1/preparation/1337/artifact' - body = {'artifact_type': 'foo', 'filepaths': [['foo.txt', 1], - ['bar.txt', 1]], - 'artifact_name': 'a name is a name'} + uri = "/api/v1/study/1/preparation/1337/artifact" + body = { + "artifact_type": "foo", + "filepaths": [["foo.txt", 1], ["bar.txt", 1]], + "artifact_name": "a name is a name", + } response = self.post(uri, data=body, headers=self.headers, asjson=True) - exp = {'message': 'Preparation not found'} + exp = {"message": "Preparation not found"} self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_post_unknown_artifact_type(self): - uri = '/api/v1/study/1/preparation/1/artifact' - body = {'artifact_type': 'foo', 'filepaths': [['foo.txt', 1], - ['bar.txt', 1]], - 'artifact_name': 'a name is a name'} + uri = "/api/v1/study/1/preparation/1/artifact" + body = { + "artifact_type": "foo", + "filepaths": [["foo.txt", 1], ["bar.txt", 1]], + "artifact_name": "a name is a name", + } response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 406) obs = json_decode(response.body) - self.assertCountEqual(obs.keys(), ['message']) - self.assertGreater(len(obs['message']), 0) + self.assertCountEqual(obs.keys(), ["message"]) + self.assertGreater(len(obs["message"]), 0) def test_post_unknown_filepath_type_id(self): - uri = '/api/v1/study/1/preparation/1/artifact' - body = {'artifact_type': 'foo', 'filepaths': [['foo.txt', 123123], - ['bar.txt', 1]], - 'artifact_name': 'a name is a name'} + uri = "/api/v1/study/1/preparation/1/artifact" + body = { + "artifact_type": "foo", + "filepaths": [["foo.txt", 123123], ["bar.txt", 1]], + "artifact_name": "a name is a name", + } response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 406) obs = json_decode(response.body) - self.assertCountEqual(obs.keys(), ['message']) - self.assertGreater(len(obs['message']), 0) + self.assertCountEqual(obs.keys(), ["message"]) + self.assertGreater(len(obs["message"]), 0) def test_post_files_notfound(self): - uri = '/api/v1/study/1/preparation/1/artifact' - body = {'artifact_type': 'foo', 'filepaths': [['foo.txt', 1], - ['bar.txt', 1]], - 'artifact_name': 'a name is a name'} + uri = "/api/v1/study/1/preparation/1/artifact" + body = { + "artifact_type": "foo", + "filepaths": [["foo.txt", 1], ["bar.txt", 1]], + "artifact_name": "a name is a name", + } response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 406) obs = json_decode(response.body) - self.assertCountEqual(obs.keys(), ['message']) - self.assertGreater(len(obs['message']), 0) + self.assertCountEqual(obs.keys(), ["message"]) + self.assertGreater(len(obs["message"]), 0) def test_post_valid(self): - dontcare, uploads_dir = get_mountpoint('uploads')[0] - foo_fp = os.path.join(uploads_dir, '1', 'foo.txt') - bar_fp = os.path.join(uploads_dir, '1', 'bar.txt') - with open(foo_fp, 'w') as fp: + dontcare, uploads_dir = get_mountpoint("uploads")[0] + foo_fp = os.path.join(uploads_dir, "1", "foo.txt") + bar_fp = os.path.join(uploads_dir, "1", "bar.txt") + with open(foo_fp, "w") as fp: fp.write("@x\nATGC\n+\nHHHH\n") - with open(bar_fp, 'w') as fp: + with open(bar_fp, "w") as fp: fp.write("@x\nATGC\n+\nHHHH\n") prep = StringIO(EXP_PREP_TEMPLATE.format(1)) prep_table = load_template_to_dataframe(prep) - response = self.post('/api/v1/study/1/preparation?data_type=16S', - data=prep_table.T.to_dict(), - headers=self.headers, asjson=True) - prepid = json_decode(response.body)['id'] + response = self.post( + "/api/v1/study/1/preparation?data_type=16S", + data=prep_table.T.to_dict(), + headers=self.headers, + asjson=True, + ) + prepid = json_decode(response.body)["id"] - uri = '/api/v1/study/1/preparation/%d/artifact' % prepid + uri = "/api/v1/study/1/preparation/%d/artifact" % prepid # 1 -> fwd or rev sequences in fastq # 3 -> barcodes - body = {'artifact_type': 'FASTQ', 'filepaths': [['foo.txt', 1], - ['bar.txt', - 'raw_barcodes']], - 'artifact_name': 'a name is a name'} + body = { + "artifact_type": "FASTQ", + "filepaths": [["foo.txt", 1], ["bar.txt", "raw_barcodes"]], + "artifact_name": "a name is a name", + } response = self.post(uri, data=body, headers=self.headers, asjson=True) self.assertEqual(response.code, 201) - obs = json_decode(response.body)['id'] + obs = json_decode(response.body)["id"] prep_instance = PrepTemplate(prepid) exp = prep_instance.artifact.id @@ -162,20 +182,21 @@ def test_post_valid(self): EXP_PREP_TEMPLATE = ( - 'sample_name\tbarcode\tcenter_name\tcenter_project_name\t' - 'ebi_submission_accession\temp_status\texperiment_design_description\t' - 'instrument_model\tlibrary_construction_protocol\tplatform\tprimer\t' - 'bar\trun_prefix\tstr_column\n' - '{0}.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t' - 'Illumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\tfoo\t' - 's_G1_L002_sequences\tValue for sample 3\n' - '{0}.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t' - 'Illumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\tfoo\t' - 's_G1_L001_sequences\tValue for sample 1\n' - '{0}.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t' - 'Illumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\tfoo\t' - 's_G1_L001_sequences\tValue for sample 2\n') - - -if __name__ == '__main__': + "sample_name\tbarcode\tcenter_name\tcenter_project_name\t" + "ebi_submission_accession\temp_status\texperiment_design_description\t" + "instrument_model\tlibrary_construction_protocol\tplatform\tprimer\t" + "bar\trun_prefix\tstr_column\n" + "{0}.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t" + "Illumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\tfoo\t" + "s_G1_L002_sequences\tValue for sample 3\n" + "{0}.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t" + "Illumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\tfoo\t" + "s_G1_L001_sequences\tValue for sample 1\n" + "{0}.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t" + "Illumina MiSeq\tAAAA\tIllumina\tGTGCCAGCMGCCGCGGTAA\tfoo\t" + "s_G1_L001_sequences\tValue for sample 2\n" +) + + +if __name__ == "__main__": main() diff --git a/qiita_pet/test/rest/test_study_samples.py b/qiita_pet/test/rest/test_study_samples.py index 6d246bd6c..1551a60c5 100644 --- a/qiita_pet/test/rest/test_study_samples.py +++ b/qiita_pet/test/rest/test_study_samples.py @@ -6,8 +6,8 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- import json -from unittest import main from datetime import datetime +from unittest import main from tornado.escape import json_decode @@ -18,155 +18,187 @@ def _sample_creator(ids, categories=None): if categories is None: - categories = ['season_environment', 'env_package', - 'assigned_from_geo', 'texture', 'taxon_id', - 'depth', 'host_taxid', 'common_name', - 'water_content_soil', 'elevation', 'temp', - 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', - 'env_feature', 'physical_specimen_location', - 'physical_specimen_remaining', 'dna_extracted', - 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', - 'latitude', 'longitude', 'scientific_name'] + categories = [ + "season_environment", + "env_package", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "latitude", + "longitude", + "scientific_name", + ] return {i: {c: 1 for c in categories} for i in ids} class StudySamplesHandlerTests(RESTHandlerTestCase): def _get_sample_categories(self, study_id): - response = self.get('/api/v1/study/1/samples/info', - headers=self.headers) + response = self.get("/api/v1/study/1/samples/info", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) - return obs['categories'] + return obs["categories"] def test_patch_accept_new_categories(self): - body = {'1.SKM1.999998': {'dna_extracted': 'foo', - 'host_taxid': 'foo', - 'altitude': 'foo', - 'description_duplicate': 'foo', - 'temp': 'foo', - 'country': 'foo', - 'texture': 'foo', - 'latitude': '32.7157', - 'assigned_from_geo': 'foo', - 'tot_org_carb': 'foo', - 'env_feature': 'foo', - 'depth': 'foo', - 'tot_nitro': 'foo', - 'anonymized_name': 'foo', - 'scientific_name': 'foo', - 'samp_salinity': 'foo', - 'ph': 'foo', - 'taxon_id': '9999', - 'season_environment': 'foo', - 'physical_specimen_remaining': 'foo', - 'host_subject_id': 'foo', - 'water_content_soil': 'foo', - 'env_biome': 'foo', - 'env_package': 'foo', - 'elevation': 'foo', - 'collection_timestamp': ('2014-05-29 ' - '12:24:15'), - 'sample_type': 'foo', - 'physical_specimen_location': 'foo', - 'longitude': '117.1611', - 'common_name': 'foo', - 'description': 'foo'}} + body = { + "1.SKM1.999998": { + "dna_extracted": "foo", + "host_taxid": "foo", + "altitude": "foo", + "description_duplicate": "foo", + "temp": "foo", + "country": "foo", + "texture": "foo", + "latitude": "32.7157", + "assigned_from_geo": "foo", + "tot_org_carb": "foo", + "env_feature": "foo", + "depth": "foo", + "tot_nitro": "foo", + "anonymized_name": "foo", + "scientific_name": "foo", + "samp_salinity": "foo", + "ph": "foo", + "taxon_id": "9999", + "season_environment": "foo", + "physical_specimen_remaining": "foo", + "host_subject_id": "foo", + "water_content_soil": "foo", + "env_biome": "foo", + "env_package": "foo", + "elevation": "foo", + "collection_timestamp": ("2014-05-29 12:24:15"), + "sample_type": "foo", + "physical_specimen_location": "foo", + "longitude": "117.1611", + "common_name": "foo", + "description": "foo", + } + } # first, confirm this should patch successfully: all fields present # note that response is 201 if using patch to add new samples, 200 if # updating existing samples. - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 201) - body = {'1.SKM1.999999': {'dna_extracted': 'foo', - 'host_taxid': 'foo', - 'altitude': 'foo', - 'description_duplicate': 'foo', - 'temp': 'foo', - 'country': 'foo', - 'texture': 'foo', - 'latitude': '32.7157', - 'assigned_from_geo': 'foo', - 'tot_org_carb': 'foo', - 'env_feature': 'foo', - 'depth': 'foo', - 'tot_nitro': 'foo', - 'anonymized_name': 'foo', - 'scientific_name': 'foo', - 'samp_salinity': 'foo', - 'ph': 'foo', - 'taxon_id': '9999', - 'season_environment': 'foo', - 'physical_specimen_remaining': 'foo', - 'host_subject_id': 'foo', - 'water_content_soil': 'foo', - 'env_biome': 'foo', - 'env_package': 'foo', - 'elevation': 'foo', - 'collection_timestamp': ('2014-05-29 ' - '12:24:15'), - 'sample_type': 'foo', - 'physical_specimen_location': 'foo', - 'longitude': '117.1611', - 'common_name': 'foo', - 'description': 'foo'}} + body = { + "1.SKM1.999999": { + "dna_extracted": "foo", + "host_taxid": "foo", + "altitude": "foo", + "description_duplicate": "foo", + "temp": "foo", + "country": "foo", + "texture": "foo", + "latitude": "32.7157", + "assigned_from_geo": "foo", + "tot_org_carb": "foo", + "env_feature": "foo", + "depth": "foo", + "tot_nitro": "foo", + "anonymized_name": "foo", + "scientific_name": "foo", + "samp_salinity": "foo", + "ph": "foo", + "taxon_id": "9999", + "season_environment": "foo", + "physical_specimen_remaining": "foo", + "host_subject_id": "foo", + "water_content_soil": "foo", + "env_biome": "foo", + "env_package": "foo", + "elevation": "foo", + "collection_timestamp": ("2014-05-29 12:24:15"), + "sample_type": "foo", + "physical_specimen_location": "foo", + "longitude": "117.1611", + "common_name": "foo", + "description": "foo", + } + } # add a new field to one sample_id, making body a superset of values. - body['1.SKM1.999999']['new_field1'] = 'some_value' - body['1.SKM1.999999']['new_field2'] = 'another_value' + body["1.SKM1.999999"]["new_field1"] = "some_value" + body["1.SKM1.999999"]["new_field2"] = "another_value" # this test should pass. - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 201) # confirm new samples were added. - response = self.get('/api/v1/study/1/samples', headers=self.headers) + response = self.get("/api/v1/study/1/samples", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) - self.assertIn('1.SKM1.999998', obs) - self.assertIn('1.SKM1.999999', obs) + self.assertIn("1.SKM1.999998", obs) + self.assertIn("1.SKM1.999999", obs) # confirm new categories are a part of the samples. - response = self.get('/api/v1/study/1/samples/info', - headers=self.headers) + response = self.get("/api/v1/study/1/samples/info", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) - self.assertIn('new_field1', obs['categories']) - self.assertIn('new_field2', obs['categories']) + self.assertIn("new_field1", obs["categories"]) + self.assertIn("new_field2", obs["categories"]) # remove a few existing fields, representing retired fields. for sample_id in body: - del (body[sample_id]['ph']) - del (body[sample_id]['water_content_soil']) + del body[sample_id]["ph"] + del body[sample_id]["water_content_soil"] - exp = {'message': 'Not all sample information categories provided'} - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + exp = {"message": "Not all sample information categories provided"} + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 400) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_patch_no_study(self): - body = {'sampleid1': {'category_a': 'value_a'}, - 'sampleid2': {'category_b': 'value_b'}} + body = { + "sampleid1": {"category_a": "value_a"}, + "sampleid2": {"category_b": "value_b"}, + } - exp = {'message': 'Study not found'} - response = self.patch('/api/v1/study/0/samples', headers=self.headers, - data=body, asjson=True) + exp = {"message": "Study not found"} + response = self.patch( + "/api/v1/study/0/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_patch_no_samples(self): body = {} - exp = {'message': 'No samples provided'} - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + exp = {"message": "No samples provided"} + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 400) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -180,30 +212,40 @@ def test_patch_no_sample_template(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": StudyPerson(3), - 'first_contact': datetime(2015, 5, 19, 16, 10), - 'most_recent_contact': datetime(2015, 5, 19, 16, 11), + "first_contact": datetime(2015, 5, 19, 16, 10), + "most_recent_contact": datetime(2015, 5, 19, 16, 11), } - new_study = Study.create(User('test@foo.bar'), - "Some New Study for test jr", info) + new_study = Study.create( + User("test@foo.bar"), "Some New Study for test jr", info + ) - body = {'sampleid1': {'category_a': 'value_a'}, - 'sampleid2': {'category_b': 'value_b'}} + body = { + "sampleid1": {"category_a": "value_a"}, + "sampleid2": {"category_b": "value_b"}, + } - exp = {'message': 'No sample information found'} - response = self.patch('/api/v1/study/%d/samples' % new_study.id, - headers=self.headers, data=body, asjson=True) + exp = {"message": "No sample information found"} + response = self.patch( + "/api/v1/study/%d/samples" % new_study.id, + headers=self.headers, + data=body, + asjson=True, + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_patch_sample_ids_exist_incomplete_metadata(self): - body = {'1.SKM3.640197': {'elevation': 'xyz'}, - '1.SKM1.640183': {'elevation': 'foo'}} + body = { + "1.SKM3.640197": {"elevation": "xyz"}, + "1.SKM1.640183": {"elevation": "foo"}, + } - exp = {'message': 'Not all sample information categories provided'} - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + exp = {"message": "Not all sample information categories provided"} + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 400) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -212,126 +254,152 @@ def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): current = self._get_sample_categories(1) # If no new categories, both new and existing samples should succeed. # 640201 is an existing sample. blank.a1 is a new sample - body = _sample_creator(['1.SKM8.640201', - 'blank.a1'], categories=current) - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + body = _sample_creator(["1.SKM8.640201", "blank.a1"], categories=current) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 201) # successful response should be empty string - self.assertEqual(response.body, b'') + self.assertEqual(response.body, b"") # If new categories are added, patch() should succeed. # New and existing samples should have new categories. # 640201 is an existing sample. blank.a2 is a new sample - body = _sample_creator(['1.SKM8.640201', - 'blank.a2'], categories=current) + body = _sample_creator(["1.SKM8.640201", "blank.a2"], categories=current) # body['blank.a2']['DOES_NOT_EXIST'] will be '', not None. # body['1.SKM8.640201']['WHAT'] will be '', not None. - body['1.SKM8.640201']['DOES_NOT_EXIST'] = 'foo' - body['blank.a2']['WHAT'] = 'bar' + body["1.SKM8.640201"]["DOES_NOT_EXIST"] = "foo" + body["blank.a2"]["WHAT"] = "bar" - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 201) # successful response should be empty string - self.assertEqual(response.body, b'') + self.assertEqual(response.body, b"") - response = self.get(('/api/v1/study/1/samples/categories=' - 'does_not_exist,what'), headers=self.headers) + response = self.get( + ("/api/v1/study/1/samples/categories=does_not_exist,what"), + headers=self.headers, + ) self.assertEqual(response.code, 200) # decode results manually from bytes, replacing non-JSON-spec 'NaN' # values with JSON-spec 'null'. These will convert to Python None # values when load()ed. - obs = response.body.decode("utf-8").replace('NaN', 'null') + obs = response.body.decode("utf-8").replace("NaN", "null") obs = json.loads(obs) - self.assertEqual(obs['header'], ['does_not_exist', 'what']) + self.assertEqual(obs["header"], ["does_not_exist", "what"]) - self.assertEqual(obs['samples']['1.blank.a2'], ['', 'bar']) - self.assertEqual(obs['samples']['1.SKM8.640201'], ['foo', '']) + self.assertEqual(obs["samples"]["1.blank.a2"], ["", "bar"]) + self.assertEqual(obs["samples"]["1.SKM8.640201"], ["foo", ""]) # as the number and names of samples is dynamic, simply confirm the # other samples are unchanged. - for sample in obs['samples']: - if sample not in ['1.blank.a2', '1.SKM8.640201']: + for sample in obs["samples"]: + if sample not in ["1.blank.a2", "1.SKM8.640201"]: print(sample) - self.assertEqual(obs['samples'][sample], [None, None]) + self.assertEqual(obs["samples"][sample], [None, None]) # If categories were removed, both existing and new samples should # fail. # 640201 is an existing sample. blank.a3 is a new sample current = self._get_sample_categories(1) - body = _sample_creator(['1.SKM8.640201', - 'blank.a3'], categories=current) - del (body['1.SKM8.640201']['env_biome']) - del (body['blank.a3']['env_biome']) - - exp = {'message': 'Not all sample information categories provided'} - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + body = _sample_creator(["1.SKM8.640201", "blank.a3"], categories=current) + del body["1.SKM8.640201"]["env_biome"] + del body["blank.a3"]["env_biome"] + + exp = {"message": "Not all sample information categories provided"} + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 400) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_patch_sample_ids_already_exist(self): current = self._get_sample_categories(1) - body = _sample_creator(['1.SKM8.640201', - '1.SKM3.640197'], categories=current) - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + body = _sample_creator(["1.SKM8.640201", "1.SKM3.640197"], categories=current) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 200) df = Study(1).sample_template.to_dataframe() - self.assertEqual(df.loc['1.SKM8.640201']['elevation'], '1') - self.assertEqual(df.loc['1.SKM3.640197']['elevation'], '1') + self.assertEqual(df.loc["1.SKM8.640201"]["elevation"], "1") + self.assertEqual(df.loc["1.SKM3.640197"]["elevation"], "1") # make sure we didn't touch other samples - self.assertNotEqual(df.loc['1.SKM4.640180']['elevation'], '1') + self.assertNotEqual(df.loc["1.SKM4.640180"]["elevation"], "1") def test_patch_sample_ids_do_not_exist(self): current = self._get_sample_categories(1) - body = _sample_creator(['blank.a1'], categories=current) - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + body = _sample_creator(["blank.a1"], categories=current) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 201) df = Study(1).sample_template.to_dataframe() - self.assertNotEqual(df.loc['1.SKD7.640191']['elevation'], '1') - self.assertEqual(df.loc['1.blank.a1']['elevation'], '1') + self.assertNotEqual(df.loc["1.SKD7.640191"]["elevation"], "1") + self.assertEqual(df.loc["1.blank.a1"]["elevation"], "1") def test_patch_sample_ids_partially_exist(self): current = self._get_sample_categories(1) - body = _sample_creator(['blank.b1', - '1.SKM5.640177', - '1.SKB9.640200'], categories=current) - response = self.patch('/api/v1/study/1/samples', headers=self.headers, - data=body, asjson=True) + body = _sample_creator( + ["blank.b1", "1.SKM5.640177", "1.SKB9.640200"], categories=current + ) + response = self.patch( + "/api/v1/study/1/samples", headers=self.headers, data=body, asjson=True + ) self.assertEqual(response.code, 201) df = Study(1).sample_template.to_dataframe() - self.assertEqual(df.loc['1.blank.b1']['elevation'], '1') - self.assertEqual(df.loc['1.SKM5.640177']['elevation'], '1') - self.assertEqual(df.loc['1.SKB9.640200']['elevation'], '1') + self.assertEqual(df.loc["1.blank.b1"]["elevation"], "1") + self.assertEqual(df.loc["1.SKM5.640177"]["elevation"], "1") + self.assertEqual(df.loc["1.SKB9.640200"]["elevation"], "1") # make sure we didn't touch other samples - self.assertNotEqual(df.loc['1.SKD3.640198']['elevation'], '1') + self.assertNotEqual(df.loc["1.SKD3.640198"]["elevation"], "1") def test_get_valid(self): - exp = sorted(['1.SKB2.640194', '1.SKM4.640180', '1.SKB3.640195', - '1.SKB6.640176', '1.SKD6.640190', '1.SKM6.640187', - '1.SKD9.640182', '1.SKM8.640201', '1.SKM2.640199', - '1.SKD2.640178', '1.SKB7.640196', '1.SKD4.640185', - '1.SKB8.640193', '1.SKM3.640197', '1.SKD5.640186', - '1.SKB1.640202', '1.SKM1.640183', '1.SKD1.640179', - '1.SKD3.640198', '1.SKB5.640181', '1.SKB4.640189', - '1.SKB9.640200', '1.SKM9.640192', '1.SKD8.640184', - '1.SKM5.640177', '1.SKM7.640188', '1.SKD7.640191']) - response = self.get('/api/v1/study/1/samples', headers=self.headers) + exp = sorted( + [ + "1.SKB2.640194", + "1.SKM4.640180", + "1.SKB3.640195", + "1.SKB6.640176", + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + ] + ) + response = self.get("/api/v1/study/1/samples", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(sorted(obs), exp) def test_get_invalid_no_study(self): - exp = {'message': 'Study not found'} - response = self.get('/api/v1/study/0/samples', headers=self.headers) + exp = {"message": "Study not found"} + response = self.get("/api/v1/study/0/samples", headers=self.headers) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -345,16 +413,16 @@ def test_get_study_no_samples(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": StudyPerson(3), - 'first_contact': datetime(2015, 5, 19, 16, 10), - 'most_recent_contact': datetime(2015, 5, 19, 16, 11), + "first_contact": datetime(2015, 5, 19, 16, 10), + "most_recent_contact": datetime(2015, 5, 19, 16, 11), } - new_study = Study.create(User('test@foo.bar'), - "Some New Study for test", info) + new_study = Study.create(User("test@foo.bar"), "Some New Study for test", info) exp = [] - response = self.get('/api/v1/study/%d/samples' % new_study.id, - headers=self.headers) + response = self.get( + "/api/v1/study/%d/samples" % new_study.id, headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -362,31 +430,52 @@ def test_get_study_no_samples(self): class StudySamplesInfoHandlerTests(RESTHandlerTestCase): def test_get_valid(self): - exp = {'number-of-samples': 27, - 'categories': ['season_environment', - 'assigned_from_geo', 'texture', 'taxon_id', - 'depth', 'host_taxid', 'common_name', - 'water_content_soil', 'elevation', 'temp', - 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', - 'env_feature', 'physical_specimen_location', - 'physical_specimen_remaining', 'dna_extracted', - 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', 'env_package', - 'latitude', 'longitude', 'scientific_name']} - response = self.get('/api/v1/study/1/samples/info', - headers=self.headers) + exp = { + "number-of-samples": 27, + "categories": [ + "season_environment", + "assigned_from_geo", + "texture", + "taxon_id", + "depth", + "host_taxid", + "common_name", + "water_content_soil", + "elevation", + "temp", + "tot_nitro", + "samp_salinity", + "altitude", + "env_biome", + "country", + "ph", + "anonymized_name", + "tot_org_carb", + "description_duplicate", + "env_feature", + "physical_specimen_location", + "physical_specimen_remaining", + "dna_extracted", + "sample_type", + "collection_timestamp", + "host_subject_id", + "description", + "env_package", + "latitude", + "longitude", + "scientific_name", + ], + } + response = self.get("/api/v1/study/1/samples/info", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs.keys(), exp.keys()) - self.assertEqual(obs['number-of-samples'], exp['number-of-samples']) - self.assertCountEqual(obs['categories'], exp['categories']) + self.assertEqual(obs["number-of-samples"], exp["number-of-samples"]) + self.assertCountEqual(obs["categories"], exp["categories"]) def test_get_study_does_not_exist(self): - exp = {'message': 'Study not found'} - response = self.get('/api/v1/study/0/samples/info', - headers=self.headers) + exp = {"message": "Study not found"} + response = self.get("/api/v1/study/0/samples/info", headers=self.headers) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -402,15 +491,15 @@ def test_get_no_samples(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": StudyPerson(3), - 'first_contact': datetime(2015, 5, 19, 16, 10), - 'most_recent_contact': datetime(2015, 5, 19, 16, 11), + "first_contact": datetime(2015, 5, 19, 16, 10), + "most_recent_contact": datetime(2015, 5, 19, 16, 11), } - new_study = Study.create(User('test@foo.bar'), - "Some New Study for test", info) - exp = {'number-of-samples': 0, 'categories': []} - response = self.get('/api/v1/study/%d/samples/info' % new_study.id, - headers=self.headers) + new_study = Study.create(User("test@foo.bar"), "Some New Study for test", info) + exp = {"number-of-samples": 0, "categories": []} + response = self.get( + "/api/v1/study/%d/samples/info" % new_study.id, headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -419,59 +508,60 @@ def test_get_no_samples(self): class StudySamplesCategoriesHandlerTests(RESTHandlerTestCase): def test_get_valid_two_arg(self): df = Study(1).sample_template.to_dataframe() - df = df[['ph', 'country']] - df = {idx: [row['ph'], row['country']] for idx, row in df.iterrows()} - exp = {'header': ['ph', 'country'], - 'samples': df} + df = df[["ph", "country"]] + df = {idx: [row["ph"], row["country"]] for idx, row in df.iterrows()} + exp = {"header": ["ph", "country"], "samples": df} - response = self.get('/api/v1/study/1/samples/categories=ph,country', - headers=self.headers) + response = self.get( + "/api/v1/study/1/samples/categories=ph,country", headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_valid_one_arg(self): df = Study(1).sample_template.to_dataframe() - df = df[['ph', 'country']] - df = {idx: [row['country']] for idx, row in df.iterrows()} - exp = {'header': ['country'], 'samples': df} + df = df[["ph", "country"]] + df = {idx: [row["country"]] for idx, row in df.iterrows()} + exp = {"header": ["country"], "samples": df} - response = self.get('/api/v1/study/1/samples/categories=country', - headers=self.headers) + response = self.get( + "/api/v1/study/1/samples/categories=country", headers=self.headers + ) self.assertEqual(response.code, 200) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_valid_two_arg_one_bad(self): - exp = {'message': 'Category not found', - 'categories_not_found': ['foo']} - response = self.get('/api/v1/study/1/samples/categories=country,foo', - headers=self.headers) + exp = {"message": "Category not found", "categories_not_found": ["foo"]} + response = self.get( + "/api/v1/study/1/samples/categories=country,foo", headers=self.headers + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_study_does_not_exist(self): - exp = {'message': 'Study not found'} - response = self.get('/api/v1/study/0/samples/categories=foo', - headers=self.headers) + exp = {"message": "Study not found"} + response = self.get( + "/api/v1/study/0/samples/categories=foo", headers=self.headers + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_bad_category(self): - exp = {'message': 'Category not found', - 'categories_not_found': ['foo']} - response = self.get('/api/v1/study/1/samples/categories=foo', - headers=self.headers) + exp = {"message": "Category not found", "categories_not_found": ["foo"]} + response = self.get( + "/api/v1/study/1/samples/categories=foo", headers=self.headers + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) def test_get_no_category(self): - exp = {'message': 'No categories specified'} - response = self.get('/api/v1/study/1/samples/categories=', - headers=self.headers) + exp = {"message": "No categories specified"} + response = self.get("/api/v1/study/1/samples/categories=", headers=self.headers) self.assertEqual(response.code, 405) obs = json_decode(response.body) self.assertEqual(obs, exp) @@ -487,20 +577,21 @@ def test_get_no_samples(self): "study_description": "DESC", "study_abstract": "ABS", "principal_investigator_id": StudyPerson(3), - 'first_contact': datetime(2015, 5, 19, 16, 10), - 'most_recent_contact': datetime(2015, 5, 19, 16, 11), + "first_contact": datetime(2015, 5, 19, 16, 10), + "most_recent_contact": datetime(2015, 5, 19, 16, 11), } - new_study = Study.create(User('test@foo.bar'), - "Some New Study for test", info) + new_study = Study.create(User("test@foo.bar"), "Some New Study for test", info) - exp = {'message': 'Study does not have sample information'} - response = self.get('/api/v1/study/%d/samples/categories=foo' % - new_study.id, headers=self.headers) + exp = {"message": "Study does not have sample information"} + response = self.get( + "/api/v1/study/%d/samples/categories=foo" % new_study.id, + headers=self.headers, + ) self.assertEqual(response.code, 404) obs = json_decode(response.body) self.assertEqual(obs, exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/test_admin_processing_job_handlers.py b/qiita_pet/test/test_admin_processing_job_handlers.py index 4e45daa9e..7a16766b6 100644 --- a/qiita_pet/test/test_admin_processing_job_handlers.py +++ b/qiita_pet/test/test_admin_processing_job_handlers.py @@ -6,13 +6,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main + import pandas as pd from mock import Mock -from qiita_db.user import User from qiita_db.metadata_template.sample_template import SampleTemplate as ST +from qiita_db.user import User from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.test.tornado_test_base import TestHandlerBase @@ -25,68 +26,57 @@ def setUp(self): class TestAdminProcessingJob(BaseAdminTests): def test_get(self): - response = self.get('/admin/processing_jobs/') + response = self.get("/admin/processing_jobs/") self.assertEqual(response.code, 200) - self.assertIn("Available Commands", response.body.decode('ascii')) + self.assertIn("Available Commands", response.body.decode("ascii")) class TestAJAXAdminProcessingJobListing(BaseAdminTests): def test_get(self): - response = self.get('/admin/processing_jobs/list?sEcho=3&commandId=2') + response = self.get("/admin/processing_jobs/list?sEcho=3&commandId=2") self.assertEqual(response.code, 200) - exp = {'sEcho': '3', 'recordsTotal': 0, 'recordsFiltered': 0, - 'data': []} + exp = {"sEcho": "3", "recordsTotal": 0, "recordsFiltered": 0, "data": []} self.assertEqual(loads(response.body), exp) def test_get_missing_argument(self): - response = self.get('/admin/processing_jobs/list?sEcho=1') + response = self.get("/admin/processing_jobs/list?sEcho=1") self.assertEqual(response.code, 400) - self.assertIn("Missing argument commandId", - response.body.decode('ascii')) + self.assertIn("Missing argument commandId", response.body.decode("ascii")) class TestSampleValidation(BaseAdminTests): def test_get(self): - response = self.get('/admin/sample_validation/') + response = self.get("/admin/sample_validation/") self.assertEqual(response.code, 200) def test_post(self): # Check success - post_args = { - 'qid': 1, - 'snames': 'SKB1.640202 SKB2.640194 BLANK.1A BLANK.1B' - } - response = self.post('/admin/sample_validation/', post_args) + post_args = {"qid": 1, "snames": "SKB1.640202 SKB2.640194 BLANK.1A BLANK.1B"} + response = self.post("/admin/sample_validation/", post_args) self.assertEqual(response.code, 200) - snames = ['SKB1.640202', 'SKB2.640194', 'BLANK.1A', 'BLANK.1B'] - body = response.body.decode('ascii') + snames = ["SKB1.640202", "SKB2.640194", "BLANK.1A", "BLANK.1B"] + body = response.body.decode("ascii") for name in snames: self.assertIn(name, body) # Check success with tube_id - md_dict = {'SKB1.640202': {'tube_id': '12345'}} - md_ext = pd.DataFrame.from_dict(md_dict, orient='index', dtype=str) + md_dict = {"SKB1.640202": {"tube_id": "12345"}} + md_ext = pd.DataFrame.from_dict(md_dict, orient="index", dtype=str) ST(1).extend(md_ext) - post_args = { - 'qid': 1, - 'snames': '12345 SKB2.640194 BLANK.1A BLANK.1B' - } - response = self.post('/admin/sample_validation/', post_args) + post_args = {"qid": 1, "snames": "12345 SKB2.640194 BLANK.1A BLANK.1B"} + response = self.post("/admin/sample_validation/", post_args) self.assertEqual(response.code, 200) - snames = ['SKB2.640194', 'SKB1.640202, tube_id: 12345'] - body = response.body.decode('ascii') + snames = ["SKB2.640194", "SKB1.640202, tube_id: 12345"] + body = response.body.decode("ascii") for name in snames: self.assertIn(name, body) # Check failure: invalid qiita id - post_args = { - 'qid': 2, - 'snames': 'SKB1.640202 SKB2.640194 BLANK.1A BLANK.1B' - } - response = self.post('/admin/sample_validation/', post_args) + post_args = {"qid": 2, "snames": "SKB1.640202 SKB2.640194 BLANK.1A BLANK.1B"} + response = self.post("/admin/sample_validation/", post_args) self.assertEqual(response.code, 200) - self.assertIn('Study 2 does not exist', response.body.decode('ascii')) + self.assertIn("Study 2 does not exist", response.body.decode("ascii")) if __name__ == "__main__": diff --git a/qiita_pet/test/test_auth_handlers.py b/qiita_pet/test/test_auth_handlers.py index 1188c79bd..85b1b6da0 100644 --- a/qiita_pet/test/test_auth_handlers.py +++ b/qiita_pet/test/test_auth_handlers.py @@ -7,61 +7,49 @@ # ----------------------------------------------------------------------------- from unittest import main -from qiita_pet.test.tornado_test_base import TestHandlerBase + from qiita_db.user import User +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestAuthCreateHandler(TestHandlerBase): - def test_get(self): - response = self.get('/auth/create/') + response = self.get("/auth/create/") self.assertEqual(response.code, 200) def test_post(self): - post_args = { - 'email': 'newuser@foo.bar', - 'newpass': 'password' - } - response = self.post('/auth/create/', post_args) + post_args = {"email": "newuser@foo.bar", "newpass": "password"} + response = self.post("/auth/create/", post_args) # Make sure page response loaded sucessfully self.assertEqual(response.code, 200) class TestAuthVerifyHandler(TestHandlerBase): - def test_get(self): - response = self.get('/auth/verify/SOMETHINGHERE?email=test%40foo.bar') + response = self.get("/auth/verify/SOMETHINGHERE?email=test%40foo.bar") self.assertEqual(response.code, 200) - User.create('new@test.com', 'Somesortofpass') - response = self.get('/auth/verify/SOMETHINGHERE?email=new%40test.bar') + User.create("new@test.com", "Somesortofpass") + response = self.get("/auth/verify/SOMETHINGHERE?email=new%40test.bar") self.assertEqual(response.code, 200) class TestAuthLoginHandler(TestHandlerBase): def test_get(self): - response = self.get('/auth/login/') + response = self.get("/auth/login/") self.assertEqual(response.code, 200) # make sure redirect happened properly port = self.get_http_port() - self.assertEqual(response.effective_url, 'http://127.0.0.1:%d/' % port) + self.assertEqual(response.effective_url, "http://127.0.0.1:%d/" % port) def test_post_correct_pass(self): - post_args = { - 'username': 'test@foo.bar', - 'passwd': 'password', - 'next': '/' - } - response = self.post('/auth/login/', post_args) + post_args = {"username": "test@foo.bar", "passwd": "password", "next": "/"} + response = self.post("/auth/login/", post_args) self.assertEqual(response.code, 200) def test_post_wrong_pass(self): - post_args = { - 'username': 'test@foo.bar', - 'passwd': 'wrongpass', - 'next': '/' - } - response = self.post('/auth/login/', post_args) + post_args = {"username": "test@foo.bar", "passwd": "wrongpass", "next": "/"} + response = self.post("/auth/login/", post_args) self.assertEqual(response.code, 200) def test_set_current_user(self): @@ -71,11 +59,11 @@ def test_set_current_user(self): class TestAuthLogoutHandler(TestHandlerBase): def test_get(self): - response = self.get('/auth/login/') + response = self.get("/auth/login/") self.assertEqual(response.code, 200) # make sure redirect happened properly port = self.get_http_port() - self.assertEqual(response.effective_url, 'http://127.0.0.1:%d/' % port) + self.assertEqual(response.effective_url, "http://127.0.0.1:%d/" % port) if __name__ == "__main__": diff --git a/qiita_pet/test/test_base_handlers.py b/qiita_pet/test/test_base_handlers.py index a8df18f07..777719eee 100644 --- a/qiita_pet/test/test_base_handlers.py +++ b/qiita_pet/test/test_base_handlers.py @@ -7,31 +7,33 @@ # ----------------------------------------------------------------------------- from unittest import main + from qiita_pet.test.tornado_test_base import TestHandlerBase class TestMainHandler(TestHandlerBase): def test_get(self): - response = self.get('/') + response = self.get("/") self.assertEqual(response.code, 200) class TestNoPageHandler(TestHandlerBase): def test_get(self): - response = self.get('/THISPAGENOEXIST') + response = self.get("/THISPAGENOEXIST") self.assertEqual(response.code, 404) class TestIFrame(TestHandlerBase): def test_get(self): - response = self.get('/iframe/') + response = self.get("/iframe/") self.assertEqual(response.code, 200) - self.assertIn("No content", response.body.decode('ascii')) + self.assertIn("No content", response.body.decode("ascii")) - response = self.get('/iframe/?iframe=qiita-terms') + response = self.get("/iframe/?iframe=qiita-terms") self.assertEqual(response.code, 200) - self.assertIn('src="/static/qiita_data_terms_of_use.html"', - response.body.decode('ascii')) + self.assertIn( + 'src="/static/qiita_data_terms_of_use.html"', response.body.decode("ascii") + ) if __name__ == "__main__": diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py index 8d123e918..9d2b6f1c4 100644 --- a/qiita_pet/test/test_download.py +++ b/qiita_pet/test/test_download.py @@ -6,31 +6,29 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -import pandas as pd -from unittest import main -from mock import Mock -from os.path import exists, isdir, join, basename -from os import remove, makedirs, close +import json +from io import StringIO +from os import close, makedirs, remove +from os.path import basename, exists, isdir, join from shutil import rmtree from tempfile import mkdtemp, mkstemp -from io import StringIO +from unittest import main +from urllib.parse import urlparse -from biom.util import biom_open +import pandas as pd from biom import example_table as et +from biom.util import biom_open +from mock import Mock -from qiita_pet.test.tornado_test_base import TestHandlerBase -from qiita_pet.handlers.base_handlers import BaseHandler -from qiita_db.user import User -from qiita_db.study import Study from qiita_db.artifact import Artifact -from qiita_db.software import Parameters, Command - -from urllib.parse import urlparse -import json +from qiita_db.software import Command, Parameters +from qiita_db.study import Study +from qiita_db.user import User +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestDownloadHandler(TestHandlerBase): - def setUp(self): super(TestDownloadHandler, self).setUp() self._clean_up_files = [] @@ -46,66 +44,75 @@ def tearDown(self): def test_download(self): # check success - response = self.get('/download/1') + response = self.get("/download/1") self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), ( - "This installation of Qiita was not equipped with nginx, so it " - "is incapable of serving files. The file you attempted to " - "download is located at raw_data/1_s_G1_L001_sequences.fastq.gz")) self.assertEqual( - response.headers['Content-Disposition'], - "attachment; filename=1_1_s_G1_L001_sequences.fastq.gz") + response.body.decode("ascii"), + ( + "This installation of Qiita was not equipped with nginx, so it " + "is incapable of serving files. The file you attempted to " + "download is located at raw_data/1_s_G1_L001_sequences.fastq.gz" + ), + ) + self.assertEqual( + response.headers["Content-Disposition"], + "attachment; filename=1_1_s_G1_L001_sequences.fastq.gz", + ) # other tests to validate the filename - response = self.get('/download/2') + response = self.get("/download/2") self.assertEqual( - response.headers['Content-Disposition'], - "attachment; filename=1_1_s_G1_L001_sequences_barcodes.fastq.gz") - response = self.get('/download/3') + response.headers["Content-Disposition"], + "attachment; filename=1_1_s_G1_L001_sequences_barcodes.fastq.gz", + ) + response = self.get("/download/3") self.assertEqual( - response.headers['Content-Disposition'], - "attachment; filename=2_1_seqs.fna") - response = self.get('/download/18') + response.headers["Content-Disposition"], "attachment; filename=2_1_seqs.fna" + ) + response = self.get("/download/18") self.assertEqual( - response.headers['Content-Disposition'], - "attachment; filename=1_prep_1_19700101-000000.txt") - response = self.get('/download/22') + response.headers["Content-Disposition"], + "attachment; filename=1_prep_1_19700101-000000.txt", + ) + response = self.get("/download/22") self.assertEqual( - response.headers['Content-Disposition'], - "attachment; filename=7_biom_table.biom") + response.headers["Content-Disposition"], + "attachment; filename=7_biom_table.biom", + ) # failure - response = self.get('/download/1000') + response = self.get("/download/1000") self.assertEqual(response.code, 403) # directory a = Artifact(1) - fd, fp = mkstemp(suffix='.html') + fd, fp = mkstemp(suffix=".html") close(fd) - with open(fp, 'w') as f: - f.write('\n') + with open(fp, "w") as f: + f.write("\n") self._clean_up_files.append(fp) dirpath = mkdtemp() - fd, fp2 = mkstemp(suffix='.txt', dir=dirpath) + fd, fp2 = mkstemp(suffix=".txt", dir=dirpath) close(fd) - with open(fp2, 'w') as f: - f.write('\n') + with open(fp2, "w") as f: + f.write("\n") self._clean_up_files.append(dirpath) a.set_html_summary(fp, support_dir=dirpath) for x in a.filepaths: - if x['fp_type'] == 'html_summary_dir': + if x["fp_type"] == "html_summary_dir": break - response = self.get('/download/%d' % x['fp_id']) + response = self.get("/download/%d" % x["fp_id"]) self.assertEqual(response.code, 200) fp_name = basename(fp2) dirname = basename(dirpath) - self.assertEqual(response.body.decode('ascii'), - "- 1 /protected/FASTQ/1/%s/%s FASTQ/1/%s/%s\n" % ( - dirname, fp_name, dirname, fp_name)) + self.assertEqual( + response.body.decode("ascii"), + "- 1 /protected/FASTQ/1/%s/%s FASTQ/1/%s/%s\n" + % (dirname, fp_name, dirname, fp_name), + ) class TestDownloadStudyBIOMSHandler(TestHandlerBase): - def setUp(self): super(TestDownloadStudyBIOMSHandler, self).setUp() self._clean_up_files = [] @@ -123,90 +130,92 @@ def test_download_study(self): tmp_dir = mkdtemp() self._clean_up_files.append(tmp_dir) - biom_fp = join(tmp_dir, 'otu_table.biom') - smr_dir = join(tmp_dir, 'sortmerna_picked_otus') - log_dir = join(smr_dir, 'seqs_otus.log') - tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz') + biom_fp = join(tmp_dir, "otu_table.biom") + smr_dir = join(tmp_dir, "sortmerna_picked_otus") + log_dir = join(smr_dir, "seqs_otus.log") + tgz = join(tmp_dir, "sortmerna_picked_otus.tgz") - with biom_open(biom_fp, 'w') as f: + with biom_open(biom_fp, "w") as f: et.to_hdf5(f, "test") makedirs(smr_dir) - with open(log_dir, 'w') as f: - f.write('\n') - with open(tgz, 'w') as f: - f.write('\n') + with open(log_dir, "w") as f: + f.write("\n") + with open(tgz, "w") as f: + f.write("\n") - files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] + files_biom = [(biom_fp, "biom"), (smr_dir, "directory"), (tgz, "tgz")] params = Parameters.from_default_params( - next(Command(3).default_parameter_sets), {'input_data': 1}) - a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)], - processing_parameters=params) + next(Command(3).default_parameter_sets), {"input_data": 1} + ) + a = Artifact.create( + files_biom, "BIOM", parents=[Artifact(2)], processing_parameters=params + ) for x in a.filepaths: - self._clean_up_files.append(x['fp']) + self._clean_up_files.append(x["fp"]) - response = self.get('/download_study_bioms/1') + response = self.get("/download_study_bioms/1") self.assertEqual(response.code, 200) exp = ( - '- \\d+ /protected/processed_data/1_study_1001_closed_reference_' - 'otu_table.biom processed_data/1_study_1001_closed_reference_otu' - '_table.biom\n' - '- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/4_mapping_file.txt\n' - '- \\d+ /protected/processed_data/1_study_1001_closed_reference_' - 'otu_table.biom processed_data/1_study_1001_closed_reference_otu' - '_table.biom\n' - '- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/5_mapping_file.txt\n' - '- \\d+ /protected/processed_data/1_study_1001_closed_reference_' - 'otu_table_Silva.biom processed_data/1_study_1001_closed_' - 'reference_otu_table_Silva.biom\n' - '- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/6_mapping_file.txt\n' - '- \\d+ /protected/BIOM/7/biom_table.biom BIOM/7/biom_table.biom\n' - '- \\d+ /protected/BIOM/10/otu_table.biom BIOM/10/otu_table.biom\n' - '- \\d+ /protected/BIOM/10/sortmerna_picked_otus/seqs_otus.log ' - 'BIOM/10/sortmerna_picked_otus/seqs_otus.log\n' - '- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/10_mapping_file.txt\n') - self.assertRegex(response.body.decode('ascii'), exp) - - response = self.get('/download_study_bioms/200') + "- \\d+ /protected/processed_data/1_study_1001_closed_reference_" + "otu_table.biom processed_data/1_study_1001_closed_reference_otu" + "_table.biom\n" + "- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/4_mapping_file.txt\n" + "- \\d+ /protected/processed_data/1_study_1001_closed_reference_" + "otu_table.biom processed_data/1_study_1001_closed_reference_otu" + "_table.biom\n" + "- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/5_mapping_file.txt\n" + "- \\d+ /protected/processed_data/1_study_1001_closed_reference_" + "otu_table_Silva.biom processed_data/1_study_1001_closed_" + "reference_otu_table_Silva.biom\n" + "- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/6_mapping_file.txt\n" + "- \\d+ /protected/BIOM/7/biom_table.biom BIOM/7/biom_table.biom\n" + "- \\d+ /protected/BIOM/10/otu_table.biom BIOM/10/otu_table.biom\n" + "- \\d+ /protected/BIOM/10/sortmerna_picked_otus/seqs_otus.log " + "BIOM/10/sortmerna_picked_otus/seqs_otus.log\n" + "- \\d+ /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/10_mapping_file.txt\n" + ) + self.assertRegex(response.body.decode("ascii"), exp) + + response = self.get("/download_study_bioms/200") self.assertEqual(response.code, 405) # changing user so we can test the failures - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_study_bioms/1') + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_study_bioms/1") self.assertEqual(response.code, 405) - a.visibility = 'public' - response = self.get('/download_study_bioms/1') + a.visibility = "public" + response = self.get("/download_study_bioms/1") # returning visibility - a.visibility = 'private' + a.visibility = "private" self.assertEqual(response.code, 200) # we should have the same files than the previous test, except artifact # and mapping file 7: position 6; thus removing 6 - exp = exp.split('\n') + exp = exp.split("\n") exp.pop(6) - exp = '\n'.join(exp) - self.assertRegex(response.body.decode('ascii'), exp) + exp = "\n".join(exp) + self.assertRegex(response.body.decode("ascii"), exp) class TestDownloadRelease(TestHandlerBase): - def test_download(self): # check success - response = self.get('/release/download/1') + response = self.get("/release/download/1") self.assertEqual(response.code, 200) self.assertIn( "This installation of Qiita was not equipped with nginx, so it is " "incapable of serving files. The file you attempted to download " - "is located at", response.body.decode('ascii')) + "is located at", + response.body.decode("ascii"), + ) class TestDownloadRawData(TestHandlerBase): - def setUp(self): super(TestDownloadRawData, self).setUp() self._clean_up_files = [] @@ -224,348 +233,356 @@ def test_download_raw_data(self): # it's possible that one of the tests is deleting the raw data # so we will make sure that the files exists so this test passes study = Study(1) - all_files = [x['fp'] for a in study.artifacts() - for x in a.filepaths] + all_files = [x["fp"] for a in study.artifacts() for x in a.filepaths] for fp in all_files: if not exists(fp): - with open(fp, 'w') as f: - f.write('') - response = self.get('/download_raw_data/1') + with open(fp, "w") as f: + f.write("") + response = self.get("/download_raw_data/1") self.assertEqual(response.code, 200) exp = ( - '- 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz ' - 'raw_data/1_s_G1_L001_sequences.fastq.gz\n' - '- 58 /protected/raw_data/' - '1_s_G1_L001_sequences_barcodes.fastq.gz ' - 'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n' - '- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt ' - 'mapping_files/1_mapping_file.txt\n' - '- 1093210 /protected/BIOM/7/biom_table.biom ' - 'BIOM/7/biom_table.biom\n') - self.assertRegex(response.body.decode('ascii'), exp) - - response = self.get('/download_study_bioms/200') + "- 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz " + "raw_data/1_s_G1_L001_sequences.fastq.gz\n" + "- 58 /protected/raw_data/" + "1_s_G1_L001_sequences_barcodes.fastq.gz " + "raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n" + "- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt " + "mapping_files/1_mapping_file.txt\n" + "- 1093210 /protected/BIOM/7/biom_table.biom " + "BIOM/7/biom_table.biom\n" + ) + self.assertRegex(response.body.decode("ascii"), exp) + + response = self.get("/download_study_bioms/200") self.assertEqual(response.code, 405) # changing user so we can test the failures - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_study_bioms/1') + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_study_bioms/1") self.assertEqual(response.code, 405) # now, let's make sure that when artifacts are public AND the # public_raw_download any user can download the files study.public_raw_download = True - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_study_bioms/1') + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_study_bioms/1") self.assertEqual(response.code, 405) # 7 is an uploaded biom, which should now be available but as it's a # biom, only the prep info file will be retrieved - Artifact(7).visibility = 'public' - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_study_bioms/1') + Artifact(7).visibility = "public" + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_study_bioms/1") self.assertEqual(response.code, 200) - exp = ('- [0-9]* /protected/BIOM/7/biom_table.biom ' - 'BIOM/7/biom_table.biom\n') - self.assertRegex(response.body.decode('ascii'), exp) + exp = "- [0-9]* /protected/BIOM/7/biom_table.biom BIOM/7/biom_table.biom\n" + self.assertRegex(response.body.decode("ascii"), exp) class TestDownloadEBISampleAccessions(TestHandlerBase): - def test_download(self): # check success - response = self.get('/download_ebi_accessions/samples/1') - exp = ("sample_name\tsample_accession\n1.SKB2.640194\tERS000008\n" - "1.SKM4.640180\tERS000004\n1.SKB3.640195\tERS000024\n" - "1.SKB6.640176\tERS000025\n1.SKD6.640190\tERS000007\n" - "1.SKM6.640187\tERS000022\n1.SKD9.640182\tERS000019\n" - "1.SKM8.640201\tERS000014\n1.SKM2.640199\tERS000015\n" - "1.SKD2.640178\tERS000009\n1.SKB7.640196\tERS000002\n" - "1.SKD4.640185\tERS000023\n1.SKB8.640193\tERS000000\n" - "1.SKM3.640197\tERS000018\n1.SKD5.640186\tERS000017\n" - "1.SKB1.640202\tERS000011\n1.SKM1.640183\tERS000025\n" - "1.SKD1.640179\tERS000012\n1.SKD3.640198\tERS000013\n" - "1.SKB5.640181\tERS000006\n1.SKB4.640189\tERS000020\n" - "1.SKB9.640200\tERS000016\n1.SKM9.640192\tERS000003\n" - "1.SKD8.640184\tERS000001\n1.SKM5.640177\tERS000005\n" - "1.SKM7.640188\tERS000010\n1.SKD7.640191\tERS000021") + response = self.get("/download_ebi_accessions/samples/1") + exp = ( + "sample_name\tsample_accession\n1.SKB2.640194\tERS000008\n" + "1.SKM4.640180\tERS000004\n1.SKB3.640195\tERS000024\n" + "1.SKB6.640176\tERS000025\n1.SKD6.640190\tERS000007\n" + "1.SKM6.640187\tERS000022\n1.SKD9.640182\tERS000019\n" + "1.SKM8.640201\tERS000014\n1.SKM2.640199\tERS000015\n" + "1.SKD2.640178\tERS000009\n1.SKB7.640196\tERS000002\n" + "1.SKD4.640185\tERS000023\n1.SKB8.640193\tERS000000\n" + "1.SKM3.640197\tERS000018\n1.SKD5.640186\tERS000017\n" + "1.SKB1.640202\tERS000011\n1.SKM1.640183\tERS000025\n" + "1.SKD1.640179\tERS000012\n1.SKD3.640198\tERS000013\n" + "1.SKB5.640181\tERS000006\n1.SKB4.640189\tERS000020\n" + "1.SKB9.640200\tERS000016\n1.SKM9.640192\tERS000003\n" + "1.SKD8.640184\tERS000001\n1.SKM5.640177\tERS000005\n" + "1.SKM7.640188\tERS000010\n1.SKD7.640191\tERS000021" + ) self.assertEqual(response.code, 200) # testing as lists so we ignore order - obs = response.body.decode('ascii').split('\n') - exp = exp.split('\n') + obs = response.body.decode("ascii").split("\n") + exp = exp.split("\n") self.assertCountEqual(obs, exp) # changing user so we can test the failures - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_ebi_accessions/samples/1') + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_ebi_accessions/samples/1") self.assertEqual(response.code, 405) class TestDownloadEBIPrepAccessions(TestHandlerBase): - def test_download(self): # check success - response = self.get('/download_ebi_accessions/experiments/1') - exp = ("sample_name\texperiment_accession\n1.SKB2.640194\tERX0000008\n" - "1.SKM4.640180\tERX0000004\n1.SKB3.640195\tERX0000024\n" - "1.SKB6.640176\tERX0000025\n1.SKD6.640190\tERX0000007\n" - "1.SKM6.640187\tERX0000022\n1.SKD9.640182\tERX0000019\n" - "1.SKM8.640201\tERX0000014\n1.SKM2.640199\tERX0000015\n" - "1.SKD2.640178\tERX0000009\n1.SKB7.640196\tERX0000002\n" - "1.SKD4.640185\tERX0000023\n1.SKB8.640193\tERX0000000\n" - "1.SKM3.640197\tERX0000018\n1.SKD5.640186\tERX0000017\n" - "1.SKB1.640202\tERX0000011\n1.SKM1.640183\tERX0000026\n" - "1.SKD1.640179\tERX0000012\n1.SKD3.640198\tERX0000013\n" - "1.SKB5.640181\tERX0000006\n1.SKB4.640189\tERX0000020\n" - "1.SKB9.640200\tERX0000016\n1.SKM9.640192\tERX0000003\n" - "1.SKD8.640184\tERX0000001\n1.SKM5.640177\tERX0000005\n" - "1.SKM7.640188\tERX0000010\n1.SKD7.640191\tERX0000021") + response = self.get("/download_ebi_accessions/experiments/1") + exp = ( + "sample_name\texperiment_accession\n1.SKB2.640194\tERX0000008\n" + "1.SKM4.640180\tERX0000004\n1.SKB3.640195\tERX0000024\n" + "1.SKB6.640176\tERX0000025\n1.SKD6.640190\tERX0000007\n" + "1.SKM6.640187\tERX0000022\n1.SKD9.640182\tERX0000019\n" + "1.SKM8.640201\tERX0000014\n1.SKM2.640199\tERX0000015\n" + "1.SKD2.640178\tERX0000009\n1.SKB7.640196\tERX0000002\n" + "1.SKD4.640185\tERX0000023\n1.SKB8.640193\tERX0000000\n" + "1.SKM3.640197\tERX0000018\n1.SKD5.640186\tERX0000017\n" + "1.SKB1.640202\tERX0000011\n1.SKM1.640183\tERX0000026\n" + "1.SKD1.640179\tERX0000012\n1.SKD3.640198\tERX0000013\n" + "1.SKB5.640181\tERX0000006\n1.SKB4.640189\tERX0000020\n" + "1.SKB9.640200\tERX0000016\n1.SKM9.640192\tERX0000003\n" + "1.SKD8.640184\tERX0000001\n1.SKM5.640177\tERX0000005\n" + "1.SKM7.640188\tERX0000010\n1.SKD7.640191\tERX0000021" + ) self.assertEqual(response.code, 200) # testing as lists so we ignore order - obs = response.body.decode('ascii').split('\n') - exp = exp.split('\n') + obs = response.body.decode("ascii").split("\n") + exp = exp.split("\n") self.assertCountEqual(obs, exp) # changing user so we can test the failures - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_ebi_accessions/experiments/1') + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_ebi_accessions/experiments/1") self.assertEqual(response.code, 405) class TestDownloadSampleInfoPerPrep(TestHandlerBase): - def test_download(self): # check success - response = self.get('/download_sample_info_per_prep/1') + response = self.get("/download_sample_info_per_prep/1") self.assertEqual(response.code, 200) - df = pd.read_csv(StringIO(response.body.decode('ascii')), sep='\t') + df = pd.read_csv(StringIO(response.body.decode("ascii")), sep="\t") # just testing shape as the actual content is tested in the dataframe # generation self.assertEqual(df.shape, (27, 33)) # changing user so we can test the failures - BaseHandler.get_current_user = Mock( - return_value=User("demo@microbio.me")) - response = self.get('/download_sample_info_per_prep/1') + BaseHandler.get_current_user = Mock(return_value=User("demo@microbio.me")) + response = self.get("/download_sample_info_per_prep/1") self.assertEqual(response.code, 405) class TestDownloadUpload(TestHandlerBase): - def test_download(self): # check failure - response = self.get('/download_upload/1/uploaded_file.txt') + response = self.get("/download_upload/1/uploaded_file.txt") self.assertEqual(response.code, 403) # check success BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.get('/download_upload/1/uploaded_file.txt') + response = self.get("/download_upload/1/uploaded_file.txt") self.assertEqual(response.code, 200) class TestDownloadPublicHandler(TestHandlerBase): - def test_download(self): # check failures - response = self.get('/public_download/') + response = self.get("/public_download/") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'You need to specify ' - 'both data (the data type you want to download - ' - 'raw/biom/sample_information/prep_information) and ' - 'study_id or prep_id') + self.assertEqual( + response.reason, + "You need to specify " + "both data (the data type you want to download - " + "raw/biom/sample_information/prep_information) and " + "study_id or prep_id", + ) - response = self.get('/public_download/?data=raw&study_id=10000') + response = self.get("/public_download/?data=raw&study_id=10000") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'Study does not exist') + self.assertEqual(response.reason, "Study does not exist") - response = self.get('/public_download/?data=raw&study_id=1') + response = self.get("/public_download/?data=raw&study_id=1") self.assertEqual(response.code, 404) - self.assertEqual(response.reason, 'Study is not public. ' - 'If this is a mistake contact: foo@bar.com') + self.assertEqual( + response.reason, + "Study is not public. If this is a mistake contact: foo@bar.com", + ) # 7 is an uploaded biom, which should now be available but as it's a # biom, only the prep info file will be retrieved - Artifact(7).visibility = 'public' - response = self.get('/public_download/?data=raw&study_id=1') + Artifact(7).visibility = "public" + response = self.get("/public_download/?data=raw&study_id=1") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'No raw data access. ' - 'If this is a mistake contact: foo@bar.com') + self.assertEqual( + response.reason, + "No raw data access. If this is a mistake contact: foo@bar.com", + ) # check success - response = self.get('/public_download/?data=biom&study_id=1') + response = self.get("/public_download/?data=biom&study_id=1") self.assertEqual(response.code, 200) - exp = ('- [0-9]* /protected/BIOM/7/biom_table.biom' - ' BIOM/7/biom_table.biom\n') - self.assertRegex(response.body.decode('ascii'), exp) + exp = "- [0-9]* /protected/BIOM/7/biom_table.biom BIOM/7/biom_table.biom\n" + self.assertRegex(response.body.decode("ascii"), exp) Study(1).public_raw_download = True # check success - response = self.get('/public_download/?data=raw&study_id=1') + response = self.get("/public_download/?data=raw&study_id=1") self.assertEqual(response.code, 200) - exp = ('- [0-9]* /protected/BIOM/7/biom_table.biom' - ' BIOM/7/biom_table.biom\n') - self.assertRegex(response.body.decode('ascii'), exp) + exp = "- [0-9]* /protected/BIOM/7/biom_table.biom BIOM/7/biom_table.biom\n" + self.assertRegex(response.body.decode("ascii"), exp) # testing data_type - response = self.get( - '/public_download/?data=raw&study_id=1&data_type=X') + response = self.get("/public_download/?data=raw&study_id=1&data_type=X") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'Not a valid data_type. Valid types ' - 'are: 16S, 18S, ITS, Proteomic, Metabolomic, ' - 'Metagenomic, Multiomic, Metatranscriptomics, ' - 'Viromics, Genomics, Transcriptomics, ' - 'Job Output Folder') + self.assertEqual( + response.reason, + "Not a valid data_type. Valid types " + "are: 16S, 18S, ITS, Proteomic, Metabolomic, " + "Metagenomic, Multiomic, Metatranscriptomics, " + "Viromics, Genomics, Transcriptomics, " + "Job Output Folder", + ) - response = self.get( - '/public_download/?data=raw&study_id=1&data_type=Genomics') + response = self.get("/public_download/?data=raw&study_id=1&data_type=Genomics") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'Nothing to download. If this is a ' - 'mistake contact: foo@bar.com') - response = self.get( - '/public_download/?data=biom&study_id=1&data_type=Genomics') + self.assertEqual( + response.reason, + "Nothing to download. If this is a mistake contact: foo@bar.com", + ) + response = self.get("/public_download/?data=biom&study_id=1&data_type=Genomics") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'Nothing to download. If this is a ' - 'mistake contact: foo@bar.com') + self.assertEqual( + response.reason, + "Nothing to download. If this is a mistake contact: foo@bar.com", + ) # check success - Artifact(5).visibility = 'public' - response = self.get( - '/public_download/?data=raw&study_id=1&data_type=18S') + Artifact(5).visibility = "public" + response = self.get("/public_download/?data=raw&study_id=1&data_type=18S") self.assertEqual(response.code, 200) exp = ( - '[0-9]* [0-9]* /protected/raw_data/1_s_G1_L001_sequences_barcodes' - '.fastq.gz raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n' - '- [0-9]* /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/1_mapping_file.txt') - self.assertRegex(response.body.decode('ascii'), exp) + "[0-9]* [0-9]* /protected/raw_data/1_s_G1_L001_sequences_barcodes" + ".fastq.gz raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n" + "- [0-9]* /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/1_mapping_file.txt" + ) + self.assertRegex(response.body.decode("ascii"), exp) - response = self.get( - '/public_download/?data=biom&study_id=1&data_type=18S') + response = self.get("/public_download/?data=biom&study_id=1&data_type=18S") self.assertEqual(response.code, 200) exp = ( - '- [0-9]* /protected/processed_data/1_study_1001_closed_' - 'reference_otu_table.biom processed_data/1_study_1001_closed_' - 'reference_otu_table.biom\n- [0-9]* /protected/templates/1_prep_' - '1_qiime_19700101-000000.txt mapping_files/4_mapping_file.txt\n' - '- [0-9]* /protected/processed_data/1_study_1001_closed_' - 'reference_otu_table.biom processed_data/1_study_1001_closed_' - 'reference_otu_table.biom\n- [0-9]* /protected/templates/1_prep_' - '1_qiime_19700101-000000.txt mapping_files/5_mapping_file.txt\n') + "- [0-9]* /protected/processed_data/1_study_1001_closed_" + "reference_otu_table.biom processed_data/1_study_1001_closed_" + "reference_otu_table.biom\n- [0-9]* /protected/templates/1_prep_" + "1_qiime_19700101-000000.txt mapping_files/4_mapping_file.txt\n" + "- [0-9]* /protected/processed_data/1_study_1001_closed_" + "reference_otu_table.biom processed_data/1_study_1001_closed_" + "reference_otu_table.biom\n- [0-9]* /protected/templates/1_prep_" + "1_qiime_19700101-000000.txt mapping_files/5_mapping_file.txt\n" + ) - self.assertRegex(response.body.decode('ascii'), exp) + self.assertRegex(response.body.decode("ascii"), exp) def test_download_sample_information(self): - response = self.get('/public_download/?data=sample_information') + response = self.get("/public_download/?data=sample_information") self.assertEqual(response.code, 422) self.assertEqual( - response.reason, 'You need to specify both data (the data type ' - 'you want to download - raw/biom/sample_information/' - 'prep_information) and study_id or prep_id') + response.reason, + "You need to specify both data (the data type " + "you want to download - raw/biom/sample_information/" + "prep_information) and study_id or prep_id", + ) - response = self.get('/public_download/?data=sample_information&' - 'data_type=16S&study_id=1') + response = self.get( + "/public_download/?data=sample_information&data_type=16S&study_id=1" + ) self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'If requesting an information file ' - 'you cannot specify the data_type') + self.assertEqual( + response.reason, + "If requesting an information file you cannot specify the data_type", + ) - response = self.get( - '/public_download/?data=sample_information&prep_id=1') + response = self.get("/public_download/?data=sample_information&prep_id=1") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'Review your parameters, not a ' - 'valid combination') + self.assertEqual( + response.reason, "Review your parameters, not a valid combination" + ) - response = self.get( - '/public_download/?data=sample_information&study_id=10000') + response = self.get("/public_download/?data=sample_information&study_id=10000") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'Sample information does not exist') + self.assertEqual(response.reason, "Sample information does not exist") - response = self.get( - '/public_download/?data=prep_information&prep_id=10000') + response = self.get("/public_download/?data=prep_information&prep_id=10000") self.assertEqual(response.code, 422) - self.assertEqual( - response.reason, 'Preparation information does not exist') + self.assertEqual(response.reason, "Preparation information does not exist") - response = self.get( - '/public_download/?data=sample_information&study_id=1') + response = self.get("/public_download/?data=sample_information&study_id=1") self.assertEqual(response.code, 200) - exp = ('[0-9]* [0-9]* /protected/templates/1_[0-9]*-[0-9]*.txt ' - 'templates/1_[0-9]*-[0-9]*.txt\n') - self.assertRegex(response.body.decode('ascii'), exp) + exp = ( + "[0-9]* [0-9]* /protected/templates/1_[0-9]*-[0-9]*.txt " + "templates/1_[0-9]*-[0-9]*.txt\n" + ) + self.assertRegex(response.body.decode("ascii"), exp) - response = self.get( - '/public_download/?data=prep_information&prep_id=1') + response = self.get("/public_download/?data=prep_information&prep_id=1") self.assertEqual(response.code, 200) - exp = ('- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]' - '*.txt templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt\n') - self.assertRegex(response.body.decode('ascii'), exp) + exp = ( + "- [0-9]* /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]" + "*.txt templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt\n" + ) + self.assertRegex(response.body.decode("ascii"), exp) class TestDownloadPublicArtifactHandler(TestHandlerBase): - def test_download(self): # check failures - response = self.get('/public_artifact_download/') + response = self.get("/public_artifact_download/") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'You need to specify an artifact id') + self.assertEqual(response.reason, "You need to specify an artifact id") - response = self.get('/public_artifact_download/?artifact_id=10000') + response = self.get("/public_artifact_download/?artifact_id=10000") self.assertEqual(response.code, 404) - self.assertEqual(response.reason, 'Artifact does not exist') + self.assertEqual(response.reason, "Artifact does not exist") - response = self.get('/public_artifact_download/?artifact_id=3') + response = self.get("/public_artifact_download/?artifact_id=3") self.assertEqual(response.code, 404) - self.assertEqual(response.reason, 'Artifact is not public. If this is ' - 'a mistake contact: foo@bar.com') + self.assertEqual( + response.reason, + "Artifact is not public. If this is a mistake contact: foo@bar.com", + ) # check success - Artifact(5).visibility = 'public' - response = self.get('/public_artifact_download/?artifact_id=5') + Artifact(5).visibility = "public" + response = self.get("/public_artifact_download/?artifact_id=5") self.assertEqual(response.code, 200) exp = ( - '- [0-9]* /protected/processed_data/' - '1_study_1001_closed_reference_otu_table.biom ' - 'processed_data/1_study_1001_closed_reference_otu_table.biom\n' - '- [0-9]* /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/5_mapping_file.txt') - self.assertRegex(response.body.decode('ascii'), exp) + "- [0-9]* /protected/processed_data/" + "1_study_1001_closed_reference_otu_table.biom " + "processed_data/1_study_1001_closed_reference_otu_table.biom\n" + "- [0-9]* /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/5_mapping_file.txt" + ) + self.assertRegex(response.body.decode("ascii"), exp) # Now let's check download prep with no raw data access - response = self.get('/public_download/?data=raw&prep_id=1') - self.assertTrue(response.reason.startswith('No raw data access.')) + response = self.get("/public_download/?data=raw&prep_id=1") + self.assertTrue(response.reason.startswith("No raw data access.")) # Now success Study(1).public_raw_download = True - response = self.get('/public_download/?data=raw&prep_id=1') + response = self.get("/public_download/?data=raw&prep_id=1") self.assertEqual(response.code, 200) - exp = ('- [0-9]* /protected/raw_data/1_s_G1_L001_sequences.fastq.gz ' - 'raw_data/1_s_G1_L001_sequences.fastq.gz\n- [0-9]* /protected' - '/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz raw_data/' - '1_s_G1_L001_sequences_barcodes.fastq.gz\n- [0-9]* /protected/' - 'templates/1_prep_1_qiime_19700101-000000.txt mapping_files/' - '1_mapping_file.txt\n') - self.assertRegex(response.body.decode('ascii'), exp) + exp = ( + "- [0-9]* /protected/raw_data/1_s_G1_L001_sequences.fastq.gz " + "raw_data/1_s_G1_L001_sequences.fastq.gz\n- [0-9]* /protected" + "/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz raw_data/" + "1_s_G1_L001_sequences_barcodes.fastq.gz\n- [0-9]* /protected/" + "templates/1_prep_1_qiime_19700101-000000.txt mapping_files/" + "1_mapping_file.txt\n" + ) + self.assertRegex(response.body.decode("ascii"), exp) # for simplicity, let's just check respose.code - response = self.get('/public_download/?data=biom&prep_id=1') + response = self.get("/public_download/?data=biom&prep_id=1") self.assertEqual(response.code, 200) def test_download_sample_information(self): - response = self.get('/public_artifact_download/') + response = self.get("/public_artifact_download/") self.assertEqual(response.code, 422) - self.assertEqual(response.reason, 'You need to specify an artifact id') + self.assertEqual(response.reason, "You need to specify an artifact id") class TestDownloadPrivateArtifactHandler(TestHandlerBase): - def test_download(self): # you can't post None, you must post an empty byte array - response = self.post('/private_download/1', b'') + response = self.post("/private_download/1", b"") self.assertEqual(response.code, 200) resp_dict = json.loads(response.body) @@ -573,15 +590,15 @@ def test_download(self): response_file = self.get(o.path) self.assertEqual(response_file.code, 200) exp = ( - '- 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz ' - 'raw_data/1_s_G1_L001_sequences.fastq.gz\n' - '- 58 /protected/raw_data/1_s_G1_L001_sequences_barcodes.' - 'fastq.gz raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n' - '- [0-9]* /protected/templates/1_prep_1_qiime_19700101-000000.txt ' - 'mapping_files/1_mapping_file.txt\n' + "- 58 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz " + "raw_data/1_s_G1_L001_sequences.fastq.gz\n" + "- 58 /protected/raw_data/1_s_G1_L001_sequences_barcodes." + "fastq.gz raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n" + "- [0-9]* /protected/templates/1_prep_1_qiime_19700101-000000.txt " + "mapping_files/1_mapping_file.txt\n" ) - self.assertRegex(response_file.body.decode('ascii'), exp) + self.assertRegex(response_file.body.decode("ascii"), exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/test_logger.py b/qiita_pet/test/test_logger.py index 803f521d3..770e9fd11 100644 --- a/qiita_pet/test/test_logger.py +++ b/qiita_pet/test/test_logger.py @@ -7,19 +7,20 @@ # ----------------------------------------------------------------------------- from unittest import main + from qiita_pet.test.tornado_test_base import TestHandlerBase class TestLogEntryViewerHandler(TestHandlerBase): def test_get(self): - response = self.get('/admin/error/') + response = self.get("/admin/error/") self.assertEqual(response.code, 403) def test_post(self): - response = self.post('/admin/error/', {'numrecords': -5}) + response = self.post("/admin/error/", {"numrecords": -5}) self.assertEqual(response.code, 403) - response = self.post('/admin/error/', {'numrecords': 20}) + response = self.post("/admin/error/", {"numrecords": 20}) self.assertEqual(response.code, 403) diff --git a/qiita_pet/test/test_ontology.py b/qiita_pet/test/test_ontology.py index fcc60027b..0ab719e2d 100644 --- a/qiita_pet/test/test_ontology.py +++ b/qiita_pet/test/test_ontology.py @@ -6,20 +6,20 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from json import loads +from unittest import main from qiita_pet.test.tornado_test_base import TestHandlerBase class TestOntologyHandler(TestHandlerBase): def test_patch(self): - arguments = {'op': 'add', 'path': 'ENA', 'value': 'new-term'} - response = self.patch('/ontology/', data=arguments) + arguments = {"op": "add", "path": "ENA", "value": "new-term"} + response = self.patch("/ontology/", data=arguments) self.assertEqual(response.code, 200) - exp = {'status': 'success', 'message': ''} + exp = {"status": "success", "message": ""} self.assertEqual(loads(response.body), exp) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/test_portal.py b/qiita_pet/test/test_portal.py index 3bed950d0..c62b049ac 100644 --- a/qiita_pet/test/test_portal.py +++ b/qiita_pet/test/test_portal.py @@ -7,73 +7,78 @@ # ----------------------------------------------------------------------------- from unittest import main -from qiita_pet.test.tornado_test_base import TestHandlerBase from mock import Mock from qiita_db.user import User from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestPortal(TestHandlerBase): def test_get(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.get('/admin/portals/studies/') + response = self.get("/admin/portals/studies/") self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") def test_post_add(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.post('/admin/portals/studies/', {'portal': 'EMP', - 'selected': [1], - 'action': 'Add'}) + response = self.post( + "/admin/portals/studies/", + {"portal": "EMP", "selected": [1], "action": "Add"}, + ) self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") def test_post_remove(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.post('/admin/portals/studies/', {'portal': 'EMP', - 'selected': [1], - 'action': 'Remove'}) + response = self.post( + "/admin/portals/studies/", + {"portal": "EMP", "selected": [1], "action": "Remove"}, + ) self.assertEqual(response.code, 200) self.assertNotEqual(response.body, "") def test_get_not_valid_user(self): - response = self.get('/admin/portals/studies/') + response = self.get("/admin/portals/studies/") self.assertEqual(response.code, 403) def test_post_not_valid_user(self): - response = self.post('/admin/portals/studies/', {'portal': 'EMP', - 'selected': [1], - 'action': 'Add'}) + response = self.post( + "/admin/portals/studies/", + {"portal": "EMP", "selected": [1], "action": "Add"}, + ) self.assertEqual(response.code, 403) def test_post_not_valid_portal(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.post('/admin/portals/studies/', {'portal': 'not-valid', - 'selected': [1], - 'action': 'Add'}) + response = self.post( + "/admin/portals/studies/", + {"portal": "not-valid", "selected": [1], "action": "Add"}, + ) self.assertEqual(response.code, 400) def test_post_not_valid_action(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.post('/admin/portals/studies/', {'portal': 'EMP', - 'selected': [1], - 'action': 'Error'}) + response = self.post( + "/admin/portals/studies/", + {"portal": "EMP", "selected": [1], "action": "Error"}, + ) self.assertEqual(response.code, 400) def test_get_AJAX(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - page = '/admin/portals/studiesAJAX/' - response = self.get(page, {'sEcho': '1001', 'view-portal': 'QIITA'}) + page = "/admin/portals/studiesAJAX/" + response = self.get(page, {"sEcho": "1001", "view-portal": "QIITA"}) self.assertEqual(response.code, 200) exp = "Identification of the Microbiomes for Cannabis Soils" - self.assertIn(exp, response.body.decode('ascii')) + self.assertIn(exp, response.body.decode("ascii")) def test_get_AJAX_not_valid_user(self): - page = '/admin/portals/studiesAJAX/' - response = self.get(page, {'sEcho': '1001', 'view-portal': 'QIITA'}) + page = "/admin/portals/studiesAJAX/" + response = self.get(page, {"sEcho": "1001", "view-portal": "QIITA"}) self.assertEqual(response.code, 403) diff --git a/qiita_pet/test/test_prep_template.py b/qiita_pet/test/test_prep_template.py index 2a19a9768..ecd3cce82 100644 --- a/qiita_pet/test/test_prep_template.py +++ b/qiita_pet/test/test_prep_template.py @@ -6,27 +6,26 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main -from os.path import join, exists -from os import remove from json import loads +from os import remove +from os.path import exists, join +from unittest import main -from qiita_pet.test.tornado_test_base import TestHandlerBase -from qiita_db.util import get_count, get_mountpoint from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.util import get_count, get_mountpoint +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestPrepTemplateHandler(TestHandlerBase): - def setUp(self): super(TestPrepTemplateHandler, self).setUp() - uploads_dp = get_mountpoint('uploads')[0][1] - self.new_prep = join(uploads_dp, '1', 'new_template.txt') - with open(self.new_prep, 'w') as f: + uploads_dp = get_mountpoint("uploads")[0][1] + self.new_prep = join(uploads_dp, "1", "new_template.txt") + with open(self.new_prep, "w") as f: f.write("sample_name\tnew_col\nSKD6.640190\tnew_value\n") - self.broken_prep = join(uploads_dp, '1', 'broke_template.txt') - with open(self.broken_prep, 'w') as f: + self.broken_prep = join(uploads_dp, "1", "broke_template.txt") + with open(self.broken_prep, "w") as f: f.write("sample_name\tbroke |col\nSKD6.640190\tnew_value\n") def tearDown(self): @@ -37,89 +36,119 @@ def tearDown(self): remove(self.broken_prep) def test_post(self): - new_prep_id = get_count('qiita.prep_template') + 1 - arguments = {'study_id': '1', - 'data-type': '16S', - 'prep-file': 'new_template.txt'} - response = self.post('/prep_template/', arguments) + new_prep_id = get_count("qiita.prep_template") + 1 + arguments = { + "study_id": "1", + "data-type": "16S", + "prep-file": "new_template.txt", + } + response = self.post("/prep_template/", arguments) self.assertEqual(response.code, 200) # Check that the new prep template has been created self.assertTrue(PrepTemplate.exists(new_prep_id)) def test_post_broken_header(self): - arguments = {'study_id': '1', - 'data-type': '16S', - 'prep-file': 'broke_template.txt'} - response = self.post('/prep_template/', arguments) + arguments = { + "study_id": "1", + "data-type": "16S", + "prep-file": "broke_template.txt", + } + response = self.post("/prep_template/", arguments) self.assertEqual(response.code, 200) - self.assertIn('broke |col', response.body.decode('ascii')) + self.assertIn("broke |col", response.body.decode("ascii")) def test_patch(self): - arguments = {'op': 'replace', - 'path': '/1/investigation_type/', - 'value': 'RNA-Seq'} - response = self.patch('/prep_template/', data=arguments) + arguments = { + "op": "replace", + "path": "/1/investigation_type/", + "value": "RNA-Seq", + } + response = self.patch("/prep_template/", data=arguments) self.assertEqual(response.code, 200) - exp = {'status': 'success', 'message': ''} + exp = {"status": "success", "message": ""} self.assertEqual(loads(response.body), exp) def test_delete(self): # Create a new prep template so we can delete it - response = self.delete('/prep_template/', data={'prep-template-id': 1}) + response = self.delete("/prep_template/", data={"prep-template-id": 1}) self.assertEqual(response.code, 200) exp = { "status": "error", "message": "Cannot remove prep template 1 because it has an " - "artifact associated with it"} + "artifact associated with it", + } self.assertEqual(loads(response.body), exp) class TestPrepTemplateGraphHandler(TestHandlerBase): def test_get(self): - response = self.get('/prep_template/1/graph/') + response = self.get("/prep_template/1/graph/") self.assertEqual(response.code, 200) # job ids are generated by random so testing composition obs = loads(response.body) - self.assertEqual(obs['message'], '') - self.assertEqual(obs['status'], 'success') + self.assertEqual(obs["message"], "") + self.assertEqual(obs["status"], "success") - self.assertEqual(11, len(obs['nodes'])) + self.assertEqual(11, len(obs["nodes"])) self.assertIn( - ['artifact', 'FASTQ', 1, 'Raw data 1\n(FASTQ)', 'artifact'], - obs['nodes']) + ["artifact", "FASTQ", 1, "Raw data 1\n(FASTQ)", "artifact"], obs["nodes"] + ) self.assertIn( - ['artifact', 'Demultiplexed', 2, - 'Demultiplexed 1\n(Demultiplexed)', 'artifact'], - obs['nodes']) + [ + "artifact", + "Demultiplexed", + 2, + "Demultiplexed 1\n(Demultiplexed)", + "artifact", + ], + obs["nodes"], + ) self.assertIn( - ['artifact', 'Demultiplexed', 3, - 'Demultiplexed 2\n(Demultiplexed)', 'artifact'], - obs['nodes']) - self.assertIn(['artifact', 'BIOM', 4, 'BIOM\n(BIOM)', 'artifact'], - obs['nodes']) - self.assertIn(['artifact', 'BIOM', 5, 'BIOM\n(BIOM)', 'artifact'], - obs['nodes']) - self.assertIn(['artifact', 'BIOM', 6, 'BIOM\n(BIOM)', 'artifact'], - obs['nodes']) - self.assertEqual(3, len([n for dt, _, _, n, _ in obs['nodes'] - if n == 'Pick closed-reference OTUs' and - dt == 'job'])) - self.assertEqual(2, len([n for dt, _, _, n, _ in obs['nodes'] - if n == 'Split libraries FASTQ' and - dt == 'job'])) - - self.assertEqual(10, len(obs['edges'])) - self.assertEqual(2, len([x for x, y in obs['edges'] if x == 1])) - self.assertEqual(3, len([x for x, y in obs['edges'] if x == 2])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 2])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 3])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 4])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 5])) - self.assertEqual(1, len([x for x, y in obs['edges'] if y == 6])) - - self.assertIsNone(obs['workflow']) - - -if __name__ == '__main__': + [ + "artifact", + "Demultiplexed", + 3, + "Demultiplexed 2\n(Demultiplexed)", + "artifact", + ], + obs["nodes"], + ) + self.assertIn(["artifact", "BIOM", 4, "BIOM\n(BIOM)", "artifact"], obs["nodes"]) + self.assertIn(["artifact", "BIOM", 5, "BIOM\n(BIOM)", "artifact"], obs["nodes"]) + self.assertIn(["artifact", "BIOM", 6, "BIOM\n(BIOM)", "artifact"], obs["nodes"]) + self.assertEqual( + 3, + len( + [ + n + for dt, _, _, n, _ in obs["nodes"] + if n == "Pick closed-reference OTUs" and dt == "job" + ] + ), + ) + self.assertEqual( + 2, + len( + [ + n + for dt, _, _, n, _ in obs["nodes"] + if n == "Split libraries FASTQ" and dt == "job" + ] + ), + ) + + self.assertEqual(10, len(obs["edges"])) + self.assertEqual(2, len([x for x, y in obs["edges"] if x == 1])) + self.assertEqual(3, len([x for x, y in obs["edges"] if x == 2])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 2])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 3])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 4])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 5])) + self.assertEqual(1, len([x for x, y in obs["edges"] if y == 6])) + + self.assertIsNone(obs["workflow"]) + + +if __name__ == "__main__": main() diff --git a/qiita_pet/test/test_public.py b/qiita_pet/test/test_public.py index 9dbd1b842..c663a9497 100644 --- a/qiita_pet/test/test_public.py +++ b/qiita_pet/test/test_public.py @@ -8,59 +8,56 @@ from unittest import main -from qiita_pet.test.tornado_test_base import TestHandlerBase from qiita_db.artifact import Artifact +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestPublicHandler(TestHandlerBase): def test_public(self): - response = self.get('/public/') + response = self.get("/public/") self.assertEqual(response.code, 422) - self.assertIn("You need to specify study_id or artifact_id", - response.body.decode('ascii')) + self.assertIn( + "You need to specify study_id or artifact_id", response.body.decode("ascii") + ) - response = self.get('/public/?study_id=100') + response = self.get("/public/?study_id=100") self.assertEqual(response.code, 422) - self.assertIn("Study 100 doesn't exist", - response.body.decode('ascii')) + self.assertIn("Study 100 doesn't exist", response.body.decode("ascii")) - response = self.get('/public/?artifact_id=100') + response = self.get("/public/?artifact_id=100") self.assertEqual(response.code, 422) - self.assertIn("Artifact 100 doesn't exist", - response.body.decode('ascii')) + self.assertIn("Artifact 100 doesn't exist", response.body.decode("ascii")) - response = self.get('/public/?artifact_id=1') + response = self.get("/public/?artifact_id=1") self.assertEqual(response.code, 422) - self.assertIn("Artifact 1 is not public", - response.body.decode('ascii')) + self.assertIn("Artifact 1 is not public", response.body.decode("ascii")) - response = self.get('/public/?study_id=1') + response = self.get("/public/?study_id=1") self.assertEqual(response.code, 422) - self.assertIn("Not a public study", - response.body.decode('ascii')) + self.assertIn("Not a public study", response.body.decode("ascii")) # artifact 1 is the first artifact within Study 1 - Artifact(1).visibility = 'public' + Artifact(1).visibility = "public" - response = self.get('/public/?study_id=1') + response = self.get("/public/?study_id=1") self.assertEqual(response.code, 200) - response = self.get('/public/?artifact_id=1') + response = self.get("/public/?artifact_id=1") self.assertEqual(response.code, 200) - response = self.get('/public/?artifact_id=7') + response = self.get("/public/?artifact_id=7") self.assertEqual(response.code, 422) - self.assertIn("Artifact 7 is not public", - response.body.decode('ascii')) + self.assertIn("Artifact 7 is not public", response.body.decode("ascii")) # artifact 8 is part of an analysis - Artifact(8).visibility = 'public' + Artifact(8).visibility = "public" - response = self.get('/public/?artifact_id=8') + response = self.get("/public/?artifact_id=8") self.assertEqual(response.code, 422) - self.assertIn("Artifact 8 doesn't belong to a study", - response.body.decode('ascii')) + self.assertIn( + "Artifact 8 doesn't belong to a study", response.body.decode("ascii") + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_pet/test/test_qiita_redbiom.py b/qiita_pet/test/test_qiita_redbiom.py index 559858112..af6191bb0 100644 --- a/qiita_pet/test/test_qiita_redbiom.py +++ b/qiita_pet/test/test_qiita_redbiom.py @@ -6,152 +6,152 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main from copy import deepcopy from json import loads +from unittest import main from qiita_pet.test.tornado_test_base import TestHandlerBase class TestRedbiom(TestHandlerBase): - def test_get(self): - response = self.get('/redbiom/') + response = self.get("/redbiom/") self.assertEqual(response.code, 200) def test_post_metadata(self): - post_args = { - 'search': 'Diesel', - 'search_on': 'metadata' - } - response = self.post('/redbiom/', post_args) + post_args = {"search": "Diesel", "search_on": "metadata"} + response = self.post("/redbiom/", post_args) self.assertEqual(response.code, 200) exp_artifact_biom_ids = { - '5': ['1.SKD2.640178'], - '4': sorted(['1.SKD2.640178', '1.SKD8.640184'])} + "5": ["1.SKD2.640178"], + "4": sorted(["1.SKD2.640178", "1.SKD8.640184"]), + } response_body = loads(response.body) - obs_artifact_biom_ids = response_body['data'][0].pop( - 'artifact_biom_ids') + obs_artifact_biom_ids = response_body["data"][0].pop("artifact_biom_ids") # making sure they are in the same order - obs_artifact_biom_ids['4'] = sorted(obs_artifact_biom_ids['4']) + obs_artifact_biom_ids["4"] = sorted(obs_artifact_biom_ids["4"]) self.assertDictEqual(obs_artifact_biom_ids, exp_artifact_biom_ids) - exp = {'status': 'success', 'message': '', 'data': DATA} + exp = {"status": "success", "message": "", "data": DATA} self.assertEqual(response_body, exp) - post_args = { - 'search': 'inf', - 'search_on': 'metadata' - } - response = self.post('/redbiom/', post_args) + post_args = {"search": "inf", "search_on": "metadata"} + response = self.post("/redbiom/", post_args) self.assertEqual(response.code, 200) - exp = {'status': 'success', - 'message': 'No samples were found! Try again ...', 'data': []} + exp = { + "status": "success", + "message": "No samples were found! Try again ...", + "data": [], + } self.assertEqual(loads(response.body), exp) - post_args = { - 'search': '4353076', - 'search_on': 'metadata' - } - response = self.post('/redbiom/', post_args) + post_args = {"search": "4353076", "search_on": "metadata"} + response = self.post("/redbiom/", post_args) self.assertEqual(response.code, 200) - exp = {'status': 'success', - 'message': ('The query ("4353076") did not work and may be ' - 'malformed. Please check the search help for more ' - 'information on the queries.'), 'data': []} + exp = { + "status": "success", + "message": ( + 'The query ("4353076") did not work and may be ' + "malformed. Please check the search help for more " + "information on the queries." + ), + "data": [], + } self.assertEqual(loads(response.body), exp) def test_post_features(self): - post_args = { - 'search': '4479944', - 'search_on': 'feature' - } - response = self.post('/redbiom/', post_args) + post_args = {"search": "4479944", "search_on": "feature"} + response = self.post("/redbiom/", post_args) data = deepcopy(DATA) - data[0]['artifact_biom_ids'] = { - '5': ['1.SKM3.640197'], '4': ['1.SKM3.640197']} - exp = {'status': 'success', 'message': '', 'data': data} + data[0]["artifact_biom_ids"] = {"5": ["1.SKM3.640197"], "4": ["1.SKM3.640197"]} + exp = {"status": "success", "message": "", "data": data} self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) - post_args = { - 'search': 'TT', - 'search_on': 'feature' + post_args = {"search": "TT", "search_on": "feature"} + response = self.post("/redbiom/", post_args) + exp = { + "status": "success", + "message": "No samples were found! Try again ...", + "data": [], } - response = self.post('/redbiom/', post_args) - exp = {'status': 'success', - 'message': 'No samples were found! Try again ...', 'data': []} self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) def test_post_taxon(self): - post_args = { - 'search': 'o__0319-7L14', - 'search_on': 'taxon' - } + post_args = {"search": "o__0319-7L14", "search_on": "taxon"} data = deepcopy(DATA) - data[0]['artifact_biom_ids'] = { - '5': sorted(['1.SKD2.640178', '1.SKM3.640197']), - '4': sorted(['1.SKM3.640197', '1.SKD2.640178'])} - response = self.post('/redbiom/', post_args) - exp = {'status': 'success', 'message': '', 'data': data} + data[0]["artifact_biom_ids"] = { + "5": sorted(["1.SKD2.640178", "1.SKM3.640197"]), + "4": sorted(["1.SKM3.640197", "1.SKD2.640178"]), + } + response = self.post("/redbiom/", post_args) + exp = {"status": "success", "message": "", "data": data} # making sure they are in the same order obs = loads(response.body) - obs['data'][0]['artifact_biom_ids'] = { - '4': sorted(obs['data'][0]['artifact_biom_ids']['4']), - '5': sorted(obs['data'][0]['artifact_biom_ids']['5'])} + obs["data"][0]["artifact_biom_ids"] = { + "4": sorted(obs["data"][0]["artifact_biom_ids"]["4"]), + "5": sorted(obs["data"][0]["artifact_biom_ids"]["5"]), + } self.assertEqual(response.code, 200) self.assertEqual(obs, exp) - post_args = { - 'search': 'o_0319-7L14', - 'search_on': 'taxon' + post_args = {"search": "o_0319-7L14", "search_on": "taxon"} + response = self.post("/redbiom/", post_args) + exp = { + "status": "success", + "message": "No samples were found! Try again ...", + "data": [], } - response = self.post('/redbiom/', post_args) - exp = {'status': 'success', - 'message': 'No samples were found! Try again ...', 'data': []} self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), exp) def test_post_errors(self): - post_args = { - 'search_on': 'metadata' - } - response = self.post('/redbiom/', post_args) + post_args = {"search_on": "metadata"} + response = self.post("/redbiom/", post_args) self.assertEqual(response.code, 400) - post_args = { - 'search': 'infant', - 'search_on': 'error' - } - response = self.post('/redbiom/', post_args) + post_args = {"search": "infant", "search_on": "error"} + response = self.post("/redbiom/", post_args) self.assertEqual(response.code, 200) - exp = {'status': 'success', - 'message': ('Incorrect search by: you can use metadata, ' - 'features or taxon and you passed: error'), - 'data': []} + exp = { + "status": "success", + "message": ( + "Incorrect search by: you can use metadata, " + "features or taxon and you passed: error" + ), + "data": [], + } self.assertEqual(loads(response.body), exp) DATA = [ - {'study_title': 'Identification of the Microbiomes for Cannabis Soils', - 'metadata_complete': True, 'publication_pid': ['123456', '7891011'], - 'autoloaded': False, - 'study_id': 1, 'ebi_study_accession': 'EBI123456-BB', - 'study_abstract': ('This is a preliminary study to examine the ' - 'microbiota associated with the Cannabis plant. Soils ' - 'samples from the bulk soil, soil associated with the ' - 'roots, and the rhizosphere were extracted and the ' - 'DNA sequenced. Roots from three independent plants ' - 'of different strains were examined. These roots were ' - 'obtained November 11, 2011 from plants that had been ' - 'harvested in the summer. Future studies will attempt ' - 'to analyze the soils and rhizospheres from the same ' - 'location at different time points in the plant ' - 'lifecycle.'), - 'pi': ['PI_dude@foo.bar', 'PIDude'], - 'publication_doi': ['10.100/123456', '10.100/7891011'], - 'study_alias': 'Cannabis Soils', 'number_samples_collected': 27}] + { + "study_title": "Identification of the Microbiomes for Cannabis Soils", + "metadata_complete": True, + "publication_pid": ["123456", "7891011"], + "autoloaded": False, + "study_id": 1, + "ebi_study_accession": "EBI123456-BB", + "study_abstract": ( + "This is a preliminary study to examine the " + "microbiota associated with the Cannabis plant. Soils " + "samples from the bulk soil, soil associated with the " + "roots, and the rhizosphere were extracted and the " + "DNA sequenced. Roots from three independent plants " + "of different strains were examined. These roots were " + "obtained November 11, 2011 from plants that had been " + "harvested in the summer. Future studies will attempt " + "to analyze the soils and rhizospheres from the same " + "location at different time points in the plant " + "lifecycle." + ), + "pi": ["PI_dude@foo.bar", "PIDude"], + "publication_doi": ["10.100/123456", "10.100/7891011"], + "study_alias": "Cannabis Soils", + "number_samples_collected": 27, + } +] if __name__ == "__main__": diff --git a/qiita_pet/test/test_software.py b/qiita_pet/test/test_software.py index cc3fd01f3..980cb2907 100644 --- a/qiita_pet/test/test_software.py +++ b/qiita_pet/test/test_software.py @@ -6,55 +6,54 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from copy import deepcopy from unittest import main -from qiita_pet.test.tornado_test_base import TestHandlerBase from mock import Mock -from copy import deepcopy -from qiita_db.sql_connection import TRN -from qiita_db.user import User from qiita_db.software import DefaultWorkflow -from qiita_db.sql_connection import perform_as_transaction +from qiita_db.sql_connection import TRN, perform_as_transaction +from qiita_db.user import User from qiita_pet.handlers.base_handlers import BaseHandler from qiita_pet.handlers.software import _retrive_workflows +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestSoftware(TestHandlerBase): def test_get(self): - response = self.get('/software/') + response = self.get("/software/") self.assertEqual(response.code, 200) - body = response.body.decode('ascii') + body = response.body.decode("ascii") self.assertNotEqual(body, "") # checking that this software is not displayed - self.assertNotIn('Target Gene', body) + self.assertNotIn("Target Gene", body) BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.get('/software/') + response = self.get("/software/") self.assertEqual(response.code, 200) - body = response.body.decode('ascii') + body = response.body.decode("ascii") self.assertNotEqual(body, "") # checking that this software is displayed - self.assertIn('Target Gene', body) + self.assertIn("Target Gene", body) class TestWorkflowsHandler(TestHandlerBase): def test_get(self): DefaultWorkflow(2).active = False - response = self.get('/workflows/') + response = self.get("/workflows/") self.assertEqual(response.code, 200) - body = response.body.decode('ascii') + body = response.body.decode("ascii") self.assertNotEqual(body, "") # checking that this software is not displayed - self.assertNotIn('FASTA upstream workflow', body) + self.assertNotIn("FASTA upstream workflow", body) BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) - response = self.get('/workflows/') + response = self.get("/workflows/") self.assertEqual(response.code, 200) - body = response.body.decode('ascii') + body = response.body.decode("ascii") self.assertNotEqual(body, "") # checking that this software is displayed - self.assertIn('FASTA upstream workflow', body) + self.assertIn("FASTA upstream workflow", body) DefaultWorkflow(2).active = True def test_retrive_workflows_standalone(self): @@ -81,11 +80,9 @@ def test_retrive_workflows_standalone(self): # here we expect 1 input node and 1 edge obs = _retrive_workflows(True)[-1] - exp_value = f'input_params_{nid[0]}_per_sample_FASTQ' - self.assertEqual(1, len( - [x for x in obs['nodes'] if x[0] == exp_value])) - self.assertEqual(1, len( - [x for x in obs['edges'] if x[0] == exp_value])) + exp_value = f"input_params_{nid[0]}_per_sample_FASTQ" + self.assertEqual(1, len([x for x in obs["nodes"] if x[0] == exp_value])) + self.assertEqual(1, len([x for x in obs["edges"] if x[0] == exp_value])) # now let's insert another command using the same input with TRN: @@ -98,10 +95,8 @@ def test_retrive_workflows_standalone(self): # we should still have 1 node but now with 2 edges obs = _retrive_workflows(True)[-1] - self.assertEqual(1, len( - [x for x in obs['nodes'] if x[0] == exp_value])) - self.assertEqual(2, len( - [x for x in obs['edges'] if x[0] == exp_value])) + self.assertEqual(1, len([x for x in obs["nodes"] if x[0] == exp_value])) + self.assertEqual(2, len([x for x in obs["edges"] if x[0] == exp_value])) def test_retrive_workflows(self): # we should see all 3 workflows @@ -111,8 +106,9 @@ def test_retrive_workflows(self): # validating that the params_name is not being used self.assertNotIn( - 'Split libraries | Defaults with Golay 12 barcodes', - [x[2] for x in _retrive_workflows(False)[1]['nodes']]) + "Split libraries | Defaults with Golay 12 barcodes", + [x[2] for x in _retrive_workflows(False)[1]["nodes"]], + ) # now it should be there with TRN: # Hard-coded values; 19 -> barcode_type @@ -122,8 +118,9 @@ def test_retrive_workflows(self): TRN.add(sql) TRN.execute() self.assertIn( - 'Split libraries | Defaults with Golay 12 barcodes', - [x[2] for x in _retrive_workflows(False)[1]['nodes']]) + "Split libraries | Defaults with Golay 12 barcodes", + [x[2] for x in _retrive_workflows(False)[1]["nodes"]], + ) # and gone again with TRN: sql = """UPDATE qiita.command_parameter @@ -132,8 +129,9 @@ def test_retrive_workflows(self): TRN.add(sql) TRN.execute() self.assertNotIn( - 'Split libraries | Defaults with Golay 12 barcodes', - [x[2] for x in _retrive_workflows(False)[1]['nodes']]) + "Split libraries | Defaults with Golay 12 barcodes", + [x[2] for x in _retrive_workflows(False)[1]["nodes"]], + ) # we should not see the middle one del exp[1] @@ -181,119 +179,269 @@ def test_retrive_workflows(self): # adding new expected values exp = deepcopy(WORKFLOWS) obs = _retrive_workflows(False) - exp[0]['nodes'].extend([ - ['params_7', 1, 'Split libraries FASTQ', 'Defaults with reverse ' - 'complement mapping file barcodes', { - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'True', 'rev_comp': 'False', - 'phred_quality_threshold': '3', 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', 'phred_offset': 'auto'}], - ['input_params_7_FASTQ | per_sample_FASTQ', 1, - 'FASTQ | per_sample_FASTQ'], - ['output_params_7_demultiplexed | Demultiplexed', 1, - 'demultiplexed | Demultiplexed'], - ['params_8', 3, 'Pick closed-reference OTUs', 'Defaults', { - 'reference': '1', 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'similarity': '0.97', - 'sortmerna_coverage': '0.97', 'threads': '1'}], - ['output_params_8_OTU table | BIOM', 3, 'OTU table | BIOM']]) - exp[0]['edges'].extend([ - ['input_params_7_FASTQ | per_sample_FASTQ', 'params_7'], - ['params_7', 'output_params_7_demultiplexed | Demultiplexed'], - ['output_params_7_demultiplexed | Demultiplexed', 'params_8'], - ['params_8', 'output_params_8_OTU table | BIOM']]) - exp[1]['nodes'].extend([ - ['params_9', 3, 'Pick closed-reference OTUs', '100%', { - 'reference': '1', 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'similarity': '1.0', - 'sortmerna_coverage': '1.0', 'threads': '1'}], - ['output_params_9_OTU table | BIOM', 3, 'OTU table | BIOM']]) - exp[1]['edges'].extend([ - ['output_params_3_demultiplexed | Demultiplexed', 'params_9'], - ['params_9', 'output_params_9_OTU table | BIOM'] - ]) + exp[0]["nodes"].extend( + [ + [ + "params_7", + 1, + "Split libraries FASTQ", + "Defaults with reverse complement mapping file barcodes", + { + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "True", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + ], + [ + "input_params_7_FASTQ | per_sample_FASTQ", + 1, + "FASTQ | per_sample_FASTQ", + ], + [ + "output_params_7_demultiplexed | Demultiplexed", + 1, + "demultiplexed | Demultiplexed", + ], + [ + "params_8", + 3, + "Pick closed-reference OTUs", + "Defaults", + { + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + ], + ["output_params_8_OTU table | BIOM", 3, "OTU table | BIOM"], + ] + ) + exp[0]["edges"].extend( + [ + ["input_params_7_FASTQ | per_sample_FASTQ", "params_7"], + ["params_7", "output_params_7_demultiplexed | Demultiplexed"], + ["output_params_7_demultiplexed | Demultiplexed", "params_8"], + ["params_8", "output_params_8_OTU table | BIOM"], + ] + ) + exp[1]["nodes"].extend( + [ + [ + "params_9", + 3, + "Pick closed-reference OTUs", + "100%", + { + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "1.0", + "sortmerna_coverage": "1.0", + "threads": "1", + }, + ], + ["output_params_9_OTU table | BIOM", 3, "OTU table | BIOM"], + ] + ) + exp[1]["edges"].extend( + [ + ["output_params_3_demultiplexed | Demultiplexed", "params_9"], + ["params_9", "output_params_9_OTU table | BIOM"], + ] + ) self.assertCountEqual(obs, exp) WORKFLOWS = [ - {'name': 'FASTQ upstream workflow', 'id': 1, 'data_types': ['16S', '18S'], - 'description': 'This accepts html Qiita!' - '

BYE!', - 'active': True, 'parameters_sample': {}, 'parameters_prep': {}, - 'nodes': [ - ['params_1', 1, 'Split libraries FASTQ', 'Defaults', { - 'max_bad_run_length': '3', 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', 'rev_comp': 'False', - 'phred_quality_threshold': '3', 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', 'phred_offset': 'auto'}], - ['input_params_1_FASTQ', 1, - 'FASTQ'], - ['output_params_1_demultiplexed | Demultiplexed', 1, - 'demultiplexed | Demultiplexed'], - ['params_2', 3, 'Pick closed-reference OTUs', 'Defaults', { - 'reference': '1', 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'similarity': '0.97', - 'sortmerna_coverage': '0.97', 'threads': '1'}], - ['output_params_2_OTU table | BIOM', 3, 'OTU table | BIOM']], - 'edges': [ - ['input_params_1_FASTQ', 'params_1'], - ['params_1', 'output_params_1_demultiplexed | Demultiplexed'], - ['output_params_1_demultiplexed | Demultiplexed', 'params_2'], - ['params_2', 'output_params_2_OTU table | BIOM']]}, - {'name': 'FASTA upstream workflow', 'id': 2, 'data_types': ['18S'], - 'description': 'This is another description', - 'active': False, 'parameters_sample': {}, 'parameters_prep': {}, - 'nodes': [ - ['params_3', 2, 'Split libraries', 'Defaults with Golay 12 barcodes', { - 'min_seq_len': '200', 'max_seq_len': '1000', - 'trim_seq_length': 'False', 'min_qual_score': '25', - 'max_ambig': '6', 'max_homopolymer': '6', - 'max_primer_mismatch': '0', 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', 'disable_bc_correction': 'False', - 'qual_score_window': '0', 'disable_primers': 'False', - 'reverse_primers': 'disable', 'reverse_primer_mismatches': '0', - 'truncate_ambi_bases': 'False'}], - ['input_params_3_** WARNING, NOT DEFINED **', 2, - '** WARNING, NOT DEFINED **'], - ['output_params_3_demultiplexed | Demultiplexed', 2, - 'demultiplexed | Demultiplexed'], - ['params_4', 3, 'Pick closed-reference OTUs', 'Defaults', { - 'reference': '1', 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'similarity': '0.97', - 'sortmerna_coverage': '0.97', 'threads': '1'}], - ['output_params_4_OTU table | BIOM', 3, 'OTU table | BIOM']], - 'edges': [ - ['input_params_3_** WARNING, NOT DEFINED **', 'params_3'], - ['params_3', 'output_params_3_demultiplexed | Demultiplexed'], - ['output_params_3_demultiplexed | Demultiplexed', 'params_4'], - ['params_4', 'output_params_4_OTU table | BIOM']]}, - {'name': 'Per sample FASTQ upstream workflow', 'id': 3, - 'data_types': ['ITS'], 'description': None, - 'active': True, 'parameters_sample': {}, 'parameters_prep': {}, - 'nodes': [ - ['params_5', 1, 'Split libraries FASTQ', 'per sample FASTQ defaults', { - 'max_bad_run_length': '3', 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', 'rev_comp': 'False', - 'phred_quality_threshold': '3', 'barcode_type': 'not-barcoded', - 'max_barcode_errors': '1.5', 'phred_offset': 'auto'}], - ['input_params_5_FASTQ', 1, - 'FASTQ'], - ['output_params_5_demultiplexed | Demultiplexed', 1, - 'demultiplexed | Demultiplexed'], - ['params_6', 3, 'Pick closed-reference OTUs', 'Defaults', { - 'reference': '1', 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', 'similarity': '0.97', - 'sortmerna_coverage': '0.97', 'threads': '1'}], - ['output_params_6_OTU table | BIOM', 3, 'OTU table | BIOM']], - 'edges': [ - ['input_params_5_FASTQ', 'params_5'], - ['params_5', 'output_params_5_demultiplexed | Demultiplexed'], - ['output_params_5_demultiplexed | Demultiplexed', 'params_6'], - ['params_6', 'output_params_6_OTU table | BIOM']]}] + { + "name": "FASTQ upstream workflow", + "id": 1, + "data_types": ["16S", "18S"], + "description": 'This accepts html Qiita!' + "

BYE!", + "active": True, + "parameters_sample": {}, + "parameters_prep": {}, + "nodes": [ + [ + "params_1", + 1, + "Split libraries FASTQ", + "Defaults", + { + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "False", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + ], + ["input_params_1_FASTQ", 1, "FASTQ"], + [ + "output_params_1_demultiplexed | Demultiplexed", + 1, + "demultiplexed | Demultiplexed", + ], + [ + "params_2", + 3, + "Pick closed-reference OTUs", + "Defaults", + { + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + ], + ["output_params_2_OTU table | BIOM", 3, "OTU table | BIOM"], + ], + "edges": [ + ["input_params_1_FASTQ", "params_1"], + ["params_1", "output_params_1_demultiplexed | Demultiplexed"], + ["output_params_1_demultiplexed | Demultiplexed", "params_2"], + ["params_2", "output_params_2_OTU table | BIOM"], + ], + }, + { + "name": "FASTA upstream workflow", + "id": 2, + "data_types": ["18S"], + "description": "This is another description", + "active": False, + "parameters_sample": {}, + "parameters_prep": {}, + "nodes": [ + [ + "params_3", + 2, + "Split libraries", + "Defaults with Golay 12 barcodes", + { + "min_seq_len": "200", + "max_seq_len": "1000", + "trim_seq_length": "False", + "min_qual_score": "25", + "max_ambig": "6", + "max_homopolymer": "6", + "max_primer_mismatch": "0", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "disable_bc_correction": "False", + "qual_score_window": "0", + "disable_primers": "False", + "reverse_primers": "disable", + "reverse_primer_mismatches": "0", + "truncate_ambi_bases": "False", + }, + ], + [ + "input_params_3_** WARNING, NOT DEFINED **", + 2, + "** WARNING, NOT DEFINED **", + ], + [ + "output_params_3_demultiplexed | Demultiplexed", + 2, + "demultiplexed | Demultiplexed", + ], + [ + "params_4", + 3, + "Pick closed-reference OTUs", + "Defaults", + { + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + ], + ["output_params_4_OTU table | BIOM", 3, "OTU table | BIOM"], + ], + "edges": [ + ["input_params_3_** WARNING, NOT DEFINED **", "params_3"], + ["params_3", "output_params_3_demultiplexed | Demultiplexed"], + ["output_params_3_demultiplexed | Demultiplexed", "params_4"], + ["params_4", "output_params_4_OTU table | BIOM"], + ], + }, + { + "name": "Per sample FASTQ upstream workflow", + "id": 3, + "data_types": ["ITS"], + "description": None, + "active": True, + "parameters_sample": {}, + "parameters_prep": {}, + "nodes": [ + [ + "params_5", + 1, + "Split libraries FASTQ", + "per sample FASTQ defaults", + { + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "False", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "not-barcoded", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + ], + ["input_params_5_FASTQ", 1, "FASTQ"], + [ + "output_params_5_demultiplexed | Demultiplexed", + 1, + "demultiplexed | Demultiplexed", + ], + [ + "params_6", + 3, + "Pick closed-reference OTUs", + "Defaults", + { + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + ], + ["output_params_6_OTU table | BIOM", 3, "OTU table | BIOM"], + ], + "edges": [ + ["input_params_5_FASTQ", "params_5"], + ["params_5", "output_params_5_demultiplexed | Demultiplexed"], + ["output_params_5_demultiplexed | Demultiplexed", "params_6"], + ["params_6", "output_params_6_OTU table | BIOM"], + ], + }, +] if __name__ == "__main__": diff --git a/qiita_pet/test/test_upload.py b/qiita_pet/test/test_upload.py index d4ee24a36..fa7cdc4fa 100644 --- a/qiita_pet/test/test_upload.py +++ b/qiita_pet/test/test_upload.py @@ -6,27 +6,28 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from time import sleep from unittest import main + from requests import Request from six import StringIO -from time import sleep from qiita_pet.test.tornado_test_base import TestHandlerBase class TestStudyUploadFileHandler(TestHandlerBase): def test_get_exists(self): - response = self.get('/study/upload/1') + response = self.get("/study/upload/1") self.assertEqual(response.code, 200) def test_get_no_exists(self): - response = self.get('/study/upload/245') + response = self.get("/study/upload/245") self.assertEqual(response.code, 404) class TestUploadFileHandler(TestHandlerBase): def test_get(self): - response = self.get('/upload/') + response = self.get("/upload/") self.assertEqual(response.code, 400) @@ -35,48 +36,50 @@ def _setup_request(self, data): # setting up things to test by sending POST variables and a file # taken from: https://bit.ly/2CpZiZn prepare = Request( - url='https://localhost/', - files={'ssh-key': StringIO('Test key.')}, data=data).prepare() - headers = {"Content-Type": prepare.headers.get('Content-Type')} + url="https://localhost/", + files={"ssh-key": StringIO("Test key.")}, + data=data, + ).prepare() + headers = {"Content-Type": prepare.headers.get("Content-Type")} body = prepare.body return headers, body def test_post(self): - data = {'remote-request-type': 'list', 'inputURL': 'scp-url'} + data = {"remote-request-type": "list", "inputURL": "scp-url"} headers, body = self._setup_request(data) # study doesn't exist - response = self.post( - '/study/upload/remote/100', data=body, headers=headers) + response = self.post("/study/upload/remote/100", data=body, headers=headers) self.assertEqual(response.code, 404) # create a successful list job - response = self.post( - '/study/upload/remote/1', data=body, headers=headers) + response = self.post("/study/upload/remote/1", data=body, headers=headers) self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), - '{"status": "success", "message": ""}') + self.assertEqual( + response.body.decode("ascii"), '{"status": "success", "message": ""}' + ) # create a successful list job - data = {'remote-request-type': 'transfer', 'inputURL': 'scp-url'} + data = {"remote-request-type": "transfer", "inputURL": "scp-url"} headers, body = self._setup_request(data) - response = self.post( - '/study/upload/remote/1', data=body, headers=headers) + response = self.post("/study/upload/remote/1", data=body, headers=headers) self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), - '{"status": "success", "message": ""}') + self.assertEqual( + response.body.decode("ascii"), '{"status": "success", "message": ""}' + ) # sleep to wait for jobs to finish, no need to check for it's status sleep(5) # jobs with bad Parameters - data = {'remote-request-type': 'error', 'inputURL': 'scp-url'} + data = {"remote-request-type": "error", "inputURL": "scp-url"} headers, body = self._setup_request(data) - response = self.post( - '/study/upload/remote/1', data=body, headers=headers) + response = self.post("/study/upload/remote/1", data=body, headers=headers) self.assertEqual(response.code, 200) - self.assertEqual(response.body.decode('ascii'), '{"status": "error", ' - '"message": "Not a valid method"}') + self.assertEqual( + response.body.decode("ascii"), + '{"status": "error", "message": "Not a valid method"}', + ) if __name__ == "__main__": diff --git a/qiita_pet/test/test_user_handlers.py b/qiita_pet/test/test_user_handlers.py index a47729ad7..8ccfbac69 100644 --- a/qiita_pet/test/test_user_handlers.py +++ b/qiita_pet/test/test_user_handlers.py @@ -6,16 +6,17 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from json import loads from unittest import main -from wtforms.validators import ValidationError -from wtforms import StringField + from mock import Mock -from json import loads +from wtforms import StringField +from wtforms.validators import ValidationError -from qiita_pet.test.tornado_test_base import TestHandlerBase -from qiita_pet.handlers.user_handlers import UserProfile -from qiita_pet.handlers.base_handlers import BaseHandler from qiita_db.user import User +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.handlers.user_handlers import UserProfile +from qiita_pet.test.tornado_test_base import TestHandlerBase class TestUserProfile(TestHandlerBase): @@ -25,29 +26,26 @@ class TestUserProfile(TestHandlerBase): class TestUserProfileHandler(TestHandlerBase): def test_get(self): - response = self.get('/profile/') + response = self.get("/profile/") self.assertEqual(response.code, 200) def test_post_password(self): - post_args = { - 'action': 'password', - 'oldpass': 'password', - 'newpass': 'newpass' - } - response = self.post('/profile/', post_args) + post_args = {"action": "password", "oldpass": "password", "newpass": "newpass"} + response = self.post("/profile/", post_args) self.assertEqual(response.code, 200) def test_post_profile(self): post_args = { - 'action': ['profile'], - 'affiliation': ['NEWNAME'], - 'address': ['ADDRESS'], - 'name': ['TESTDUDE'], - 'phone': ['111-222-3333'], - 'social_orcid': [''], - 'social_googlescholar': [''], - 'social_researchgate': ['']} - response = self.post('/profile/', post_args) + "action": ["profile"], + "affiliation": ["NEWNAME"], + "address": ["ADDRESS"], + "name": ["TESTDUDE"], + "phone": ["111-222-3333"], + "social_orcid": [""], + "social_googlescholar": [""], + "social_researchgate": [""], + } + response = self.post("/profile/", post_args) self.assertEqual(response.code, 200) def test_validators_social(self): @@ -65,11 +63,12 @@ def test_validators_social(self): with self.assertRaises(ValidationError): obs = UserProfile.validate_general(" infix ", "", "") obs = UserProfile.validate_general("infix", "", "") - self.assertEqual(obs, 'infix') + self.assertEqual(obs, "infix") with self.assertRaises(ValidationError): obs = UserProfile.validate_general( - "http://kurt.com/id1234", "msg", r"http://kurt.\w{1,3}/") + "http://kurt.com/id1234", "msg", r"http://kurt.\w{1,3}/" + ) def test_validator_orcid_id(self): field = StringField("testfield") @@ -93,20 +92,19 @@ def test_validator_gscholar_id(self): obs = UserProfile.validator_gscholar_id(None, field) self.assertEqual(obs, None) - field.data = ('https://scholar.google.com/citations?user=_e3QL94AAAAJ&' - 'hl=en') + field.data = "https://scholar.google.com/citations?user=_e3QL94AAAAJ&hl=en" with self.assertRaises(ValidationError): obs = UserProfile.validator_gscholar_id(None, field) - field.data = 'user=_e3QL94AAAAJ&hl=en' + field.data = "user=_e3QL94AAAAJ&hl=en" with self.assertRaises(ValidationError): obs = UserProfile.validator_gscholar_id(None, field) - field.data = 'user=_e3QL94AAAAJ' + field.data = "user=_e3QL94AAAAJ" with self.assertRaises(ValidationError): obs = UserProfile.validator_gscholar_id(None, field) - field.data = '=_e3QL94AAAAJ' + field.data = "=_e3QL94AAAAJ" with self.assertRaises(ValidationError): obs = UserProfile.validator_gscholar_id(None, field) @@ -117,14 +115,14 @@ def test_validator_rgate_id(self): obs = UserProfile.validator_rgate_id(None, field) self.assertEqual(obs, None) - field.data = 'https://www.researchgate.net/profile/Rob-Knight' + field.data = "https://www.researchgate.net/profile/Rob-Knight" with self.assertRaises(ValidationError): obs = UserProfile.validator_rgate_id(None, field) class TestUserJobsHandler(TestHandlerBase): def test_get(self): - response = self.get('/user/jobs/') + response = self.get("/user/jobs/") self.assertEqual(response.code, 200) @@ -134,24 +132,27 @@ def setUp(self): BaseHandler.get_current_user = Mock(return_value=User("admin@foo.bar")) def test_get(self): - response = self.get('/admin/purge_usersAjax/?_=1718805487494') - obs_users_table = loads(response.body.decode('ascii')) - obs_users = {user['email'] for user in obs_users_table} - self.assertIn('ayearago@nonvalidat.ed', obs_users) - self.assertIn('3Xdays@nonvalidat.ed', obs_users) - self.assertNotIn('justnow@nonvalidat.ed', obs_users) + response = self.get("/admin/purge_usersAjax/?_=1718805487494") + obs_users_table = loads(response.body.decode("ascii")) + obs_users = {user["email"] for user in obs_users_table} + self.assertIn("ayearago@nonvalidat.ed", obs_users) + self.assertIn("3Xdays@nonvalidat.ed", obs_users) + self.assertNotIn("justnow@nonvalidat.ed", obs_users) def test_post_removeBoth(self): # remove both users - response = self.post('/admin/purge_users/', - {'action': 'Remove', - 'selected': ['ayearago@nonvalidat.ed', - '3Xdays@nonvalidat.ed']}) + response = self.post( + "/admin/purge_users/", + { + "action": "Remove", + "selected": ["ayearago@nonvalidat.ed", "3Xdays@nonvalidat.ed"], + }, + ) self.assertEqual(response.code, 200) # test that zero users are listed now - response = self.get('/admin/purge_usersAjax/?_=1718805487495') - obs_users_table = loads(response.body.decode('ascii')) + response = self.get("/admin/purge_usersAjax/?_=1718805487495") + obs_users_table = loads(response.body.decode("ascii")) self.assertEqual(obs_users_table, []) diff --git a/qiita_pet/test/test_util.py b/qiita_pet/test/test_util.py index 7392c6fd5..6348b9142 100644 --- a/qiita_pet/test/test_util.py +++ b/qiita_pet/test/test_util.py @@ -8,10 +8,15 @@ from unittest import TestCase, main -from qiita_pet.util import (clean_str, generate_param_str, is_localhost, - convert_text_html, get_network_nodes_edges) -from qiita_db.software import DefaultParameters from qiita_db.artifact import Artifact +from qiita_db.software import DefaultParameters +from qiita_pet.util import ( + clean_str, + convert_text_html, + generate_param_str, + get_network_nodes_edges, + is_localhost, +) class TestUtil(TestCase): @@ -22,27 +27,30 @@ def test_clean_str(self): def test_generate_param_str(self): params = DefaultParameters(10) obs = generate_param_str(params) - exp = ('Reference: Greengenes 13_8
' - 'sortmerna_e_value: 1
' - 'sortmerna_max_pos: 10000
' - 'similarity: 0.97
' - 'sortmerna_coverage: 0.97
' - 'threads: 1') + exp = ( + "Reference: Greengenes 13_8
" + "sortmerna_e_value: 1
" + "sortmerna_max_pos: 10000
" + "similarity: 0.97
" + "sortmerna_coverage: 0.97
" + "threads: 1" + ) self.assertEqual(obs, exp) def test_is_localhost(self): - self.assertTrue(is_localhost('127.0.0.1')) - self.assertTrue(is_localhost('localhost')) - self.assertTrue(is_localhost('127.0.0.1:21174')) + self.assertTrue(is_localhost("127.0.0.1")) + self.assertTrue(is_localhost("localhost")) + self.assertTrue(is_localhost("127.0.0.1:21174")) - self.assertFalse(is_localhost('10.0.0.1')) - self.assertFalse(is_localhost('10.0.0.1:21174')) + self.assertFalse(is_localhost("10.0.0.1")) + self.assertFalse(is_localhost("10.0.0.1:21174")) def test_convert_text_html(self): - test = ('This is line\nThis is another\n' - 'This is a link: http://test.com') - exp = ('This is line
This is another
' - 'This is a link: http://test.com') + test = "This is line\nThis is another\nThis is a link: http://test.com" + exp = ( + "This is line
This is another
" + 'This is a link: http://test.com' + ) obs = convert_text_html(test) self.assertEqual(obs, exp) @@ -50,9 +58,9 @@ def test_get_network_nodes_edges(self): graph = Artifact(1).descendants_with_jobs obs_nodes, obs_edges, obs_wf = get_network_nodes_edges(graph, True) self.assertEqual(len(obs_nodes), 11) - self.assertEqual(len([x for x in obs_nodes if x[0] == 'job']), 5) - self.assertEqual(len([x for x in obs_nodes if x[0] == 'artifact']), 6) - self.assertEqual(len([x for x in obs_nodes if x[0] == 'type']), 0) + self.assertEqual(len([x for x in obs_nodes if x[0] == "job"]), 5) + self.assertEqual(len([x for x in obs_nodes if x[0] == "artifact"]), 6) + self.assertEqual(len([x for x in obs_nodes if x[0] == "type"]), 0) self.assertEqual(len(obs_edges), 10) self.assertIsNone(obs_wf) @@ -60,11 +68,12 @@ def test_get_network_nodes_edges(self): # the graph gets extended accordingly graph = Artifact(6).descendants_with_jobs obs_nodes, obs_edges, obs_wf = get_network_nodes_edges( - graph, True, nodes=obs_nodes, edges=obs_edges) + graph, True, nodes=obs_nodes, edges=obs_edges + ) self.assertEqual(len(obs_nodes), 12) - self.assertEqual(len([x for x in obs_nodes if x[0] == 'job']), 5) - self.assertEqual(len([x for x in obs_nodes if x[0] == 'artifact']), 7) - self.assertEqual(len([x for x in obs_nodes if x[0] == 'type']), 0) + self.assertEqual(len([x for x in obs_nodes if x[0] == "job"]), 5) + self.assertEqual(len([x for x in obs_nodes if x[0] == "artifact"]), 7) + self.assertEqual(len([x for x in obs_nodes if x[0] == "type"]), 0) self.assertEqual(len(obs_edges), 10) self.assertIsNone(obs_wf) diff --git a/qiita_pet/test/test_websocket_handlers.py b/qiita_pet/test/test_websocket_handlers.py index 57d2a6245..3c870d8db 100644 --- a/qiita_pet/test/test_websocket_handlers.py +++ b/qiita_pet/test/test_websocket_handlers.py @@ -6,18 +6,17 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import main - -from qiita_pet.test.tornado_test_base import TestHandlerWebSocketBase from json import dumps, loads +from unittest import main +from tornado.gen import Return, coroutine from tornado.testing import gen_test -from tornado.gen import coroutine, Return + +from qiita_pet.test.tornado_test_base import TestHandlerWebSocketBase # adapted from: https://gist.github.com/crodjer/1e9989ab30fdc32db926 class TestSelectedSocketHandler(TestHandlerWebSocketBase): - @coroutine def _mk_client(self): c = yield self._mk_connection() @@ -34,22 +33,22 @@ def test_socket(self): # A client with the hello taken care of. c = yield self._mk_client() - msg = {'remove_sample': {'proc_data': 2, 'samples': ['A', 'B']}} + msg = {"remove_sample": {"proc_data": 2, "samples": ["A", "B"]}} c.write_message(dumps(msg)) response = yield c.read_message() self.assertEqual(loads(response), msg) - msg = {'remove_pd': {'proc_data': 2}} + msg = {"remove_pd": {"proc_data": 2}} c.write_message(dumps(msg)) response = yield c.read_message() self.assertEqual(loads(response), msg) - msg = {'clear': {'pids': [2]}} + msg = {"clear": {"pids": [2]}} c.write_message(dumps(msg)) response = yield c.read_message() self.assertEqual(loads(response), msg) - msg = {'clear': {'pids': [2, 3, 4]}} + msg = {"clear": {"pids": [2, 3, 4]}} c.write_message(dumps(msg)) response = yield c.read_message() self.assertEqual(loads(response), msg) diff --git a/qiita_pet/test/tornado_test_base.py b/qiita_pet/test/tornado_test_base.py index 050b68246..b49a3335b 100644 --- a/qiita_pet/test/tornado_test_base.py +++ b/qiita_pet/test/tornado_test_base.py @@ -7,19 +7,21 @@ # ----------------------------------------------------------------------------- from mock import Mock + try: from urllib import urlencode except ImportError: # py3 from urllib.parse import urlencode -from tornado.testing import AsyncHTTPTestCase, bind_unused_port from tornado.escape import json_encode +from tornado.testing import AsyncHTTPTestCase, bind_unused_port from tornado.websocket import websocket_connect -from qiita_pet.webserver import Application -from qiita_pet.handlers.base_handlers import BaseHandler + +from qiita_core.qiita_settings import r_client from qiita_db.environment_manager import clean_test_environment from qiita_db.user import User -from qiita_core.qiita_settings import r_client +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_pet.webserver import Application class TestHandlerBase(AsyncHTTPTestCase): @@ -28,7 +30,7 @@ class TestHandlerBase(AsyncHTTPTestCase): def get_app(self): BaseHandler.get_current_user = Mock(return_value=User("test@foo.bar")) - self.app.settings['debug'] = False + self.app.settings["debug"] = False return self.app @classmethod @@ -41,11 +43,11 @@ def get(self, url, data=None, headers=None, doseq=True): if data is not None: if isinstance(data, dict): data = urlencode(data, doseq=doseq) - if '?' in url: - url += '&%s' % data + if "?" in url: + url += "&%s" % data else: - url += '?%s' % data - return self._fetch(url, 'GET', headers=headers) + url += "?%s" % data + return self._fetch(url, "GET", headers=headers) def post(self, url, data, headers=None, doseq=True, asjson=False): if data is not None: @@ -53,7 +55,7 @@ def post(self, url, data, headers=None, doseq=True, asjson=False): data = json_encode(data) elif isinstance(data, dict): data = urlencode(data, doseq=doseq) - return self._fetch(url, 'POST', data, headers) + return self._fetch(url, "POST", data, headers) def patch(self, url, data, headers=None, doseq=True, asjson=False): if asjson: @@ -61,25 +63,26 @@ def patch(self, url, data, headers=None, doseq=True, asjson=False): else: if isinstance(data, dict): data = urlencode(data, doseq=doseq) - if '?' in url: - url += '&%s' % data + if "?" in url: + url += "&%s" % data else: - url += '?%s' % data - return self._fetch(url, 'PATCH', data=data, headers=headers) + url += "?%s" % data + return self._fetch(url, "PATCH", data=data, headers=headers) def delete(self, url, data=None, headers=None, doseq=True): if data is not None: if isinstance(data, dict): data = urlencode(data, doseq=doseq) - if '?' in url: - url += '&%s' % data + if "?" in url: + url += "&%s" % data else: - url += '?%s' % data - return self._fetch(url, 'DELETE', headers=headers) + url += "?%s" % data + return self._fetch(url, "DELETE", headers=headers) def _fetch(self, url, method, data=None, headers=None): - self.http_client.fetch(self.get_url(url), self.stop, method=method, - body=data, headers=headers) + self.http_client.fetch( + self.get_url(url), self.stop, method=method, body=data, headers=headers + ) # there is a random error in travis where a test takes longer than # expected thus using 25 seconds return self.wait(timeout=25) @@ -94,5 +97,5 @@ def setUp(self): def _mk_connection(self): return websocket_connect( - 'ws://localhost:{}/analysis/selected/socket/'.format(self.port) + "ws://localhost:{}/analysis/selected/socket/".format(self.port) ) diff --git a/qiita_pet/util.py b/qiita_pet/util.py index d09f6728c..fde68df51 100644 --- a/qiita_pet/util.py +++ b/qiita_pet/util.py @@ -14,6 +14,7 @@ clean_str """ + # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # @@ -21,25 +22,26 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from tornado.escape import linkify as tornado_linkify, xhtml_unescape +from tornado.escape import linkify as tornado_linkify +from tornado.escape import xhtml_unescape from qiita_core.util import execute_as_transaction from qiita_db.reference import Reference - STATUS_STYLER = { - 'sandbox': - ('glyphicon glyphicon-eye-close', 'glyphicon glyphicon-lock', 'gray'), - 'awaiting_approval': - ('glyphicon glyphicon-eye-open', 'glyphicon glyphicon-lock', 'peru'), - 'private': - ('glyphicon glyphicon-eye-open', 'glyphicon glyphicon-lock', - '#3599FD'), - 'public': - ('glyphicon glyphicon-eye-open', 'glyphicon glyphicon-globe', 'green')} + "sandbox": ("glyphicon glyphicon-eye-close", "glyphicon glyphicon-lock", "gray"), + "awaiting_approval": ( + "glyphicon glyphicon-eye-open", + "glyphicon glyphicon-lock", + "peru", + ), + "private": ("glyphicon glyphicon-eye-open", "glyphicon glyphicon-lock", "#3599FD"), + "public": ("glyphicon glyphicon-eye-open", "glyphicon glyphicon-globe", "green"), +} -EBI_LINKIFIER = ('{0}') +EBI_LINKIFIER = ( + '{0}' +) def linkify(link_template, item): @@ -74,7 +76,7 @@ def clean_str(item): def convert_text_html(message): """Linkify URLs and turn newlines into
for HTML""" html = xhtml_unescape(tornado_linkify(message)) - return html.replace('\n', '
') + return html.replace("\n", "
") @execute_as_transaction @@ -92,11 +94,13 @@ def generate_param_str(param): The html string with the parameter set values """ values = param.values - ref = Reference(values['reference']) + ref = Reference(values["reference"]) result = ["Reference: %s %s" % (ref.name, ref.version)] - result.extend("%s: %s" % (name, value) - for name, value in values.items() - if name != 'reference') + result.extend( + "%s: %s" % (name, value) + for name, value in values.items() + if name != "reference" + ) return "
".join(result) @@ -113,7 +117,7 @@ def is_localhost(host): bool True if local request """ - localhost = ('localhost', '127.0.0.1') + localhost = ("localhost", "127.0.0.1") return host.startswith(localhost) @@ -132,29 +136,29 @@ def get_artifact_processing_status(artifact): 'Not processed'} A summary of the jobs attached to the artifact """ - preprocessing_status = 'Not processed' + preprocessing_status = "Not processed" preprocessing_status_msg = [] for job in artifact.jobs(): job_status = job.status - if job_status == 'error': - if preprocessing_status != 'success': - preprocessing_status = 'failed' + if job_status == "error": + if preprocessing_status != "success": + preprocessing_status = "failed" preprocessing_status_msg.append( - "Job %s: failed - %s" - % (job.id, job.log.msg)) - elif job_status == 'success': - preprocessing_status = 'success' + "Job %s: failed - %s" % (job.id, job.log.msg) + ) + elif job_status == "success": + preprocessing_status = "success" else: - if preprocessing_status != 'success': - preprocessing_status = 'processing' - preprocessing_status_msg.append( - "Job %s: %s" % (job.id, job_status)) + if preprocessing_status != "success": + preprocessing_status = "processing" + preprocessing_status_msg.append("Job %s: %s" % (job.id, job_status)) if not preprocessing_status_msg: - preprocessing_status_msg = 'Not processed' + preprocessing_status_msg = "Not processed" else: preprocessing_status_msg = convert_text_html( - '
'.join(preprocessing_status_msg)) + "
".join(preprocessing_status_msg) + ) return preprocessing_status, preprocessing_status_msg @@ -186,37 +190,37 @@ def get_network_nodes_edges(graph, full_access, nodes=None, edges=None): # n[0] is the data type: job/artifact/type # n[1] is the object for n in graph.nodes(): - if n[0] == 'job': + if n[0] == "job": # ignoring internal Jobs - if n[1].command.software.name == 'Qiita': + if n[1].command.software.name == "Qiita": continue - atype = 'job' + atype = "job" name = n[1].command.name status = n[1].status wkflow = n[1].processing_job_workflow - if status == 'in_construction' and wkflow is not None: + if status == "in_construction" and wkflow is not None: workflow_id = wkflow.id - elif n[0] == 'artifact': + elif n[0] == "artifact": atype = n[1].artifact_type - status = 'artifact' + status = "artifact" pp = n[1].processing_parameters if pp is not None: cmd = pp.command if cmd.software.deprecated: - status = 'deprecated' + status = "deprecated" elif not cmd.active: - status = 'outdated' - if full_access or n[1].visibility == 'public': - name = '%s\n(%s)' % (n[1].name, n[1].artifact_type) + status = "outdated" + if full_access or n[1].visibility == "public": + name = "%s\n(%s)" % (n[1].name, n[1].artifact_type) else: continue - elif n[0] == 'type': + elif n[0] == "type": atype = n[1].type - name = '%s\n(%s)' % (n[1].name, n[1].type) - status = 'type' + name = "%s\n(%s)" % (n[1].name, n[1].type) + status = "type" else: # this should never happen but let's add it just in case - raise ValueError('not valid node type: %s' % n[0]) + raise ValueError("not valid node type: %s" % n[0]) nodes.append((n[0], atype, n[1].id, name, status)) edges.extend([(n[1].id, m[1].id) for n, m in graph.edges()]) diff --git a/qiita_pet/webserver.py b/qiita_pet/webserver.py index 412667213..33edca142 100644 --- a/qiita_pet/webserver.py +++ b/qiita_pet/webserver.py @@ -7,100 +7,178 @@ # ----------------------------------------------------------------------------- # login code modified from https://gist.github.com/guillaumevincent/4771570 +from base64 import b64encode +from os.path import dirname, join +from uuid import uuid4 + import tornado.auth import tornado.escape import tornado.web import tornado.websocket -from os.path import dirname, join -from base64 import b64encode -from uuid import uuid4 from qiita_core.qiita_settings import qiita_config from qiita_core.util import is_test_environment -from qiita_pet.handlers.base_handlers import ( - MainHandler, NoPageHandler, IFrame) -from qiita_pet.handlers.auth_handlers import ( - AuthCreateHandler, AuthLoginHandler, AuthLogoutHandler, AuthVerifyHandler) -from qiita_pet.handlers.user_handlers import ( - ChangeForgotPasswordHandler, ForgotPasswordHandler, UserProfileHandler, - UserMessagesHander, UserJobs, PurgeUsersAJAXHandler, PurgeUsersHandler) -from qiita_pet.handlers.admin_processing_job import ( - AdminProcessingJob, AJAXAdminProcessingJobListing, SampleValidation) -from qiita_pet.handlers.analysis_handlers import ( - ListAnalysesHandler, AnalysisSummaryAJAX, SelectedSamplesHandler, - AnalysisHandler, AnalysisGraphHandler, CreateAnalysisHandler, - AnalysisJobsHandler, ShareAnalysisAJAX) -from qiita_pet.handlers.study_handlers import ( - StudyIndexHandler, StudyBaseInfoAJAX, SampleTemplateHandler, - SampleTemplateOverviewHandler, SampleTemplateColumnsHandler, - StudyEditHandler, ListStudiesHandler, ListStudiesAJAX, EBISubmitHandler, - CreateStudyAJAX, ShareStudyAJAX, StudyApprovalList, ArtifactGraphAJAX, - VAMPSHandler, Study, StudyTags, StudyGetTags, - ListCommandsHandler, ListOptionsHandler, PrepTemplateSummaryAJAX, - PrepTemplateAJAX, NewArtifactHandler, SampleAJAX, StudyDeleteAjax, - ArtifactAdminAJAX, NewPrepTemplateAjax, DataTypesMenuAJAX, StudyFilesAJAX, - ArtifactGetSamples, ArtifactGetInfo, WorkflowHandler, AnalysesAjax, - WorkflowRunHandler, AddDefaultWorkflowHandler, JobAJAX, - AutocompleteHandler) -from qiita_pet.handlers.artifact_handlers import ( - ArtifactSummaryAJAX, ArtifactAJAX, ArtifactSummaryHandler) -from qiita_pet.handlers.websocket_handlers import ( - MessageHandler, SelectedSocketHandler, SelectSamplesHandler) -from qiita_pet.handlers.logger_handlers import LogEntryViewerHandler -from qiita_pet.handlers.upload import ( - UploadFileHandler, StudyUploadFileHandler, StudyUploadViaRemote) -from qiita_pet.handlers.stats import StatsHandler -from qiita_pet.handlers.resources import ResourcesHandler -from qiita_pet.handlers.download import ( - DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease, - DownloadRawData, DownloadEBISampleAccessions, DownloadEBIPrepAccessions, - DownloadUpload, DownloadPublicHandler, DownloadPublicArtifactHandler, - DownloadSampleInfoPerPrep, DownloadPrivateArtifactHandler, - DownloadDataReleaseFromPrep) -from qiita_pet.handlers.prep_template import ( - PrepTemplateHandler, PrepTemplateGraphHandler, PrepTemplateJobHandler) -from qiita_pet.handlers.ontology import OntologyHandler -from qiita_pet.handlers.software import SoftwareHandler, WorkflowsHandler -from qiita_db.handlers.processing_job import ( - JobHandler, HeartbeatHandler, ActiveStepHandler, CompleteHandler, - ProcessingJobAPItestHandler) +from qiita_db.handlers.analysis import APIAnalysisMetadataHandler +from qiita_db.handlers.archive import APIArchiveObservations from qiita_db.handlers.artifact import ( - ArtifactHandler, ArtifactAPItestHandler, ArtifactTypeHandler, - APIArtifactHandler) -from qiita_db.handlers.sample_information import SampleInfoDBHandler -from qiita_db.handlers.user import UserInfoDBHandler, UsersListDBHandler -from qiita_db.handlers.prep_template import ( - PrepTemplateDataHandler, PrepTemplateAPItestHandler, - PrepTemplateAPIHandler, PrepTemplateDBHandler) -from qiita_db.handlers.oauth2 import TokenAuthHandler -from qiita_db.handlers.reference import ReferenceHandler + APIArtifactHandler, + ArtifactAPItestHandler, + ArtifactHandler, + ArtifactTypeHandler, +) from qiita_db.handlers.core import ResetAPItestHandler +from qiita_db.handlers.oauth2 import TokenAuthHandler from qiita_db.handlers.plugin import ( - PluginHandler, CommandHandler, CommandListHandler, CommandActivateHandler, - ReloadPluginAPItestHandler) -from qiita_db.handlers.analysis import APIAnalysisMetadataHandler -from qiita_db.handlers.archive import APIArchiveObservations + CommandActivateHandler, + CommandHandler, + CommandListHandler, + PluginHandler, + ReloadPluginAPItestHandler, +) +from qiita_db.handlers.prep_template import ( + PrepTemplateAPIHandler, + PrepTemplateAPItestHandler, + PrepTemplateDataHandler, + PrepTemplateDBHandler, +) +from qiita_db.handlers.processing_job import ( + ActiveStepHandler, + CompleteHandler, + HeartbeatHandler, + JobHandler, + ProcessingJobAPItestHandler, +) +from qiita_db.handlers.reference import ReferenceHandler +from qiita_db.handlers.sample_information import SampleInfoDBHandler from qiita_db.handlers.studies import APIStudiesListing +from qiita_db.handlers.user import UserInfoDBHandler, UsersListDBHandler from qiita_db.util import get_mountpoint -from qiita_pet.handlers.rest import ENDPOINTS as REST_ENDPOINTS -from qiita_pet.handlers.qiita_redbiom import RedbiomPublicSearch -from qiita_pet.handlers.public import PublicHandler +from qiita_pet.handlers.admin_processing_job import ( + AdminProcessingJob, + AJAXAdminProcessingJobListing, + SampleValidation, +) +from qiita_pet.handlers.analysis_handlers import ( + AnalysisGraphHandler, + AnalysisHandler, + AnalysisJobsHandler, + AnalysisSummaryAJAX, + CreateAnalysisHandler, + ListAnalysesHandler, + SelectedSamplesHandler, + ShareAnalysisAJAX, +) +from qiita_pet.handlers.artifact_handlers import ( + ArtifactAJAX, + ArtifactSummaryAJAX, + ArtifactSummaryHandler, +) +from qiita_pet.handlers.auth_handlers import ( + AuthCreateHandler, + AuthLoginHandler, + AuthLogoutHandler, + AuthVerifyHandler, +) +from qiita_pet.handlers.base_handlers import IFrame, MainHandler, NoPageHandler from qiita_pet.handlers.cloud_handlers import ENDPOINTS as CLOUD_ENDPOINTS +from qiita_pet.handlers.download import ( + DownloadDataReleaseFromPrep, + DownloadEBIPrepAccessions, + DownloadEBISampleAccessions, + DownloadHandler, + DownloadPrivateArtifactHandler, + DownloadPublicArtifactHandler, + DownloadPublicHandler, + DownloadRawData, + DownloadRelease, + DownloadSampleInfoPerPrep, + DownloadStudyBIOMSHandler, + DownloadUpload, +) +from qiita_pet.handlers.logger_handlers import LogEntryViewerHandler +from qiita_pet.handlers.ontology import OntologyHandler +from qiita_pet.handlers.prep_template import ( + PrepTemplateGraphHandler, + PrepTemplateHandler, + PrepTemplateJobHandler, +) +from qiita_pet.handlers.public import PublicHandler +from qiita_pet.handlers.qiita_redbiom import RedbiomPublicSearch +from qiita_pet.handlers.resources import ResourcesHandler +from qiita_pet.handlers.rest import ENDPOINTS as REST_ENDPOINTS +from qiita_pet.handlers.software import SoftwareHandler, WorkflowsHandler +from qiita_pet.handlers.stats import StatsHandler +from qiita_pet.handlers.study_handlers import ( + AddDefaultWorkflowHandler, + AnalysesAjax, + ArtifactAdminAJAX, + ArtifactGetInfo, + ArtifactGetSamples, + ArtifactGraphAJAX, + AutocompleteHandler, + CreateStudyAJAX, + DataTypesMenuAJAX, + EBISubmitHandler, + JobAJAX, + ListCommandsHandler, + ListOptionsHandler, + ListStudiesAJAX, + ListStudiesHandler, + NewArtifactHandler, + NewPrepTemplateAjax, + PrepTemplateAJAX, + PrepTemplateSummaryAJAX, + SampleAJAX, + SampleTemplateColumnsHandler, + SampleTemplateHandler, + SampleTemplateOverviewHandler, + ShareStudyAJAX, + Study, + StudyApprovalList, + StudyBaseInfoAJAX, + StudyDeleteAjax, + StudyEditHandler, + StudyFilesAJAX, + StudyGetTags, + StudyIndexHandler, + StudyTags, + VAMPSHandler, + WorkflowHandler, + WorkflowRunHandler, +) +from qiita_pet.handlers.upload import ( + StudyUploadFileHandler, + StudyUploadViaRemote, + UploadFileHandler, +) +from qiita_pet.handlers.user_handlers import ( + ChangeForgotPasswordHandler, + ForgotPasswordHandler, + PurgeUsersAJAXHandler, + PurgeUsersHandler, + UserJobs, + UserMessagesHander, + UserProfileHandler, +) +from qiita_pet.handlers.websocket_handlers import ( + MessageHandler, + SelectedSocketHandler, + SelectSamplesHandler, +) if qiita_config.portal == "QIITA": - from qiita_pet.handlers.portal import ( - StudyPortalHandler, StudyPortalAJAXHandler) + from qiita_pet.handlers.portal import StudyPortalAJAXHandler, StudyPortalHandler DIRNAME = dirname(__file__) STATIC_PATH = join(DIRNAME, "static") TEMPLATE_PATH = join(DIRNAME, "templates") # base folder for webpages -_, RES_PATH = get_mountpoint('job')[0] +_, RES_PATH = get_mountpoint("job")[0] COOKIE_SECRET = b64encode(uuid4().bytes + uuid4().bytes) DEBUG = qiita_config.test_environment -_vendor_js = join(STATIC_PATH, 'vendor', 'js') +_vendor_js = join(STATIC_PATH, "vendor", "js") class Application(tornado.web.Application): @@ -116,8 +194,7 @@ def __init__(self): (r"/profile/", UserProfileHandler), (r"/user/messages/", UserMessagesHander), (r"/user/jobs/", UserJobs), - (r"/static/(.*)", tornado.web.StaticFileHandler, - {"path": STATIC_PATH}), + (r"/static/(.*)", tornado.web.StaticFileHandler, {"path": STATIC_PATH}), # Analysis handlers (r"/analysis/list/", ListAnalysesHandler), (r"/analysis/dflt/sumary/", AnalysisSummaryAJAX), @@ -165,21 +242,27 @@ def __init__(self): # Artifact handlers (r"/artifact/graph/", ArtifactGraphAJAX), (r"/artifact/(.*)/summary/", ArtifactSummaryAJAX), - (r"/artifact/html_summary/(.*)", ArtifactSummaryHandler, - {"path": qiita_config.base_data_dir}), + ( + r"/artifact/html_summary/(.*)", + ArtifactSummaryHandler, + {"path": qiita_config.base_data_dir}, + ), (r"/artifact/(.*)/", ArtifactAJAX), # Prep template handlers (r"/prep_template/", PrepTemplateHandler), (r"/prep_template/(.*)/graph/", PrepTemplateGraphHandler), (r"/prep_template/(.*)/jobs/", PrepTemplateJobHandler), (r"/ontology/", OntologyHandler), - # ORDER FOR /study/description/ SUBPAGES HERE MATTERS. # Same reasoning as below. /study/description/(.*) should be last. - (r"/study/description/sample_template/overview/", - SampleTemplateOverviewHandler), - (r"/study/description/sample_template/columns/", - SampleTemplateColumnsHandler), + ( + r"/study/description/sample_template/overview/", + SampleTemplateOverviewHandler, + ), + ( + r"/study/description/sample_template/columns/", + SampleTemplateColumnsHandler, + ), (r"/study/description/sample_template/", SampleTemplateHandler), (r"/study/description/sample_summary/", SampleAJAX), (r"/study/description/prep_summary/", PrepTemplateSummaryAJAX), @@ -196,16 +279,12 @@ def __init__(self): (r"/software/", SoftwareHandler), (r"/workflows/", WorkflowsHandler), (r"/download/(.*)", DownloadHandler), - (r"/download_data_release_from_prep/(.*)", - DownloadDataReleaseFromPrep), + (r"/download_data_release_from_prep/(.*)", DownloadDataReleaseFromPrep), (r"/download_study_bioms/(.*)", DownloadStudyBIOMSHandler), (r"/download_raw_data/(.*)", DownloadRawData), - (r"/download_ebi_accessions/samples/(.*)", - DownloadEBISampleAccessions), - (r"/download_sample_info_per_prep/(.*)", - DownloadSampleInfoPerPrep), - (r"/download_ebi_accessions/experiments/(.*)", - DownloadEBIPrepAccessions), + (r"/download_ebi_accessions/samples/(.*)", DownloadEBISampleAccessions), + (r"/download_sample_info_per_prep/(.*)", DownloadSampleInfoPerPrep), + (r"/download_ebi_accessions/experiments/(.*)", DownloadEBIPrepAccessions), (r"/download_upload/(.*)", DownloadUpload), (r"/release/download/(.*)", DownloadRelease), (r"/public_download/", DownloadPublicHandler), @@ -235,14 +314,16 @@ def __init__(self): (r"/qiita_db/prep_template/(.*)/", PrepTemplateDBHandler), (r"/qiita_db/prep_template/", PrepTemplateAPIHandler), (r"/qiita_db/references/(.*)/", ReferenceHandler), - (r"/qiita_db/plugins/(.*)/(.*)/commands/(.*)/activate/", - CommandActivateHandler), + ( + r"/qiita_db/plugins/(.*)/(.*)/commands/(.*)/activate/", + CommandActivateHandler, + ), (r"/qiita_db/plugins/(.*)/(.*)/commands/(.*)/", CommandHandler), (r"/qiita_db/plugins/(.*)/(.*)/commands/", CommandListHandler), (r"/qiita_db/plugins/(.*)/(.*)/", PluginHandler), (r"/qiita_db/analysis/(.*)/metadata/", APIAnalysisMetadataHandler), (r"/qiita_db/archive/observations/", APIArchiveObservations), - (r"/qiita_db/studies/(.*)", APIStudiesListing) + (r"/qiita_db/studies/(.*)", APIStudiesListing), ] # expose endpoints necessary for https file communication between @@ -257,7 +338,7 @@ def __init__(self): # Add portals editing pages only on main portal portals = [ (r"/admin/portals/studies/", StudyPortalHandler), - (r"/admin/portals/studiesAJAX/", StudyPortalAJAXHandler) + (r"/admin/portals/studiesAJAX/", StudyPortalAJAXHandler), ] handlers.extend(portals) @@ -268,7 +349,7 @@ def __init__(self): (r"/apitest/reset/", ResetAPItestHandler), (r"/apitest/prep_template/", PrepTemplateAPItestHandler), (r"/apitest/artifact/", ArtifactAPItestHandler), - (r"/apitest/reload_plugins/", ReloadPluginAPItestHandler) + (r"/apitest/reload_plugins/", ReloadPluginAPItestHandler), ] handlers.extend(test_handlers) diff --git a/qiita_ware/commands.py b/qiita_ware/commands.py index 249864ebe..a77910cdd 100644 --- a/qiita_ware/commands.py +++ b/qiita_ware/commands.py @@ -6,22 +6,23 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os.path import basename, isdir, join, exists +from functools import partial +from os import environ, remove, stat +from os.path import basename, exists, isdir, join from shutil import rmtree from tarfile import open as taropen from tempfile import mkdtemp -from os import environ, stat, remove from traceback import format_exc -from paramiko import AutoAddPolicy, RSAKey, SSHClient -from scp import SCPClient from urllib.parse import urlparse -from functools import partial + import pandas as pd +from paramiko import AutoAddPolicy, RSAKey, SSHClient +from scp import SCPClient +from qiita_core.qiita_settings import qiita_config from qiita_db.artifact import Artifact from qiita_db.logger import LogEntry from qiita_db.processing_job import _system_call as system_call -from qiita_core.qiita_settings import qiita_config from qiita_ware.ebi import EBISubmission from qiita_ware.exceptions import ComputeError, EBISubmissionError @@ -47,7 +48,7 @@ def _ssh_session(p_url, private_key): port = p_url.port username = p_url.username - if scheme == 'scp': + if scheme == "scp": # if port not specified, use default 22 as port if port is None: port = 22 @@ -59,12 +60,12 @@ def _ssh_session(p_url, private_key): # step 2: connect to fileserver key = RSAKey.from_private_key_file(private_key) - ssh.connect(hostname, port=port, username=username, - pkey=key, look_for_keys=False) + ssh.connect( + hostname, port=port, username=username, pkey=key, look_for_keys=False + ) return ssh else: - raise ValueError( - 'Not valid scheme. Valid options is scp.') + raise ValueError("Not valid scheme. Valid options is scp.") def _list_valid_files(ssh, directory): @@ -84,11 +85,11 @@ def _list_valid_files(ssh, directory): """ valid_file_extensions = tuple(qiita_config.valid_upload_extension) - stdin, stdout, stderr = ssh.exec_command('ls %s' % directory) + stdin, stdout, stderr = ssh.exec_command("ls %s" % directory) stderr = stderr.read().decode("utf-8") if stderr: raise ValueError(stderr) - files = stdout.read().decode("utf-8").split('\n') + files = stdout.read().decode("utf-8").split("\n") valid_files = [f for f in files if f.endswith(valid_file_extensions)] @@ -154,11 +155,10 @@ def download_remote(URL, private_key, destination): # step 2: download files scheme = p_url.scheme - if scheme == 'scp': + if scheme == "scp": scp = SCPClient(ssh.get_transport()) for f in file_paths: - download = partial( - scp.get, local_path=join(destination, basename(f))) + download = partial(scp.get, local_path=join(destination, basename(f))) download(f) # step 3: close the connection @@ -195,142 +195,171 @@ def submit_EBI(artifact_id, action, send, test=False, test_size=False): error_msg = format_exc() if isdir(ebi_submission.full_ebi_dir): rmtree(ebi_submission.full_ebi_dir) - LogEntry.create('Runtime', error_msg, - info={'ebi_submission': artifact_id}) + LogEntry.create("Runtime", error_msg, info={"ebi_submission": artifact_id}) raise # step 3: generate and write xml files ebi_submission.generate_xml_files() # before we continue let's check the size of the submission - to_review = [ebi_submission.study_xml_fp, - ebi_submission.sample_xml_fp, - ebi_submission.experiment_xml_fp, - ebi_submission.run_xml_fp, - ebi_submission.submission_xml_fp] + to_review = [ + ebi_submission.study_xml_fp, + ebi_submission.sample_xml_fp, + ebi_submission.experiment_xml_fp, + ebi_submission.run_xml_fp, + ebi_submission.submission_xml_fp, + ] total_size = sum([stat(tr).st_size for tr in to_review if tr is not None]) # note that the max for EBI is 10M but let's play it safe - max_size = 10e+6 if not test_size else 5000 + max_size = 10e6 if not test_size else 5000 if total_size > max_size: LogEntry.create( - 'Runtime', 'The submission: %d is larger than allowed (%d), will ' - 'try to fix: %d' % (artifact_id, max_size, total_size)) + "Runtime", + "The submission: %d is larger than allowed (%d), will " + "try to fix: %d" % (artifact_id, max_size, total_size), + ) def _reduce_metadata(low=0.01, high=0.5): # helper function to # transform current metadata to dataframe for easier curation rows = {k: dict(v) for k, v in ebi_submission.samples.items()} - df = pd.DataFrame.from_dict(rows, orient='index') + df = pd.DataFrame.from_dict(rows, orient="index") # remove unique columns and same value in all columns nunique = df.apply(pd.Series.nunique) nsamples = len(df.index) - cols_to_drop = set( - nunique[(nunique == 1) | (nunique == nsamples)].index) + cols_to_drop = set(nunique[(nunique == 1) | (nunique == nsamples)].index) # maximize deletion by removing also columns that are almost all # the same or almost all unique cols_to_drop = set( - nunique[(nunique <= int(nsamples * low)) | - (nunique >= int(nsamples * high))].index) - cols_to_drop = cols_to_drop - {'taxon_id', 'scientific_name', - 'description', 'country', - 'collection_date'} + nunique[ + (nunique <= int(nsamples * low)) | (nunique >= int(nsamples * high)) + ].index + ) + cols_to_drop = cols_to_drop - { + "taxon_id", + "scientific_name", + "description", + "country", + "collection_date", + } all_samples = ebi_submission.sample_template.ebi_sample_accessions - if action == 'ADD': - samples = [k for k in ebi_submission.samples - if all_samples[k] is None] + if action == "ADD": + samples = [k for k in ebi_submission.samples if all_samples[k] is None] else: - samples = [k for k in ebi_submission.samples - if all_samples[k] is not None] + samples = [ + k for k in ebi_submission.samples if all_samples[k] is not None + ] if samples: ebi_submission.write_xml_file( ebi_submission.generate_sample_xml(samples, cols_to_drop), - ebi_submission.sample_xml_fp) + ebi_submission.sample_xml_fp, + ) # let's try with the default pameters _reduce_metadata() # now let's recalculate the size to make sure it's fine - new_total_size = sum([stat(tr).st_size - for tr in to_review if tr is not None]) + new_total_size = sum([stat(tr).st_size for tr in to_review if tr is not None]) LogEntry.create( - 'Runtime', - 'The submission: %d after defaul cleaning is %d and was %d' % ( - artifact_id, total_size, new_total_size)) + "Runtime", + "The submission: %d after defaul cleaning is %d and was %d" + % (artifact_id, total_size, new_total_size), + ) if new_total_size > max_size: LogEntry.create( - 'Runtime', 'Submission %d still too big, will try more ' - 'stringent parameters' % (artifact_id)) + "Runtime", + "Submission %d still too big, will try more " + "stringent parameters" % (artifact_id), + ) _reduce_metadata(0.05, 0.4) - new_total_size = sum([stat(tr).st_size - for tr in to_review if tr is not None]) + new_total_size = sum( + [stat(tr).st_size for tr in to_review if tr is not None] + ) LogEntry.create( - 'Runtime', - 'The submission: %d after defaul cleaning is %d and was %d' % ( - artifact_id, total_size, new_total_size)) + "Runtime", + "The submission: %d after defaul cleaning is %d and was %d" + % (artifact_id, total_size, new_total_size), + ) if new_total_size > max_size: raise ComputeError( - 'Even after cleaning the submission: %d is too large. ' - 'Before cleaning: %d, after: %d' % ( - artifact_id, total_size, new_total_size)) + "Even after cleaning the submission: %d is too large. " + "Before cleaning: %d, after: %d" + % (artifact_id, total_size, new_total_size) + ) st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None if send: # getting aspera's password - old_ascp_pass = environ.get('ASPERA_SCP_PASS', '') - if old_ascp_pass == '': - environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass - ascp_passwd = environ['ASPERA_SCP_PASS'] - LogEntry.create('Runtime', - ('Submission of sequences of pre_processed_id: ' - '%d completed successfully' % artifact_id)) + old_ascp_pass = environ.get("ASPERA_SCP_PASS", "") + if old_ascp_pass == "": + environ["ASPERA_SCP_PASS"] = qiita_config.ebi_seq_xfer_pass + ascp_passwd = environ["ASPERA_SCP_PASS"] + LogEntry.create( + "Runtime", + ( + "Submission of sequences of pre_processed_id: " + "%d completed successfully" % artifact_id + ), + ) # step 4: sending sequences - if action != 'MODIFY': - LogEntry.create('Runtime', - ("Submitting sequences for pre_processed_id: " - "%d" % artifact_id)) + if action != "MODIFY": + LogEntry.create( + "Runtime", + ("Submitting sequences for pre_processed_id: %d" % artifact_id), + ) for cmd in ebi_submission.generate_send_sequences_cmd(): stdout, stderr, rv = system_call(cmd) if rv != 0: - error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" % ( - stdout, stderr)) - environ['ASPERA_SCP_PASS'] = old_ascp_pass + error_msg = "ASCP Error:\nStd output:%s\nStd error:%s" % ( + stdout, + stderr, + ) + environ["ASPERA_SCP_PASS"] = old_ascp_pass raise ComputeError(error_msg) - open(ebi_submission.ascp_reply, 'a').write( - 'stdout:\n%s\n\nstderr: %s' % (stdout, stderr)) - environ['ASPERA_SCP_PASS'] = old_ascp_pass + open(ebi_submission.ascp_reply, "a").write( + "stdout:\n%s\n\nstderr: %s" % (stdout, stderr) + ) + environ["ASPERA_SCP_PASS"] = old_ascp_pass # step 5: sending xml - xmls_cmds = ebi_submission.generate_curl_command( - ebi_seq_xfer_pass=ascp_passwd) - LogEntry.create('Runtime', - ("Submitting XMLs for pre_processed_id: " - "%d" % artifact_id)) + xmls_cmds = ebi_submission.generate_curl_command(ebi_seq_xfer_pass=ascp_passwd) + LogEntry.create( + "Runtime", ("Submitting XMLs for pre_processed_id: %d" % artifact_id) + ) xml_content, stderr, rv = system_call(xmls_cmds) if rv != 0: - error_msg = ("Error:\nStd output:%s\nStd error:%s" % ( - xml_content, stderr)) + error_msg = "Error:\nStd output:%s\nStd error:%s" % (xml_content, stderr) raise ComputeError(error_msg) else: - LogEntry.create('Runtime', - ('Submission of sequences of pre_processed_id: ' - '%d completed successfully' % artifact_id)) - open(ebi_submission.curl_reply, 'w').write( - 'stdout:\n%s\n\nstderr: %s' % (xml_content, stderr)) + LogEntry.create( + "Runtime", + ( + "Submission of sequences of pre_processed_id: " + "%d completed successfully" % artifact_id + ), + ) + open(ebi_submission.curl_reply, "w").write( + "stdout:\n%s\n\nstderr: %s" % (xml_content, stderr) + ) # parsing answer / only if adding - if action == 'ADD' or test: + if action == "ADD" or test: try: - st_acc, sa_acc, bio_acc, ex_acc, run_acc = \ + st_acc, sa_acc, bio_acc, ex_acc, run_acc = ( ebi_submission.parse_EBI_reply(xml_content, test=test) + ) except EBISubmissionError as e: error = str(e) le = LogEntry.create( - 'Fatal', "Command: %s\nError: %s\n" % (xml_content, error), - info={'ebi_submission': artifact_id}) + "Fatal", + "Command: %s\nError: %s\n" % (xml_content, error), + info={"ebi_submission": artifact_id}, + ) raise ComputeError( - "EBI Submission failed! Log id: %d\n%s" % (le.id, error)) + "EBI Submission failed! Log id: %d\n%s" % (le.id, error) + ) if st_acc: ebi_submission.study.ebi_study_accession = st_acc @@ -361,15 +390,14 @@ def submit_VAMPS(artifact_id): """ artifact = Artifact(artifact_id) if not artifact.can_be_submitted_to_vamps: - raise ComputeError("Artifact %d cannot be submitted to VAMPS" - % artifact_id) + raise ComputeError("Artifact %d cannot be submitted to VAMPS" % artifact_id) study = artifact.study sample_template = study.sample_template prep_templates = artifact.prep_templates if len(prep_templates) > 1: raise ComputeError( - "Multiple prep templates associated with the artifact: %s" - % artifact_id) + "Multiple prep templates associated with the artifact: %s" % artifact_id + ) prep_template = prep_templates[0] # Also need to check that is not submitting (see item in #1523) @@ -378,39 +406,42 @@ def submit_VAMPS(artifact_id): # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) - targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id, - prep_template.id, - artifact_id)) - targz = taropen(targz_fp, mode='w:gz') + targz_fp = join( + targz_folder, "%d_%d_%d.tgz" % (study.id, prep_template.id, artifact_id) + ) + targz = taropen(targz_fp, mode="w:gz") # adding sample/prep - samp_fp = join(targz_folder, 'sample_metadata.txt') + samp_fp = join(targz_folder, "sample_metadata.txt") sample_template.to_file(samp_fp) - targz.add(samp_fp, arcname='sample_metadata.txt') - prep_fp = join(targz_folder, 'prep_metadata.txt') + targz.add(samp_fp, arcname="sample_metadata.txt") + prep_fp = join(targz_folder, "prep_metadata.txt") prep_template.to_file(prep_fp) - targz.add(prep_fp, arcname='prep_metadata.txt') + targz.add(prep_fp, arcname="prep_metadata.txt") # adding preprocessed data for x in artifact.filepaths: - if x['fp_type'] == 'preprocessed_fasta': - targz.add(x['fp'], arcname='preprocessed_fasta.fna') + if x["fp_type"] == "preprocessed_fasta": + targz.add(x["fp"], arcname="preprocessed_fasta.fna") targz.close() # submitting - cmd = ("curl -F user=%s -F pass='%s' -F uploadFile=@%s -F " - "press=UploadFile %s" % (qiita_config.vamps_user, - qiita_config.vamps_pass, - targz_fp, - qiita_config.vamps_url)) + cmd = "curl -F user=%s -F pass='%s' -F uploadFile=@%s -F press=UploadFile %s" % ( + qiita_config.vamps_user, + qiita_config.vamps_pass, + targz_fp, + qiita_config.vamps_url, + ) obs, stderr, rv = system_call(cmd) if rv != 0: - error_msg = ("Error:\nStd output:%s\nStd error:%s" % (obs, stderr)) + error_msg = "Error:\nStd output:%s\nStd error:%s" % (obs, stderr) raise ComputeError(error_msg) - exp = ("\n\nProcess Uploaded File\n\n" - "\n\n") + exp = ( + "\n\nProcess Uploaded File\n\n" + "\n\n" + ) if obs != exp: return False diff --git a/qiita_ware/ebi.py b/qiita_ware/ebi.py index 9c1411fbf..ac04bcf2e 100644 --- a/qiita_ware/ebi.py +++ b/qiita_ware/ebi.py @@ -7,35 +7,36 @@ # ----------------------------------------------------------------------------- import hashlib -from os.path import basename, join, isdir, isfile, exists -from shutil import copyfile, rmtree -from os import remove, listdir, makedirs from datetime import date, timedelta -from urllib.parse import quote +from functools import partial +from gzip import GzipFile from itertools import zip_longest +from os import listdir, makedirs, remove +from os.path import basename, exists, isdir, isfile, join +from shutil import copyfile, rmtree +from urllib.parse import quote from xml.etree import ElementTree as ET from xml.etree.ElementTree import ParseError from xml.sax.saxutils import escape -from gzip import GzipFile -from functools import partial + from h5py import File from qiita_files.demux import to_per_sample_ascii from qiita_core.qiita_settings import qiita_config -from qiita_ware.exceptions import EBISubmissionError -from qiita_db.util import create_nested_path -from qiita_db.logger import LogEntry -from qiita_db.ontology import Ontology -from qiita_db.util import convert_to_id, get_mountpoint, open_file from qiita_db.artifact import Artifact +from qiita_db.logger import LogEntry from qiita_db.metadata_template.constants import ( - TARGET_GENE_DATA_TYPES, PREP_TEMPLATE_COLUMNS_TARGET_GENE) + PREP_TEMPLATE_COLUMNS_TARGET_GENE, + TARGET_GENE_DATA_TYPES, +) +from qiita_db.ontology import Ontology from qiita_db.processing_job import _system_call as system_call - +from qiita_db.util import convert_to_id, create_nested_path, get_mountpoint, open_file +from qiita_ware.exceptions import EBISubmissionError ENA_COLS_TO_FIX = { - 'country': 'geographic location (country and/or sea)', - 'collection_date': 'collection date' + "country": "geographic location (country and/or sea)", + "collection_date": "collection date", } @@ -52,7 +53,7 @@ def clean_whitespace(text): str fixed text """ - return ' '.join(str(text).split()) + return " ".join(str(text).split()) class EBISubmission(object): @@ -90,45 +91,62 @@ class EBISubmission(object): - If the sample preparation metadata doesn't have a platform field or it isn't a EBISubmission.valid_platforms """ - FWD_READ_SUFFIX = '.R1.fastq.gz' - REV_READ_SUFFIX = '.R2.fastq.gz' - valid_ebi_actions = ('ADD', 'VALIDATE', 'MODIFY') - valid_ebi_submission_states = ('submitting') + FWD_READ_SUFFIX = ".R1.fastq.gz" + REV_READ_SUFFIX = ".R2.fastq.gz" + + valid_ebi_actions = ("ADD", "VALIDATE", "MODIFY") + valid_ebi_submission_states = "submitting" # valid_platforms dict of 'platform': ['valid_instrument_models'] - valid_platforms = {'DNBSEQ': ['DNBSEQ-G400', 'DNBSEQ-T7', 'DNBSEQ-G800'], - 'LS454': ['454 GS', '454 GS 20', '454 GS FLX', - '454 GS FLX+', '454 GS FLX TITANIUM', - '454 GS JUNIOR', 'UNSPECIFIED'], - 'ION_TORRENT': ['ION TORRENT PGM', 'ION TORRENT PROTON', - 'ION TORRENT S5', 'ION TORRENT S5 XL'], - 'ILLUMINA': ['HISEQ X FIVE', - 'HISEQ X TEN', - 'ILLUMINA GENOME ANALYZER', - 'ILLUMINA GENOME ANALYZER II', - 'ILLUMINA GENOME ANALYZER IIX', - 'ILLUMINA HISCANSQ', - 'ILLUMINA HISEQ 1000', - 'ILLUMINA HISEQ 1500', - 'ILLUMINA HISEQ 2000', - 'ILLUMINA HISEQ 2500', - 'ILLUMINA HISEQ 3000', - 'ILLUMINA HISEQ 4000', - 'ILLUMINA MISEQ', - 'ILLUMINA MINISEQ', - 'ILLUMINA NOVASEQ 6000', - 'ILLUMINA NOVASEQ X', - 'NEXTSEQ 500', - 'NEXTSEQ 550', - 'UNSPECIFIED'], - 'OXFORD_NANOPORE': ['GRIDION'], - 'PACBIO_SMRT': ['PACBIO RS', - 'PACBIO RS II', - 'SEQUEL', - 'ONSO', - 'REVIO', - 'SEQUEL IIE', - 'SEQUEL II']} + valid_platforms = { + "DNBSEQ": ["DNBSEQ-G400", "DNBSEQ-T7", "DNBSEQ-G800"], + "LS454": [ + "454 GS", + "454 GS 20", + "454 GS FLX", + "454 GS FLX+", + "454 GS FLX TITANIUM", + "454 GS JUNIOR", + "UNSPECIFIED", + ], + "ION_TORRENT": [ + "ION TORRENT PGM", + "ION TORRENT PROTON", + "ION TORRENT S5", + "ION TORRENT S5 XL", + ], + "ILLUMINA": [ + "HISEQ X FIVE", + "HISEQ X TEN", + "ILLUMINA GENOME ANALYZER", + "ILLUMINA GENOME ANALYZER II", + "ILLUMINA GENOME ANALYZER IIX", + "ILLUMINA HISCANSQ", + "ILLUMINA HISEQ 1000", + "ILLUMINA HISEQ 1500", + "ILLUMINA HISEQ 2000", + "ILLUMINA HISEQ 2500", + "ILLUMINA HISEQ 3000", + "ILLUMINA HISEQ 4000", + "ILLUMINA MISEQ", + "ILLUMINA MINISEQ", + "ILLUMINA NOVASEQ 6000", + "ILLUMINA NOVASEQ X", + "NEXTSEQ 500", + "NEXTSEQ 550", + "UNSPECIFIED", + ], + "OXFORD_NANOPORE": ["GRIDION"], + "PACBIO_SMRT": [ + "PACBIO RS", + "PACBIO RS II", + "SEQUEL", + "ONSO", + "REVIO", + "SEQUEL IIE", + "SEQUEL II", + ], + } xmlns_xsi = "http://www.w3.org/2001/XMLSchema-instance" xsi_noNSL = "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_3/SRA.%s.xsd" @@ -137,19 +155,19 @@ def __init__(self, artifact_id, action): error_msgs = [] if action not in self.valid_ebi_actions: - error_msg = ("%s is not a valid EBI submission action, valid " - "actions are: %s" % - (action, ', '.join(self.valid_ebi_actions))) - LogEntry.create('Runtime', error_msg) + error_msg = ( + "%s is not a valid EBI submission action, valid " + "actions are: %s" % (action, ", ".join(self.valid_ebi_actions)) + ) + LogEntry.create("Runtime", error_msg) raise EBISubmissionError(error_msg) - ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) + ena_ontology = Ontology(convert_to_id("ENA", "ontology")) self.action = action self.artifact = Artifact(artifact_id) if not self.artifact.can_be_submitted_to_ebi: - error_msg = ("Artifact %d cannot be submitted to EBI" - % self.artifact.id) - LogEntry.create('Runtime', error_msg) + error_msg = "Artifact %d cannot be submitted to EBI" % self.artifact.id + LogEntry.create("Runtime", error_msg) raise EBISubmissionError(error_msg) self.study = self.artifact.study @@ -161,35 +179,39 @@ def __init__(self, artifact_id, action): # be set to false, which is checked in the previous if statement self.prep_template = self.artifact.prep_templates[0] - if self.artifact.is_submitted_to_ebi and action != 'MODIFY': - error_msg = ("Cannot resubmit! Artifact %d has already " - "been submitted to EBI." % artifact_id) - LogEntry.create('Runtime', error_msg) + if self.artifact.is_submitted_to_ebi and action != "MODIFY": + error_msg = ( + "Cannot resubmit! Artifact %d has already " + "been submitted to EBI." % artifact_id + ) + LogEntry.create("Runtime", error_msg) raise EBISubmissionError(error_msg) self.artifact_id = artifact_id self.study_title = self.study.title - self.study_abstract = self.study.info['study_abstract'] + self.study_abstract = self.study.info["study_abstract"] it = self.prep_template.investigation_type if it in ena_ontology.terms: self.investigation_type = it self.new_investigation_type = None elif it in ena_ontology.user_defined_terms: - self.investigation_type = 'Other' + self.investigation_type = "Other" self.new_investigation_type = it else: # This should never happen - error_msgs.append("Unrecognized investigation type: '%s'. This " - "term is neither one of the official terms nor " - "one of the user-defined terms in the ENA " - "ontology." % it) + error_msgs.append( + "Unrecognized investigation type: '%s'. This " + "term is neither one of the official terms nor " + "one of the user-defined terms in the ENA " + "ontology." % it + ) _, base_fp = get_mountpoint("preprocessed_data")[0] - self.ebi_dir = '%d_ebi_submission' % artifact_id + self.ebi_dir = "%d_ebi_submission" % artifact_id self.full_ebi_dir = join(base_fp, self.ebi_dir) - self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt') - self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml') - self.xml_dir = join(self.full_ebi_dir, 'xml_dir') + self.ascp_reply = join(self.full_ebi_dir, "ascp_reply.txt") + self.curl_reply = join(self.full_ebi_dir, "curl_reply.xml") + self.xml_dir = join(self.full_ebi_dir, "xml_dir") self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None @@ -199,22 +221,24 @@ def __init__(self, artifact_id, action): self.publications = self.study.publications # getting the restrictions - st_restrictions = [self.sample_template.columns_restrictions['EBI']] - pt_restrictions = [self.prep_template.columns_restrictions['EBI']] + st_restrictions = [self.sample_template.columns_restrictions["EBI"]] + pt_restrictions = [self.prep_template.columns_restrictions["EBI"]] if self.artifact.data_type in TARGET_GENE_DATA_TYPES: # adding restrictions on primer and barcode as these are # conditionally requiered for target gene - pt_restrictions.append( - PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex']) + pt_restrictions.append(PREP_TEMPLATE_COLUMNS_TARGET_GENE["demultiplex"]) st_missing = self.sample_template.check_restrictions(st_restrictions) pt_missing = self.prep_template.check_restrictions(pt_restrictions) # testing if there are any missing columns if st_missing: - error_msgs.append("Missing column in the sample template: %s" % - ', '.join(list(st_missing))) + error_msgs.append( + "Missing column in the sample template: %s" + % ", ".join(list(st_missing)) + ) if pt_missing: - error_msgs.append("Missing column in the prep template: %s" % - ', '.join(list(pt_missing))) + error_msgs.append( + "Missing column in the prep template: %s" % ", ".join(list(pt_missing)) + ) # generating all samples from sample template self.samples = {} @@ -225,19 +249,20 @@ def __init__(self, artifact_id, action): nvim = [] for k, sample_prep in self.prep_template.items(): # validating required fields - if ('platform' not in sample_prep or - sample_prep['platform'] is None): + if "platform" not in sample_prep or sample_prep["platform"] is None: nvp.append(k) else: - platform = sample_prep['platform'].upper() + platform = sample_prep["platform"].upper() if platform not in self.valid_platforms: nvp.append(k) else: - if ('instrument_model' not in sample_prep or - sample_prep['instrument_model'] is None): + if ( + "instrument_model" not in sample_prep + or sample_prep["instrument_model"] is None + ): nvim.append(k) else: - im = sample_prep['instrument_model'].upper() + im = sample_prep["instrument_model"].upper() if im not in self.valid_platforms[platform]: nvim.append(k) @@ -250,40 +275,50 @@ def __init__(self, artifact_id, action): self.sample_demux_fps[k] = get_output_fp(k) if nvp: - error_msgs.append("These samples do not have a valid platform " - "(instrumet model wasn't checked): %s" % ( - ', '.join(nvp))) + error_msgs.append( + "These samples do not have a valid platform " + "(instrumet model wasn't checked): %s" % (", ".join(nvp)) + ) if nvim: - error_msgs.append("These samples do not have a valid instrument " - "model: %s" % (', '.join(nvim))) + error_msgs.append( + "These samples do not have a valid instrument " + "model: %s" % (", ".join(nvim)) + ) if error_msgs: - error_msgs = ("Errors found during EBI submission for study #%d, " - "artifact #%d and prep template #%d:\n%s" - % (self.study.id, artifact_id, - self.prep_template.id, '\n'.join(error_msgs))) - LogEntry.create('Runtime', error_msgs) + error_msgs = ( + "Errors found during EBI submission for study #%d, " + "artifact #%d and prep template #%d:\n%s" + % ( + self.study.id, + artifact_id, + self.prep_template.id, + "\n".join(error_msgs), + ) + ) + LogEntry.create("Runtime", error_msgs) raise EBISubmissionError(error_msgs) self._sample_aliases = {} self._experiment_aliases = {} self._run_aliases = {} - self._ebi_sample_accessions = \ - self.sample_template.ebi_sample_accessions - self._ebi_experiment_accessions = \ - self.prep_template.ebi_experiment_accessions + self._ebi_sample_accessions = self.sample_template.ebi_sample_accessions + self._ebi_experiment_accessions = self.prep_template.ebi_experiment_accessions def _get_study_alias(self): """Format alias using ``self.study_id``""" - study_alias_format = '%s_sid_%s' + study_alias_format = "%s_sid_%s" return study_alias_format % ( qiita_config.ebi_organization_prefix, - escape(clean_whitespace(str(self.study.id)))) + escape(clean_whitespace(str(self.study.id))), + ) def _get_sample_alias(self, sample_name): """Format alias using ``self.study_id``, `sample_name`""" - alias = "%s:%s" % (self._get_study_alias(), - escape(clean_whitespace(str(sample_name)))) + alias = "%s:%s" % ( + self._get_study_alias(), + escape(clean_whitespace(str(sample_name))), + ) self._sample_aliases[alias] = sample_name return alias @@ -293,58 +328,59 @@ def _get_experiment_alias(self, sample_name): Currently, this is identical to _get_sample_alias above, since we are only going to allow submission of one prep for each sample """ - exp_alias_format = '%s_ptid_%s:%s' + exp_alias_format = "%s_ptid_%s:%s" alias = exp_alias_format % ( qiita_config.ebi_organization_prefix, escape(clean_whitespace(str(self.prep_template.id))), - escape(clean_whitespace(str(sample_name)))) + escape(clean_whitespace(str(sample_name))), + ) self._experiment_aliases[alias] = sample_name return alias def _get_submission_alias(self): """Format alias using ``self.artifact_id``""" - safe_artifact_id = escape( - clean_whitespace(str(self.artifact_id))) - submission_alias_format = '%s_submission_%s' - return submission_alias_format % (qiita_config.ebi_organization_prefix, - safe_artifact_id) + safe_artifact_id = escape(clean_whitespace(str(self.artifact_id))) + submission_alias_format = "%s_submission_%s" + return submission_alias_format % ( + qiita_config.ebi_organization_prefix, + safe_artifact_id, + ) def _get_run_alias(self, sample_name): - """Format alias using `sample_name` - """ - alias = '%s_ppdid_%s:%s' % ( + """Format alias using `sample_name`""" + alias = "%s_ppdid_%s:%s" % ( qiita_config.ebi_organization_prefix, escape(clean_whitespace(str(self.artifact_id))), - sample_name) + sample_name, + ) self._run_aliases[alias] = sample_name return alias def _get_library_name(self, sample_name): - """Format alias using `sample_name` - """ + """Format alias using `sample_name`""" return escape(clean_whitespace(sample_name)) - def _add_dict_as_tags_and_values(self, parent_node, attribute_element_name, - data_dict): + def _add_dict_as_tags_and_values( + self, parent_node, attribute_element_name, data_dict + ): """Format key/value data using a common EBI XML motif""" for attr, val in sorted(data_dict.items()): if val is None: val = "Unknown" - attribute_element = ET.SubElement(parent_node, - attribute_element_name) - tag = ET.SubElement(attribute_element, 'TAG') + attribute_element = ET.SubElement(parent_node, attribute_element_name) + tag = ET.SubElement(attribute_element, "TAG") tag.text = clean_whitespace(attr) - value = ET.SubElement(attribute_element, 'VALUE') + value = ET.SubElement(attribute_element, "VALUE") value.text = clean_whitespace(val) def _get_publication_element(self, study_links, pmid, db_name): - study_link = ET.SubElement(study_links, 'STUDY_LINK') - xref_link = ET.SubElement(study_link, 'XREF_LINK') + study_link = ET.SubElement(study_links, "STUDY_LINK") + xref_link = ET.SubElement(study_link, "XREF_LINK") - db = ET.SubElement(xref_link, 'DB') + db = ET.SubElement(xref_link, "DB") db.text = db_name - _id = ET.SubElement(xref_link, 'ID') + _id = ET.SubElement(xref_link, "ID") _id.text = str(pmid) def generate_study_xml(self): @@ -355,35 +391,42 @@ def generate_study_xml(self): ET.Element Object with study XML values """ - study_set = ET.Element('STUDY_SET', { - 'xmlns:xsi': self.xmlns_xsi, - 'xsi:noNamespaceSchemaLocation': self.xsi_noNSL % "study"}) + study_set = ET.Element( + "STUDY_SET", + { + "xmlns:xsi": self.xmlns_xsi, + "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "study", + }, + ) - study = ET.SubElement(study_set, 'STUDY', { - 'alias': self._get_study_alias(), - 'center_name': qiita_config.ebi_center_name} + study = ET.SubElement( + study_set, + "STUDY", + { + "alias": self._get_study_alias(), + "center_name": qiita_config.ebi_center_name, + }, ) - descriptor = ET.SubElement(study, 'DESCRIPTOR') - study_title = ET.SubElement(descriptor, 'STUDY_TITLE') + descriptor = ET.SubElement(study, "DESCRIPTOR") + study_title = ET.SubElement(descriptor, "STUDY_TITLE") study_title.text = escape(clean_whitespace(self.study_title)) # study type is deprecated and not displayed anywhere on EBI-ENA; # however it's required for submission so just injecting with Other - ET.SubElement( - descriptor, 'STUDY_TYPE', {'existing_study_type': 'Other'}) + ET.SubElement(descriptor, "STUDY_TYPE", {"existing_study_type": "Other"}) - study_abstract = ET.SubElement(descriptor, 'STUDY_ABSTRACT') + study_abstract = ET.SubElement(descriptor, "STUDY_ABSTRACT") study_abstract.text = clean_whitespace(escape(self.study_abstract)) # Add pubmed IDs if self.publications: - study_links = ET.SubElement(study, 'STUDY_LINKS') + study_links = ET.SubElement(study, "STUDY_LINKS") for pub, is_doi in self.publications: if is_doi: - self._get_publication_element(study_links, pub, 'DOI') + self._get_publication_element(study_links, pub, "DOI") else: - self._get_publication_element(study_links, pub, 'PUBMED') + self._get_publication_element(study_links, pub, "PUBMED") return study_set @@ -404,9 +447,13 @@ def generate_sample_xml(self, samples=None, ignore_columns=None): ET.Element Object with sample XML values """ - sample_set = ET.Element('SAMPLE_SET', { - 'xmlns:xsi': self.xmlns_xsi, - "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample"}) + sample_set = ET.Element( + "SAMPLE_SET", + { + "xmlns:xsi": self.xmlns_xsi, + "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample", + }, + ) if not samples: samples = self.samples.keys() @@ -418,45 +465,52 @@ def generate_sample_xml(self, samples=None, ignore_columns=None): sample_info[ename] = sample_info[qname] sample_accession = self._ebi_sample_accessions[sample_name] - if self.action in ('ADD', 'VALIDATE'): + if self.action in ("ADD", "VALIDATE"): if sample_accession is not None: continue else: - sample = ET.SubElement(sample_set, 'SAMPLE', { - 'alias': self._get_sample_alias(sample_name), - 'center_name': qiita_config.ebi_center_name} + sample = ET.SubElement( + sample_set, + "SAMPLE", + { + "alias": self._get_sample_alias(sample_name), + "center_name": qiita_config.ebi_center_name, + }, ) else: - sample = ET.SubElement(sample_set, 'SAMPLE', { - 'accession': sample_accession, - 'center_name': qiita_config.ebi_center_name} + sample = ET.SubElement( + sample_set, + "SAMPLE", + { + "accession": sample_accession, + "center_name": qiita_config.ebi_center_name, + }, ) - sample_title = ET.SubElement(sample, 'TITLE') + sample_title = ET.SubElement(sample, "TITLE") sample_title.text = escape(clean_whitespace(sample_name)) - sample_sample_name = ET.SubElement(sample, 'SAMPLE_NAME') - taxon_id = ET.SubElement(sample_sample_name, 'TAXON_ID') - text = sample_info.pop('taxon_id') + sample_sample_name = ET.SubElement(sample, "SAMPLE_NAME") + taxon_id = ET.SubElement(sample_sample_name, "TAXON_ID") + text = sample_info.pop("taxon_id") taxon_id.text = escape(clean_whitespace(text)) - scientific_name = ET.SubElement( - sample_sample_name, 'SCIENTIFIC_NAME') - text = sample_info.pop('scientific_name') + scientific_name = ET.SubElement(sample_sample_name, "SCIENTIFIC_NAME") + text = sample_info.pop("scientific_name") scientific_name.text = escape(clean_whitespace(text)) - description = ET.SubElement(sample, 'DESCRIPTION') - text = sample_info.pop('description') + description = ET.SubElement(sample, "DESCRIPTION") + text = sample_info.pop("description") description.text = escape(clean_whitespace(text)) if sample_info: if ignore_columns is not None: for key in ignore_columns: del sample_info[key] - sample_attributes = ET.SubElement(sample, 'SAMPLE_ATTRIBUTES') - self._add_dict_as_tags_and_values(sample_attributes, - 'SAMPLE_ATTRIBUTE', - sample_info) + sample_attributes = ET.SubElement(sample, "SAMPLE_ATTRIBUTES") + self._add_dict_as_tags_and_values( + sample_attributes, "SAMPLE_ATTRIBUTE", sample_info + ) return sample_set @@ -466,23 +520,23 @@ def _generate_spot_descriptor(self, design, platform): Therefore, we can break it out into its own method. """ # This section applies only to the LS454 platform - if platform != 'LS454': + if platform != "LS454": return # There is some hard-coded information in here, but this is what we # have always done in the past... - spot_descriptor = ET.SubElement(design, 'SPOT_DESCRIPTOR') - ET.SubElement(spot_descriptor, 'SPOT_DECODE_SPEC') - read_spec = ET.SubElement(spot_descriptor, 'READ_SPEC') - - read_index = ET.SubElement(read_spec, 'READ_INDEX') - read_index.text = '0' - read_class = ET.SubElement(read_spec, 'READ_CLASS') - read_class.text = 'Application Read' - read_type = ET.SubElement(read_spec, 'READ_TYPE') - read_type.text = 'Forward' - base_coord = ET.SubElement(read_spec, 'BASE_COORD') - base_coord.text = '1' + spot_descriptor = ET.SubElement(design, "SPOT_DESCRIPTOR") + ET.SubElement(spot_descriptor, "SPOT_DECODE_SPEC") + read_spec = ET.SubElement(spot_descriptor, "READ_SPEC") + + read_index = ET.SubElement(read_spec, "READ_INDEX") + read_index.text = "0" + read_class = ET.SubElement(read_spec, "READ_CLASS") + read_class.text = "Application Read" + read_type = ET.SubElement(read_spec, "READ_TYPE") + read_type.text = "Forward" + base_coord = ET.SubElement(read_spec, "BASE_COORD") + base_coord.text = "1" def generate_experiment_xml(self, samples=None): """Generates the experiment XML file @@ -499,17 +553,21 @@ def generate_experiment_xml(self, samples=None): """ study_accession = self.study.ebi_study_accession if study_accession: - study_ref_dict = {'accession': study_accession} + study_ref_dict = {"accession": study_accession} else: - study_ref_dict = {'refname': self._get_study_alias()} - - experiment_set = ET.Element('EXPERIMENT_SET', { - 'xmlns:xsi': self.xmlns_xsi, - "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "experiment"}) + study_ref_dict = {"refname": self._get_study_alias()} + + experiment_set = ET.Element( + "EXPERIMENT_SET", + { + "xmlns:xsi": self.xmlns_xsi, + "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "experiment", + }, + ) samples = samples if samples is not None else self.samples.keys() - if self.investigation_type == 'Other': + if self.investigation_type == "Other": library_strategy = self.new_investigation_type else: library_strategy = self.investigation_type @@ -519,77 +577,77 @@ def generate_experiment_xml(self, samples=None): sample_prep = dict(self.samples_prep[sample_name]) if self._ebi_sample_accessions[sample_name]: sample_descriptor_dict = { - 'accession': self._ebi_sample_accessions[sample_name]} + "accession": self._ebi_sample_accessions[sample_name] + } else: sample_descriptor_dict = { - 'refname': self._get_sample_alias(sample_name)} - - platform = sample_prep.pop('platform') - experiment = ET.SubElement(experiment_set, 'EXPERIMENT', { - 'alias': experiment_alias, - 'center_name': qiita_config.ebi_center_name} + "refname": self._get_sample_alias(sample_name) + } + + platform = sample_prep.pop("platform") + experiment = ET.SubElement( + experiment_set, + "EXPERIMENT", + { + "alias": experiment_alias, + "center_name": qiita_config.ebi_center_name, + }, ) - title = ET.SubElement(experiment, 'TITLE') + title = ET.SubElement(experiment, "TITLE") title.text = experiment_alias - ET.SubElement(experiment, 'STUDY_REF', study_ref_dict) + ET.SubElement(experiment, "STUDY_REF", study_ref_dict) - design = ET.SubElement(experiment, 'DESIGN') - design_description = ET.SubElement(design, - 'DESIGN_DESCRIPTION') - edd = sample_prep.pop('experiment_design_description') + design = ET.SubElement(experiment, "DESIGN") + design_description = ET.SubElement(design, "DESIGN_DESCRIPTION") + edd = sample_prep.pop("experiment_design_description") design_description.text = escape(clean_whitespace(edd)) - ET.SubElement(design, 'SAMPLE_DESCRIPTOR', sample_descriptor_dict) + ET.SubElement(design, "SAMPLE_DESCRIPTOR", sample_descriptor_dict) # this is the library contruction section. The only required fields # is library_construction_protocol, the other are optional - library_descriptor = ET.SubElement(design, 'LIBRARY_DESCRIPTOR') - library_name = ET.SubElement(library_descriptor, 'LIBRARY_NAME') + library_descriptor = ET.SubElement(design, "LIBRARY_DESCRIPTOR") + library_name = ET.SubElement(library_descriptor, "LIBRARY_NAME") library_name.text = self._get_library_name(sample_name) - lg = ET.SubElement(library_descriptor, 'LIBRARY_STRATEGY') + lg = ET.SubElement(library_descriptor, "LIBRARY_STRATEGY") lg.text = escape(clean_whitespace(library_strategy)) # hardcoding some values, # see https://github.com/biocore/qiita/issues/1485 - library_source = ET.SubElement(library_descriptor, - "LIBRARY_SOURCE") + library_source = ET.SubElement(library_descriptor, "LIBRARY_SOURCE") library_source.text = "METAGENOMIC" - library_selection = ET.SubElement(library_descriptor, - "LIBRARY_SELECTION") + library_selection = ET.SubElement(library_descriptor, "LIBRARY_SELECTION") library_selection.text = "PCR" - library_layout = ET.SubElement(library_descriptor, - "LIBRARY_LAYOUT") + library_layout = ET.SubElement(library_descriptor, "LIBRARY_LAYOUT") if self.per_sample_FASTQ_reverse: ET.SubElement(library_layout, "PAIRED") else: ET.SubElement(library_layout, "SINGLE") - lcp = ET.SubElement(library_descriptor, - "LIBRARY_CONSTRUCTION_PROTOCOL") - lcp.text = escape(clean_whitespace( - sample_prep.pop('library_construction_protocol'))) + lcp = ET.SubElement(library_descriptor, "LIBRARY_CONSTRUCTION_PROTOCOL") + lcp.text = escape( + clean_whitespace(sample_prep.pop("library_construction_protocol")) + ) self._generate_spot_descriptor(design, platform) - platform_element = ET.SubElement(experiment, 'PLATFORM') - platform_info = ET.SubElement(platform_element, - platform.upper()) - instrument_model = ET.SubElement(platform_info, 'INSTRUMENT_MODEL') - instrument_model.text = sample_prep.pop('instrument_model') + platform_element = ET.SubElement(experiment, "PLATFORM") + platform_info = ET.SubElement(platform_element, platform.upper()) + instrument_model = ET.SubElement(platform_info, "INSTRUMENT_MODEL") + instrument_model.text = sample_prep.pop("instrument_model") if sample_prep: experiment_attributes = ET.SubElement( - experiment, 'EXPERIMENT_ATTRIBUTES') - self._add_dict_as_tags_and_values(experiment_attributes, - 'EXPERIMENT_ATTRIBUTE', - sample_prep) + experiment, "EXPERIMENT_ATTRIBUTES" + ) + self._add_dict_as_tags_and_values( + experiment_attributes, "EXPERIMENT_ATTRIBUTE", sample_prep + ) return experiment_set - def _add_file_subelement(self, add_file, file_type, sample_name, - is_forward): - """generate_run_xml helper to avoid duplication of code - """ + def _add_file_subelement(self, add_file, file_type, sample_name, is_forward): + """generate_run_xml helper to avoid duplication of code""" if is_forward: suffix = self.FWD_READ_SUFFIX @@ -597,14 +655,16 @@ def _add_file_subelement(self, add_file, file_type, sample_name, suffix = self.REV_READ_SUFFIX file_path = self.sample_demux_fps[sample_name] + suffix - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: md5 = hashlib.md5(fp.read()).hexdigest() - file_details = {'filetype': file_type, - 'quality_scoring_system': 'phred', - 'checksum_method': 'MD5', - 'checksum': md5, - 'filename': join(self.ebi_dir, basename(file_path))} + file_details = { + "filetype": file_type, + "quality_scoring_system": "phred", + "checksum_method": "MD5", + "checksum": md5, + "filename": join(self.ebi_dir, basename(file_path)), + } add_file(file_details) @@ -616,32 +676,42 @@ def generate_run_xml(self): ET.Element Object with run XML values """ - run_set = ET.Element('RUN_SET', { - 'xmlns:xsi': self.xmlns_xsi, - "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "run"}) + run_set = ET.Element( + "RUN_SET", + { + "xmlns:xsi": self.xmlns_xsi, + "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "run", + }, + ) for sample_name, sample_prep in sorted(self.samples_prep.items()): sample_prep = dict(sample_prep) if self._ebi_experiment_accessions[sample_name]: experiment_ref_dict = { - 'accession': self._ebi_experiment_accessions[sample_name]} + "accession": self._ebi_experiment_accessions[sample_name] + } else: experiment_alias = self._get_experiment_alias(sample_name) - experiment_ref_dict = {'refname': experiment_alias} + experiment_ref_dict = {"refname": experiment_alias} # We only submit fastq - file_type = 'fastq' - run = ET.SubElement(run_set, 'RUN', { - 'alias': self._get_run_alias(sample_name), - 'center_name': qiita_config.ebi_center_name} + file_type = "fastq" + run = ET.SubElement( + run_set, + "RUN", + { + "alias": self._get_run_alias(sample_name), + "center_name": qiita_config.ebi_center_name, + }, ) - ET.SubElement(run, 'EXPERIMENT_REF', experiment_ref_dict) - data_block = ET.SubElement(run, 'DATA_BLOCK') - files = ET.SubElement(data_block, 'FILES') + ET.SubElement(run, "EXPERIMENT_REF", experiment_ref_dict) + data_block = ET.SubElement(run, "DATA_BLOCK") + files = ET.SubElement(data_block, "FILES") - add_file = partial(ET.SubElement, files, 'FILE') - add_file_subelement = partial(self._add_file_subelement, add_file, - file_type, sample_name) + add_file = partial(ET.SubElement, files, "FILE") + add_file_subelement = partial( + self._add_file_subelement, add_file, file_type, sample_name + ) add_file_subelement(is_forward=True) if self.per_sample_FASTQ_reverse: add_file_subelement(is_forward=False) @@ -667,49 +737,64 @@ def generate_submission_xml(self, submission_date=None): EBI requieres a date when the submission will be automatically made public. This date is generated from the submission date + 365 days. """ - submission_set = ET.Element('SUBMISSION_SET', { - 'xmlns:xsi': self.xmlns_xsi, - "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "submission"}) - submission = ET.SubElement(submission_set, 'SUBMISSION', { - 'alias': self._get_submission_alias(), - 'center_name': qiita_config.ebi_center_name} + submission_set = ET.Element( + "SUBMISSION_SET", + { + "xmlns:xsi": self.xmlns_xsi, + "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "submission", + }, + ) + submission = ET.SubElement( + submission_set, + "SUBMISSION", + { + "alias": self._get_submission_alias(), + "center_name": qiita_config.ebi_center_name, + }, ) - actions = ET.SubElement(submission, 'ACTIONS') + actions = ET.SubElement(submission, "ACTIONS") if self.study_xml_fp: - study_action = ET.SubElement(actions, 'ACTION') - ET.SubElement(study_action, self.action, { - 'schema': 'study', - 'source': basename(self.study_xml_fp)} + study_action = ET.SubElement(actions, "ACTION") + ET.SubElement( + study_action, + self.action, + {"schema": "study", "source": basename(self.study_xml_fp)}, ) if self.sample_xml_fp: - sample_action = ET.SubElement(actions, 'ACTION') - ET.SubElement(sample_action, self.action, { - 'schema': 'sample', - 'source': basename(self.sample_xml_fp)} + sample_action = ET.SubElement(actions, "ACTION") + ET.SubElement( + sample_action, + self.action, + {"schema": "sample", "source": basename(self.sample_xml_fp)}, ) if self.experiment_xml_fp: - experiment_action = ET.SubElement(actions, 'ACTION') - ET.SubElement(experiment_action, self.action, { - 'schema': 'experiment', - 'source': basename(self.experiment_xml_fp)} + experiment_action = ET.SubElement(actions, "ACTION") + ET.SubElement( + experiment_action, + self.action, + {"schema": "experiment", "source": basename(self.experiment_xml_fp)}, ) if self.run_xml_fp: - run_action = ET.SubElement(actions, 'ACTION') - ET.SubElement(run_action, self.action, { - 'schema': 'run', 'source': basename(self.run_xml_fp)} + run_action = ET.SubElement(actions, "ACTION") + ET.SubElement( + run_action, + self.action, + {"schema": "run", "source": basename(self.run_xml_fp)}, ) if submission_date is None: submission_date = date.today() - if self.action == 'ADD': - hold_action = ET.SubElement(actions, 'ACTION') - ET.SubElement(hold_action, 'HOLD', { - 'HoldUntilDate': str(submission_date + timedelta(365))} + if self.action == "ADD": + hold_action = ET.SubElement(actions, "ACTION") + ET.SubElement( + hold_action, + "HOLD", + {"HoldUntilDate": str(submission_date + timedelta(365))}, ) return submission_set @@ -727,8 +812,7 @@ def write_xml_file(self, element, fp): """ if not exists(self.xml_dir): makedirs(self.xml_dir) - ET.ElementTree(element).write( - fp, encoding='UTF-8', xml_declaration=True) + ET.ElementTree(element).write(fp, encoding="UTF-8", xml_declaration=True) def generate_xml_files(self): """Generate all the XML files""" @@ -736,45 +820,48 @@ def generate_xml_files(self): # There are really only 2 main cases for EBI submission: ADD and # MODIFY and the only exception is in MODIFY - if self.action != 'MODIFY': + if self.action != "MODIFY": # The study.xml file needs to be generated if and only if the study # does NOT have an ebi_study_accession if not self.study.ebi_study_accession: - self.study_xml_fp = get_output_fp('study.xml') - self.write_xml_file(self.generate_study_xml(), - self.study_xml_fp) + self.study_xml_fp = get_output_fp("study.xml") + self.write_xml_file(self.generate_study_xml(), self.study_xml_fp) # The sample.xml file needs to be generated if and only if there # are samples in the current submission that do NOT have an # ebi_sample_accession new_samples = { - sample for sample, accession in - self.sample_template.ebi_sample_accessions.items() - if accession is None} + sample + for sample, accession in self.sample_template.ebi_sample_accessions.items() + if accession is None + } new_samples = new_samples.intersection(self.samples) if new_samples: - self.sample_xml_fp = get_output_fp('sample.xml') - self.write_xml_file(self.generate_sample_xml(new_samples), - self.sample_xml_fp) + self.sample_xml_fp = get_output_fp("sample.xml") + self.write_xml_file( + self.generate_sample_xml(new_samples), self.sample_xml_fp + ) # The experiment.xml needs to be generated if and only if there are # samples in the current submission that do NO have an # ebi_experiment_accession new_samples = { - sample for sample, accession in - self.prep_template.ebi_experiment_accessions.items() - if accession is None} + sample + for sample, accession in self.prep_template.ebi_experiment_accessions.items() + if accession is None + } new_samples = new_samples.intersection(self.samples) if new_samples: - self.experiment_xml_fp = get_output_fp('experiment.xml') - self.write_xml_file(self.generate_experiment_xml(new_samples), - self.experiment_xml_fp) + self.experiment_xml_fp = get_output_fp("experiment.xml") + self.write_xml_file( + self.generate_experiment_xml(new_samples), self.experiment_xml_fp + ) # Generate the run.xml as it should always be generated - self.run_xml_fp = get_output_fp('run.xml') + self.run_xml_fp = get_output_fp("run.xml") self.write_xml_file(self.generate_run_xml(), self.run_xml_fp) - self.submission_xml_fp = get_output_fp('submission.xml') + self.submission_xml_fp = get_output_fp("submission.xml") else: # When MODIFY we can only modify the sample (sample.xml) and prep # (experiment.xml) template. The easiest is to generate both and @@ -786,27 +873,27 @@ def generate_xml_files(self): # finding unique name for sample xml i = 0 while True: - self.sample_xml_fp = get_output_fp('sample_%d.xml' % i) + self.sample_xml_fp = get_output_fp("sample_%d.xml" % i) if not exists(self.sample_xml_fp): break i = i + 1 - self.write_xml_file(self.generate_sample_xml(samples), - self.sample_xml_fp) + self.write_xml_file(self.generate_sample_xml(samples), self.sample_xml_fp) # finding unique name for experiment xml i = 0 while True: - self.experiment_xml_fp = get_output_fp('experiment_%d.xml' % i) + self.experiment_xml_fp = get_output_fp("experiment_%d.xml" % i) if not exists(self.experiment_xml_fp): break i = i + 1 - self.write_xml_file(self.generate_experiment_xml(samples), - self.experiment_xml_fp) + self.write_xml_file( + self.generate_experiment_xml(samples), self.experiment_xml_fp + ) # finding unique name for run xml i = 0 while True: - self.submission_xml_fp = get_output_fp('submission_%d.xml' % i) + self.submission_xml_fp = get_output_fp("submission_%d.xml" % i) if not exists(self.submission_xml_fp): break i = i + 1 @@ -814,21 +901,20 @@ def generate_xml_files(self): # just to keep all curl_reply-s we find a new name i = 0 while True: - self.curl_reply = join(self.full_ebi_dir, - 'curl_reply_%d.xml' % i) + self.curl_reply = join(self.full_ebi_dir, "curl_reply_%d.xml" % i) if not exists(self.curl_reply): break i = i + 1 # The submission.xml is always generated - self.write_xml_file(self.generate_submission_xml(), - self.submission_xml_fp) + self.write_xml_file(self.generate_submission_xml(), self.submission_xml_fp) def generate_curl_command( - self, - ebi_seq_xfer_user=qiita_config.ebi_seq_xfer_user, - ebi_seq_xfer_pass=qiita_config.ebi_seq_xfer_pass, - ebi_dropbox_url=qiita_config.ebi_dropbox_url): + self, + ebi_seq_xfer_user=qiita_config.ebi_seq_xfer_user, + ebi_seq_xfer_pass=qiita_config.ebi_seq_xfer_pass, + ebi_dropbox_url=qiita_config.ebi_dropbox_url, + ): """Generates the curl command for submission Parameters @@ -851,9 +937,10 @@ def generate_curl_command( be generated before executing this function """ # make sure that the XML files have been generated - url = '?auth=ENA%20{0}%20{1}'.format(quote(ebi_seq_xfer_user), - quote(ebi_seq_xfer_pass)) - curl_cmd = ['curl -sS -k'] + url = "?auth=ENA%20{0}%20{1}".format( + quote(ebi_seq_xfer_user), quote(ebi_seq_xfer_pass) + ) + curl_cmd = ["curl -sS -k"] if self.submission_xml_fp is not None: curl_cmd.append(' -F "SUBMISSION=@%s"' % self.submission_xml_fp) if self.study_xml_fp is not None: @@ -866,7 +953,7 @@ def generate_curl_command( curl_cmd.append(' -F "EXPERIMENT=@%s"' % self.experiment_xml_fp) curl_cmd.append(' "%s"' % join(ebi_dropbox_url, url)) - return ''.join(curl_cmd) + return "".join(curl_cmd) def generate_send_sequences_cmd(self): """Generate the sequences to EBI via ascp command @@ -891,12 +978,14 @@ def generate_send_sequences_cmd(self): fastqs_div = [fastqs[i::10] for i in range(10) if fastqs[i::10]] ascp_commands = [] for f in fastqs_div: - ascp_commands.append('ascp --ignore-host-key -d -QT -k2 ' - '{0} {1}@{2}:./{3}/'.format( - ' '.join(f), - qiita_config.ebi_seq_xfer_user, - qiita_config.ebi_seq_xfer_url, - self.ebi_dir)) + ascp_commands.append( + "ascp --ignore-host-key -d -QT -k2 {0} {1}@{2}:./{3}/".format( + " ".join(f), + qiita_config.ebi_seq_xfer_user, + qiita_config.ebi_seq_xfer_url, + self.ebi_dir, + ) + ) return ascp_commands @@ -937,87 +1026,99 @@ def parse_EBI_reply(self, curl_result, test=False): try: root = ET.fromstring(curl_result) except ParseError: - error_msg = ("The curl result from the EBI submission doesn't " - "look like an XML file:\n%s" % curl_result) - le = LogEntry.create('Runtime', error_msg) + error_msg = ( + "The curl result from the EBI submission doesn't " + "look like an XML file:\n%s" % curl_result + ) + le = LogEntry.create("Runtime", error_msg) raise EBISubmissionError( "The curl result from the EBI submission doesn't look like " "an XML file. Contact and admin for more information. " - "Log id: %d" % le.id) + "Log id: %d" % le.id + ) - success = root.get('success') == 'true' + success = root.get("success") == "true" if not success: # here we want to parse out the errors so the failures are clearer errors = {elem.text for elem in root.iter("ERROR")} - raise EBISubmissionError("The EBI submission failed:\n%s" - % '\n'.join(errors)) + raise EBISubmissionError( + "The EBI submission failed:\n%s" % "\n".join(errors) + ) if test: - study_accession = 'MyStudyAccession' + study_accession = "MyStudyAccession" sample_accessions = {} biosample_accessions = {} experiment_accessions = {} run_accessions = {} - return (study_accession, sample_accessions, biosample_accessions, - experiment_accessions, run_accessions) + return ( + study_accession, + sample_accessions, + biosample_accessions, + experiment_accessions, + run_accessions, + ) study_elem = root.findall("STUDY") if study_elem: if len(study_elem) > 1: raise EBISubmissionError( - "Multiple study tags found in EBI reply: %d" - % len(study_elem)) + "Multiple study tags found in EBI reply: %d" % len(study_elem) + ) study_elem = study_elem[0] - study_accession = study_elem.get('accession') + study_accession = study_elem.get("accession") else: study_accession = None sample_accessions = {} biosample_accessions = {} for elem in root.iter("SAMPLE"): - alias = elem.get('alias') + alias = elem.get("alias") sample_id = self._sample_aliases[alias] - sample_accessions[sample_id] = elem.get('accession') - ext_id = elem.find('EXT_ID') - biosample_accessions[sample_id] = ext_id.get('accession') + sample_accessions[sample_id] = elem.get("accession") + ext_id = elem.find("EXT_ID") + biosample_accessions[sample_id] = ext_id.get("accession") def data_retriever(key, trans_dict): res = {} for elem in root.iter(key): - alias = elem.get('alias') - res[trans_dict[alias]] = elem.get('accession') + alias = elem.get("alias") + res[trans_dict[alias]] = elem.get("accession") return res - experiment_accessions = data_retriever("EXPERIMENT", - self._experiment_aliases) + + experiment_accessions = data_retriever("EXPERIMENT", self._experiment_aliases) run_accessions = data_retriever("RUN", self._run_aliases) - return (study_accession, sample_accessions, biosample_accessions, - experiment_accessions, run_accessions) + return ( + study_accession, + sample_accessions, + biosample_accessions, + experiment_accessions, + run_accessions, + ) def _generate_demultiplexed_fastq_per_sample_FASTQ(self): """Modularity helper""" # helper function to write files in this method def _rename_file(fp, new_fp): - if fp.endswith('.gz'): + if fp.endswith(".gz"): copyfile(fp, new_fp) else: cmd = "gzip -c %s > %s" % (fp, new_fp) stdout, stderr, rv = system_call(cmd) if rv != 0: - error_msg = ( - "Error:\nStd output:%s\nStd error:%s" - % (stdout, stderr)) + error_msg = "Error:\nStd output:%s\nStd error:%s" % (stdout, stderr) raise EBISubmissionError(error_msg) fwd_reads = [] rev_reads = [] for x in self.artifact.filepaths: - if x['fp_type'] == 'raw_forward_seqs': - fwd_reads.append((basename(x['fp']), x['fp'])) - elif x['fp_type'] == 'raw_reverse_seqs': - rev_reads.append((basename(x['fp']), x['fp'])) + if x["fp_type"] == "raw_forward_seqs": + fwd_reads.append((basename(x["fp"]), x["fp"])) + elif x["fp_type"] == "raw_reverse_seqs": + rev_reads.append((basename(x["fp"]), x["fp"])) fwd_reads.sort(key=lambda x: x[1]) rev_reads.sort(key=lambda x: x[1]) if rev_reads: @@ -1034,11 +1135,12 @@ def _rename_file(fp, new_fp): rev_read = r[1] if r is not None else None fps.append((sample_name, (fwd_read, rev_read))) - if 'run_prefix' in self.prep_template.categories: - rps = [(k, v) for k, v in - self.prep_template.get_category('run_prefix').items()] + if "run_prefix" in self.prep_template.categories: + rps = [ + (k, v) for k, v in self.prep_template.get_category("run_prefix").items() + ] else: - rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()] + rps = [(v, v.split(".", 1)[1]) for v in self.prep_template.keys()] rps.sort(key=lambda x: x[1]) demux_samples = set() @@ -1050,20 +1152,19 @@ def _rename_file(fp, new_fp): _rename_file(fp[0], new_fp) if fp[1] is not None: - new_fp = self.sample_demux_fps[ - sn] + self.REV_READ_SUFFIX + new_fp = self.sample_demux_fps[sn] + self.REV_READ_SUFFIX _rename_file(fp[1], new_fp) del fps[i] break if fps: error_msg = ( - 'Discrepancy between filepaths and sample names. Extra' - ' filepaths: %s' % ', '.join([fp[0] for fp in fps])) - LogEntry.create('Runtime', error_msg) + "Discrepancy between filepaths and sample names. Extra" + " filepaths: %s" % ", ".join([fp[0] for fp in fps]) + ) + LogEntry.create("Runtime", error_msg) raise EBISubmissionError(error_msg) - return demux_samples, \ - set(self.samples.keys()).difference(set(demux_samples)) + return demux_samples, set(self.samples.keys()).difference(set(demux_samples)) def _generate_demultiplexed_fastq_demux(self, mtime): """Modularity helper""" @@ -1071,22 +1172,21 @@ def _generate_demultiplexed_fastq_demux(self, mtime): # `preprocessed_demux`. Thus, we only use the first one # (the only one present) ar = self.artifact - demux = [x['fp'] for x in ar.filepaths - if x['fp_type'] == 'preprocessed_demux'][0] + demux = [x["fp"] for x in ar.filepaths if x["fp_type"] == "preprocessed_demux"][ + 0 + ] demux_samples = set() with open_file(demux) as demux_fh: if not isinstance(demux_fh, File): - error_msg = ( - "'%s' doesn't look like a demux file" % demux) - LogEntry.create('Runtime', error_msg) + error_msg = "'%s' doesn't look like a demux file" % demux + LogEntry.create("Runtime", error_msg) raise EBISubmissionError(error_msg) - for s, i in to_per_sample_ascii(demux_fh, - self.prep_template.keys()): - s = s.decode('ascii') + for s, i in to_per_sample_ascii(demux_fh, self.prep_template.keys()): + s = s.decode("ascii") sample_fp = self.sample_demux_fps[s] + self.FWD_READ_SUFFIX wrote_sequences = False - with GzipFile(sample_fp, mode='w', mtime=mtime) as fh: + with GzipFile(sample_fp, mode="w", mtime=mtime) as fh: for record in i: fh.write(record) wrote_sequences = True @@ -1094,9 +1194,9 @@ def _generate_demultiplexed_fastq_demux(self, mtime): if wrote_sequences: demux_samples.add(s) else: - del (self.samples[s]) - del (self.samples_prep[s]) - del (self.sample_demux_fps[s]) + del self.samples[s] + del self.samples_prep[s] + del self.sample_demux_fps[s] remove(sample_fp) return demux_samples @@ -1144,9 +1244,10 @@ def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None): create_nested_path(self.full_ebi_dir) - if self.artifact.artifact_type == 'per_sample_FASTQ': - demux_samples, missing_samples = \ + if self.artifact.artifact_type == "per_sample_FASTQ": + demux_samples, missing_samples = ( self._generate_demultiplexed_fastq_per_sample_FASTQ() + ) else: demux_samples = self._generate_demultiplexed_fastq_demux(mtime) else: @@ -1173,20 +1274,21 @@ def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None): if missing_files != all_missing_files: self.per_sample_FASTQ_reverse = True - missing_samples = set( - self.samples.keys()).difference(demux_samples) + missing_samples = set(self.samples.keys()).difference(demux_samples) if missing_samples: for ms in missing_samples: - del (self.samples[ms]) - del (self.samples_prep[ms]) - del (self.sample_demux_fps[ms]) + del self.samples[ms] + del self.samples_prep[ms] + del self.sample_demux_fps[ms] if not demux_samples: - error_msg = ("All samples were removed from the submission " - "because the demux file is empty or the sample names " - "do not match.") - LogEntry.create('Runtime', error_msg) + error_msg = ( + "All samples were removed from the submission " + "because the demux file is empty or the sample names " + "do not match." + ) + LogEntry.create("Runtime", error_msg) raise EBISubmissionError(error_msg) return demux_samples diff --git a/qiita_ware/exceptions.py b/qiita_ware/exceptions.py index 6f4107c2b..675a1e355 100644 --- a/qiita_ware/exceptions.py +++ b/qiita_ware/exceptions.py @@ -12,34 +12,41 @@ class QiitaWareError(QiitaError): """Base clase for all Qiita-ware exceptions""" + pass class UserDoesNotExistsError(QiitaWareError): """Error used when a user does not exist""" + pass class AnalysisDoesNotExistsError(QiitaWareError): """Error used when an analysis does not exist""" + pass class JobDoesNotExistsError(QiitaWareError): """Error used when a job does not exist""" + pass class StudyDoesNotExistsError(QiitaWareError): """Error used when a study does not exist""" + pass class ComputeError(QiitaWareError): """A compute error happened""" + pass class EBISubmissionError(QiitaWareError): """Error used when EBI cannot be submitted""" + pass diff --git a/qiita_ware/metadata_pipeline.py b/qiita_ware/metadata_pipeline.py index 33d645338..ab6bdaf85 100644 --- a/qiita_ware/metadata_pipeline.py +++ b/qiita_ware/metadata_pipeline.py @@ -5,12 +5,15 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from qiita_db.metadata_template.util import load_template_to_dataframe -from qiita_db.metadata_template.sample_template import SampleTemplate -from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.metadata_template.constants import ( - PREP_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS, - TARGET_GENE_DATA_TYPES) + CONTROLLED_COLS, + PREP_TEMPLATE_COLUMNS, + PREP_TEMPLATE_COLUMNS_TARGET_GENE, + TARGET_GENE_DATA_TYPES, +) +from qiita_db.metadata_template.prep_template import PrepTemplate +from qiita_db.metadata_template.sample_template import SampleTemplate +from qiita_db.metadata_template.util import load_template_to_dataframe from qiita_db.util import convert_from_id from qiita_ware.exceptions import QiitaWareError @@ -32,30 +35,32 @@ def create_templates_from_qiime_mapping_file(fp, study, data_type): (SampleTemplate, PrepTemplate) The templates created from the QIIME mapping file """ - qiime_map = load_template_to_dataframe(fp, index='#SampleID') + qiime_map = load_template_to_dataframe(fp, index="#SampleID") # There are a few columns in the QIIME mapping file that are special and # we know how to deal with them rename_cols = { - 'BarcodeSequence': 'barcode', - 'LinkerPrimerSequence': 'primer', - 'Description': 'description', + "BarcodeSequence": "barcode", + "LinkerPrimerSequence": "primer", + "Description": "description", } - if 'ReverseLinkerPrimer' in qiime_map: - rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' + if "ReverseLinkerPrimer" in qiime_map: + rename_cols["ReverseLinkerPrimer"] = "reverselinkerprimer" missing = set(rename_cols).difference(qiime_map.columns) if missing: raise QiitaWareError( "Error generating the templates from the QIIME mapping file. " - "Missing QIIME mapping file columns: %s" % ', '.join(missing)) + "Missing QIIME mapping file columns: %s" % ", ".join(missing) + ) qiime_map.rename(columns=rename_cols, inplace=True) # Fix the casing in the columns that we control - qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c - for c in qiime_map.columns] + qiime_map.columns = [ + c.lower() if c.lower() in CONTROLLED_COLS else c for c in qiime_map.columns + ] # Figure out which columns belong to the prep template def _col_iterator(restriction_set): @@ -65,13 +70,15 @@ def _col_iterator(restriction_set): pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) - data_type_str = (convert_from_id(data_type, "data_type") - if isinstance(data_type, int) else data_type) + data_type_str = ( + convert_from_id(data_type, "data_type") + if isinstance(data_type, int) + else data_type + ) if data_type_str in TARGET_GENE_DATA_TYPES: - pt_cols.update( - col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) - pt_cols.add('reverselinkerprimer') + pt_cols.update(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) + pt_cols.add("reverselinkerprimer") qiime_cols = set(qiime_map.columns) pt_cols = qiime_cols.intersection(pt_cols) @@ -80,5 +87,7 @@ def _col_iterator(restriction_set): st_md = qiime_map.loc[:, list(st_cols)] pt_md = qiime_map.loc[:, list(pt_cols)] - return (SampleTemplate.create(st_md, study), - PrepTemplate.create(pt_md, study, data_type)) + return ( + SampleTemplate.create(st_md, study), + PrepTemplate.create(pt_md, study, data_type), + ) diff --git a/qiita_ware/private_plugin.py b/qiita_ware/private_plugin.py index e29badbde..065be525e 100644 --- a/qiita_ware/private_plugin.py +++ b/qiita_ware/private_plugin.py @@ -6,21 +6,19 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +import traceback +import warnings from json import dumps, loads -from sys import exc_info -from time import sleep from os import remove from os.path import join -import traceback -import warnings +from sys import exc_info +from time import sleep import qiita_db as qdb -from qiita_core.qiita_settings import r_client, qiita_config -from qiita_ware.commands import (download_remote, list_remote, - submit_VAMPS, submit_EBI) -from qiita_ware.metadata_pipeline import ( - create_templates_from_qiime_mapping_file) +from qiita_core.qiita_settings import qiita_config, r_client +from qiita_ware.commands import download_remote, list_remote, submit_EBI, submit_VAMPS from qiita_ware.exceptions import EBISubmissionError +from qiita_ware.metadata_pipeline import create_templates_from_qiime_mapping_file def build_analysis_files(job): @@ -33,30 +31,36 @@ def build_analysis_files(job): """ with qdb.sql_connection.TRN: params = job.parameters.values - analysis_id = params['analysis'] - categories = params['categories'] - merge_duplicated_sample_ids = params['merge_dup_sample_ids'] + analysis_id = params["analysis"] + categories = params["categories"] + merge_duplicated_sample_ids = params["merge_dup_sample_ids"] analysis = qdb.analysis.Analysis(analysis_id) biom_files = analysis.build_files( - merge_duplicated_sample_ids, categories=categories) + merge_duplicated_sample_ids, categories=categories + ) - cmd = qdb.software.Command.get_validator('BIOM') + cmd = qdb.software.Command.get_validator("BIOM") val_jobs = [] for dtype, biom_fp, archive_artifact_fp in biom_files: if archive_artifact_fp is not None: - files = dumps({'biom': [biom_fp], - 'plain_text': [archive_artifact_fp]}) + files = dumps({"biom": [biom_fp], "plain_text": [archive_artifact_fp]}) else: - files = dumps({'biom': [biom_fp]}) + files = dumps({"biom": [biom_fp]}) validate_params = qdb.software.Parameters.load( - cmd, values_dict={'files': files, - 'artifact_type': 'BIOM', - 'provenance': dumps({'job': job.id, - 'data_type': dtype}), - 'analysis': analysis_id, - 'template': None}) - val_jobs.append(qdb.processing_job.ProcessingJob.create( - analysis.owner, validate_params, True)) + cmd, + values_dict={ + "files": files, + "artifact_type": "BIOM", + "provenance": dumps({"job": job.id, "data_type": dtype}), + "analysis": analysis_id, + "template": None, + }, + ) + val_jobs.append( + qdb.processing_job.ProcessingJob.create( + analysis.owner, validate_params, True + ) + ) job._set_validator_jobs(val_jobs) @@ -77,9 +81,8 @@ def release_validators(job): job : qiita_db.processing_job.ProcessingJob The processing job with the information of the parent job """ - qdb.processing_job.ProcessingJob( - job.parameters.values['job']).release_validators() - job._set_status('success') + qdb.processing_job.ProcessingJob(job.parameters.values["job"]).release_validators() + job._set_status("success") def submit_to_VAMPS(job): @@ -91,8 +94,8 @@ def submit_to_VAMPS(job): The processing job performing the task """ with qdb.sql_connection.TRN: - submit_VAMPS(job.parameters.values['artifact']) - job._set_status('success') + submit_VAMPS(job.parameters.values["artifact"]) + job._set_status("success") def submit_to_EBI(job): @@ -105,18 +108,20 @@ def submit_to_EBI(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - artifact_id = int(param_vals['artifact']) - submission_type = param_vals['submission_type'] + artifact_id = int(param_vals["artifact"]) + submission_type = param_vals["submission_type"] artifact = qdb.artifact.Artifact(artifact_id) for info in artifact.study._ebi_submission_jobs(): jid, aid, js, cbste, era = info - if js in ('running', 'queued') and jid != job.id: - error_msg = ("Cannot perform parallel EBI submission for " - "the same study. Current job running: %s" % js) + if js in ("running", "queued") and jid != job.id: + error_msg = ( + "Cannot perform parallel EBI submission for " + "the same study. Current job running: %s" % js + ) raise EBISubmissionError(error_msg) submit_EBI(artifact_id, submission_type, True) - job._set_status('success') + job._set_status("success") def copy_artifact(job): @@ -129,11 +134,12 @@ def copy_artifact(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - orig_artifact = qdb.artifact.Artifact(param_vals['artifact']) + orig_artifact = qdb.artifact.Artifact(param_vals["artifact"]) prep_template = qdb.metadata_template.prep_template.PrepTemplate( - param_vals['prep_template']) + param_vals["prep_template"] + ) qdb.artifact.Artifact.copy(orig_artifact, prep_template) - job._set_status('success') + job._set_status("success") def delete_artifact(job): @@ -145,9 +151,9 @@ def delete_artifact(job): The processing job performing the task """ with qdb.sql_connection.TRN: - artifact_id = job.parameters.values['artifact'] + artifact_id = job.parameters.values["artifact"] qdb.artifact.Artifact.delete(artifact_id) - job._set_status('success') + job._set_status("success") def create_sample_template(job): @@ -160,27 +166,30 @@ def create_sample_template(job): """ with qdb.sql_connection.TRN: params = job.parameters.values - fp = params['fp'] - study = qdb.study.Study(int(params['study_id'])) - is_mapping_file = params['is_mapping_file'] - data_type = params['data_type'] + fp = params["fp"] + study = qdb.study.Study(int(params["study_id"])) + is_mapping_file = params["is_mapping_file"] + data_type = params["data_type"] with warnings.catch_warnings(record=True) as warns: if is_mapping_file: create_templates_from_qiime_mapping_file(fp, study, data_type) else: qdb.metadata_template.sample_template.SampleTemplate.create( - qdb.metadata_template.util.load_template_to_dataframe(fp), - study) + qdb.metadata_template.util.load_template_to_dataframe(fp), study + ) remove(fp) if warns: - msg = '\n'.join(set(str(w.message) for w in warns)) - r_client.set("sample_template_%s" % study.id, - dumps({'job_id': job.id, 'alert_type': 'warning', - 'alert_msg': msg})) + msg = "\n".join(set(str(w.message) for w in warns)) + r_client.set( + "sample_template_%s" % study.id, + dumps( + {"job_id": job.id, "alert_type": "warning", "alert_msg": msg} + ), + ) - job._set_status('success') + job._set_status("success") def update_sample_template(job): @@ -193,8 +202,8 @@ def update_sample_template(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - study_id = param_vals['study'] - fp = param_vals['template_fp'] + study_id = param_vals["study"] + fp = param_vals["template_fp"] with warnings.catch_warnings(record=True) as warns: st = qdb.metadata_template.sample_template.SampleTemplate(study_id) df = qdb.metadata_template.util.load_template_to_dataframe(fp) @@ -204,12 +213,15 @@ def update_sample_template(job): # Join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: - msg = '\n'.join(set(str(w.message) for w in warns)) - r_client.set("sample_template_%s" % study_id, - dumps({'job_id': job.id, 'alert_type': 'warning', - 'alert_msg': msg})) + msg = "\n".join(set(str(w.message) for w in warns)) + r_client.set( + "sample_template_%s" % study_id, + dumps( + {"job_id": job.id, "alert_type": "warning", "alert_msg": msg} + ), + ) - job._set_status('success') + job._set_status("success") def delete_sample_template(job): @@ -222,8 +234,9 @@ def delete_sample_template(job): """ with qdb.sql_connection.TRN: qdb.metadata_template.sample_template.SampleTemplate.delete( - job.parameters.values['study']) - job._set_status('success') + job.parameters.values["study"] + ) + job._set_status("success") def update_prep_template(job): @@ -236,8 +249,8 @@ def update_prep_template(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - prep_id = param_vals['prep_template'] - fp = param_vals['template_fp'] + prep_id = param_vals["prep_template"] + fp = param_vals["template_fp"] prep = qdb.metadata_template.prep_template.PrepTemplate(prep_id) with warnings.catch_warnings(record=True) as warns: @@ -248,12 +261,15 @@ def update_prep_template(job): # Join all the warning messages into one. Note that this info # will be ignored if an exception is raised if warns: - msg = '\n'.join(set(str(w.message) for w in warns)) - r_client.set("prep_template_%s" % prep_id, - dumps({'job_id': job.id, 'alert_type': 'warning', - 'alert_msg': msg})) + msg = "\n".join(set(str(w.message) for w in warns)) + r_client.set( + "prep_template_%s" % prep_id, + dumps( + {"job_id": job.id, "alert_type": "warning", "alert_msg": msg} + ), + ) - job._set_status('success') + job._set_status("success") def delete_sample_or_column(job): @@ -266,30 +282,34 @@ def delete_sample_or_column(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - obj_class = param_vals['obj_class'] - obj_id = param_vals['obj_id'] - sample_or_col = param_vals['sample_or_col'] - name = param_vals['name'].split(',') + obj_class = param_vals["obj_class"] + obj_id = param_vals["obj_id"] + sample_or_col = param_vals["sample_or_col"] + name = param_vals["name"].split(",") - if obj_class == 'SampleTemplate': + if obj_class == "SampleTemplate": constructor = qdb.metadata_template.sample_template.SampleTemplate - elif obj_class == 'PrepTemplate': + elif obj_class == "PrepTemplate": constructor = qdb.metadata_template.prep_template.PrepTemplate else: - raise ValueError('Unknown value "%s". Choose between ' - '"SampleTemplate" and "PrepTemplate"' % obj_class) + raise ValueError( + 'Unknown value "%s". Choose between ' + '"SampleTemplate" and "PrepTemplate"' % obj_class + ) - if sample_or_col == 'columns': + if sample_or_col == "columns": del_func = constructor(obj_id).delete_column name = name[0] - elif sample_or_col == 'samples': + elif sample_or_col == "samples": del_func = constructor(obj_id).delete_samples else: - raise ValueError('Unknown value "%s". Choose between "samples" ' - 'and "columns"' % sample_or_col) + raise ValueError( + 'Unknown value "%s". Choose between "samples" ' + 'and "columns"' % sample_or_col + ) del_func(name) - job._set_status('success') + job._set_status("success") def delete_study(job): @@ -302,7 +322,7 @@ def delete_study(job): """ MT = qdb.metadata_template with qdb.sql_connection.TRN: - study_id = job.parameters.values['study'] + study_id = job.parameters.values["study"] study = qdb.study.Study(study_id) # deleting analyses @@ -321,7 +341,7 @@ def delete_study(job): qdb.study.Study.delete(study_id) - job._set_status('success') + job._set_status("success") def complete_job(job): @@ -334,23 +354,23 @@ def complete_job(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - payload = loads(param_vals['payload']) - if payload['success']: - artifacts = payload['artifacts'] + payload = loads(param_vals["payload"]) + if payload["success"]: + artifacts = payload["artifacts"] error = None else: artifacts = None - error = payload['error'] - c_job = qdb.processing_job.ProcessingJob(param_vals['job_id']) - c_job.step = 'Completing via %s [%s]' % (job.id, job.external_id) + error = payload["error"] + c_job = qdb.processing_job.ProcessingJob(param_vals["job_id"]) + c_job.step = "Completing via %s [%s]" % (job.id, job.external_id) try: - c_job.complete(payload['success'], artifacts, error) + c_job.complete(payload["success"], artifacts, error) except Exception: c_job._set_error(traceback.format_exception(*exc_info())) - job._set_status('success') + job._set_status("success") - if 'archive' in payload: + if "archive" in payload: pass # ToDo: Archive # features = payload['archive'] @@ -366,12 +386,12 @@ def delete_analysis(job): The processing job performing the task """ with qdb.sql_connection.TRN: - analysis_id = job.parameters.values['analysis_id'] + analysis_id = job.parameters.values["analysis_id"] qdb.analysis.Analysis.delete_analysis_artifacts(analysis_id) - r_client.delete('analysis_delete_%d' % analysis_id) + r_client.delete("analysis_delete_%d" % analysis_id) - job._set_status('success') + job._set_status("success") def list_remote_files(job): @@ -383,17 +403,19 @@ def list_remote_files(job): The processing job performing the task """ with qdb.sql_connection.TRN: - url = job.parameters.values['url'] - private_key = job.parameters.values['private_key'] - study_id = job.parameters.values['study_id'] + url = job.parameters.values["url"] + private_key = job.parameters.values["private_key"] + study_id = job.parameters.values["study_id"] try: files = list_remote(url, private_key) - r_client.set("upload_study_%s" % study_id, - dumps({'job_id': job.id, 'url': url, 'files': files})) + r_client.set( + "upload_study_%s" % study_id, + dumps({"job_id": job.id, "url": url, "files": files}), + ) except Exception: job._set_error(traceback.format_exception(*exc_info())) else: - job._set_status('success') + job._set_status("success") def download_remote_files(job): @@ -405,15 +427,15 @@ def download_remote_files(job): The processing job performing the task """ with qdb.sql_connection.TRN: - url = job.parameters.values['url'] - destination = job.parameters.values['destination'] - private_key = job.parameters.values['private_key'] + url = job.parameters.values["url"] + destination = job.parameters.values["destination"] + private_key = job.parameters.values["private_key"] try: download_remote(url, private_key, destination) except Exception: job._set_error(traceback.format_exception(*exc_info())) else: - job._set_status('success') + job._set_status("success") def INSDC_download(job): @@ -426,11 +448,11 @@ def INSDC_download(job): """ with qdb.sql_connection.TRN: param_vals = job.parameters.values - download_source = param_vals['download_source'] - accession = param_vals['accession'] + download_source = param_vals["download_source"] + accession = param_vals["accession"] - if job.user.level != 'admin': - job._set_error('INSDC_download is only for administrators') + if job.user.level != "admin": + job._set_error("INSDC_download is only for administrators") job_dir = join(qiita_config.working_dir, job.id) qdb.util.create_nested_path(job_dir) @@ -438,26 +460,28 @@ def INSDC_download(job): # code doing something print(download_source, accession) - job._set_status('success') - - -TASK_DICT = {'build_analysis_files': build_analysis_files, - 'release_validators': release_validators, - 'submit_to_VAMPS': submit_to_VAMPS, - 'submit_to_EBI': submit_to_EBI, - 'copy_artifact': copy_artifact, - 'delete_artifact': delete_artifact, - 'create_sample_template': create_sample_template, - 'update_sample_template': update_sample_template, - 'delete_sample_template': delete_sample_template, - 'update_prep_template': update_prep_template, - 'delete_sample_or_column': delete_sample_or_column, - 'delete_study': delete_study, - 'complete_job': complete_job, - 'delete_analysis': delete_analysis, - 'list_remote_files': list_remote_files, - 'download_remote_files': download_remote_files, - 'INSDC_download': INSDC_download} + job._set_status("success") + + +TASK_DICT = { + "build_analysis_files": build_analysis_files, + "release_validators": release_validators, + "submit_to_VAMPS": submit_to_VAMPS, + "submit_to_EBI": submit_to_EBI, + "copy_artifact": copy_artifact, + "delete_artifact": delete_artifact, + "create_sample_template": create_sample_template, + "update_sample_template": update_sample_template, + "delete_sample_template": delete_sample_template, + "update_prep_template": update_prep_template, + "delete_sample_or_column": delete_sample_or_column, + "delete_study": delete_study, + "complete_job": complete_job, + "delete_analysis": delete_analysis, + "list_remote_files": list_remote_files, + "download_remote_files": download_remote_files, + "INSDC_download": INSDC_download, +} def private_task(job_id): @@ -468,7 +492,7 @@ def private_task(job_id): job_id : str The job id """ - if job_id == 'register': + if job_id == "register": # We don't need to do anything here if Qiita is registering plugins return @@ -480,6 +504,8 @@ def private_task(job_id): TASK_DICT[task_name](job) except Exception as e: log_msg = "Error on job %s: %s" % ( - job.id, ''.join(traceback.format_exception(*exc_info()))) - le = qdb.logger.LogEntry.create('Runtime', log_msg) + job.id, + "".join(traceback.format_exception(*exc_info())), + ) + le = qdb.logger.LogEntry.create("Runtime", log_msg) job.complete(False, error="Error (log id: %d): %s" % (le.id, e)) diff --git a/qiita_ware/test/test_commands.py b/qiita_ware/test/test_commands.py index fa1a827aa..16e053fe7 100644 --- a/qiita_ware/test/test_commands.py +++ b/qiita_ware/test/test_commands.py @@ -5,42 +5,41 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from unittest import TestCase, main, skipIf -from os.path import join, basename -from tempfile import mkdtemp -import pandas as pd from datetime import datetime -from shutil import rmtree, copyfile -from os import path from glob import glob -from paramiko.ssh_exception import AuthenticationException +from os import path +from os.path import basename, join +from shutil import copyfile, rmtree +from tempfile import mkdtemp +from unittest import TestCase, main, skipIf +import pandas as pd from h5py import File +from paramiko.ssh_exception import AuthenticationException from qiita_files.demux import to_hdf5 -from qiita_ware.exceptions import ComputeError -from qiita_ware.commands import submit_EBI, list_remote, download_remote -from qiita_db.util import get_mountpoint -from qiita_db.study import Study, StudyPerson -from qiita_db.software import DefaultParameters, Parameters +from qiita_core.qiita_settings import qiita_config +from qiita_core.util import qiita_test_checker from qiita_db.artifact import Artifact from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.metadata_template.sample_template import SampleTemplate +from qiita_db.software import DefaultParameters, Parameters +from qiita_db.study import Study, StudyPerson from qiita_db.user import User -from qiita_core.util import qiita_test_checker -from qiita_core.qiita_settings import qiita_config +from qiita_db.util import get_mountpoint +from qiita_ware.commands import download_remote, list_remote, submit_EBI +from qiita_ware.exceptions import ComputeError @qiita_test_checker() class SSHTests(TestCase): def setUp(self): self.self_dir_path = path.dirname(path.abspath(__file__)) - self.remote_dir_path = join(self.self_dir_path, - 'test_data/test_remote_dir/') - self.test_ssh_key = join(self.self_dir_path, 'test_data/test_key') - self.test_wrong_key = join(self.self_dir_path, 'test_data/random_key') + self.remote_dir_path = join(self.self_dir_path, "test_data/test_remote_dir/") + self.test_ssh_key = join(self.self_dir_path, "test_data/test_key") + self.test_wrong_key = join(self.self_dir_path, "test_data/random_key") self.temp_local_dir = mkdtemp() - self.exp_files = ['test_0.fastq.gz', 'test_1.txt'] + self.exp_files = ["test_0.fastq.gz", "test_1.txt"] def tearDown(self): rmtree(self.temp_local_dir) @@ -48,16 +47,17 @@ def tearDown(self): def _get_valid_files(self, folder): files = [] for x in qiita_config.valid_upload_extension: - files.extend([basename(f) for f in glob(join(folder, '*.%s' % x))]) + files.extend([basename(f) for f in glob(join(folder, "*.%s" % x))]) return files def test_list_scp_wrong_key(self): with self.assertRaises(AuthenticationException): - list_remote('scp://runner@localhost:'+self.remote_dir_path, - self.test_wrong_key) + list_remote( + "scp://runner@localhost:" + self.remote_dir_path, self.test_wrong_key + ) def test_list_scp(self): - kpath = join(self.temp_local_dir, 'tmp-key') + kpath = join(self.temp_local_dir, "tmp-key") copyfile(self.test_ssh_key, kpath) # 05/22/25: this test requires a scp/ssh connection and github # actions is broken; thus commenting out @@ -67,12 +67,14 @@ def test_list_scp(self): def test_download_remote_nonexist_key(self): with self.assertRaises(IOError): - download_remote('scp://runner@localhost:'+self.remote_dir_path, - join(self.self_dir_path, 'nokey'), - self.temp_local_dir) + download_remote( + "scp://runner@localhost:" + self.remote_dir_path, + join(self.self_dir_path, "nokey"), + self.temp_local_dir, + ) def test_download_scp(self): - kpath = join(self.temp_local_dir, 'tmp-key') + kpath = join(self.temp_local_dir, "tmp-key") copyfile(self.test_ssh_key, kpath) # 05/22/25: this test requires a scp/ssh connection and github # actions is broken; thus commenting out @@ -92,27 +94,31 @@ def setUp(self): def write_demux_files(self, prep_template, generate_hdf5=True): """Writes a demux test file to avoid duplication of code""" - fna_fp = join(self.temp_dir, 'seqs.fna') - demux_fp = join(self.temp_dir, 'demux.seqs') + fna_fp = join(self.temp_dir, "seqs.fna") + demux_fp = join(self.temp_dir, "demux.seqs") if generate_hdf5: - with open(fna_fp, 'w') as f: + with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) else: - with open(demux_fp, 'w') as f: - f.write('') + with open(demux_fp, "w") as f: + f.write("") if prep_template.artifact is None: ppd = Artifact.create( - [(demux_fp, 6)], "Demultiplexed", prep_template=prep_template) + [(demux_fp, 6)], "Demultiplexed", prep_template=prep_template + ) else: params = Parameters.from_default_params( - DefaultParameters(1), - {'input_data': prep_template.artifact.id}) + DefaultParameters(1), {"input_data": prep_template.artifact.id} + ) ppd = Artifact.create( - [(demux_fp, 6)], "Demultiplexed", - parents=[prep_template.artifact], processing_parameters=params) + [(demux_fp, 6)], + "Demultiplexed", + parents=[prep_template.artifact], + processing_parameters=params, + ) return ppd def generate_new_study_with_preprocessed_data(self): @@ -125,64 +131,73 @@ def generate_new_study_with_preprocessed_data(self): "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "principal_investigator_id": StudyPerson(3), - "lab_person_id": StudyPerson(1) + "lab_person_id": StudyPerson(1), } - study = Study.create(User('test@foo.bar'), "Test EBI study", info) + study = Study.create(User("test@foo.bar"), "Test EBI study", info) metadata_dict = { - 'Sample1': {'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 1'}, - 'Sample2': {'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 2'}, - 'Sample3': {'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 3'} + "Sample1": { + "collection_timestamp": datetime(2015, 6, 1, 7, 0, 0), + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 1", + }, + "Sample2": { + "collection_timestamp": datetime(2015, 6, 2, 7, 0, 0), + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 2", + }, + "Sample3": { + "collection_timestamp": datetime(2015, 6, 3, 7, 0, 0), + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 3", + }, } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { - 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTC', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 1"}, - 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTA', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 2"}, - 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTT', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 3"}, + "Sample1": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTC", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 1", + }, + "Sample2": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTA", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 2", + }, + "Sample3": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTT", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 3", + }, } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) - pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') - fna_fp = join(self.temp_dir, 'seqs.fna') - demux_fp = join(self.temp_dir, 'demux.seqs') - with open(fna_fp, 'w') as f: + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) + pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics") + fna_fp = join(self.temp_dir, "seqs.fna") + demux_fp = join(self.temp_dir, "demux.seqs") + with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE_2.format(study.id)) - with File(demux_fp, 'w') as f: + with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) - ppd = Artifact.create( - [(demux_fp, 6)], "Demultiplexed", prep_template=pt) + ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd @@ -191,47 +206,43 @@ def test_submit_EBI_step_2_failure(self): pid = ppd.id with self.assertRaises(ComputeError): - submit_EBI(pid, 'VALIDATE', True) + submit_EBI(pid, "VALIDATE", True) - rmtree(join(self.base_fp, '%d_ebi_submission' % pid), True) + rmtree(join(self.base_fp, "%d_ebi_submission" % pid), True) - @skipIf( - qiita_config.ebi_seq_xfer_pass == '', 'skip: ascp not configured') + @skipIf(qiita_config.ebi_seq_xfer_pass == "", "skip: ascp not configured") def test_submit_EBI_parse_EBI_reply_failure(self): ppd = self.write_demux_files(PrepTemplate(1)) pid = ppd.id with self.assertRaises(ComputeError) as error: - submit_EBI(pid, 'VALIDATE', True) + submit_EBI(pid, "VALIDATE", True) error = str(error.exception) - self.assertIn('EBI Submission failed! Log id:', error) - self.assertIn('The EBI submission failed:', error) + self.assertIn("EBI Submission failed! Log id:", error) + self.assertIn("The EBI submission failed:", error) - rmtree(join(self.base_fp, '%d_ebi_submission' % pid), True) + rmtree(join(self.base_fp, "%d_ebi_submission" % pid), True) - @skipIf( - qiita_config.ebi_seq_xfer_pass == '', 'skip: ascp not configured') + @skipIf(qiita_config.ebi_seq_xfer_pass == "", "skip: ascp not configured") def test_full_submission(self): artifact = self.generate_new_study_with_preprocessed_data() - self.assertEqual( - artifact.study.ebi_submission_status, 'not submitted') + self.assertEqual(artifact.study.ebi_submission_status, "not submitted") aid = artifact.id - submit_EBI(aid, 'VALIDATE', True, test=True) - self.assertEqual(artifact.study.ebi_submission_status, 'submitted') + submit_EBI(aid, "VALIDATE", True, test=True) + self.assertEqual(artifact.study.ebi_submission_status, "submitted") - rmtree(join(self.base_fp, '%d_ebi_submission' % aid), True) + rmtree(join(self.base_fp, "%d_ebi_submission" % aid), True) def test_max_ebiena_curl_error(self): artifact = self.generate_new_study_with_preprocessed_data() - self.assertEqual( - artifact.study.ebi_submission_status, 'not submitted') + self.assertEqual(artifact.study.ebi_submission_status, "not submitted") aid = artifact.id with self.assertRaises(ComputeError) as error: - submit_EBI(aid, 'VALIDATE', True, test_size=True) + submit_EBI(aid, "VALIDATE", True, test_size=True) error = str(error.exception) - self.assertIn('is too large. Before cleaning:', error) + self.assertIn("is too large. Before cleaning:", error) - rmtree(join(self.base_fp, '%d_ebi_submission' % aid), True) + rmtree(join(self.base_fp, "%d_ebi_submission" % aid), True) FASTA_EXAMPLE = """>1.SKB2.640194_1 X orig_bc=X new_bc=X bc_diffs=0 @@ -281,5 +292,5 @@ def test_max_ebiena_curl_error(self): """ -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/qiita_ware/test/test_ebi.py b/qiita_ware/test/test_ebi.py index e28fd39d3..cd2dc2446 100644 --- a/qiita_ware/test/test_ebi.py +++ b/qiita_ware/test/test_ebi.py @@ -6,31 +6,31 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +import hashlib +import warnings +from datetime import date +from functools import partial from os import remove -from os.path import join, isdir, exists +from os.path import exists, isdir, join from shutil import rmtree from tempfile import mkdtemp from unittest import TestCase, main from xml.etree import ElementTree as ET -from functools import partial -import pandas as pd -import warnings -from datetime import date -import hashlib +import pandas as pd from h5py import File from qiita_files.demux import to_hdf5 from qiita_core.qiita_settings import qiita_config from qiita_core.util import qiita_test_checker -from qiita_db.util import get_mountpoint, convert_to_id -from qiita_db.study import Study, StudyPerson +from qiita_db.artifact import Artifact from qiita_db.metadata_template.prep_template import PrepTemplate from qiita_db.metadata_template.sample_template import SampleTemplate -from qiita_db.user import User -from qiita_db.artifact import Artifact -from qiita_db.software import Parameters, DefaultParameters from qiita_db.ontology import Ontology +from qiita_db.software import DefaultParameters, Parameters +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.util import convert_to_id, get_mountpoint from qiita_ware.ebi import EBISubmission from qiita_ware.exceptions import EBISubmissionError @@ -62,38 +62,49 @@ def tearDown(self): def test_init(self): artifact_id = 3 - action = 'ADD' + action = "ADD" e = EBISubmission(artifact_id, action) self.files_to_remove.append(e.full_ebi_dir) self.assertEqual(e.artifact_id, artifact_id) - self.assertEqual(e.study_title, 'Identification of the Microbiomes ' - 'for Cannabis Soils') - self.assertEqual(e.study_abstract, - ('This is a preliminary study to examine the ' - 'microbiota associated with the Cannabis plant. ' - 'Soils samples from the bulk soil, soil associated ' - 'with the roots, and the rhizosphere were extracted ' - 'and the DNA sequenced. Roots from three ' - 'independent plants of different strains were ' - 'examined. These roots were obtained November 11, ' - '2011 from plants that had been harvested in the ' - 'summer. Future studies will attempt to analyze the ' - 'soils and rhizospheres from the same location at ' - 'different time points in the plant lifecycle.')) - self.assertEqual(e.investigation_type, 'Metagenomics') + self.assertEqual( + e.study_title, "Identification of the Microbiomes for Cannabis Soils" + ) + self.assertEqual( + e.study_abstract, + ( + "This is a preliminary study to examine the " + "microbiota associated with the Cannabis plant. " + "Soils samples from the bulk soil, soil associated " + "with the roots, and the rhizosphere were extracted " + "and the DNA sequenced. Roots from three " + "independent plants of different strains were " + "examined. These roots were obtained November 11, " + "2011 from plants that had been harvested in the " + "summer. Future studies will attempt to analyze the " + "soils and rhizospheres from the same location at " + "different time points in the plant lifecycle." + ), + ) + self.assertEqual(e.investigation_type, "Metagenomics") self.assertIsNone(e.new_investigation_type) self.assertCountEqual(e.sample_template, e.samples) - self.assertCountEqual(e.publications, [ - ['10.100/123456', True], ['123456', False], - ['10.100/7891011', True], ['7891011', False]]) + self.assertCountEqual( + e.publications, + [ + ["10.100/123456", True], + ["123456", False], + ["10.100/7891011", True], + ["7891011", False], + ], + ) self.assertEqual(e.action, action) - self.assertEqual(e.ascp_reply, join(e.full_ebi_dir, 'ascp_reply.txt')) - self.assertEqual(e.curl_reply, join(e.full_ebi_dir, 'curl_reply.xml')) + self.assertEqual(e.ascp_reply, join(e.full_ebi_dir, "ascp_reply.txt")) + self.assertEqual(e.curl_reply, join(e.full_ebi_dir, "curl_reply.xml")) get_output_fp = partial(join, e.full_ebi_dir) - self.assertEqual(e.xml_dir, get_output_fp('xml_dir')) + self.assertEqual(e.xml_dir, get_output_fp("xml_dir")) self.assertIsNone(e.study_xml_fp) self.assertIsNone(e.sample_xml_fp) self.assertIsNone(e.experiment_xml_fp) @@ -106,271 +117,305 @@ def test_init(self): self.assertEqual(e.sample_demux_fps[sample], get_output_fp(sample)) def test_get_study_alias(self): - e = EBISubmission(3, 'ADD') + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) - exp = '%s_sid_1' % qiita_config.ebi_organization_prefix + exp = "%s_sid_1" % qiita_config.ebi_organization_prefix self.assertEqual(e._get_study_alias(), exp) def test_get_sample_alias(self): - e = EBISubmission(3, 'ADD') + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) - exp = '%s_sid_1:foo' % qiita_config.ebi_organization_prefix - self.assertEqual(e._get_sample_alias('foo'), exp) - self.assertEqual(e._sample_aliases, {exp: 'foo'}) + exp = "%s_sid_1:foo" % qiita_config.ebi_organization_prefix + self.assertEqual(e._get_sample_alias("foo"), exp) + self.assertEqual(e._sample_aliases, {exp: "foo"}) def test_get_experiment_alias(self): - e = EBISubmission(3, 'ADD') + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) - exp = '%s_ptid_1:foo' % qiita_config.ebi_organization_prefix - self.assertEqual(e._get_experiment_alias('foo'), exp) - self.assertEqual(e._experiment_aliases, {exp: 'foo'}) + exp = "%s_ptid_1:foo" % qiita_config.ebi_organization_prefix + self.assertEqual(e._get_experiment_alias("foo"), exp) + self.assertEqual(e._experiment_aliases, {exp: "foo"}) def test_get_submission_alias(self): artifact_id = 3 - e = EBISubmission(artifact_id, 'ADD') + e = EBISubmission(artifact_id, "ADD") self.files_to_remove.append(e.full_ebi_dir) obs = e._get_submission_alias() - exp = '%s_submission_%d' % (qiita_config.ebi_organization_prefix, - artifact_id) + exp = "%s_submission_%d" % (qiita_config.ebi_organization_prefix, artifact_id) self.assertEqual(obs, exp) def test_get_run_alias(self): artifact_id = 3 - e = EBISubmission(artifact_id, 'ADD') + e = EBISubmission(artifact_id, "ADD") self.files_to_remove.append(e.full_ebi_dir) - exp = '%s_ppdid_%d:foo' % (qiita_config.ebi_organization_prefix, - artifact_id) - self.assertEqual(e._get_run_alias('foo'), exp) - self.assertEqual(e._run_aliases, {exp: 'foo'}) + exp = "%s_ppdid_%d:foo" % (qiita_config.ebi_organization_prefix, artifact_id) + self.assertEqual(e._get_run_alias("foo"), exp) + self.assertEqual(e._run_aliases, {exp: "foo"}) def test_get_library_name(self): - e = EBISubmission(3, 'ADD') + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) obs = e._get_library_name("nasty") exp = "nasty<business>" self.assertEqual(obs, exp) def test_add_dict_as_tags_and_values(self): - e = EBISubmission(3, 'ADD') + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) - elm = ET.Element('TESTING', {'foo': 'bar'}) + elm = ET.Element("TESTING", {"foo": "bar"}) - e._add_dict_as_tags_and_values(elm, 'foo', {'x': 'y', - '>x': 'x": "') - self.assertEqual(obs.decode('ascii'), exp) + exp = ( + '' + ) + self.assertEqual(obs.decode("ascii"), exp) # removing samples so test text is easier to read - keys_to_del = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', - '1.SKM8.640201', '1.SKM2.640199', '1.SKD2.640178', - '1.SKB7.640196', '1.SKD4.640185', '1.SKB8.640193', - '1.SKM3.640197', '1.SKD5.640186', '1.SKB1.640202', - '1.SKM1.640183', '1.SKD1.640179', '1.SKD3.640198', - '1.SKB5.640181', '1.SKB4.640189', '1.SKB9.640200', - '1.SKM9.640192', '1.SKD8.640184', '1.SKM5.640177', - '1.SKM7.640188', '1.SKD7.640191', '1.SKB6.640176', - '1.SKM4.640180'] + keys_to_del = [ + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + "1.SKB6.640176", + "1.SKM4.640180", + ] for k in keys_to_del: - del (submission.samples[k]) - del (submission.samples_prep[k]) + del submission.samples[k] + del submission.samples_prep[k] obs = ET.tostring(submission.generate_sample_xml()) - self.assertEqual(obs.decode('ascii'), exp) + self.assertEqual(obs.decode("ascii"), exp) obs = ET.tostring(submission.generate_sample_xml(samples=[])) - self.assertEqual(obs.decode('ascii'), exp) + self.assertEqual(obs.decode("ascii"), exp) def test_generate_spot_descriptor(self): - e = EBISubmission(3, 'ADD') + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) - elm = ET.Element('design', {'foo': 'bar'}) + elm = ET.Element("design", {"foo": "bar"}) - e._generate_spot_descriptor(elm, 'LS454') - exp = ''.join([line.strip() for line in GENSPOTDESC.splitlines()]) + e._generate_spot_descriptor(elm, "LS454") + exp = "".join([line.strip() for line in GENSPOTDESC.splitlines()]) obs = ET.tostring(elm) - self.assertEqual(obs.decode('ascii'), exp) + self.assertEqual(obs.decode("ascii"), exp) def test_generate_submission_xml(self): - submission = EBISubmission(3, 'ADD') + submission = EBISubmission(3, "ADD") self.files_to_remove.append(submission.full_ebi_dir) submission.experiment_xml_fp = "/some/path/experiment.xml" submission.run_xml_fp = "/some/path/run.xml" obs = ET.tostring( - submission.generate_submission_xml( - submission_date=date(2015, 9, 3))) + submission.generate_submission_xml(submission_date=date(2015, 9, 3)) + ) exp = SUBMISSIONXML % { - 'submission_alias': submission._get_submission_alias(), - 'center_name': qiita_config.ebi_center_name} - exp = ''.join([line.strip() for line in exp.splitlines()]) - self.assertEqual(obs.decode('ascii'), exp) + "submission_alias": submission._get_submission_alias(), + "center_name": qiita_config.ebi_center_name, + } + exp = "".join([line.strip() for line in exp.splitlines()]) + self.assertEqual(obs.decode("ascii"), exp) submission.study_xml_fp = "/some/path/study.xml" submission.sample_xml_fp = "/some/path/sample.xml" submission.experiment_xml_fp = "/some/path/experiment.xml" submission.run_xml_fp = "/some/path/run.xml" obs = ET.tostring( - submission.generate_submission_xml( - submission_date=date(2015, 9, 3))) + submission.generate_submission_xml(submission_date=date(2015, 9, 3)) + ) exp = SUBMISSIONXML_FULL % { - 'submission_alias': submission._get_submission_alias(), - 'center_name': qiita_config.ebi_center_name} - exp = ''.join([line.strip() for line in exp.splitlines()]) - self.assertEqual(obs.decode('ascii'), exp) + "submission_alias": submission._get_submission_alias(), + "center_name": qiita_config.ebi_center_name, + } + exp = "".join([line.strip() for line in exp.splitlines()]) + self.assertEqual(obs.decode("ascii"), exp) def test_write_xml_file(self): - element = ET.Element('TESTING', {'foo': 'bar'}) - e = EBISubmission(3, 'ADD') + element = ET.Element("TESTING", {"foo": "bar"}) + e = EBISubmission(3, "ADD") self.files_to_remove.append(e.full_ebi_dir) - e.write_xml_file(element, 'testfile') - self.files_to_remove.append('testfile') + e.write_xml_file(element, "testfile") + self.files_to_remove.append("testfile") - obs = open('testfile').read() + obs = open("testfile").read() exp = "\n" self.assertEqual(obs, exp) def test_generate_curl_command(self): - submission = EBISubmission(3, 'ADD') + submission = EBISubmission(3, "ADD") self.files_to_remove.append(submission.full_ebi_dir) - test_ebi_seq_xfer_user = 'ebi_seq_xfer_user' - test_ebi_seq_xfer_pass = 'ebi_seq_xfer_pass' - test_ebi_dropbox_url = 'ebi_dropbox_url' + test_ebi_seq_xfer_user = "ebi_seq_xfer_user" + test_ebi_seq_xfer_pass = "ebi_seq_xfer_pass" + test_ebi_dropbox_url = "ebi_dropbox_url" submission.study_xml_fp = "/some/path/study.xml" submission.sample_xml_fp = "/some/path/sample.xml" submission.experiment_xml_fp = "/some/path/experiment.xml" submission.run_xml_fp = "/some/path/run.xml" submission.submission_xml_fp = "/some/path/submission.xml" - obs = submission.generate_curl_command(test_ebi_seq_xfer_user, - test_ebi_seq_xfer_pass, - test_ebi_dropbox_url) - exp = ('curl -sS -k ' - '-F "SUBMISSION=@/some/path/submission.xml" ' - '-F "STUDY=@/some/path/study.xml" ' - '-F "SAMPLE=@/some/path/sample.xml" ' - '-F "RUN=@/some/path/run.xml" ' - '-F "EXPERIMENT=@/some/path/experiment.xml" ' - '"ebi_dropbox_url/?auth=ENA%20ebi_seq_xfer_user' - '%20ebi_seq_xfer_pass"') + obs = submission.generate_curl_command( + test_ebi_seq_xfer_user, test_ebi_seq_xfer_pass, test_ebi_dropbox_url + ) + exp = ( + "curl -sS -k " + '-F "SUBMISSION=@/some/path/submission.xml" ' + '-F "STUDY=@/some/path/study.xml" ' + '-F "SAMPLE=@/some/path/sample.xml" ' + '-F "RUN=@/some/path/run.xml" ' + '-F "EXPERIMENT=@/some/path/experiment.xml" ' + '"ebi_dropbox_url/?auth=ENA%20ebi_seq_xfer_user' + '%20ebi_seq_xfer_pass"' + ) self.assertEqual(obs, exp) - def write_demux_files(self, prep_template, sequences='FASTA-EXAMPLE'): + def write_demux_files(self, prep_template, sequences="FASTA-EXAMPLE"): """Writes a demux test file to avoid duplication of code""" - fna_fp = join(self.temp_dir, 'seqs.fna') - demux_fp = join(self.temp_dir, 'demux.seqs') - if sequences == 'FASTA-EXAMPLE': - with open(fna_fp, 'w') as f: + fna_fp = join(self.temp_dir, "seqs.fna") + demux_fp = join(self.temp_dir, "demux.seqs") + if sequences == "FASTA-EXAMPLE": + with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) - elif sequences == 'WRONG-SEQS': - with open(fna_fp, 'w') as f: - f.write('>a_1 X orig_bc=X new_bc=X bc_diffs=0\nCCC') + elif sequences == "WRONG-SEQS": + with open(fna_fp, "w") as f: + f.write(">a_1 X orig_bc=X new_bc=X bc_diffs=0\nCCC") with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) - elif sequences == 'EMPTY': - with open(demux_fp, 'w') as f: + elif sequences == "EMPTY": + with open(demux_fp, "w") as f: f.write("") else: - raise ValueError('Wrong sequences values: %s. Valid values: ' - 'FASTA_EXAMPLE, WRONG-SEQS, EMPTY' % sequences) + raise ValueError( + "Wrong sequences values: %s. Valid values: " + "FASTA_EXAMPLE, WRONG-SEQS, EMPTY" % sequences + ) if prep_template.artifact is None: artifact = Artifact.create( - [(demux_fp, 6)], "Demultiplexed", prep_template=prep_template) + [(demux_fp, 6)], "Demultiplexed", prep_template=prep_template + ) else: params = Parameters.from_default_params( - DefaultParameters(1), - {'input_data': prep_template.artifact.id}) + DefaultParameters(1), {"input_data": prep_template.artifact.id} + ) artifact = Artifact.create( - [(demux_fp, 6)], "Demultiplexed", + [(demux_fp, 6)], + "Demultiplexed", parents=[prep_template.artifact], - processing_parameters=params) + processing_parameters=params, + ) return artifact - def generate_new_prep_template_and_write_demux_files(self, - valid_metadata=False): + def generate_new_prep_template_and_write_demux_files(self, valid_metadata=False): """Creates new prep-template/demux-file to avoid duplication of code""" # creating prep template without required EBI submission columns if not valid_metadata: metadata_dict = { - 'SKD6.640190': {'center_name': 'ANL', 'barcode': 'AAA', - 'center_project_name': 'Test Project'}, - 'SKM6.640187': {'center_name': 'ANL', 'barcode': 'AAA', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Not valid'}, - 'SKD9.640182': {'center_name': 'ANL', 'barcode': 'AAA', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401'} + "SKD6.640190": { + "center_name": "ANL", + "barcode": "AAA", + "center_project_name": "Test Project", + }, + "SKM6.640187": { + "center_name": "ANL", + "barcode": "AAA", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Not valid", + }, + "SKD9.640182": { + "center_name": "ANL", + "barcode": "AAA", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "primer": "GTGCCAGCMGCCGCGGTAA", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + }, } investigation_type = None else: metadata_dict = { - 'SKD6.640190': {'center_name': 'ANL', 'barcode': 'AAA', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401'}, - 'SKM6.640187': {'center_name': 'ANL', 'barcode': 'AAA', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401', - 'extra_value': 1.2}, - 'SKD9.640182': {'center_name': 'ANL', 'barcode': 'AAA', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'primer': 'GTGCCAGCMGCCGCGGTAA', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401', - 'extra_value': 'Unspecified'} + "SKD6.640190": { + "center_name": "ANL", + "barcode": "AAA", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "primer": "GTGCCAGCMGCCGCGGTAA", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + }, + "SKM6.640187": { + "center_name": "ANL", + "barcode": "AAA", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "primer": "GTGCCAGCMGCCGCGGTAA", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + "extra_value": 1.2, + }, + "SKD9.640182": { + "center_name": "ANL", + "barcode": "AAA", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "primer": "GTGCCAGCMGCCGCGGTAA", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + "extra_value": "Unspecified", + }, } investigation_type = "Metagenomics" - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True): - pt = PrepTemplate.create(metadata, Study(1), "18S", - investigation_type=investigation_type) + pt = PrepTemplate.create( + metadata, Study(1), "18S", investigation_type=investigation_type + ) artifact = self.write_demux_files(pt) return artifact @@ -385,96 +430,107 @@ def generate_new_study_with_preprocessed_data(self): "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "principal_investigator_id": StudyPerson(3), - "lab_person_id": StudyPerson(1) + "lab_person_id": StudyPerson(1), } - study = Study.create(User('test@foo.bar'), "Test EBI study", info) + study = Study.create(User("test@foo.bar"), "Test EBI study", info) self.study_id = study.id metadata_dict = { - 'Sample1': {'collection_timestamp': '06/01/15 07:00:00', - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 1'}, - 'Sample2': {'collection_timestamp': '06/02/15 07:00:00', - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 2'}, - 'Sample3': {'collection_timestamp': '06/03/15 07:00:00', - 'physical_specimen_location': 'location1', - 'taxon_id': 9606, - 'scientific_name': 'homo sapiens', - 'Description': 'Test Sample 3'} + "Sample1": { + "collection_timestamp": "06/01/15 07:00:00", + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 1", + }, + "Sample2": { + "collection_timestamp": "06/02/15 07:00:00", + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 2", + }, + "Sample3": { + "collection_timestamp": "06/03/15 07:00:00", + "physical_specimen_location": "location1", + "taxon_id": 9606, + "scientific_name": "homo sapiens", + "Description": "Test Sample 3", + }, } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True): SampleTemplate.create(metadata, study) metadata_dict = { - 'Sample1': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTC', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 1"}, - 'Sample2': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTA', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 2"}, - 'Sample3': {'primer': 'GTGCCAGCMGCCGCGGTAA', - 'barcode': 'CGTAGAGCTCTT', - 'center_name': 'KnightLab', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'library_construction_protocol': 'Protocol ABC', - 'experiment_design_description': "Random value 3"}, + "Sample1": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTC", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 1", + }, + "Sample2": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTA", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 2", + }, + "Sample3": { + "primer": "GTGCCAGCMGCCGCGGTAA", + "barcode": "CGTAGAGCTCTT", + "center_name": "KnightLab", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "library_construction_protocol": "Protocol ABC", + "experiment_design_description": "Random value 3", + }, } - metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', - dtype=str) + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True): - pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') - fna_fp = join(self.temp_dir, 'seqs.fna') - demux_fp = join(self.temp_dir, 'demux.seqs') - with open(fna_fp, 'w') as f: + pt = PrepTemplate.create(metadata, study, "16S", "Metagenomics") + fna_fp = join(self.temp_dir, "seqs.fna") + demux_fp = join(self.temp_dir, "demux.seqs") + with open(fna_fp, "w") as f: f.write(FASTA_EXAMPLE_2.format(study.id)) - with File(demux_fp, 'w') as f: + with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) # Magic number 6: the id of the preprocessed_demux filepath_type - artifact = Artifact.create( - [(demux_fp, 6)], "Demultiplexed", prep_template=pt) + artifact = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return artifact def test_init_exceptions(self): # not a valid action with self.assertRaises(EBISubmissionError): - EBISubmission(1, 'This is not a valid action') + EBISubmission(1, "This is not a valid action") # artifact can't be submitted with self.assertRaises(EBISubmissionError): - EBISubmission(1, 'ADD') + EBISubmission(1, "ADD") # artifact has been already submitted with self.assertRaises(EBISubmissionError): - EBISubmission(2, 'ADD') + EBISubmission(2, "ADD") artifact = self.generate_new_prep_template_and_write_demux_files() # raise error as we are missing columns # artifact.prep_templates[0] cause there should only be 1 - exp_text = ("Errors found during EBI submission for study #1, " - "artifact #%d and prep template #%d:\n" - "These samples do not have a valid " - "platform (instrumet model wasn't checked): " - "1.SKD6.640190\nThese samples do not have a valid " - "instrument model: 1.SKM6.640187" % ( - artifact.id, artifact.prep_templates[0].id)) + exp_text = ( + "Errors found during EBI submission for study #1, " + "artifact #%d and prep template #%d:\n" + "These samples do not have a valid " + "platform (instrumet model wasn't checked): " + "1.SKD6.640190\nThese samples do not have a valid " + "instrument model: 1.SKM6.640187" + % (artifact.id, artifact.prep_templates[0].id) + ) with self.assertRaises(EBISubmissionError) as e: - EBISubmission(artifact.id, 'ADD') + EBISubmission(artifact.id, "ADD") self.assertEqual(exp_text, str(e.exception)) def test_prep_with_less_samples_than_sample_template(self): @@ -482,74 +538,92 @@ def test_prep_with_less_samples_than_sample_template(self): # the sample template and basically we want to test that # the EBISubmission can be generated artifact = self.generate_new_prep_template_and_write_demux_files(True) - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(e.full_ebi_dir) - exp = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182'] + exp = ["1.SKD6.640190", "1.SKM6.640187", "1.SKD9.640182"] self.assertCountEqual(exp, e.samples) def test_generate_experiment_xml(self): artifact = self.generate_new_study_with_preprocessed_data() - submission = EBISubmission(artifact.id, 'ADD') + submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(submission.full_ebi_dir) obs = ET.tostring(submission.generate_experiment_xml()) exp = EXPERIMENTXML_NEWSTUDY % { - 'organization_prefix': qiita_config.ebi_organization_prefix, - 'center_name': qiita_config.ebi_center_name, - 'study_id': artifact.study.id, - 'pt_id': artifact.prep_templates[0].id + "organization_prefix": qiita_config.ebi_organization_prefix, + "center_name": qiita_config.ebi_center_name, + "study_id": artifact.study.id, + "pt_id": artifact.prep_templates[0].id, } - exp = ''.join([line.strip() for line in exp.splitlines()]) - self.assertEqual(obs.decode('ascii'), exp) + exp = "".join([line.strip() for line in exp.splitlines()]) + self.assertEqual(obs.decode("ascii"), exp) artifact_id = 3 - submission = EBISubmission(artifact_id, 'ADD') + submission = EBISubmission(artifact_id, "ADD") self.files_to_remove.append(submission.full_ebi_dir) - samples = ['1.SKB2.640194', '1.SKB3.640195'] + samples = ["1.SKB2.640194", "1.SKB3.640195"] obs = ET.tostring(submission.generate_experiment_xml(samples=samples)) exp = EXPERIMENTXML - exp = ''.join([line.strip() for line in exp.splitlines()]) - self.assertEqual(obs.decode('ascii'), exp) + exp = "".join([line.strip() for line in exp.splitlines()]) + self.assertEqual(obs.decode("ascii"), exp) # removing samples so test text is easier to read - keys_to_del = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', - '1.SKM8.640201', '1.SKM2.640199', '1.SKD2.640178', - '1.SKB7.640196', '1.SKD4.640185', '1.SKB8.640193', - '1.SKM3.640197', '1.SKD5.640186', '1.SKB1.640202', - '1.SKM1.640183', '1.SKD1.640179', '1.SKD3.640198', - '1.SKB5.640181', '1.SKB4.640189', '1.SKB9.640200', - '1.SKM9.640192', '1.SKD8.640184', '1.SKM5.640177', - '1.SKM7.640188', '1.SKD7.640191', '1.SKB6.640176', - '1.SKM4.640180'] + keys_to_del = [ + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKD2.640178", + "1.SKB7.640196", + "1.SKD4.640185", + "1.SKB8.640193", + "1.SKM3.640197", + "1.SKD5.640186", + "1.SKB1.640202", + "1.SKM1.640183", + "1.SKD1.640179", + "1.SKD3.640198", + "1.SKB5.640181", + "1.SKB4.640189", + "1.SKB9.640200", + "1.SKM9.640192", + "1.SKD8.640184", + "1.SKM5.640177", + "1.SKM7.640188", + "1.SKD7.640191", + "1.SKB6.640176", + "1.SKM4.640180", + ] for k in keys_to_del: - del (submission.samples[k]) - del (submission.samples_prep[k]) + del submission.samples[k] + del submission.samples_prep[k] obs = ET.tostring(submission.generate_experiment_xml()) - self.assertEqual(obs.decode('ascii'), exp) + self.assertEqual(obs.decode("ascii"), exp) # changing investigation_type to test user defined terms, first let's # create a new term - new_term = 'ULTIMATE TERM' - ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) + new_term = "ULTIMATE TERM" + ena_ontology = Ontology(convert_to_id("ENA", "ontology")) ena_ontology.add_user_defined_term(new_term) # set the preparation with the new term submission.prep_template.investigation_type = new_term # regenerate submission to make sure everything is just fine ... - submission = EBISubmission(artifact_id, 'ADD') - self.assertEqual(submission.investigation_type, 'Other') + submission = EBISubmission(artifact_id, "ADD") + self.assertEqual(submission.investigation_type, "Other") self.assertEqual(submission.new_investigation_type, new_term) obs = ET.tostring(submission.generate_experiment_xml()) - exp = '%s' % new_term - self.assertIn(exp, obs.decode('ascii')) + exp = "%s" % new_term + self.assertIn(exp, obs.decode("ascii")) # returnging investigation_type to it's value - submission.prep_template.investigation_type = 'Metagenomics' + submission.prep_template.investigation_type = "Metagenomics" def test_generate_run_xml(self): artifact = self.generate_new_study_with_preprocessed_data() - submission = EBISubmission(artifact.id, 'ADD') + submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(submission.full_ebi_dir) submission.generate_demultiplexed_fastq(mtime=1) obs = ET.tostring(submission.generate_run_xml()) @@ -557,49 +631,56 @@ def test_generate_run_xml(self): md5_sums = {} for s, fp in submission.sample_demux_fps.items(): md5_sums[s] = hashlib.md5( - open(fp + submission.FWD_READ_SUFFIX, 'rb').read()).hexdigest() + open(fp + submission.FWD_READ_SUFFIX, "rb").read() + ).hexdigest() exp = RUNXML_NEWSTUDY % { - 'study_alias': submission._get_study_alias(), - 'ebi_dir': submission.ebi_dir, - 'organization_prefix': qiita_config.ebi_organization_prefix, - 'center_name': qiita_config.ebi_center_name, - 'artifact_id': artifact.id, - 'study_id': artifact.study.id, - 'pt_id': artifact.prep_templates[0].id, - 'sample_1': md5_sums['%d.Sample1' % self.study_id], - 'sample_2': md5_sums['%d.Sample2' % self.study_id], - 'sample_3': md5_sums['%d.Sample3' % self.study_id] + "study_alias": submission._get_study_alias(), + "ebi_dir": submission.ebi_dir, + "organization_prefix": qiita_config.ebi_organization_prefix, + "center_name": qiita_config.ebi_center_name, + "artifact_id": artifact.id, + "study_id": artifact.study.id, + "pt_id": artifact.prep_templates[0].id, + "sample_1": md5_sums["%d.Sample1" % self.study_id], + "sample_2": md5_sums["%d.Sample2" % self.study_id], + "sample_3": md5_sums["%d.Sample3" % self.study_id], } - exp = ''.join([line.strip() for line in exp.splitlines()]) - self.assertEqual(obs.decode('ascii'), exp) + exp = "".join([line.strip() for line in exp.splitlines()]) + self.assertEqual(obs.decode("ascii"), exp) artifact = self.write_demux_files(PrepTemplate(1)) - submission = EBISubmission(artifact.id, 'ADD') + submission = EBISubmission(artifact.id, "ADD") # removing samples so test text is easier to read - keys_to_del = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', - '1.SKM8.640201', '1.SKM2.640199'] + keys_to_del = [ + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + ] for k in keys_to_del: - del (submission.samples[k]) - del (submission.samples_prep[k]) + del submission.samples[k] + del submission.samples_prep[k] submission.generate_demultiplexed_fastq(mtime=1) self.files_to_remove.append(submission.full_ebi_dir) obs = ET.tostring(submission.generate_run_xml()) exp = RUNXML % { - 'study_alias': submission._get_study_alias(), - 'ebi_dir': submission.ebi_dir, - 'organization_prefix': qiita_config.ebi_organization_prefix, - 'center_name': qiita_config.ebi_center_name, - 'artifact_id': artifact.id} - exp = ''.join([line.strip() for line in exp.splitlines()]) - self.assertEqual(obs.decode('ascii'), exp) + "study_alias": submission._get_study_alias(), + "ebi_dir": submission.ebi_dir, + "organization_prefix": qiita_config.ebi_organization_prefix, + "center_name": qiita_config.ebi_center_name, + "artifact_id": artifact.id, + } + exp = "".join([line.strip() for line in exp.splitlines()]) + self.assertEqual(obs.decode("ascii"), exp) def test_generate_xml_files(self): artifact = self.generate_new_study_with_preprocessed_data() - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(e.full_ebi_dir) e.generate_demultiplexed_fastq() self.assertIsNone(e.run_xml_fp) @@ -615,7 +696,7 @@ def test_generate_xml_files(self): self.assertIsNotNone(e.submission_xml_fp) artifact = self.generate_new_prep_template_and_write_demux_files(True) - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(e.full_ebi_dir) e.generate_demultiplexed_fastq() self.assertIsNone(e.run_xml_fp) @@ -631,7 +712,7 @@ def test_generate_xml_files(self): self.assertIsNotNone(e.submission_xml_fp) artifact = self.write_demux_files(PrepTemplate(1)) - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(e.full_ebi_dir) e.generate_demultiplexed_fastq() self.assertIsNone(e.run_xml_fp) @@ -648,15 +729,15 @@ def test_generate_xml_files(self): def test_generate_demultiplexed_fastq_failure(self): # generating demux file for testing - artifact = self.write_demux_files(PrepTemplate(1), 'EMPTY') + artifact = self.write_demux_files(PrepTemplate(1), "EMPTY") - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(ebi_submission.full_ebi_dir) with self.assertRaises(EBISubmissionError): ebi_submission.generate_demultiplexed_fastq(rewrite_fastq=True) - artifact = self.write_demux_files(PrepTemplate(1), 'WRONG-SEQS') - ebi_submission = EBISubmission(artifact.id, 'ADD') + artifact = self.write_demux_files(PrepTemplate(1), "WRONG-SEQS") + ebi_submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(ebi_submission.full_ebi_dir) with self.assertRaises(EBISubmissionError): ebi_submission.generate_demultiplexed_fastq() @@ -664,36 +745,44 @@ def test_generate_demultiplexed_fastq_failure(self): def test_generate_demultiplexed_fastq(self): # generating demux file for testing exp_demux_samples = set( - ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', - '1.SKB2.640194', '1.SKM8.640201', '1.SKM4.640180', - '1.SKM2.640199', '1.SKB3.640195', '1.SKB6.640176']) + [ + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKB2.640194", + "1.SKM8.640201", + "1.SKM4.640180", + "1.SKM2.640199", + "1.SKB3.640195", + "1.SKB6.640176", + ] + ) artifact = self.write_demux_files(PrepTemplate(1)) # This is testing that only the samples with sequences are going to # be created - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") # adding rewrite_fastq=True as it's possible to have duplicated ids # and this will assure to get the right test obs_demux_samples = ebi_submission.generate_demultiplexed_fastq( - rewrite_fastq=True) + rewrite_fastq=True + ) self.files_to_remove.append(ebi_submission.full_ebi_dir) self.assertCountEqual(obs_demux_samples, exp_demux_samples) # testing that the samples/samples_prep and demux_samples are the same self.assertCountEqual(obs_demux_samples, ebi_submission.samples.keys()) - self.assertCountEqual(obs_demux_samples, - ebi_submission.samples_prep.keys()) + self.assertCountEqual(obs_demux_samples, ebi_submission.samples_prep.keys()) # If the last test passed then we can test that the folder already # exists and that we have the same files and ignore not fastq.gz files - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") obs_demux_samples = ebi_submission.generate_demultiplexed_fastq() self.files_to_remove.append(ebi_submission.full_ebi_dir) self.assertCountEqual(obs_demux_samples, exp_demux_samples) # testing that the samples/samples_prep and demux_samples are the same self.assertCountEqual(obs_demux_samples, ebi_submission.samples.keys()) - self.assertCountEqual(obs_demux_samples, - ebi_submission.samples_prep.keys()) + self.assertCountEqual(obs_demux_samples, ebi_submission.samples_prep.keys()) def _generate_per_sample_FASTQs(self, prep_template, sequences): # generating a per_sample_FASTQ artifact, adding should_rename so @@ -701,80 +790,85 @@ def _generate_per_sample_FASTQs(self, prep_template, sequences): # copy/gz-generation files = [] for sn, seqs in sequences.items(): - fn = join(self.temp_dir, sn + 'should_rename.fastq') - with open(fn, 'w') as fh: + fn = join(self.temp_dir, sn + "should_rename.fastq") + with open(fn, "w") as fh: fh.write(seqs) files.append(fn) self.files_to_remove.append(fn) if prep_template.artifact is None: artifact = Artifact.create( - [(fp, 1) for fp in files], "per_sample_FASTQ", - prep_template=prep_template) + [(fp, 1) for fp in files], + "per_sample_FASTQ", + prep_template=prep_template, + ) else: params = Parameters.from_default_params( - DefaultParameters(1), - {'input_data': prep_template.artifact.id}) + DefaultParameters(1), {"input_data": prep_template.artifact.id} + ) artifact = Artifact.create( # 1 is raw_forward_seqs - [(fp, 1) for fp in files], "per_sample_FASTQ", + [(fp, 1) for fp in files], + "per_sample_FASTQ", parents=[prep_template.artifact], - processing_parameters=params) + processing_parameters=params, + ) return artifact def test_generate_demultiplexed_per_sample_fastq(self): # testing failure due to "extra" filepaths - artifact = self._generate_per_sample_FASTQs( - PrepTemplate(1), FASTQ_EXAMPLE) - ebi_submission = EBISubmission(artifact.id, 'ADD') + artifact = self._generate_per_sample_FASTQs(PrepTemplate(1), FASTQ_EXAMPLE) + ebi_submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(ebi_submission.full_ebi_dir) with self.assertRaises(EBISubmissionError): ebi_submission.generate_demultiplexed_fastq() # testing that we generate the correct samples - exp_samples = ['1.SKM4.640180', '1.SKB2.640194'] + exp_samples = ["1.SKM4.640180", "1.SKB2.640194"] metadata_dict = { - 'SKB2.640194': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401', - 'run_prefix': '1.SKB2.640194'}, - 'SKM4.640180': {'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401', - 'run_prefix': '1.SKM4.640180'}} - metadata = pd.DataFrame.from_dict( - metadata_dict, orient='index', dtype=str) + "SKB2.640194": { + "center_name": "ANL", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + "run_prefix": "1.SKB2.640194", + }, + "SKM4.640180": { + "center_name": "ANL", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + "run_prefix": "1.SKM4.640180", + }, + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True): - pt = PrepTemplate.create(metadata, Study(1), "18S", - investigation_type="Metagenomics") + pt = PrepTemplate.create( + metadata, Study(1), "18S", investigation_type="Metagenomics" + ) artifact = self._generate_per_sample_FASTQs(pt, FASTQ_EXAMPLE) # this should fail due to missing columns with self.assertRaises(EBISubmissionError) as err: - ebi_submission = EBISubmission(artifact.id, 'ADD') - self.assertIn('Missing column in the prep template: barcode', - str(err.exception)) + ebi_submission = EBISubmission(artifact.id, "ADD") + self.assertIn( + "Missing column in the prep template: barcode", str(err.exception) + ) metadata_dict = { - 'SKB2.640194': {'barcode': 'AAA', 'primer': 'CCCC'}, - 'SKM4.640180': {'barcode': 'CCC', 'primer': 'AAAA'}} - metadata = pd.DataFrame.from_dict( - metadata_dict, orient='index', dtype=str) + "SKB2.640194": {"barcode": "AAA", "primer": "CCCC"}, + "SKM4.640180": {"barcode": "CCC", "primer": "AAAA"}, + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True): pt.extend_and_update(metadata) - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(ebi_submission.full_ebi_dir) obs_demux_samples = ebi_submission.generate_demultiplexed_fastq() @@ -785,23 +879,23 @@ def test_generate_demultiplexed_per_sample_fastq(self): ebi_submission.generate_xml_files() obs_run_xml = open(ebi_submission.run_xml_fp).read() obs_experiment_xml = open(ebi_submission.experiment_xml_fp).read() - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_run_xml) - self.assertNotIn('1.SKB2.640194.R2.fastq.gz', obs_run_xml) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_run_xml) - self.assertNotIn('1.SKM4.640180.R2.fastq.gz', obs_run_xml) - self.assertNotIn('PAIRED', obs_experiment_xml) - self.assertIn('SINGLE', obs_experiment_xml) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_run_xml) + self.assertNotIn("1.SKB2.640194.R2.fastq.gz", obs_run_xml) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_run_xml) + self.assertNotIn("1.SKM4.640180.R2.fastq.gz", obs_run_xml) + self.assertNotIn("PAIRED", obs_experiment_xml) + self.assertIn("SINGLE", obs_experiment_xml) # generate_send_sequences_cmd returns a list of commands so joining # for easier testing - obs_cmd = '|'.join(ebi_submission.generate_send_sequences_cmd()) - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_cmd) - self.assertNotIn('1.SKB2.640194.R2.fastq.gz', obs_cmd) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_cmd) - self.assertNotIn('1.SKM4.640180.R2.fastq.gz', obs_cmd) + obs_cmd = "|".join(ebi_submission.generate_send_sequences_cmd()) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_cmd) + self.assertNotIn("1.SKB2.640194.R2.fastq.gz", obs_cmd) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_cmd) + self.assertNotIn("1.SKM4.640180.R2.fastq.gz", obs_cmd) # at this point the full_ebi_dir has been created so we can test that # the ADD actually works without rewriting the files - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") obs_demux_samples = ebi_submission.generate_demultiplexed_fastq() self.assertCountEqual(obs_demux_samples, exp_samples) self.assertCountEqual(ebi_submission.samples.keys(), exp_samples) @@ -810,69 +904,69 @@ def test_generate_demultiplexed_per_sample_fastq(self): ebi_submission.generate_xml_files() obs_run_xml = open(ebi_submission.run_xml_fp).read() obs_experiment_xml = open(ebi_submission.experiment_xml_fp).read() - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_run_xml) - self.assertNotIn('1.SKB2.640194.R2.fastq.gz', obs_run_xml) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_run_xml) - self.assertNotIn('1.SKM4.640180.R2.fastq.gz', obs_run_xml) - self.assertNotIn('PAIRED', obs_experiment_xml) - self.assertIn('SINGLE', obs_experiment_xml) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_run_xml) + self.assertNotIn("1.SKB2.640194.R2.fastq.gz", obs_run_xml) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_run_xml) + self.assertNotIn("1.SKM4.640180.R2.fastq.gz", obs_run_xml) + self.assertNotIn("PAIRED", obs_experiment_xml) + self.assertIn("SINGLE", obs_experiment_xml) # generate_send_sequences_cmd returns a list of commands so joining # for easier testing - obs_cmd = '|'.join(ebi_submission.generate_send_sequences_cmd()) - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_cmd) - self.assertNotIn('1.SKB2.640194.R2.fastq.gz', obs_cmd) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_cmd) - self.assertNotIn('1.SKM4.640180.R2.fastq.gz', obs_cmd) + obs_cmd = "|".join(ebi_submission.generate_send_sequences_cmd()) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_cmd) + self.assertNotIn("1.SKB2.640194.R2.fastq.gz", obs_cmd) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_cmd) + self.assertNotIn("1.SKM4.640180.R2.fastq.gz", obs_cmd) Artifact.delete(artifact.id) PrepTemplate.delete(pt.id) def test_generate_demultiplexed_per_sample_fastq_reverse(self): metadata_dict = { - 'SKB2.640194': {'barcode': 'AAA', - 'primer': 'CCCC', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401', - 'run_prefix': '1.SKB2.640194'}, - 'SKM4.640180': {'barcode': 'CCC', - 'primer': 'AAAA', - 'center_name': 'ANL', - 'center_project_name': 'Test Project', - 'platform': 'Illumina', - 'instrument_model': 'Illumina MiSeq', - 'experiment_design_description': - 'microbiome of soil and rhizosphere', - 'library_construction_protocol': - 'PMID: 22402401', - 'run_prefix': '1.SKM4.640180'}} - metadata = pd.DataFrame.from_dict( - metadata_dict, orient='index', dtype=str) + "SKB2.640194": { + "barcode": "AAA", + "primer": "CCCC", + "center_name": "ANL", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + "run_prefix": "1.SKB2.640194", + }, + "SKM4.640180": { + "barcode": "CCC", + "primer": "AAAA", + "center_name": "ANL", + "center_project_name": "Test Project", + "platform": "Illumina", + "instrument_model": "Illumina MiSeq", + "experiment_design_description": "microbiome of soil and rhizosphere", + "library_construction_protocol": "PMID: 22402401", + "run_prefix": "1.SKM4.640180", + }, + } + metadata = pd.DataFrame.from_dict(metadata_dict, orient="index", dtype=str) with warnings.catch_warnings(record=True): - pt = PrepTemplate.create(metadata, Study(1), "18S", - investigation_type="Metagenomics") + pt = PrepTemplate.create( + metadata, Study(1), "18S", investigation_type="Metagenomics" + ) filepaths = [] for sn in pt: # 1 is forward, 2 is reverse - filepaths.append((join(self.temp_dir, sn + '_rename.R1.fastq'), 1)) - filepaths.append((join(self.temp_dir, sn + '_rename.R2.fastq'), 2)) + filepaths.append((join(self.temp_dir, sn + "_rename.R1.fastq"), 1)) + filepaths.append((join(self.temp_dir, sn + "_rename.R2.fastq"), 2)) for fn, _ in filepaths: - with open(fn, 'w') as fh: - fh.write('some text') + with open(fn, "w") as fh: + fh.write("some text") self.files_to_remove.append(fn) - artifact = Artifact.create( - filepaths, "per_sample_FASTQ", prep_template=pt) + artifact = Artifact.create(filepaths, "per_sample_FASTQ", prep_template=pt) - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(ebi_submission.full_ebi_dir) obs_demux_samples = ebi_submission.generate_demultiplexed_fastq() - exp_samples = ['1.SKM4.640180', '1.SKB2.640194'] + exp_samples = ["1.SKM4.640180", "1.SKB2.640194"] self.assertCountEqual(obs_demux_samples, exp_samples) self.assertCountEqual(ebi_submission.samples.keys(), exp_samples) self.assertCountEqual(ebi_submission.samples_prep.keys(), exp_samples) @@ -880,25 +974,25 @@ def test_generate_demultiplexed_per_sample_fastq_reverse(self): ebi_submission.generate_xml_files() obs_run_xml = open(ebi_submission.run_xml_fp).read() obs_experiment_xml = open(ebi_submission.experiment_xml_fp).read() - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_run_xml) - self.assertIn('1.SKB2.640194.R2.fastq.gz', obs_run_xml) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_run_xml) - self.assertIn('1.SKM4.640180.R2.fastq.gz', obs_run_xml) - self.assertIn('PAIRED', obs_experiment_xml) - self.assertNotIn('SINGLE', obs_experiment_xml) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_run_xml) + self.assertIn("1.SKB2.640194.R2.fastq.gz", obs_run_xml) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_run_xml) + self.assertIn("1.SKM4.640180.R2.fastq.gz", obs_run_xml) + self.assertIn("PAIRED", obs_experiment_xml) + self.assertNotIn("SINGLE", obs_experiment_xml) # generate_send_sequences_cmd returns a list of commands so joining # for easier testing - obs_cmd = '|'.join(ebi_submission.generate_send_sequences_cmd()) - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_cmd) - self.assertIn('1.SKB2.640194.R2.fastq.gz', obs_cmd) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_cmd) - self.assertIn('1.SKM4.640180.R2.fastq.gz', obs_cmd) + obs_cmd = "|".join(ebi_submission.generate_send_sequences_cmd()) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_cmd) + self.assertIn("1.SKB2.640194.R2.fastq.gz", obs_cmd) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_cmd) + self.assertIn("1.SKM4.640180.R2.fastq.gz", obs_cmd) # now we have a full submission so let's test if a new one will create # the correct values without rewriting the fastq files - ebi_submission = EBISubmission(artifact.id, 'ADD') + ebi_submission = EBISubmission(artifact.id, "ADD") obs_demux_samples = ebi_submission.generate_demultiplexed_fastq() - exp_samples = ['1.SKM4.640180', '1.SKB2.640194'] + exp_samples = ["1.SKM4.640180", "1.SKB2.640194"] self.assertCountEqual(obs_demux_samples, exp_samples) self.assertCountEqual(ebi_submission.samples.keys(), exp_samples) self.assertCountEqual(ebi_submission.samples_prep.keys(), exp_samples) @@ -906,101 +1000,120 @@ def test_generate_demultiplexed_per_sample_fastq_reverse(self): ebi_submission.generate_xml_files() obs_run_xml = open(ebi_submission.run_xml_fp).read() obs_experiment_xml = open(ebi_submission.experiment_xml_fp).read() - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_run_xml) - self.assertIn('1.SKB2.640194.R2.fastq.gz', obs_run_xml) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_run_xml) - self.assertIn('1.SKM4.640180.R2.fastq.gz', obs_run_xml) - self.assertIn('PAIRED', obs_experiment_xml) - self.assertNotIn('SINGLE', obs_experiment_xml) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_run_xml) + self.assertIn("1.SKB2.640194.R2.fastq.gz", obs_run_xml) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_run_xml) + self.assertIn("1.SKM4.640180.R2.fastq.gz", obs_run_xml) + self.assertIn("PAIRED", obs_experiment_xml) + self.assertNotIn("SINGLE", obs_experiment_xml) # generate_send_sequences_cmd returns a list of commands so joining # for easier testing - obs_cmd = '|'.join(ebi_submission.generate_send_sequences_cmd()) - self.assertIn('1.SKB2.640194.R1.fastq.gz', obs_cmd) - self.assertIn('1.SKB2.640194.R2.fastq.gz', obs_cmd) - self.assertIn('1.SKM4.640180.R1.fastq.gz', obs_cmd) - self.assertIn('1.SKM4.640180.R2.fastq.gz', obs_cmd) + obs_cmd = "|".join(ebi_submission.generate_send_sequences_cmd()) + self.assertIn("1.SKB2.640194.R1.fastq.gz", obs_cmd) + self.assertIn("1.SKB2.640194.R2.fastq.gz", obs_cmd) + self.assertIn("1.SKM4.640180.R1.fastq.gz", obs_cmd) + self.assertIn("1.SKM4.640180.R2.fastq.gz", obs_cmd) Artifact.delete(artifact.id) PrepTemplate.delete(pt.id) def test_generate_send_sequences_cmd(self): artifact = self.write_demux_files(PrepTemplate(1)) - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") e.generate_demultiplexed_fastq() self.files_to_remove.append(e.full_ebi_dir) e.generate_xml_files() obs = e.generate_send_sequences_cmd() _, base_fp = get_mountpoint("preprocessed_data")[0] - exp = ('ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKB2.640194.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKM4.640180.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKB3.640195.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKB6.640176.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKD6.640190.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKM6.640187.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKD9.640182.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKM8.640201.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n' - 'ascp --ignore-host-key -d -QT -k2 ' - '%(ebi_dir)s/1.SKM2.640199.R1.fastq.gz ' - 'Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/' % { - 'ebi_dir': e.full_ebi_dir, 'aid': artifact.id}).split('\n') + exp = ( + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKB2.640194.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKM4.640180.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKB3.640195.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKB6.640176.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKD6.640190.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKM6.640187.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKD9.640182.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKM8.640201.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/\n" + "ascp --ignore-host-key -d -QT -k2 " + "%(ebi_dir)s/1.SKM2.640199.R1.fastq.gz " + "Webin-41528@webin.ebi.ac.uk:./%(aid)d_ebi_submission/" + % {"ebi_dir": e.full_ebi_dir, "aid": artifact.id} + ).split("\n") self.assertCountEqual(obs, exp) def test_parse_EBI_reply(self): artifact = self.generate_new_study_with_preprocessed_data() study_id = artifact.study.id - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(e.full_ebi_dir) e.generate_demultiplexed_fastq(mtime=1) e.generate_xml_files() curl_result = CURL_RESULT_FULL.format( - qiita_config.ebi_organization_prefix, artifact.id, study_id, - artifact.prep_templates[0].id) + qiita_config.ebi_organization_prefix, + artifact.id, + study_id, + artifact.prep_templates[0].id, + ) stacc, saacc, bioacc, exacc, runacc = e.parse_EBI_reply(curl_result) - self.assertEqual(stacc, 'ERP000000') + self.assertEqual(stacc, "ERP000000") study_id = artifact.study.id - exp_saacc = {'%d.Sample1' % study_id: 'ERS000000', - '%d.Sample2' % study_id: 'ERS000001', - '%d.Sample3' % study_id: 'ERS000002'} + exp_saacc = { + "%d.Sample1" % study_id: "ERS000000", + "%d.Sample2" % study_id: "ERS000001", + "%d.Sample3" % study_id: "ERS000002", + } self.assertEqual(saacc, exp_saacc) - exp_bioacc = {'%d.Sample1' % study_id: 'SAMEA0000000', - '%d.Sample2' % study_id: 'SAMEA0000001', - '%d.Sample3' % study_id: 'SAMEA0000002'} + exp_bioacc = { + "%d.Sample1" % study_id: "SAMEA0000000", + "%d.Sample2" % study_id: "SAMEA0000001", + "%d.Sample3" % study_id: "SAMEA0000002", + } self.assertEqual(bioacc, exp_bioacc) - exp_exacc = {'%d.Sample1' % study_id: 'ERX0000000', - '%d.Sample2' % study_id: 'ERX0000001', - '%d.Sample3' % study_id: 'ERX0000002'} + exp_exacc = { + "%d.Sample1" % study_id: "ERX0000000", + "%d.Sample2" % study_id: "ERX0000001", + "%d.Sample3" % study_id: "ERX0000002", + } self.assertEqual(exacc, exp_exacc) - exp_runacc = {'%d.Sample1' % study_id: 'ERR0000000', - '%d.Sample2' % study_id: 'ERR0000001', - '%d.Sample3' % study_id: 'ERR0000002'} + exp_runacc = { + "%d.Sample1" % study_id: "ERR0000000", + "%d.Sample2" % study_id: "ERR0000001", + "%d.Sample3" % study_id: "ERR0000002", + } self.assertEqual(runacc, exp_runacc) artifact = self.write_demux_files(PrepTemplate(1)) - e = EBISubmission(artifact.id, 'ADD') + e = EBISubmission(artifact.id, "ADD") self.files_to_remove.append(e.full_ebi_dir) # removing samples so test text is easier to read - keys_to_del = ['1.SKD6.640190', '1.SKM6.640187', '1.SKD9.640182', - '1.SKM8.640201', '1.SKM2.640199', '1.SKB3.640195'] + keys_to_del = [ + "1.SKD6.640190", + "1.SKM6.640187", + "1.SKD9.640182", + "1.SKM8.640201", + "1.SKM2.640199", + "1.SKB3.640195", + ] for k in keys_to_del: - del (e.samples[k]) - del (e.samples_prep[k]) + del e.samples[k] + del e.samples_prep[k] # Genereate the XML files so the aliases are generated # and stored internally @@ -1015,29 +1128,35 @@ def test_parse_EBI_reply(self): with self.assertRaises(EBISubmissionError): e.parse_EBI_reply(curl_result) - curl_result = ('some general text success="true" more text' - ' ' - 'some othe text' - '' - 'some final text') + curl_result = ( + 'some general text success="true" more text' + ' ' + "some othe text" + '' + "some final text" + ) with self.assertRaises(EBISubmissionError): e.parse_EBI_reply(curl_result) curl_result = CURL_RESULT_2_STUDY.format( - qiita_config.ebi_organization_prefix, artifact.id) + qiita_config.ebi_organization_prefix, artifact.id + ) with self.assertRaises(EBISubmissionError): e.parse_EBI_reply(curl_result) - curl_result = CURL_RESULT.format(qiita_config.ebi_organization_prefix, - artifact.id) + curl_result = CURL_RESULT.format( + qiita_config.ebi_organization_prefix, artifact.id + ) stacc, saacc, bioacc, exacc, runacc = e.parse_EBI_reply(curl_result) self.assertEqual(stacc, None) self.assertEqual(saacc, {}) self.assertEqual(bioacc, {}) self.assertEqual(exacc, {}) - exp_runacc = {'1.SKB2.640194': 'ERR0000000', - '1.SKB6.640176': 'ERR0000001', - '1.SKM4.640180': 'ERR0000002'} + exp_runacc = { + "1.SKB2.640194": "ERR0000000", + "1.SKB6.640176": "ERR0000001", + "1.SKM4.640180": "ERR0000002", + } self.assertEqual(runacc, exp_runacc) @@ -1068,7 +1187,7 @@ def test_parse_EBI_reply(self): """ FASTQ_EXAMPLE = { - '1.SKB2.640194': """@1.SKB2.640194_1 X orig_bc=X new_bc=X bc_diffs=0 + "1.SKB2.640194": """@1.SKB2.640194_1 X orig_bc=X new_bc=X bc_diffs=0 CCACCCAGTAAC + ~~~~~~~~~~~~ @@ -1080,14 +1199,14 @@ def test_parse_EBI_reply(self): + ~~~~~~~~~~~~ CCACCCAGTAAC""", - '1.SKM4.640180': """@1.SKM4.640180_4 X orig_bc=X new_bc=X bc_diffs=0 + "1.SKM4.640180": """@1.SKM4.640180_4 X orig_bc=X new_bc=X bc_diffs=0 CCACCCAGTAAC + ~~~~~~~~~~~~ >1.SKM4.640180_5 X orig_bc=X new_bc=X bc_diffs=0 CCACCCAGTAAC + -~~~~~~~~~~~~""" +~~~~~~~~~~~~""", } FASTA_EXAMPLE_2 = """>{0}.Sample1_1 X orig_bc=X new_bc=X bc_diffs=0 @@ -1302,7 +1421,7 @@ def test_parse_EBI_reply(self): - """ % {'center_name': qiita_config.ebi_center_name} + """ % {"center_name": qiita_config.ebi_center_name} STUDYXML = """ -""" % {'organization_prefix': qiita_config.ebi_organization_prefix, - 'center_name': qiita_config.ebi_center_name} +""" % { + "organization_prefix": qiita_config.ebi_organization_prefix, + "center_name": qiita_config.ebi_center_name, +} EXPERIMENTXML_NEWSTUDY = """ -""" % {'organization_prefix': qiita_config.ebi_organization_prefix, - 'center_name': qiita_config.ebi_center_name} +""" % { + "organization_prefix": qiita_config.ebi_organization_prefix, + "center_name": qiita_config.ebi_center_name, +} RUNXML = """ 9 -> 10 -> 12 -> 14 # -> 11 -> 13 - fd, fp10 = mkstemp(suffix='_table.biom') + fd, fp10 = mkstemp(suffix="_table.biom") close(fd) - fd, fp11 = mkstemp(suffix='_table.biom') + fd, fp11 = mkstemp(suffix="_table.biom") close(fd) - fd, fp12 = mkstemp(suffix='_table.biom') + fd, fp12 = mkstemp(suffix="_table.biom") close(fd) - fd, fp13 = mkstemp(suffix='_table.biom') + fd, fp13 = mkstemp(suffix="_table.biom") close(fd) - fd, fp14 = mkstemp(suffix='_table.biom') + fd, fp14 = mkstemp(suffix="_table.biom") close(fd) - with biom_open(fp10, 'w') as f: + with biom_open(fp10, "w") as f: et.to_hdf5(f, "test") - with biom_open(fp11, 'w') as f: + with biom_open(fp11, "w") as f: et.to_hdf5(f, "test") - with biom_open(fp12, 'w') as f: + with biom_open(fp12, "w") as f: et.to_hdf5(f, "test") - with biom_open(fp13, 'w') as f: + with biom_open(fp13, "w") as f: et.to_hdf5(f, "test") - with biom_open(fp14, 'w') as f: + with biom_open(fp14, "w") as f: et.to_hdf5(f, "test") self._clean_up_files.extend([fp10, fp11, fp12, fp13, fp14]) @@ -526,20 +607,21 @@ def test_delete_analysis(self): pp = a9.processing_parameters # 7: BIOM - a10 = Artifact.create([(fp10, 7)], "BIOM", parents=[a9], - processing_parameters=pp) - a11 = Artifact.create([(fp11, 7)], "BIOM", parents=[a9], - processing_parameters=pp) - a12 = Artifact.create([(fp12, 7)], "BIOM", parents=[a10], - processing_parameters=pp) - Artifact.create([(fp13, 7)], "BIOM", parents=[a11], - processing_parameters=pp) - Artifact.create([(fp14, 7)], "BIOM", parents=[a12], - processing_parameters=pp) - - job = self._create_job('delete_analysis', {'analysis_id': 1}) + a10 = Artifact.create( + [(fp10, 7)], "BIOM", parents=[a9], processing_parameters=pp + ) + a11 = Artifact.create( + [(fp11, 7)], "BIOM", parents=[a9], processing_parameters=pp + ) + a12 = Artifact.create( + [(fp12, 7)], "BIOM", parents=[a10], processing_parameters=pp + ) + Artifact.create([(fp13, 7)], "BIOM", parents=[a11], processing_parameters=pp) + Artifact.create([(fp14, 7)], "BIOM", parents=[a12], processing_parameters=pp) + + job = self._create_job("delete_analysis", {"analysis_id": 1}) private_task(job.id) - self.assertEqual(job.status, 'success') + self.assertEqual(job.status, "success") with self.assertRaises(QiitaDBUnknownIDError): Analysis(1) @@ -550,72 +632,121 @@ def test_delete_sample_or_column(self): st = SampleTemplate(1) # Delete a sample template column - job = self._create_job('delete_sample_or_column', - {'obj_class': 'SampleTemplate', 'obj_id': 1, - 'sample_or_col': 'columns', - 'name': 'season_environment'}) + job = self._create_job( + "delete_sample_or_column", + { + "obj_class": "SampleTemplate", + "obj_id": 1, + "sample_or_col": "columns", + "name": "season_environment", + }, + ) private_task(job.id) - self.assertEqual(job.status, 'success') - self.assertNotIn('season_environment', st.categories) + self.assertEqual(job.status, "success") + self.assertNotIn("season_environment", st.categories) # Delete a sample template sample - need to add one # sample that we will remove npt.assert_warns( - QiitaDBWarning, st.extend, - pd.DataFrame.from_dict({'Sample1': {'taxon_id': '9606'}}, - orient='index', dtype=str)) - self.assertIn('1.Sample1', st.keys()) - job = self._create_job('delete_sample_or_column', - {'obj_class': 'SampleTemplate', 'obj_id': 1, - 'sample_or_col': 'samples', - 'name': '1.Sample1'}) + QiitaDBWarning, + st.extend, + pd.DataFrame.from_dict( + {"Sample1": {"taxon_id": "9606"}}, orient="index", dtype=str + ), + ) + self.assertIn("1.Sample1", st.keys()) + job = self._create_job( + "delete_sample_or_column", + { + "obj_class": "SampleTemplate", + "obj_id": 1, + "sample_or_col": "samples", + "name": "1.Sample1", + }, + ) private_task(job.id) - self.assertEqual(job.status, 'success') - self.assertNotIn('1.Sample1', st.keys()) + self.assertEqual(job.status, "success") + self.assertNotIn("1.Sample1", st.keys()) # Delete a prep template column pt = PrepTemplate(1) - job = self._create_job('delete_sample_or_column', - {'obj_class': 'PrepTemplate', 'obj_id': 1, - 'sample_or_col': 'columns', - 'name': 'target_subfragment'}) + job = self._create_job( + "delete_sample_or_column", + { + "obj_class": "PrepTemplate", + "obj_id": 1, + "sample_or_col": "columns", + "name": "target_subfragment", + }, + ) private_task(job.id) - self.assertEqual(job.status, 'success') - self.assertNotIn('target_subfragment', pt.categories) + self.assertEqual(job.status, "success") + self.assertNotIn("target_subfragment", pt.categories) # Delete a prep template sample metadata = pd.DataFrame.from_dict( - {'1.SKB8.640193': {'barcode': 'GTCCGCAAGTTA', - 'primer': 'GTGCCAGCMGCCGCGGTAA'}, - '1.SKD8.640184': {'barcode': 'CGTAGAGCTCTC', - 'primer': 'GTGCCAGCMGCCGCGGTAA'}}, - orient='index', dtype=str) - pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, - Study(1), "16S") - job = self._create_job('delete_sample_or_column', - {'obj_class': 'PrepTemplate', 'obj_id': pt.id, - 'sample_or_col': 'samples', - 'name': '1.SKD8.640184'}) + { + "1.SKB8.640193": { + "barcode": "GTCCGCAAGTTA", + "primer": "GTGCCAGCMGCCGCGGTAA", + }, + "1.SKD8.640184": { + "barcode": "CGTAGAGCTCTC", + "primer": "GTGCCAGCMGCCGCGGTAA", + }, + }, + orient="index", + dtype=str, + ) + pt = npt.assert_warns( + QiitaDBWarning, PrepTemplate.create, metadata, Study(1), "16S" + ) + job = self._create_job( + "delete_sample_or_column", + { + "obj_class": "PrepTemplate", + "obj_id": pt.id, + "sample_or_col": "samples", + "name": "1.SKD8.640184", + }, + ) private_task(job.id) - self.assertNotIn('1.SKD8.640184', pt.keys()) + self.assertNotIn("1.SKD8.640184", pt.keys()) # Test exceptions - job = self._create_job('delete_sample_or_column', - {'obj_class': 'UnknownClass', 'obj_id': 1, - 'sample_or_col': 'columns', 'name': 'column'}) + job = self._create_job( + "delete_sample_or_column", + { + "obj_class": "UnknownClass", + "obj_id": 1, + "sample_or_col": "columns", + "name": "column", + }, + ) private_task(job.id) - self.assertEqual(job.status, 'error') - self.assertIn('Unknown value "UnknownClass". Choose between ' - '"SampleTemplate" and "PrepTemplate"', job.log.msg) + self.assertEqual(job.status, "error") + self.assertIn( + 'Unknown value "UnknownClass". Choose between ' + '"SampleTemplate" and "PrepTemplate"', + job.log.msg, + ) - job = self._create_job('delete_sample_or_column', - {'obj_class': 'SampleTemplate', 'obj_id': 1, - 'sample_or_col': 'unknown', 'name': 'column'}) + job = self._create_job( + "delete_sample_or_column", + { + "obj_class": "SampleTemplate", + "obj_id": 1, + "sample_or_col": "unknown", + "name": "column", + }, + ) private_task(job.id) - self.assertEqual(job.status, 'error') - self.assertIn('Unknown value "unknown". Choose between "samples" ' - 'and "columns"', job.log.msg) + self.assertEqual(job.status, "error") + self.assertIn( + 'Unknown value "unknown". Choose between "samples" and "columns"', + job.log.msg, + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/scripts/qiita b/scripts/qiita index 0e182f047..ffe476ed1 100755 --- a/scripts/qiita +++ b/scripts/qiita @@ -8,11 +8,14 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- import errno +import signal import socket +import sys from datetime import datetime, timedelta -from os.path import join, abspath, dirname -from time import ctime from multiprocessing import active_children +from os.path import abspath, dirname, join +from threading import Thread +from time import ctime import click import tornado.httpserver @@ -20,15 +23,10 @@ import tornado.ioloop from psycopg2 import OperationalError import qiita_db as qdb -from qiita_core.util import update_redis_qiita_sha_version from qiita_core.qiita_settings import qiita_config, r_client -from qiita_ware.ebi import EBISubmission +from qiita_core.util import update_redis_qiita_sha_version from qiita_ware.commands import submit_EBI as _submit_EBI - -from threading import Thread - -import signal -import sys +from qiita_ware.ebi import EBISubmission # identify gWatcher variable and signal handler globally. # Only the master process will instantiate a Watcher() process. @@ -47,8 +45,8 @@ try: conn = qdb.sql_connection.TRN except OperationalError: raise qdb.exceptions.QiitaDBConnectionError( - "Database does not exist. See qiita-env make --help for more " - "information") + "Database does not exist. See qiita-env make --help for more information" + ) else: del conn @@ -64,24 +62,29 @@ def pet(): @pet.group(invoke_without_command=True) -@click.option('--build-docs/--no-build-docs', default=True, help="Controls " - "whether the docs are built and moved to the 'doc' folder under " - "the 'static' folder.") +@click.option( + "--build-docs/--no-build-docs", + default=True, + help="Controls " + "whether the docs are built and moved to the 'doc' folder under " + "the 'static' folder.", +) def webserver(build_docs): if build_docs: import qiita_pet base = abspath(dirname(qiita_pet.__file__)) - sphinx_fp = join(base, 'support_files/doc/') - cmd = 'make -C %s html' % sphinx_fp - print('Building documentation ...') + sphinx_fp = join(base, "support_files/doc/") + cmd = "make -C %s html" % sphinx_fp + print("Building documentation ...") stdout, stderr, rv = qdb.processing_job._system_call(cmd) if rv != 0: raise click.ClickException( - 'Could not build documentation:\n' - 'Std output:%s\nStd error:%s' % (stdout, stderr)) + "Could not build documentation:\n" + "Std output:%s\nStd error:%s" % (stdout, stderr) + ) else: - print('Documentation successfully built') + print("Documentation successfully built") @qiita.group() @@ -115,100 +118,166 @@ def ebi(): @db.command() -@click.option('--owner', required=True, - help="The email address of the owner of the study") -@click.option('--title', help="The title of the study", required=True) -@click.option('--info', type=click.File(mode='r'), required=True, - help="filepath of file with study information in python " - "config file format") +@click.option( + "--owner", required=True, help="The email address of the owner of the study" +) +@click.option("--title", help="The title of the study", required=True) +@click.option( + "--info", + type=click.File(mode="r"), + required=True, + help="filepath of file with study information in python config file format", +) def load_study(owner, title, info): """Loads a study to the database""" study = qdb.commands.load_study_from_cmd(owner, title, info) - click.echo("Study successfully added to the database with id %s" - % study.id) + click.echo("Study successfully added to the database with id %s" % study.id) @db.command() -@click.option('--fp', required=True, type=click.Path(resolve_path=True, - readable=True, exists=True), multiple=True, - help='Path to the artifact files. This option can be used ' - 'multiple times if there are multiple artifact files.') -@click.option('--fp_type', required=True, multiple=True, help='Describes the ' - 'contents of the file. Pass one fp_type per fp.', - type=click.Choice(qdb.util.get_filepath_types().keys())) -@click.option('--artifact_type', required=True, help="The artifact type", - type=click.Choice(qdb.util.get_artifact_types().keys())) -@click.option('--prep_template', required=False, - help="The prep template to which the artifact is attached", - type=click.IntRange(1)) -@click.option('--parents', required=False, multiple=True, - help="The parent artifacts from which the new artifact has been " - "processed", - type=click.IntRange(1)) -@click.option('--dflt_params', required=False, - help="The default parameter set used to process the new " - "artifact", - type=click.IntRange(1)) -@click.option('--required_params', required=False, - help="JSON string containing the required parameters used to " - "process the new artifact", - type=str) -@click.option('--optional_params', required=False, - help="JSON string containing the optional parameters used to " - "process the new artifact, if different from the " - "defaults", - type=str) -def load_artifact(fp, fp_type, artifact_type, prep_template, parents, - dflt_params, required_params, optional_params): +@click.option( + "--fp", + required=True, + type=click.Path(resolve_path=True, readable=True, exists=True), + multiple=True, + help="Path to the artifact files. This option can be used " + "multiple times if there are multiple artifact files.", +) +@click.option( + "--fp_type", + required=True, + multiple=True, + help="Describes the contents of the file. Pass one fp_type per fp.", + type=click.Choice(qdb.util.get_filepath_types().keys()), +) +@click.option( + "--artifact_type", + required=True, + help="The artifact type", + type=click.Choice(qdb.util.get_artifact_types().keys()), +) +@click.option( + "--prep_template", + required=False, + help="The prep template to which the artifact is attached", + type=click.IntRange(1), +) +@click.option( + "--parents", + required=False, + multiple=True, + help="The parent artifacts from which the new artifact has been processed", + type=click.IntRange(1), +) +@click.option( + "--dflt_params", + required=False, + help="The default parameter set used to process the new artifact", + type=click.IntRange(1), +) +@click.option( + "--required_params", + required=False, + help="JSON string containing the required parameters used to " + "process the new artifact", + type=str, +) +@click.option( + "--optional_params", + required=False, + help="JSON string containing the optional parameters used to " + "process the new artifact, if different from the " + "defaults", + type=str, +) +def load_artifact( + fp, + fp_type, + artifact_type, + prep_template, + parents, + dflt_params, + required_params, + optional_params, +): """Loads an artifact to the database""" artifact = qdb.commands.load_artifact_from_cmd( - fp, fp_type, artifact_type, prep_template=prep_template, - parents=parents, dflt_params_id=dflt_params, - required_params=required_params, optional_params=optional_params) + fp, + fp_type, + artifact_type, + prep_template=prep_template, + parents=parents, + dflt_params_id=dflt_params, + required_params=required_params, + optional_params=optional_params, + ) click.echo("Artifact %s successfully created" % artifact.id) @db.command() -@click.argument('fp', required=True, - type=click.Path(resolve_path=True, readable=True, exists=True)) -@click.option('--study', required=True, type=int, - help='Associate the sample template with this study') +@click.argument( + "fp", required=True, type=click.Path(resolve_path=True, readable=True, exists=True) +) +@click.option( + "--study", + required=True, + type=int, + help="Associate the sample template with this study", +) def load_sample_template(fp, study): """Loads a sample template to the database""" sample_temp = qdb.commands.load_sample_template_from_cmd(fp, study) - click.echo("Sample template successfully added to the database with id %s" - % sample_temp.id) + click.echo( + "Sample template successfully added to the database with id %s" % sample_temp.id + ) @db.command() -@click.argument('fp', required=True, - type=click.Path(resolve_path=True, readable=True, exists=True)) -@click.option('--study', required=True, type=int, - help='Associate the prep template with this study') -@click.option('--data_type', required=True, - type=click.Choice(qdb.util.get_data_types()), - help="The data type of data") +@click.argument( + "fp", required=True, type=click.Path(resolve_path=True, readable=True, exists=True) +) +@click.option( + "--study", + required=True, + type=int, + help="Associate the prep template with this study", +) +@click.option( + "--data_type", + required=True, + type=click.Choice(qdb.util.get_data_types()), + help="The data type of data", +) def load_prep_template(fp, study, data_type): """Loads a sample template to the database""" - prep_template = qdb.commands.load_prep_template_from_cmd( - fp, study, data_type) - click.echo("Prep template successfully added to the database with id %s" - % prep_template.id) + prep_template = qdb.commands.load_prep_template_from_cmd(fp, study, data_type) + click.echo( + "Prep template successfully added to the database with id %s" % prep_template.id + ) # ############################################################################# # EBI COMMANDS # ############################################################################# + @ebi.command() -@click.option('--artifact-id', required=True, type=int) -@click.option('--action', type=click.Choice(EBISubmission.valid_ebi_actions), - default='submit', help='The generated XML files will specify ' - 'this "action", which controls how the EBI servers handle the ' - 'metadata') -@click.option('--send/--no-send', default=False, help="Controls whether or " - "not sequence files and metadata will actually be sent to EBI " - "(default is to generate all the files, but not to send)") +@click.option("--artifact-id", required=True, type=int) +@click.option( + "--action", + type=click.Choice(EBISubmission.valid_ebi_actions), + default="submit", + help="The generated XML files will specify " + 'this "action", which controls how the EBI servers handle the ' + "metadata", +) +@click.option( + "--send/--no-send", + default=False, + help="Controls whether or " + "not sequence files and metadata will actually be sent to EBI " + "(default is to generate all the files, but not to send)", +) def submit(artifact_id, action, send): _submit_EBI(artifact_id, action, send) @@ -217,16 +286,23 @@ def submit(artifact_id, action, send): # MAINTENANCE COMMANDS # ############################################################################# + @maintenance.command() -@click.option('--time', required=True, type=int, - help='The amount of time to show message, in hours') -@click.option('--message', required=True, type=str, - help="Message to show users") -@click.option('--banner', is_flag=True, - help="If set, show as site banner instead of a standard message") +@click.option( + "--time", + required=True, + type=int, + help="The amount of time to show message, in hours", +) +@click.option("--message", required=True, type=str, help="Message to show users") +@click.option( + "--banner", + is_flag=True, + help="If set, show as site banner instead of a standard message", +) def sysmessage(message, time, banner): if banner: - r_client.setex('sysmessage', time * 60 * 60, message) + r_client.setex("sysmessage", time * 60 * 60, message) else: expires = datetime.now() + timedelta(seconds=(time * 60 * 60)) qdb.util.add_system_message(message, expires) @@ -234,43 +310,47 @@ def sysmessage(message, time, banner): @maintenance.command() def clear_sysmessage(): - r_client.delete('sysmessage') + r_client.delete("sysmessage") qdb.util.clear_system_messages() @maintenance.command() def status(): - sys_status = r_client.get('sysmessage') + sys_status = r_client.get("sysmessage") if sys_status is None: click.echo("Site has no system message") else: click.echo("Site has system message:") click.echo(sys_status) - click.echo(r_client.ttl('sysmessage'), "seconds remaining") + click.echo(r_client.ttl("sysmessage"), "seconds remaining") @maintenance.command() -@click.option('--n', required=False, type=click.IntRange(0, None), default=10, - help="Number of most recent log entries to retrieve.", - show_default=True) +@click.option( + "--n", + required=False, + type=click.IntRange(0, None), + default=10, + help="Number of most recent log entries to retrieve.", + show_default=True, +) def log(n): - width = click.get_terminal_size()[0] - template = width*"=" + "\nTime: {}\nMessage:\n{}\nInfo:{}\n" + template = width * "=" + "\nTime: {}\nMessage:\n{}\nInfo:{}\n" lines = [] for e in qdb.logger.LogEntry.newest_records(n): - info = [] for i in e.info: for field, value in i.items(): - info.append('{}:{}'.format(field, value)) + info.append("{}:{}".format(field, value)) + + lines.append(template.format(e.time, e.msg, "\n".join(info))) - lines.append(template.format(e.time, e.msg, '\n'.join(info))) + click.echo_via_pager("\n".join(lines)) - click.echo_via_pager('\n'.join(lines)) # ############################################################################# # WEBSERVER COMMANDS @@ -278,38 +358,43 @@ def log(n): @webserver.command() -@click.option('--port', required=False, type=int, help='Port where the ' - 'webserver will start', default=21174) +@click.option( + "--port", + required=False, + type=int, + help="Port where the webserver will start", + default=21174, +) # (cursive Q)iita = 21174 in 1337sp34k -@click.option('--master', is_flag=True, - help="If set, update available plugins") +@click.option("--master", is_flag=True, help="If set, update available plugins") def start(port, master): - from qiita_pet.webserver import Application - from tornado.options import options, parse_command_line from tornado.ioloop import PeriodicCallback + from tornado.options import options, parse_command_line + + from qiita_pet.webserver import Application def update_database_func(): while True: # blocking call waits on new job info msg = gWatcher.queue.get(True) - if msg == 'QUIT': + if msg == "QUIT": break # TODO: add method to update Qiita of a job's status # beyond completion. Review qclient.update_job_step. # get state of job - job_state = msg['job_state'] + job_state = msg["job_state"] # currently, we are only informing Qiita of when a # job has completed, either successfully or unsuccessfully. - if job_state != 'completed' and job_state != 'DROPPED': + if job_state != "completed" and job_state != "DROPPED": continue - if job_state == 'completed': + if job_state == "completed": # get returned exit_status from job's process # all completed jobs should have an exit_status - job_state = int(msg['exit_status']) == 0 + job_state = int(msg["exit_status"]) == 0 else: # Assume job is validator job that was DROPPED. # Assume DROPPED job does not have an exit_status. @@ -317,12 +402,11 @@ def start(port, master): job_error = None # get error message, if one is present - if 'error_msg' in msg: - job_error = msg['error_msg'] + if "error_msg" in msg: + job_error = msg["error_msg"] try: - qjob = qdb.processing_job.ProcessingJob.by_external_id( - msg['Job_Id']) + qjob = qdb.processing_job.ProcessingJob.by_external_id(msg["Job_Id"]) except qdb.exceptions.QiitaDBUnknownIDError: # ignore any job owned by Qiita, but can't be # mapped to a ProcessJob. @@ -333,7 +417,7 @@ def start(port, master): # register the job's changed status in Qiita. qjob.complete(job_state, error=job_error) - if qiita_config.plugin_launcher == 'qiita-plugin-launcher-slurm': + if qiita_config.plugin_launcher == "qiita-plugin-launcher-slurm": if master: # Only a single Watcher() process is desired gWatcher = qdb.processing_job.Watcher() @@ -363,11 +447,11 @@ def start(port, master): # all workers and possibly hit this error: # redis.exceptions.ResponseError: OOM command not allowed when # used memory > 'maxmemory'. - r_client.delete('qiita-usernames') + r_client.delete("qiita-usernames") users = [email for email, name in qdb.user.User.iter()] if users: - r_client.zadd('qiita-usernames', {u: 0 for u in users}) + r_client.zadd("qiita-usernames", {u: 0 for u in users}) # Deactivate all the plugins and only activate those that are currently # available in the config file folder @@ -375,14 +459,14 @@ def start(port, master): qdb.util.activate_or_update_plugins() if qiita_config.log_dir: - options.log_file_prefix = join(qiita_config.log_dir, - 'qiita_%d.log' % port) - options.logging = 'debug' + options.log_file_prefix = join(qiita_config.log_dir, "qiita_%d.log" % port) + options.logging = "debug" parse_command_line() - ssl_options = {"certfile": qiita_config.certificate_file, - "keyfile": qiita_config.key_file} - http_server = tornado.httpserver.HTTPServer( - Application(), ssl_options=ssl_options) + ssl_options = { + "certfile": qiita_config.certificate_file, + "keyfile": qiita_config.key_file, + } + http_server = tornado.httpserver.HTTPServer(Application(), ssl_options=ssl_options) try: http_server.listen(port) @@ -390,42 +474,47 @@ def start(port, master): if e.errno == errno.EADDRINUSE: raise ValueError( "Port %d is already in use. Please choose another port with " - "--port." % port) + "--port." % port + ) else: raise base_url = qiita_config.base_url - click.echo("Qiita started on base_url: %s, port: %d @ %s" % ( - base_url, port, ctime())) + click.echo( + "Qiita started on base_url: %s, port: %d @ %s" % (base_url, port, ctime()) + ) ioloop = tornado.ioloop.IOLoop.instance() if master: + def callback_function(): active_software = list(qdb.software.Software.iter()) - sdefinition = [s for s in active_software - if s.type == 'artifact definition'] + sdefinition = [ + s for s in active_software if s.type == "artifact definition" + ] # this will load the artifact transformation and private plugins - stransformation = [s for s in active_software - if s.type != 'artifact definition'] + stransformation = [ + s for s in active_software if s.type != "artifact definition" + ] for s in sdefinition: - print('Registering: %s...' % (s.name)) + print("Registering: %s..." % (s.name)) try: s.register_commands() except ValueError as e: print(e) - print('Definition artifacts registration complete') + print("Definition artifacts registration complete") for s in stransformation: - print('Registering: %s...' % (s.name)) + print("Registering: %s..." % (s.name)) try: s.register_commands() except ValueError as e: print(e) - print('Command artifacts registration complete') + print("Command artifacts registration complete") ioloop.add_timeout(ioloop.time() + 0.5, callback_function) @@ -440,6 +529,7 @@ def start(port, master): if master: gWatcher.stop() + # ############################################################################# # PLUGIN COMMANDS # ############################################################################# @@ -451,5 +541,5 @@ def update(): qdb.util.activate_or_update_plugins(update=True) -if __name__ == '__main__': +if __name__ == "__main__": qiita() diff --git a/scripts/qiita-auto-processing b/scripts/qiita-auto-processing index bd4605849..123fc1237 100644 --- a/scripts/qiita-auto-processing +++ b/scripts/qiita-auto-processing @@ -7,13 +7,12 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from qiita_db.handlers.plugin import _get_plugin -from qiita_db.study import Study +from qiita_db.processing_job import ProcessingWorkflow from qiita_db.software import Parameters +from qiita_db.study import Study from qiita_db.user import User -from qiita_db.processing_job import ProcessingWorkflow - -user = User('antoniog@ucsd.edu') +user = User("antoniog@ucsd.edu") # full_pipelines is a list of dict as: { # 'name': str, @@ -38,74 +37,94 @@ user = User('antoniog@ucsd.edu') # 'parameters_names': list of the names of the parameter sets we want to run # } full_pipelines = [ - {'name': 'Full WGS - Shogun', - 'data_type': ['Metagenomic'], - 'artifact_type': 'per_sample_FASTQ', - 'previous-step': None, - 'requirements': dict(), - 'steps': [ - {'previous-step': None, - 'plugin': 'qp-meta', - 'version': '2021.01', - 'cmd_name': 'Atropos v1.1.24', - 'input_name': 'input', - 'ignore_parameters': ['Number of threads used'], - 'parent_artifact_name': None, - 'parameters_names': ['KAPA HyperPlus with iTru']}, - {'previous-step': 'Atropos v1.1.24', - 'plugin': 'qp-shogun', - 'version': '072020', - 'cmd_name': 'Shogun v1.0.8', - 'input_name': 'input', - 'ignore_parameters': ['Number of threads'], - 'parent_artifact_name': 'Adapter trimmed files', - 'parameters_names': ['wol_bowtie2', 'rep200_bowtie2']} - ]}, - {'name': 'Target Gene Processing', - 'data_type': ['16S', '18S', 'ITS'], - 'artifact_type': 'Demultiplexed', - 'previous-step': 'Split libraries FASTQ', - 'requirements': { - 'prep': [ - {'column': 'platform', 'value': ['illumina'], - 'equal': True}, - {'column': 'run_prefix', 'value': ['cmi_workshop_lane1_s1_l001'], - 'equal': False}]}, - 'steps': [ - {'previous-step': None, - 'plugin': 'QIIMEq2', - 'version': '1.9.1', - 'cmd_name': 'Trimming', - 'input_name': 'input_data', - 'ignore_parameters': [], - 'parent_artifact_name': None, - 'parameters_names': ['90 base pairs', - '100 base pairs', - '150 base pairs' - ]}, - {'previous-step': 'Trimming', - 'plugin': 'QIIMEq2', - 'version': '1.9.1', - 'cmd_name': 'Pick closed-reference OTUs', - 'input_name': 'input_data', - 'ignore_parameters': [], - 'parent_artifact_name': 'Trimmed Demultiplexed', - 'parameters_names': ['Defaults - parallel']}, - {'previous-step': 'Trimming', - 'plugin': 'deblur', - 'version': '1.1.0', - 'cmd_name': 'Deblur', - 'input_name': 'Demultiplexed sequences', - 'ignore_parameters': [], - 'parent_artifact_name': 'Trimmed Demultiplexed', - 'parameters_names': ['Defaults']} - ]}, + { + "name": "Full WGS - Shogun", + "data_type": ["Metagenomic"], + "artifact_type": "per_sample_FASTQ", + "previous-step": None, + "requirements": dict(), + "steps": [ + { + "previous-step": None, + "plugin": "qp-meta", + "version": "2021.01", + "cmd_name": "Atropos v1.1.24", + "input_name": "input", + "ignore_parameters": ["Number of threads used"], + "parent_artifact_name": None, + "parameters_names": ["KAPA HyperPlus with iTru"], + }, + { + "previous-step": "Atropos v1.1.24", + "plugin": "qp-shogun", + "version": "072020", + "cmd_name": "Shogun v1.0.8", + "input_name": "input", + "ignore_parameters": ["Number of threads"], + "parent_artifact_name": "Adapter trimmed files", + "parameters_names": ["wol_bowtie2", "rep200_bowtie2"], + }, + ], + }, + { + "name": "Target Gene Processing", + "data_type": ["16S", "18S", "ITS"], + "artifact_type": "Demultiplexed", + "previous-step": "Split libraries FASTQ", + "requirements": { + "prep": [ + {"column": "platform", "value": ["illumina"], "equal": True}, + { + "column": "run_prefix", + "value": ["cmi_workshop_lane1_s1_l001"], + "equal": False, + }, + ] + }, + "steps": [ + { + "previous-step": None, + "plugin": "QIIMEq2", + "version": "1.9.1", + "cmd_name": "Trimming", + "input_name": "input_data", + "ignore_parameters": [], + "parent_artifact_name": None, + "parameters_names": [ + "90 base pairs", + "100 base pairs", + "150 base pairs", + ], + }, + { + "previous-step": "Trimming", + "plugin": "QIIMEq2", + "version": "1.9.1", + "cmd_name": "Pick closed-reference OTUs", + "input_name": "input_data", + "ignore_parameters": [], + "parent_artifact_name": "Trimmed Demultiplexed", + "parameters_names": ["Defaults - parallel"], + }, + { + "previous-step": "Trimming", + "plugin": "deblur", + "version": "1.1.0", + "cmd_name": "Deblur", + "input_name": "Demultiplexed sequences", + "ignore_parameters": [], + "parent_artifact_name": "Trimmed Demultiplexed", + "parameters_names": ["Defaults"], + }, + ], + }, ] def _check_previous_command(prev_step, pparams): if (prev_step is None and pparams is None) or ( - pparams is not None and prev_step == pparams.command.name): + pparams is not None and prev_step == pparams.command.name + ): return True return False @@ -114,35 +133,42 @@ def _check_requirements(requirements, template): satisfied = True for req in requirements: if satisfied: - if req['column'] not in template.categories: - if req['equal']: + if req["column"] not in template.categories: + if req["equal"]: satisfied = False continue - template_value = list(map(str.lower, set( - template.get_category(req['column']).values()))) - if req['equal'] and template_value != req['value']: + template_value = list( + map(str.lower, set(template.get_category(req["column"]).values())) + ) + if req["equal"] and template_value != req["value"]: satisfied = False continue - elif not req['equal'] and template_value == req['value']: + elif not req["equal"] and template_value == req["value"]: satisfied = False continue return satisfied def _check_parameters(jobs, cmd): - params = [{k: str(v) for k, v in j.parameters.values.items() - if k not in cmd['ignore_parameters']} for j in jobs] + params = [ + { + k: str(v) + for k, v in j.parameters.values.items() + if k not in cmd["ignore_parameters"] + } + for j in jobs + ] return params def _submit_workflows(artifact_process): for artifact in artifact_process: - if artifact['workflow'] is None: + if artifact["workflow"] is None: continue # nodes will return in position [0] the first job created - first_job = list(artifact['workflow'].graph.nodes())[0] - if first_job.status == 'in_construction': - artifact['workflow'].submit() + first_job = list(artifact["workflow"].graph.nodes())[0] + if first_job.status == "in_construction": + artifact["workflow"].submit() # Step 1. Loop over the full_pipelines to process each step @@ -150,30 +176,35 @@ for pipeline in full_pipelines: # Step 2. From the steps generate the list of commands to add to the # workflow commands = [] - for step in pipeline['steps']: - plugin = _get_plugin(step['plugin'], step['version']) - cmds = [c for c in plugin.commands if c.name == step['cmd_name']] + for step in pipeline["steps"]: + plugin = _get_plugin(step["plugin"], step["version"]) + cmds = [c for c in plugin.commands if c.name == step["cmd_name"]] if len(cmds) != 1: - raise ValueError('There is more than one command with this ' - 'definition %s' % str(step)) + raise ValueError( + "There is more than one command with this definition %s" % str(step) + ) cmd = cmds[0] parameters = [] for dps in cmd.default_parameter_sets: - if dps.name in step['parameters_names']: + if dps.name in step["parameters_names"]: # note that for simplicity we are converting all values in the # parameters to string - parameters.append({'id': dps.id, 'values': { - k: str(v) for k, v in dps.values.items()}}) - - commands.append({ - 'command': cmd, - 'command-name': cmd.name, - 'previous-step': step['previous-step'], - 'parent_artifact_name': step['parent_artifact_name'], - 'input_name': step['input_name'], - 'ignore_parameters': step['ignore_parameters'], - 'parameters': parameters}) + parameters.append( + {"id": dps.id, "values": {k: str(v) for k, v in dps.values.items()}} + ) + + commands.append( + { + "command": cmd, + "command-name": cmd.name, + "previous-step": step["previous-step"], + "parent_artifact_name": step["parent_artifact_name"], + "input_name": step["input_name"], + "ignore_parameters": step["ignore_parameters"], + "parameters": parameters, + } + ) # Step 2. - for children. Get their commands. We currently only support # processing for 2 levels, like: @@ -181,18 +212,20 @@ for pipeline in full_pipelines: # -> Close reference # which should be fine for now as all our pipelines only # have 2 levels - children_cmds = [c for c in commands[1:] - if c['previous-step'] == commands[0]['command-name']] + children_cmds = [ + c for c in commands[1:] if c["previous-step"] == commands[0]["command-name"] + ] # Step 3. Find all preparations/artifacts that we can add the pipeline # ... as a first pass we will only process study 10317 (AGP) ... # artifacts_all = [a for study in Study.iter() - artifacts_all = [a for study in [Study(10317)] - # loop over all artifacts of artifact_type with in study - for a in study.artifacts( - artifact_type=pipeline['artifact_type']) - if _check_previous_command( - pipeline['previous-step'], a.processing_parameters)] + artifacts_all = [ + a + for study in [Study(10317)] + # loop over all artifacts of artifact_type with in study + for a in study.artifacts(artifact_type=pipeline["artifact_type"]) + if _check_previous_command(pipeline["previous-step"], a.processing_parameters) + ] # Step 4. Limit all_artifacts to those within restrictions artifacts_compliant = [] @@ -204,16 +237,16 @@ for pipeline in full_pipelines: pt = pts[0] # {'sandbox', 'awaiting_approval', 'private', 'public'} - if a.visibility in ('sandbox', 'awaiting_approval'): + if a.visibility in ("sandbox", "awaiting_approval"): continue - if pt.data_type() not in pipeline['data_type']: + if pt.data_type() not in pipeline["data_type"]: continue - reqs = pipeline['requirements'] - if 'sample' in reqs and not _check_requirements(reqs['sample'], st): + reqs = pipeline["requirements"] + if "sample" in reqs and not _check_requirements(reqs["sample"], st): continue - if 'prep' in reqs and not _check_requirements(reqs['prep'], pt): + if "prep" in reqs and not _check_requirements(reqs["prep"], pt): continue artifacts_compliant.append(a) @@ -226,23 +259,23 @@ for pipeline in full_pipelines: cmd = commands[0] for a in artifacts_compliant: # getting all jobs, includen hiddens, in case the job failed - jobs = a.jobs(cmd=cmd['command'], show_hidden=True) + jobs = a.jobs(cmd=cmd["command"], show_hidden=True) params = _check_parameters(jobs, cmd) # checking that all required parameters of this command exist missing_parameters = [] - for p in cmd['parameters']: - p = p['values'] - p.update({cmd['input_name']: str(a.id)}) + for p in cmd["parameters"]: + p = p["values"] + p.update({cmd["input_name"]: str(a.id)}) p_to_compare = p.copy() - for k in cmd['ignore_parameters']: + for k in cmd["ignore_parameters"]: del p_to_compare[k] if p_to_compare not in params: missing_parameters.append(p) else: for c in a.children: cpp = c.processing_parameters - if cpp.command.name == cmd['command-name']: + if cpp.command.name == cmd["command-name"]: cparams = _check_parameters([cpp], cmd) if cparams == p_to_compare: children_compliant.append(c) @@ -250,33 +283,42 @@ for pipeline in full_pipelines: # note that we are building a dict for each artifact so we can # save the workflow id, useful for when we run this in a terminal # and we want to follow up on those workflows - artifact_process.append({'workflow': None, 'artifact': a, - 'missing_parameters': missing_parameters, - 'cmd_id': 0}) + artifact_process.append( + { + "workflow": None, + "artifact": a, + "missing_parameters": missing_parameters, + "cmd_id": 0, + } + ) # Step 5b. Add workflow/commands for children for a in children_compliant: for cmd_id, cmd in enumerate(children_cmds): # getting all jobs, includen hiddens, in case the job failed - jobs = a.jobs(cmd=cmd['command'], show_hidden=True) + jobs = a.jobs(cmd=cmd["command"], show_hidden=True) params = _check_parameters(jobs, cmd) # checking that all required parameters of this command exist missing_parameters = [] - for p in cmd['parameters']: - p = p['values'] - p.update({cmd['input_name']: str(a.id)}) + for p in cmd["parameters"]: + p = p["values"] + p.update({cmd["input_name"]: str(a.id)}) p_to_compare = p.copy() - for k in cmd['ignore_parameters']: + for k in cmd["ignore_parameters"]: del p_to_compare[k] if p_to_compare not in params: missing_parameters.append(p) if missing_parameters: artifact_process.append( - {'workflow': None, 'artifact': a, - 'missing_parameters': missing_parameters, - 'cmd_id': cmd_id + 1}) + { + "workflow": None, + "artifact": a, + "missing_parameters": missing_parameters, + "cmd_id": cmd_id + 1, + } + ) # Step 6: add workflows and jobs # max processing will be useful for debugging as it allows to stop after @@ -287,42 +329,52 @@ for pipeline in full_pipelines: if i >= max_processing: break - if artifact['workflow'] is not None: + if artifact["workflow"] is not None: continue - a = artifact['artifact'] - cmd_id = artifact['cmd_id'] + a = artifact["artifact"] + cmd_id = artifact["cmd_id"] # create the first-job/workflow with the first command and the first # set of parameters cmd = commands[cmd_id] - params = artifact['missing_parameters'][0] - params.update({cmd['input_name']: str(a.id)}) - job_params = Parameters.load(cmd['command'], values_dict=params) + params = artifact["missing_parameters"][0] + params.update({cmd["input_name"]: str(a.id)}) + job_params = Parameters.load(cmd["command"], values_dict=params) - artifact['workflow'] = ProcessingWorkflow.from_scratch( - user, job_params) + artifact["workflow"] = ProcessingWorkflow.from_scratch(user, job_params) # now we can add the rest of the parameters to the workflow for # the first command - for params in artifact['missing_parameters'][1:]: - job_params = Parameters.load(cmd['command'], values_dict=params) - artifact['workflow'].add( - job_params, req_params={cmd['input_name']: str(a.id)}) + for params in artifact["missing_parameters"][1:]: + job_params = Parameters.load(cmd["command"], values_dict=params) + artifact["workflow"].add( + job_params, req_params={cmd["input_name"]: str(a.id)} + ) - for cmd in commands[cmd_id + 1:]: + for cmd in commands[cmd_id + 1 :]: # get jobs from the workflow to which we can add this new command - previous_jobs = [j for j in artifact['workflow'].graph.nodes() - if j.command.name == cmd['previous-step']] + previous_jobs = [ + j + for j in artifact["workflow"].graph.nodes() + if j.command.name == cmd["previous-step"] + ] for job in previous_jobs: - for params in cmd['parameters']: - params = params['values'] - params.update({cmd['input_name']: '%s%s' % ( - job.id, cmd['parent_artifact_name'])}) - job_params = Parameters.load( - cmd['command'], values_dict=params) - - artifact['workflow'].add(job_params, connections={job: { - cmd['parent_artifact_name']: cmd['input_name']}}) + for params in cmd["parameters"]: + params = params["values"] + params.update( + { + cmd["input_name"]: "%s%s" + % (job.id, cmd["parent_artifact_name"]) + } + ) + job_params = Parameters.load(cmd["command"], values_dict=params) + + artifact["workflow"].add( + job_params, + connections={ + job: {cmd["parent_artifact_name"]: cmd["input_name"]} + }, + ) # Step 7. submit the workflows! _submit_workflows(artifact_process) diff --git a/scripts/qiita-cron-job b/scripts/qiita-cron-job index 252252a38..af29f296d 100755 --- a/scripts/qiita-cron-job +++ b/scripts/qiita-cron-job @@ -10,17 +10,20 @@ import click -from qiita_db.util import ( - purge_filepaths as qiita_purge_filepaths, - empty_trash_upload_folder as qiita_empty_trash_upload_folder, - quick_mounts_purge as qiita_quick_mounts_purge) +from qiita_db.download_link import DownloadLink +from qiita_db.meta_util import ( + generate_biom_and_metadata_release as qiita_generate_biom_and_metadata_release, +) +from qiita_db.meta_util import ( + generate_plugin_releases as qiita_generate_plugin_releases, +) +from qiita_db.meta_util import update_redis_stats as qiita_update_redis_stats from qiita_db.meta_util import ( - update_redis_stats as qiita_update_redis_stats, update_resource_allocation_redis as qiita_update_resource_allocation_redis, - generate_biom_and_metadata_release as - qiita_generate_biom_and_metadata_release, - generate_plugin_releases as qiita_generate_plugin_releases) -from qiita_db.download_link import DownloadLink +) +from qiita_db.util import empty_trash_upload_folder as qiita_empty_trash_upload_folder +from qiita_db.util import purge_filepaths as qiita_purge_filepaths +from qiita_db.util import quick_mounts_purge as qiita_quick_mounts_purge @click.group() @@ -29,17 +32,22 @@ def commands(): @commands.command() -@click.option('--remove/--no-remove', default=True, - help='remove any filepaths from the qiita.filepath table that ' - 'are not linked to any other table') +@click.option( + "--remove/--no-remove", + default=True, + help="remove any filepaths from the qiita.filepath table that " + "are not linked to any other table", +) def purge_filepaths(remove): qiita_purge_filepaths(remove) @commands.command() -@click.option('--remove/--no-remove', default=True, - help='remove files from the trash folder within the upload ' - 'folders') +@click.option( + "--remove/--no-remove", + default=True, + help="remove files from the trash folder within the upload folders", +) def empty_trash_upload_folder(remove): qiita_empty_trash_upload_folder(remove) @@ -56,7 +64,7 @@ def update_resource_allocation_redis(): @commands.command() def generate_biom_and_metadata_release(): - qiita_generate_biom_and_metadata_release('public') + qiita_generate_biom_and_metadata_release("public") @commands.command() diff --git a/scripts/qiita-env b/scripts/qiita-env index 4cf284393..9532288a0 100755 --- a/scripts/qiita-env +++ b/scripts/qiita-env @@ -11,8 +11,9 @@ import click import qiita_db as qdb -from qiita_core.environment_manager import test as _test, TEST_RUNNERS from qiita_core.configuration_manager import ConfigurationManager +from qiita_core.environment_manager import TEST_RUNNERS +from qiita_core.environment_manager import test as _test _CONFIG = ConfigurationManager() @@ -23,21 +24,30 @@ def env(): @env.command() -@click.option('--load-ontologies/--no-load-ontologies', - default=True, help='If True, ontologies will be loaded. ' - 'Cannot be True if this is a test environment.') -@click.option('--download-reference/--no-download-reference', - default=False, help='If True, greengenes reference files will ' - 'be downloaded') -@click.option('--add-demo-user/--no-add-demo-user', - default=False, help='If True, then demo@microbio.me will be ' - 'added to the database with password ' - '"password"') +@click.option( + "--load-ontologies/--no-load-ontologies", + default=True, + help="If True, ontologies will be loaded. " + "Cannot be True if this is a test environment.", +) +@click.option( + "--download-reference/--no-download-reference", + default=False, + help="If True, greengenes reference files will be downloaded", +) +@click.option( + "--add-demo-user/--no-add-demo-user", + default=False, + help="If True, then demo@microbio.me will be " + "added to the database with password " + '"password"', +) def make(load_ontologies, download_reference, add_demo_user): """Creates the database specified in config""" try: qdb.environment_manager.make_environment( - load_ontologies, download_reference, add_demo_user) + load_ontologies, download_reference, add_demo_user + ) except Exception as e: if "Database qiita_test already present on the system." not in str(e): # this will clean our environment so we can try again without @@ -49,9 +59,12 @@ def make(load_ontologies, download_reference, add_demo_user): @env.command() -@click.option('--ask-for-confirmation/--no-ask-for-confirmation', - default=True, help='If True, will ask for confirmation before ' - 'dropping the production environment.') +@click.option( + "--ask-for-confirmation/--no-ask-for-confirmation", + default=True, + help="If True, will ask for confirmation before " + "dropping the production environment.", +) def drop(ask_for_confirmation): """Drops the database specified in config""" try: @@ -82,8 +95,13 @@ def patch(): @env.command() -@click.option('--runner', required=False, type=click.Choice(TEST_RUNNERS), - default='all', help='Test runner to use') +@click.option( + "--runner", + required=False, + type=click.Choice(TEST_RUNNERS), + default="all", + help="Test runner to use", +) def test(runner): """Test the environment @@ -96,8 +114,8 @@ def test(runner): @env.command(name="create-portal") -@click.argument('portal', required=True, type=str) -@click.argument('description', required=True, type=str) +@click.argument("portal", required=True, type=str) +@click.argument("description", required=True, type=str) def add_portal(portal, description): """Creates a new portal on the database""" try: @@ -107,7 +125,7 @@ def add_portal(portal, description): @env.command(name="remove-portal") -@click.argument('portal', required=True, type=str) +@click.argument("portal", required=True, type=str) def rem_portal(portal): """Removes a portal from the database""" try: @@ -118,5 +136,5 @@ def rem_portal(portal): raise click.BadParameter("Portal name does not exist!") -if __name__ == '__main__': +if __name__ == "__main__": env() diff --git a/scripts/qiita-private-launcher b/scripts/qiita-private-launcher index 0e4d3ffb0..c1b191db4 100755 --- a/scripts/qiita-private-launcher +++ b/scripts/qiita-private-launcher @@ -8,8 +8,8 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from subprocess import Popen, PIPE from os import close, remove +from subprocess import PIPE, Popen from tempfile import mkstemp import click @@ -26,17 +26,17 @@ source ~/.bash_profile @click.command() -@click.argument('qiita_env', required=True, nargs=1) -@click.argument('command', required=True, nargs=1) -@click.argument('arguments', required=True, nargs=-1) +@click.argument("qiita_env", required=True, nargs=1) +@click.argument("command", required=True, nargs=1) +@click.argument("arguments", required=True, nargs=-1) def start(qiita_env, command, arguments): """Starts the plugin environment""" - cmd = ['qiita-private', command] + cmd = ["qiita-private", command] cmd.extend(["'%s'" % arg for arg in arguments]) fd, fp = mkstemp(suffix=".sh") close(fd) - with open(fp, 'w') as f: - f.write(SCRIPT % (qiita_env, ' '.join(cmd))) + with open(fp, "w") as f: + f.write(SCRIPT % (qiita_env, " ".join(cmd))) cmd = "bash %s" % fp proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) stdout, stderr = proc.communicate() @@ -44,8 +44,9 @@ def start(qiita_env, command, arguments): if proc.returncode and proc.returncode != 0: raise ValueError( "Error launching internal task:\n\tStdout: %s\n\tStderr: %s" - % (stdout, stderr)) + % (stdout, stderr) + ) -if __name__ == '__main__': +if __name__ == "__main__": start() diff --git a/scripts/qiita-private-launcher-slurm b/scripts/qiita-private-launcher-slurm index 282fc4c96..300af484b 100644 --- a/scripts/qiita-private-launcher-slurm +++ b/scripts/qiita-private-launcher-slurm @@ -8,11 +8,11 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from subprocess import Popen, PIPE from datetime import datetime -from tempfile import mkdtemp -from os.path import join from os import environ +from os.path import join +from subprocess import PIPE, Popen +from tempfile import mkdtemp import click @@ -29,37 +29,39 @@ echo $SLURM_JOBID @click.command() -@click.argument('qiita_env', required=True, nargs=1) -@click.argument('command', required=True, nargs=1) -@click.argument('arguments', required=True, nargs=-1) +@click.argument("qiita_env", required=True, nargs=1) +@click.argument("command", required=True, nargs=1) +@click.argument("arguments", required=True, nargs=-1) def start(qiita_env, command, arguments): """Starts the plugin environment""" - cmd = ['qiita-private', command] + cmd = ["qiita-private", command] cmd.extend(["'%s'" % arg for arg in arguments]) # When Popen executes, the shell is not in interactive mode, # so it is not sourcing any of the bash configuration files # We need to source it so the env_script are available - lines = [' '.join(cmd)] + lines = [" ".join(cmd)] datestr = datetime.now().strftime("%Y%m%d_%I%M%S.%f") - dirpath = mkdtemp(prefix=datestr, - dir='/projects/qiita_data/working_dir/private-jobs/') - fp = join(dirpath, 'private') - with open(fp, 'w') as f: + dirpath = mkdtemp( + prefix=datestr, dir="/projects/qiita_data/working_dir/private-jobs/" + ) + fp = join(dirpath, "private") + with open(fp, "w") as f: f.write(SBATCHFILE % (dirpath, dirpath, "\n".join(lines))) cmd = "sbatch %s" % fp - epilogue = environ.get('QIITA_JOB_SCHEDULER_EPILOGUE', '') + epilogue = environ.get("QIITA_JOB_SCHEDULER_EPILOGUE", "") if epilogue: - cmd = f'{cmd} --epilog {epilogue}' + cmd = f"{cmd} --epilog {epilogue}" proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) stdout, stderr = proc.communicate() if proc.returncode and proc.returncode != 0: raise ValueError( "Error launching internal task:\n\tStdout: %s\n\tStderr: %s" - % (stdout, stderr)) + % (stdout, stderr) + ) -if __name__ == '__main__': +if __name__ == "__main__": start() diff --git a/scripts/qiita-private-plugin b/scripts/qiita-private-plugin index ce5f8ef6e..94555fea6 100755 --- a/scripts/qiita-private-plugin +++ b/scripts/qiita-private-plugin @@ -14,9 +14,9 @@ from qiita_ware.private_plugin import private_task @click.command() -@click.argument('url', required=True) -@click.argument('job_id', required=True) -@click.argument('output_dir', required=True) +@click.argument("url", required=True) +@click.argument("job_id", required=True) +@click.argument("output_dir", required=True) def execute(url, job_id, output_dir): """Executes the task given by job_id @@ -26,5 +26,5 @@ def execute(url, job_id, output_dir): private_task(job_id) -if __name__ == '__main__': +if __name__ == "__main__": execute() diff --git a/scripts/qiita-recover-jobs b/scripts/qiita-recover-jobs index b9cefe6f4..b97332538 100644 --- a/scripts/qiita-recover-jobs +++ b/scripts/qiita-recover-jobs @@ -7,14 +7,15 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from io import StringIO +from math import ceil from subprocess import check_output -from qiita_db.sql_connection import TRN -from qiita_db.processing_job import ProcessingJob -import pandas as pd from time import sleep -from math import ceil -from io import StringIO +import pandas as pd + +from qiita_db.processing_job import ProcessingJob +from qiita_db.sql_connection import TRN SLEEP_TIME = 6 CHANCES = 3 @@ -28,30 +29,30 @@ SQL = """SELECT processing_job_id def _submit_jobs(jids_to_recover, recover_type): # we are going to split the SLEEP_TIME by CHANCES so we can ctrl-c # ... just in case - st = int(ceil(SLEEP_TIME/CHANCES)) + st = int(ceil(SLEEP_TIME / CHANCES)) len_jids_to_recover = len(jids_to_recover) for i, j in enumerate(jids_to_recover): - print(f'recovering {j} {recover_type}: {len_jids_to_recover}/{i}') + print(f"recovering {j} {recover_type}: {len_jids_to_recover}/{i}") job = ProcessingJob(j) - job._set_status('in_construction') + job._set_status("in_construction") job.submit() for i in range(CHANCES): - print('You can ctrl-c now, iteration %d' % i) + print("You can ctrl-c now, iteration %d" % i) sleep(st) def _retrieve_queue_jobs(): # getting all the jobs in the queues - all_jobs = pd.read_csv(StringIO( - check_output(['squeue', '-o', '%all']).decode('ascii')), sep='|') + all_jobs = pd.read_csv( + StringIO(check_output(["squeue", "-o", "%all"]).decode("ascii")), sep="|" + ) # just keeping the qiita jobs - jobs = all_jobs[all_jobs.GROUP == 'qiita'] + jobs = all_jobs[all_jobs.GROUP == "qiita"] # ignore the merge-jobs and get unique values - qiita_jids = jobs.NAME.str.replace('merge-', '').unique() - qiita_jids = [x.replace( - 'finish-', '').replace('.txt', '') for x in qiita_jids] + qiita_jids = jobs.NAME.str.replace("merge-", "").unique() + qiita_jids = [x.replace("finish-", "").replace(".txt", "") for x in qiita_jids] return set(qiita_jids) @@ -61,7 +62,7 @@ def _get_jids_to_recover(recover_type): TRN.add(SQL, [recover_type]) jids = set(TRN.execute_fetchflatten()) jids_to_recover = list(jids - _retrieve_queue_jobs()) - print('Total %s: %d' % (recover_type, len(jids_to_recover))) + print("Total %s: %d" % (recover_type, len(jids_to_recover))) return jids_to_recover @@ -70,29 +71,57 @@ def _qiita_queue_log_parse(jids_to_recover): for jid in jids_to_recover: job = ProcessingJob(jid) if job.external_id: - bvals = pd.read_csv(StringIO(check_output([ - 'sacct', '-p', - '--format=ExitCode,ReqMem,MaxRSS,CPUTimeRAW,TimelimitRaw', - '-j', f'{job.external_id}.batch']).decode( - 'ascii')), sep='|').iloc[0].to_dict() - vals = pd.read_csv(StringIO(check_output([ - 'sacct', '-p', - '--format=ExitCode,ReqMem,MaxRSS,CPUTimeRAW,TimelimitRaw', - '-j', f'{job.external_id}']).decode( - 'ascii')), sep='|').iloc[0].to_dict() + bvals = ( + pd.read_csv( + StringIO( + check_output( + [ + "sacct", + "-p", + "--format=ExitCode,ReqMem,MaxRSS,CPUTimeRAW,TimelimitRaw", + "-j", + f"{job.external_id}.batch", + ] + ).decode("ascii") + ), + sep="|", + ) + .iloc[0] + .to_dict() + ) + vals = ( + pd.read_csv( + StringIO( + check_output( + [ + "sacct", + "-p", + "--format=ExitCode,ReqMem,MaxRSS,CPUTimeRAW,TimelimitRaw", + "-j", + f"{job.external_id}", + ] + ).decode("ascii") + ), + sep="|", + ) + .iloc[0] + .to_dict() + ) data = { - 'exit-code': bvals['ExitCode'], - 'mem-requested': bvals['ReqMem'], - 'time-requested': vals['TimelimitRaw'], - 'mem-used': bvals['MaxRSS'], - 'time-used': bvals['CPUTimeRAW']} + "exit-code": bvals["ExitCode"], + "mem-requested": bvals["ReqMem"], + "time-requested": vals["TimelimitRaw"], + "mem-used": bvals["MaxRSS"], + "time-used": bvals["CPUTimeRAW"], + } else: data = { - 'exit-code': None, - 'mem-requested': None, - 'time-requested': None, - 'mem-used': None, - 'time-used': None} + "exit-code": None, + "mem-requested": None, + "time-requested": None, + "mem-used": None, + "time-used": None, + } results.append(job, data) return results @@ -113,33 +142,39 @@ def _flush_queues(recover_type): # first start with completing jobs that are not running jids_to_recover = _get_jids_to_recover(recover_type) review_jobs = _qiita_queue_log_parse(jids_to_recover) - jids_review_jobs = [j.id for j, r in review_jobs - if {rr['exit-code'] for rr in r} == {'1'}] - _submit_jobs(jids_review_jobs, recover_type + '/queue_log/1') + jids_review_jobs = [ + j.id for j, r in review_jobs if {rr["exit-code"] for rr in r} == {"1"} + ] + _submit_jobs(jids_review_jobs, recover_type + "/queue_log/1") jids_to_recover = _get_jids_to_recover(recover_type) review_jobs = _qiita_queue_log_parse(jids_to_recover) - jids_review_jobs = [j.id for j, r in review_jobs - if {rr['exit-code'] for rr in r} == {'0'}] - _submit_jobs(jids_review_jobs, recover_type + '/queue_log/0') + jids_review_jobs = [ + j.id for j, r in review_jobs if {rr["exit-code"] for rr in r} == {"0"} + ] + _submit_jobs(jids_review_jobs, recover_type + "/queue_log/0") jids_to_recover = _get_jids_to_recover(recover_type) - complete_job = [j for j in jids_to_recover - if ProcessingJob(j).command.name == 'complete_job'] - _submit_jobs(complete_job, recover_type + '/complete_job') + complete_job = [ + j for j in jids_to_recover if ProcessingJob(j).command.name == "complete_job" + ] + _submit_jobs(complete_job, recover_type + "/complete_job") # first start validators that are not running jids_to_recover = _get_jids_to_recover(recover_type) - validate = [j for j in jids_to_recover - if ProcessingJob(j).command.name == 'Validate'] - _submit_jobs(validate, recover_type + '/validate') + validate = [ + j for j in jids_to_recover if ProcessingJob(j).command.name == "Validate" + ] + _submit_jobs(validate, recover_type + "/validate") # then the release validator jids_to_recover = _get_jids_to_recover(recover_type) release_validators = [ - j for j in jids_to_recover - if ProcessingJob(j).command.name == 'release_validators'] - _submit_jobs(release_validators, recover_type + '/release_validators') + j + for j in jids_to_recover + if ProcessingJob(j).command.name == "release_validators" + ] + _submit_jobs(release_validators, recover_type + "/release_validators") def qiita_recover_jobs(): @@ -156,7 +191,7 @@ def qiita_recover_jobs(): # -> complete_job -> create artifact # Step 1: recover jobs that are in queue status - recover_type = 'queued' + recover_type = "queued" _flush_queues(recover_type) # then we recover what's left @@ -167,7 +202,7 @@ def qiita_recover_jobs(): # to recover this group: 2.1. check if they have validators, # 2.2. if so, recover validators, 2. recover failed jobs with TRN: - recover_type = 'running' + recover_type = "running" _flush_queues(recover_type) jids_to_recover = _get_jids_to_recover(recover_type) @@ -184,8 +219,9 @@ def qiita_recover_jobs(): # in the next code of block for vj in validators: jobs_with_validators.append(vj.id) - status = set([v.status for v in validators - if v.id not in _retrieve_queue_jobs()]) + status = set( + [v.status for v in validators if v.id not in _retrieve_queue_jobs()] + ) # if there are no status, that means that the validators weren't # created and we should rerun from scratch (Step 4) if not bool(status): @@ -193,31 +229,35 @@ def qiita_recover_jobs(): # it multiple status in the validators, it's a complex behaivor # and needs a case by case solution if len(status) != 1: - print("Job '%s' has too many validators status (%d), check " - "them by hand" % (j, len(status))) + print( + "Job '%s' has too many validators status (%d), check " + "them by hand" % (j, len(status)) + ) continue status = list(status)[0] - if status == 'waiting': + if status == "waiting": print("releasing job validators: %s" % j) try: job.release_validators() except Exception: print("ERROR, releasing %s validators" % j) sleep(SLEEP_TIME) - elif status == 'running': - _submit_jobs(validators, recover_type + ' validator, running') - elif status == 'error': + elif status == "running": + _submit_jobs(validators, recover_type + " validator, running") + elif status == "error": # in this case is the same process than before but we need # to split the set in_construction and submit in 2 steps, # however, we can still submit via _submit_jobs for v in validators: vjob = ProcessingJob(v) - vjob._set_status('in_construction') - _submit_jobs(validators, recover_type + ' validator, error') + vjob._set_status("in_construction") + _submit_jobs(validators, recover_type + " validator, error") else: - print("Check the status of this job %s : %s and validators" - "%s." % (j, status, validators)) + print( + "Check the status of this job %s : %s and validators" + "%s." % (j, status, validators) + ) jids_to_recover = set(jids_to_recover) - set(jobs_with_validators) @@ -226,15 +266,17 @@ def qiita_recover_jobs(): job = ProcessingJob(j) status = job.status - if status == 'waiting': + if status == "waiting": print("releasing job validators: %s" % j) job.release_validators() sleep(SLEEP_TIME) - elif 'running' == status: - _submit_jobs([j], 'main_job, running') + elif "running" == status: + _submit_jobs([j], "main_job, running") -if __name__ == '__main__': - raise ValueError('This script should never be called directly but should ' - 'be used as a reference if we need to recover jobs, ' - 'see: qiita_recover_jobs') +if __name__ == "__main__": + raise ValueError( + "This script should never be called directly but should " + "be used as a reference if we need to recover jobs, " + "see: qiita_recover_jobs" + ) diff --git a/scripts/qiita-test-install b/scripts/qiita-test-install index d17742fa1..3f6474c82 100755 --- a/scripts/qiita-test-install +++ b/scripts/qiita-test-install @@ -1,9 +1,10 @@ #!/usr/bin/env python from os import environ -from os.path import join, dirname, abspath, splitext -from sys import platform, version as python_version, exit, executable, stdout -from unittest import TestLoader, TextTestRunner, TestCase +from os.path import abspath, dirname, join, splitext from smtplib import SMTP, SMTP_SSL +from sys import executable, exit, platform, stdout +from sys import version as python_version +from unittest import TestCase, TestLoader, TextTestRunner # ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. @@ -15,17 +16,21 @@ from smtplib import SMTP, SMTP_SSL core_dependency_missing_msg = ( '"%s" is missing and is a core requirement, for more information see the ' - 'Qiita Installation Guide: ' - 'https://github.com/biocore/qiita/blob/master/INSTALL.md') + "Qiita Installation Guide: " + "https://github.com/biocore/qiita/blob/master/INSTALL.md" +) -extra_info = (core_dependency_missing_msg + '. It is also possible that you ' - 'have an old version of this package, if so, please update to ' - 'the latest.') +extra_info = ( + core_dependency_missing_msg + ". It is also possible that you " + "have an old version of this package, if so, please update to " + "the latest." +) dependency_missing_msg = ( '"%s" is missing but this is _not_ a core requirement, for more ' - 'information see the Qiita Installation Guide: ' - 'https://github.com/biocore/qiita/blob/master/INSTALL.md') + "information see the Qiita Installation Guide: " + "https://github.com/biocore/qiita/blob/master/INSTALL.md" +) missing_deps_errors = [] missing_deps_warnings = [] @@ -33,118 +38,125 @@ missing_deps_warnings = [] try: from click import __version__ as click_lib_version except ImportError as e: - missing_deps_errors.append((e, extra_info % 'click')) + missing_deps_errors.append((e, extra_info % "click")) try: from numpy import __version__ as numpy_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'NumPy')) + missing_deps_errors.append((e, core_dependency_missing_msg % "NumPy")) try: from pandas import __version__ as pandas_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'pandas')) + missing_deps_errors.append((e, core_dependency_missing_msg % "pandas")) try: from tornado import version as tornado_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'tornado')) + missing_deps_errors.append((e, core_dependency_missing_msg % "tornado")) try: from redis import __version__ as redis_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'redis')) + missing_deps_errors.append((e, core_dependency_missing_msg % "redis")) try: from toredis import Client as toredis_client + toredis_client() except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'toredis')) + missing_deps_errors.append((e, core_dependency_missing_msg % "toredis")) try: from redbiom import __version__ as redbiom_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'redbiom')) + missing_deps_errors.append((e, core_dependency_missing_msg % "redbiom")) try: from bcrypt import __version__ as bcrypt_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'bcrypt')) + missing_deps_errors.append((e, core_dependency_missing_msg % "bcrypt")) try: from pyparsing import __version__ as pyparsing_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'pyparsing')) + missing_deps_errors.append((e, core_dependency_missing_msg % "pyparsing")) try: from networkx import __version__ as networkx_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'networkx')) + missing_deps_errors.append((e, core_dependency_missing_msg % "networkx")) try: from wtforms import __version__ as wtforms_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'wtforms')) + missing_deps_errors.append((e, core_dependency_missing_msg % "wtforms")) try: from mock import __version__ as mock_lib_version except ImportError as e: - missing_deps_warnings.append((e, dependency_missing_msg % 'mock')) - mock_lib_version = 'Not installed' + missing_deps_warnings.append((e, dependency_missing_msg % "mock")) + mock_lib_version = "Not installed" try: from psycopg2 import __version__ as psycopg2_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'psycopg2')) + missing_deps_errors.append((e, core_dependency_missing_msg % "psycopg2")) try: from qiita_core import __version__ as qiita_core_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'qiita_core')) + missing_deps_errors.append((e, core_dependency_missing_msg % "qiita_core")) try: from qiita_db import __version__ as qiita_db_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'qiita_db')) + missing_deps_errors.append((e, core_dependency_missing_msg % "qiita_db")) try: from qiita_pet import __version__ as qiita_pet_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'qiita_pet')) + missing_deps_errors.append((e, core_dependency_missing_msg % "qiita_pet")) try: from qiita_ware import __version__ as qiita_ware_lib_version except ImportError as e: - missing_deps_errors.append((e, core_dependency_missing_msg % 'qiita_ware')) + missing_deps_errors.append((e, core_dependency_missing_msg % "qiita_ware")) try: from qiita_core.configuration_manager import ConfigurationManager + ConfigurationManager() except Exception as e: - missing_deps_errors.append((e, 'You need to add to your enviroment ' - 'the Qiita configuration using ' - 'QIITA_CONFIG_FP')) + missing_deps_errors.append( + ( + e, + "You need to add to your enviroment " + "the Qiita configuration using " + "QIITA_CONFIG_FP", + ) + ) if missing_deps_errors: for e, t in missing_deps_errors: - print('%s\n=============' % (t)) - exit('Missing core dependencies, can not continue.') + print("%s\n=============" % (t)) + exit("Missing core dependencies, can not continue.") if missing_deps_warnings: for e, t in missing_deps_errors: - print('%s\n=============' % (t)) + print("%s\n=============" % (t)) # trick flake8 to not complain about module-level imports not being at the top # of the file. These imports can only really happen if none of the core # dependencies are missing if True: - from qiita_db.sql_connection import TRN from redis import StrictRedis + from qiita_db.sql_connection import TRN + class QiitaConfig(TestCase): - def setUp(self): self.config = ConfigurationManager() try: @@ -158,89 +170,101 @@ class QiitaConfig(TestCase): host=self.config.redis_host, password=self.config.redis_password, port=self.config.redis_port, - db=self.config.redis_db) - self.redis_version = r.info()['redis_version'] + db=self.config.redis_db, + ) + self.redis_version = r.info()["redis_version"] except Exception: self.redis_version = None def test_pandas_library_version(self): acceptable_version = (0, 15) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, pandas_lib_version.split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, pandas_lib_version.split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported pandas version. You have %s but the ' - 'minimum required version is %s' - % (pandas_lib_version, string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported pandas version. You have %s but the " + "minimum required version is %s" + % (pandas_lib_version, string_acceptable_version), + ) def test_torando_library_version(self): acceptable_version = (3, 1, 1) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, tornado_lib_version.split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, tornado_lib_version.split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported tornado version. You have %s but the ' - 'minimum required version is %s' - % (tornado_lib_version, string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported tornado version. You have %s but the " + "minimum required version is %s" + % (tornado_lib_version, string_acceptable_version), + ) def test_pyparsing_library_version(self): acceptable_version = (2, 0, 2) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, pyparsing_lib_version.split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, pyparsing_lib_version.split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported pyparsing version. You have %s but the ' - 'minimum required version is %s' - % (pyparsing_lib_version, string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported pyparsing version. You have %s but the " + "minimum required version is %s" + % (pyparsing_lib_version, string_acceptable_version), + ) def test_wtforms_library_version(self): acceptable_version = (2, 0, 1) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, wtforms_lib_version.split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, wtforms_lib_version.split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported WTForms version. You have %s but the ' - 'minimum required version is %s' - % (wtforms_lib_version, string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported WTForms version. You have %s but the " + "minimum required version is %s" + % (wtforms_lib_version, string_acceptable_version), + ) def test_postgresql_version(self): if not self.psql_version: - self.assertTrue(False, 'PostgreSQL not running or configured') + self.assertTrue(False, "PostgreSQL not running or configured") acceptable_version = (9, 3, 0) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, self.psql_version.split(' ')[1].split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, self.psql_version.split(" ")[1].split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported PostgreSQL version. You have %s but the ' - 'minimum required version is %s' - % ('.'.join(map(str, version)), - string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported PostgreSQL version. You have %s but the " + "minimum required version is %s" + % (".".join(map(str, version)), string_acceptable_version), + ) def test_redis_version(self): if not self.redis_version: - self.assertTrue(False, 'redis not running or configured') + self.assertTrue(False, "redis not running or configured") acceptable_version = (2, 8, 17) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, self.redis_version.split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, self.redis_version.split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported redis version. You have %s but the ' - 'minimum required version is %s' - % ('.'.join(map(str, version)), - string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported redis version. You have %s but the " + "minimum required version is %s" + % (".".join(map(str, version)), string_acceptable_version), + ) def test_redbiom_version(self): acceptable_version = (0, 3, 8) - string_acceptable_version = '.'.join(map(str, acceptable_version)) - version = tuple(map(int, redbiom_lib_version.split('.'))) + string_acceptable_version = ".".join(map(str, acceptable_version)) + version = tuple(map(int, redbiom_lib_version.split("."))) - self.assertTrue(acceptable_version <= version, - 'Unsupported redbiom version. You have %s but the ' - 'minimum required version is %s' - % ('.'.join(map(str, version)), - string_acceptable_version)) + self.assertTrue( + acceptable_version <= version, + "Unsupported redbiom version. You have %s but the " + "minimum required version is %s" + % (".".join(map(str, version)), string_acceptable_version), + ) system_info_header = """ @@ -274,8 +298,9 @@ Qiita plugins def main(): system_info = [ ("Platform", platform), - ("Python version", python_version.replace('\n', ' ')), - ("Python executable", executable)] + ("Python version", python_version.replace("\n", " ")), + ("Python executable", executable), + ] max_len = max([len(e[0]) for e in system_info]) print(system_info_header) for v in system_info: @@ -287,30 +312,32 @@ def main(): qiita_db_patch_number = splitext(current_patch)[0] # Getting required environment variables - if 'REDBIOM_HOST' in environ: - redbiom_host = environ['REDBIOM_HOST'] + if "REDBIOM_HOST" in environ: + redbiom_host = environ["REDBIOM_HOST"] else: redbiom_host = None version_info = [ - ('click library version', click_lib_version), - ('numpy library version', numpy_lib_version), - ('pandas library version', pandas_lib_version), - ('tornado library version', tornado_lib_version), - ('redis library version', redis_lib_version), - ('redbiom library version', '%s - host: %s' % ( - redbiom_lib_version, redbiom_host)), - ('bcrypt library version', bcrypt_lib_version), - ('pyparsing library version', pyparsing_lib_version), - ('networkX library version', networkx_lib_version), - ('WTForms library version', wtforms_lib_version), - ('mock library version', mock_lib_version), - ('psycopg2 library version', psycopg2_lib_version), - ('Qiita core library version', qiita_core_lib_version), - ('Qiita db library version', qiita_db_lib_version), - ('Qiita db patch number', qiita_db_patch_number), - ('Qiita pet library version', qiita_pet_lib_version), - ('Qiita ware library version', qiita_ware_lib_version) + ("click library version", click_lib_version), + ("numpy library version", numpy_lib_version), + ("pandas library version", pandas_lib_version), + ("tornado library version", tornado_lib_version), + ("redis library version", redis_lib_version), + ( + "redbiom library version", + "%s - host: %s" % (redbiom_lib_version, redbiom_host), + ), + ("bcrypt library version", bcrypt_lib_version), + ("pyparsing library version", pyparsing_lib_version), + ("networkX library version", networkx_lib_version), + ("WTForms library version", wtforms_lib_version), + ("mock library version", mock_lib_version), + ("psycopg2 library version", psycopg2_lib_version), + ("Qiita core library version", qiita_core_lib_version), + ("Qiita db library version", qiita_db_lib_version), + ("Qiita db patch number", qiita_db_patch_number), + ("Qiita pet library version", qiita_pet_lib_version), + ("Qiita ware library version", qiita_ware_lib_version), ] max_len = max([len(e[0]) for e in version_info]) print(dependency_info_header) @@ -320,10 +347,11 @@ def main(): extra_info = None qiita_config = ConfigurationManager() try: - qiita_conf_fp = environ['QIITA_CONFIG_FP'] + qiita_conf_fp = environ["QIITA_CONFIG_FP"] except KeyError: - qiita_conf_fp = join(dirname(abspath(__file__)), - 'support_files/config_test.cfg') + qiita_conf_fp = join( + dirname(abspath(__file__)), "support_files/config_test.cfg" + ) smtp = SMTP_SSL() if qiita_config.smtp_ssl else SMTP() smtp.set_debuglevel(False) try: @@ -332,15 +360,19 @@ def main(): send_email = True except Exception: send_email = False - ebi_credentials = (qiita_config.ebi_center_name != '' and - qiita_config.ebi_dropbox_url != '' and - qiita_config.ebi_organization_prefix != '' and - qiita_config.ebi_seq_xfer_pass != '' and - qiita_config.ebi_seq_xfer_url != '' and - qiita_config.ebi_seq_xfer_user != '') - vamps_credentials = (qiita_config.vamps_pass != '' and - qiita_config.vamps_url != '' and - qiita_config.vamps_user != '') + ebi_credentials = ( + qiita_config.ebi_center_name != "" + and qiita_config.ebi_dropbox_url != "" + and qiita_config.ebi_organization_prefix != "" + and qiita_config.ebi_seq_xfer_pass != "" + and qiita_config.ebi_seq_xfer_url != "" + and qiita_config.ebi_seq_xfer_user != "" + ) + vamps_credentials = ( + qiita_config.vamps_pass != "" + and qiita_config.vamps_url != "" + and qiita_config.vamps_user != "" + ) try: with TRN: psql_running = True @@ -351,46 +383,59 @@ def main(): host=qiita_config.redis_host, password=qiita_config.redis_password, port=qiita_config.redis_port, - db=qiita_config.redis_db) + db=qiita_config.redis_db, + ) redis_running = True except Exception: redis_running = False try: StrictRedis( - host=qiita_config.redbiom_redis_host, - password=qiita_config.redbiom_redis_password, - port=qiita_config.redbiom_redis_port, - db=qiita_config.redbiom_redis_db) + host=qiita_config.redbiom_redis_host, + password=qiita_config.redbiom_redis_password, + port=qiita_config.redbiom_redis_port, + db=qiita_config.redbiom_redis_db, + ) redbiom_redis_running = True except Exception: redbiom_redis_running = False print(qiita_config_header) qiita_config_info = [ - ('QIITA_CONFIG_FP filepath', qiita_conf_fp), - ('Test environment', str(qiita_config.test_environment)), - ('Base URL', qiita_config.base_url), - ('EBI credentials exist', ebi_credentials), - ('VAMPS credentials exist', vamps_credentials), - ('Can the system send emails?', str(send_email) + '. When true, ' - 'emails could still not be going out due to your network ' - 'configuration.'), - ('Valid file extensions for upload', ', '.join( - qiita_config.valid_upload_extension)), - ('PostgreSQL is up and configuration can connect?', psql_running), - ('Redis is up and configuracion can connect?', - redis_running if not redis_running else '%s --port %d' % ( - redis_running, qiita_config.redis_port)), - ('Redbiom redis is up and configuracion can connect?', - redbiom_redis_running if not redbiom_redis_running else - '%s --port %d' % (redbiom_redis_running, - qiita_config.redbiom_redis_port)), - ('Extra info', extra_info) + ("QIITA_CONFIG_FP filepath", qiita_conf_fp), + ("Test environment", str(qiita_config.test_environment)), + ("Base URL", qiita_config.base_url), + ("EBI credentials exist", ebi_credentials), + ("VAMPS credentials exist", vamps_credentials), + ( + "Can the system send emails?", + str(send_email) + ". When true, " + "emails could still not be going out due to your network " + "configuration.", + ), + ( + "Valid file extensions for upload", + ", ".join(qiita_config.valid_upload_extension), + ), + ("PostgreSQL is up and configuration can connect?", psql_running), + ( + "Redis is up and configuracion can connect?", + redis_running + if not redis_running + else "%s --port %d" % (redis_running, qiita_config.redis_port), + ), + ( + "Redbiom redis is up and configuracion can connect?", + redbiom_redis_running + if not redbiom_redis_running + else "%s --port %d" + % (redbiom_redis_running, qiita_config.redbiom_redis_port), + ), + ("Extra info", extra_info), ] max_len = max([len(e[0]) for e in qiita_config_info]) for v in qiita_config_info: - if v != ('Extra info', None): + if v != ("Extra info", None): print("%*s:\t%s" % (max_len, v[0], v[1])) print(qiita_plugins_header) @@ -399,6 +444,7 @@ def main(): else: try: import qiita_db as qdb + with qdb.sql_connection.TRN: sql = """SELECT name, version, client_id, client_secret FROM qiita.software @@ -412,8 +458,7 @@ def main(): print("\tClient id: %s" % client_id) print("\tClient secret: %s" % client_secret) except Exception as e: - print("An error occurred while retrieving plugin information: %s" - % str(e)) + print("An error occurred while retrieving plugin information: %s" % str(e)) print(qiita_config_tests_header) suite = TestLoader().loadTestsFromTestCase(QiitaConfig) diff --git a/setup.py b/setup.py index 61d080042..05bf24a31 100644 --- a/setup.py +++ b/setup.py @@ -7,9 +7,10 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from setuptools import setup from glob import glob +from setuptools import setup + __version__ = "2025.11" @@ -26,95 +27,128 @@ Operating System :: MacOS :: MacOS X """ -with open('README.rst') as f: +with open("README.rst") as f: long_description = f.read() -classifiers = [s.strip() for s in classes.split('\n') if s] +classifiers = [s.strip() for s in classes.split("\n") if s] -setup(name='qiita-spots', - version=__version__, - long_description=long_description, - license="BSD", - description='Qiita: Spot Patterns', - author="Qiita development team", - author_email="qiita.help@gmail.com", - url='https://github.com/biocore/qiita', - test_suite='nose.collector', - packages=['qiita_core', - 'qiita_db', - 'qiita_db/handlers', - 'qiita_db/metadata_template', - 'qiita_pet', - 'qiita_pet/handlers', - 'qiita_pet/handlers/study_handlers', - 'qiita_pet/handlers/api_proxy', - 'qiita_ware' - ], - include_package_data=True, - package_data={ - 'qiita_core': [ - 'support_files/config_test.cfg' - 'support_files/ci_server.crt', - 'support_files/ci_server.csr', - 'support_files/ci_server.key' - ], - 'qiita_db': [ - 'support_files/*.sql', - 'support_files/patches/*.sql', - 'support_files/patches/python_patches/*.py', - 'support_files/test_data/preprocessed_data/*', - 'support_files/test_data/processed_data/*', - 'support_files/test_data/raw_data/*', - 'support_files/test_data/analysis/*', - 'support_files/test_data/reference/*', - 'support_files/test_data/job/*.txt', - 'support_files/test_data/job/2_test_folder/*', - 'support_files/test_data/uploads/1/a_folder/*.txt', - 'support_files/test_data/uploads/1/.hidden_file.txt', - 'support_files/test_data/uploads/1/uploaded_file.txt', - 'support_files/test_data/templates/*', - 'support_files/work_data/*'], - 'qiita_pet': [ - 'static/css/*.css', 'static/img/*.png', - 'static/img/*.gif', 'static/img/*.ico', - 'static/js/*.js', 'static/vendor/css/*.css', - 'static/vendor/css/images/*.png', - 'static/vendor/css/*.png', - 'static/vendor/fonts/glyphicons*.*', - 'static/vendor/images/*.png', - 'static/vendor/js/*.js', - 'results/admin/jobname/*.html', - 'templates/*.html', - 'support_files/config_portal.cfg', - 'support_files/doc/Makefile', - 'support_files/doc/README.md', - 'support_files/doc/source/conf.py', - 'support_files/doc/source/*.rst', - 'support_files/doc/source/tutorials/*.rst', - 'support_files/doc/source/admin/*.rst', - 'support_files/doc/source/dev/*.rst', - 'support_files/doc/source/qiita-philosophy/*.rst', - 'support_files/doc/source/admin/images/*.png', - 'support_files/doc/source/tutorials/images/*.png', - 'support_files/doc/source/qiita-philosophy/images/*.png', - 'support_files/doc/source/_static/*.png' - ]}, - scripts=glob('scripts/*'), - # making sure that numpy is installed before biom - setup_requires=['numpy', 'cython'], - install_requires=['psycopg2', 'click', 'bcrypt', 'pandas<2.0', - 'biom-format', 'tornado<6.0', 'toredis', 'redis', - 'scp', 'pyparsing', 'h5py', 'natsort', 'nose', 'pep8', - 'networkx', 'humanize==4.11', 'wtforms<3.0.0', - 'nltk<=3.8.1', - 'openpyxl', 'sphinx-bootstrap-theme', 'Sphinx<3.0', - 'gitpython', 'redbiom', 'pyzmq', 'sphinx_rtd_theme', - 'paramiko', 'seaborn', 'matplotlib', 'scipy<=1.10.1', - 'nose', - 'flake8', 'six', 'qiita-files @ https://github.com/' - 'qiita-spots/qiita-files/archive/master.zip', 'mock', - 'python-jose', 'markdown2', 'iteration_utilities', - 'supervisor @ https://github.com/Supervisor/' - 'supervisor/archive/master.zip', 'joblib'], - classifiers=classifiers - ) +setup( + name="qiita-spots", + version=__version__, + long_description=long_description, + license="BSD", + description="Qiita: Spot Patterns", + author="Qiita development team", + author_email="qiita.help@gmail.com", + url="https://github.com/biocore/qiita", + test_suite="nose.collector", + packages=[ + "qiita_core", + "qiita_db", + "qiita_db/handlers", + "qiita_db/metadata_template", + "qiita_pet", + "qiita_pet/handlers", + "qiita_pet/handlers/study_handlers", + "qiita_pet/handlers/api_proxy", + "qiita_ware", + ], + include_package_data=True, + package_data={ + "qiita_core": [ + "support_files/config_test.cfgsupport_files/ci_server.crt", + "support_files/ci_server.csr", + "support_files/ci_server.key", + ], + "qiita_db": [ + "support_files/*.sql", + "support_files/patches/*.sql", + "support_files/patches/python_patches/*.py", + "support_files/test_data/preprocessed_data/*", + "support_files/test_data/processed_data/*", + "support_files/test_data/raw_data/*", + "support_files/test_data/analysis/*", + "support_files/test_data/reference/*", + "support_files/test_data/job/*.txt", + "support_files/test_data/job/2_test_folder/*", + "support_files/test_data/uploads/1/a_folder/*.txt", + "support_files/test_data/uploads/1/.hidden_file.txt", + "support_files/test_data/uploads/1/uploaded_file.txt", + "support_files/test_data/templates/*", + "support_files/work_data/*", + ], + "qiita_pet": [ + "static/css/*.css", + "static/img/*.png", + "static/img/*.gif", + "static/img/*.ico", + "static/js/*.js", + "static/vendor/css/*.css", + "static/vendor/css/images/*.png", + "static/vendor/css/*.png", + "static/vendor/fonts/glyphicons*.*", + "static/vendor/images/*.png", + "static/vendor/js/*.js", + "results/admin/jobname/*.html", + "templates/*.html", + "support_files/config_portal.cfg", + "support_files/doc/Makefile", + "support_files/doc/README.md", + "support_files/doc/source/conf.py", + "support_files/doc/source/*.rst", + "support_files/doc/source/tutorials/*.rst", + "support_files/doc/source/admin/*.rst", + "support_files/doc/source/dev/*.rst", + "support_files/doc/source/qiita-philosophy/*.rst", + "support_files/doc/source/admin/images/*.png", + "support_files/doc/source/tutorials/images/*.png", + "support_files/doc/source/qiita-philosophy/images/*.png", + "support_files/doc/source/_static/*.png", + ], + }, + scripts=glob("scripts/*"), + # making sure that numpy is installed before biom + setup_requires=["numpy", "cython"], + install_requires=[ + "psycopg2", + "click", + "bcrypt", + "pandas<2.0", + "biom-format", + "tornado<6.0", + "toredis", + "redis", + "scp", + "pyparsing", + "h5py", + "natsort", + "nose", + "pep8", + "networkx", + "humanize==4.11", + "wtforms<3.0.0", + "nltk<=3.8.1", + "openpyxl", + "sphinx-bootstrap-theme", + "Sphinx<3.0", + "gitpython", + "redbiom", + "pyzmq", + "sphinx_rtd_theme", + "paramiko", + "seaborn", + "matplotlib", + "scipy<=1.10.1", + "nose", + "ruff", + "six", + "qiita-files @ https://github.com/qiita-spots/qiita-files/archive/master.zip", + "mock", + "python-jose", + "markdown2", + "iteration_utilities", + "supervisor @ https://github.com/Supervisor/supervisor/archive/master.zip", + "joblib", + ], + classifiers=classifiers, +) diff --git a/test_data_studies/studies/895/prep_template_895.txt b/test_data_studies/studies/895/prep_template_895.txt index 259d651d5..4c550982f 100644 --- a/test_data_studies/studies/895/prep_template_895.txt +++ b/test_data_studies/studies/895/prep_template_895.txt @@ -4,4 +4,4 @@ Puhimau.mat.2 CTCATCATGTTC ANL Examination of geothermal sites on Kilauea. One s Puhimau.mat.3 CCAGGGACTTCT ANL Examination of geothermal sites on Kilauea. One site involves a steaming tumulus that supports a stratified biofilm at about 50 C. The site other involves a forest that was destroyed by sub-surface heating. What exists at present are heated soils and biofilms that develop on steaming tree trunks. The latter include anoxygenic chloroflexi plus a variety of CO oxidizers. Gasser_Kilauea_geothermal unknown "This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. " GT Illumina HiSeq 1000 Illumina GTGCCAGCMGCCGCGGTAA 0 ANL 8/14/12 lane5_NoIndex ANL ANL ".1,g" sequencing by synthesis 16S rRNA V4 FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT EMP Puhimau.soil.1 GCAATCCTTGCG ANL Examination of geothermal sites on Kilauea. One site involves a steaming tumulus that supports a stratified biofilm at about 50 C. The site other involves a forest that was destroyed by sub-surface heating. What exists at present are heated soils and biofilms that develop on steaming tree trunks. The latter include anoxygenic chloroflexi plus a variety of CO oxidizers. Gasser_Kilauea_geothermal unknown "This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. " GT Illumina HiSeq 1000 Illumina GTGCCAGCMGCCGCGGTAA 0 ANL 8/14/12 lane5_NoIndex ANL ANL ".1,g" sequencing by synthesis 16S rRNA V4 FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT EMP Puhimau.soil.2 CCTGCTTCCTTC ANL Examination of geothermal sites on Kilauea. One site involves a steaming tumulus that supports a stratified biofilm at about 50 C. The site other involves a forest that was destroyed by sub-surface heating. What exists at present are heated soils and biofilms that develop on steaming tree trunks. The latter include anoxygenic chloroflexi plus a variety of CO oxidizers. Gasser_Kilauea_geothermal unknown "This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. " GT Illumina HiSeq 1000 Illumina GTGCCAGCMGCCGCGGTAA 0 ANL 8/14/12 lane5_NoIndex ANL ANL ".1,g" sequencing by synthesis 16S rRNA V4 FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT EMP -Puhimau.soil.3 CAAGGCACAAGG ANL Examination of geothermal sites on Kilauea. One site involves a steaming tumulus that supports a stratified biofilm at about 50 C. The site other involves a forest that was destroyed by sub-surface heating. What exists at present are heated soils and biofilms that develop on steaming tree trunks. The latter include anoxygenic chloroflexi plus a variety of CO oxidizers. Gasser_Kilauea_geothermal unknown "This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. " GT Illumina HiSeq 1000 Illumina GTGCCAGCMGCCGCGGTAA 0 ANL 8/14/12 lane5_NoIndex ANL ANL ".1,g" sequencing by synthesis 16S rRNA V4 FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT EMP \ No newline at end of file +Puhimau.soil.3 CAAGGCACAAGG ANL Examination of geothermal sites on Kilauea. One site involves a steaming tumulus that supports a stratified biofilm at about 50 C. The site other involves a forest that was destroyed by sub-surface heating. What exists at present are heated soils and biofilms that develop on steaming tree trunks. The latter include anoxygenic chloroflexi plus a variety of CO oxidizers. Gasser_Kilauea_geothermal unknown "This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. " GT Illumina HiSeq 1000 Illumina GTGCCAGCMGCCGCGGTAA 0 ANL 8/14/12 lane5_NoIndex ANL ANL ".1,g" sequencing by synthesis 16S rRNA V4 FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT EMP From e5372679e57dfc898ef4706b0a47bf1e68b48069 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Fri, 5 Dec 2025 12:48:36 -0700 Subject: [PATCH 2/3] . import --- qiita_db/__init__.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py index fed025057..644cc8f6b 100644 --- a/qiita_db/__init__.py +++ b/qiita_db/__init__.py @@ -6,28 +6,27 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from . import ( - analysis, - archive, - artifact, - base, - commands, - environment_manager, - exceptions, - investigation, - logger, - meta_util, - metadata_template, - ontology, - portal, - processing_job, - reference, - software, - sql_connection, - study, - user, - util, -) +from . import base # noqa +from . import util # noqa +from . import sql_connection # noqa +from . import metadata_template # noqa +from . import analysis # noqa +from . import artifact # noqa +from . import archive # noqa +from . import commands # noqa +from . import environment_manager # noqa +from . import exceptions # noqa +from . import investigation # noqa +from . import logger # noqa +from . import meta_util # noqa +from . import ontology # noqa +from . import portal # noqa +from . import reference # noqa +from . import software # noqa +from . import study # noqa +from . import user # noqa +from . import processing_job # noqa + __version__ = "2025.11" From 5dc0d274e04bdcc659b2e48f12571bc294dc067d Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Fri, 5 Dec 2025 13:46:40 -0700 Subject: [PATCH 3/3] check_analysis_access --- qiita_pet/handlers/analysis_handlers/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qiita_pet/handlers/analysis_handlers/__init__.py b/qiita_pet/handlers/analysis_handlers/__init__.py index 0eb4bdd6b..d489b74c9 100644 --- a/qiita_pet/handlers/analysis_handlers/__init__.py +++ b/qiita_pet/handlers/analysis_handlers/__init__.py @@ -6,6 +6,8 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from .util import check_analysis_access # noqa + from .base_handlers import ( AnalysisGraphHandler, AnalysisHandler, @@ -18,7 +20,6 @@ SelectedSamplesHandler, ) from .sharing_handlers import ShareAnalysisAJAX -from .util import check_analysis_access __all__ = [ "CreateAnalysisHandler",