From 3676fc02e2cdd4a7cd8bbcb3cd2fbbe29db23f16 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 15 Dec 2025 09:57:45 -0700 Subject: [PATCH 01/13] TST: study associations endpoint test --- .../test/rest/test_study_associations.py | 343 +++++++----------- 1 file changed, 124 insertions(+), 219 deletions(-) diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index 3436062ac..ea2053f16 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -10,237 +10,142 @@ from tornado.escape import json_decode +from qiita_db.study import Study from qiita_pet.test.rest.test_base import RESTHandlerTestCase class StudyAssociationTests(RESTHandlerTestCase): def test_get_valid(self): - IGNORE = "IGNORE" - exp = { - "study_id": 1, - "study_sample_metadata_filepath": IGNORE, - "prep_templates": [ - { - "prep_id": 1, - "prep_status": "private", - "prep_sample_metadata_filepath": IGNORE, - "prep_data_type": "18S", - "prep_human_filtering": "The greatest human filtering method", - "prep_artifacts": [ - { - "artifact_id": 1, - "artifact_status": "private", - "artifact_parent_ids": None, - "artifact_basal_id": 1, - "artifact_processing_id": None, - "artifact_processing_name": None, - "artifact_processing_arguments": None, - "artifact_filepaths": [ - { - "artifact_filepath_id": 1, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "raw_forward_seqs", - }, - { - "artifact_filepath_id": 2, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "raw_barcodes", - }, - ], - }, - { - "artifact_id": 2, - "artifact_status": "private", - "artifact_parent_ids": [1], - "artifact_basal_id": 1, - "artifact_processing_id": 1, - "artifact_processing_name": "Split libraries FASTQ", - "artifact_processing_arguments": { - "input_data": "1", - "max_bad_run_length": "3", - "min_per_read_length_fraction": "0.75", - "sequence_max_n": "0", - "rev_comp_barcode": "False", - "rev_comp_mapping_barcodes": "False", - "rev_comp": "False", - "phred_quality_threshold": "3", - "barcode_type": "golay_12", - "max_barcode_errors": "1.5", - "phred_offset": "auto", - }, - "artifact_filepaths": [ - { - "artifact_filepath_id": 3, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "preprocessed_fasta", - }, - { - "artifact_filepath": IGNORE, - "artifact_filepath_id": 4, - "artifact_filepath_type": "preprocessed_fastq", - }, - { - "artifact_filepath": IGNORE, - "artifact_filepath_id": 5, - "artifact_filepath_type": "preprocessed_demux", - }, - ], - }, - { - "artifact_id": 3, - "artifact_status": "private", - "artifact_parent_ids": [1], - "artifact_basal_id": 1, - "artifact_processing_id": 1, - "artifact_processing_name": "Split libraries FASTQ", - "artifact_processing_arguments": { - "input_data": "1", - "max_bad_run_length": "3", - "min_per_read_length_fraction": "0.75", - "sequence_max_n": "0", - "rev_comp_barcode": "False", - "rev_comp_mapping_barcodes": "True", - "rev_comp": "False", - "phred_quality_threshold": "3", - "barcode_type": "golay_12", - "max_barcode_errors": "1.5", - "phred_offset": "auto", - }, - "artifact_filepaths": None, - }, - { - "artifact_id": 4, - "artifact_status": "private", - "artifact_parent_ids": [2], - "artifact_basal_id": 1, - "artifact_processing_id": 3, - "artifact_processing_name": "Pick closed-reference OTUs", - "artifact_processing_arguments": { - "input_data": "2", - "reference": "1", - "sortmerna_e_value": "1", - "sortmerna_max_pos": "10000", - "similarity": "0.97", - "sortmerna_coverage": "0.97", - "threads": "1", - }, - "artifact_filepaths": [ - { - "artifact_filepath_id": 9, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "biom", - } - ], - }, - { - "artifact_id": 5, - "artifact_status": "private", - "artifact_parent_ids": [2], - "artifact_basal_id": 1, - "artifact_processing_id": 3, - "artifact_processing_name": "Pick closed-reference OTUs", - "artifact_processing_arguments": { - "input_data": "2", - "reference": "1", - "sortmerna_e_value": "1", - "sortmerna_max_pos": "10000", - "similarity": "0.97", - "sortmerna_coverage": "0.97", - "threads": "1", - }, - "artifact_filepaths": [ - { - "artifact_filepath_id": 9, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "biom", - } - ], - }, - { - "artifact_id": 6, - "artifact_status": "private", - "artifact_parent_ids": [2], - "artifact_basal_id": 1, - "artifact_processing_id": 3, - "artifact_processing_name": "Pick closed-reference OTUs", - "artifact_processing_arguments": { - "input_data": "2", - "reference": "2", - "sortmerna_e_value": "1", - "sortmerna_max_pos": "10000", - "similarity": "0.97", - "sortmerna_coverage": "0.97", - "threads": "1", - }, - "artifact_filepaths": [ - { - "artifact_filepath_id": 12, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "biom", - } - ], - }, - ], - }, - { - "prep_id": 2, - "prep_status": "private", - "prep_sample_metadata_filepath": IGNORE, - "prep_data_type": "18S", - "prep_human_filtering": None, - "prep_artifacts": [ - { - "artifact_id": 7, - "artifact_parent_ids": None, - "artifact_basal_id": 7, - "artifact_status": "private", - "artifact_processing_id": None, - "artifact_processing_name": None, - "artifact_processing_arguments": None, - "artifact_filepaths": [ - { - "artifact_filepath_id": 22, - "artifact_filepath": IGNORE, - "artifact_filepath_type": "biom", - } - ], - } - ], - }, - ], - } + IGNORE = IGNORE + exp = {'study': 1, + 'prep_templates': [{'prep_id': 1, + 'prep_filepath': IGNORE, + 'prep_datatype': '18S', + 'prep_human_filtering': 'The greatest human filtering method', + 'prep_artifacts': [{'artifact_id': 1, + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{'artifact_filepath_id': 1, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_forward_seqs'}, + {'artifact_filepath_id': 2, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_barcodes'}]}, + {'artifact_id': 2, + 'artifact_parent_ids': None, + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': {'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': [{'artifact_filepath_id': 3, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'preprocessed_fasta'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 4, + 'artifact_filepath_type': 'preprocessed_fastq'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 5, + 'artifact_filepath_type': 'preprocessed_demux'}]}, + {'artifact_id': 3, + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': {'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': []}, + {'artifact_id': 4, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 5, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 6, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '2', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 12, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}, + {'prep_id': 2, + 'prep_filepath': IGNORE, + 'prep_datatype': '18S', + 'prep_human_filtering': None, + 'prep_artifacts': [{'artifact_id': 7, + 'artifact_parent_ids': [], + 'artifact_basal_id': 7, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{'artifact_filepath_id': 22, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}]} - response = self.get("/api/v1/study/1/associations", headers=self.headers) + response = self.get('/api/v1/study-association/1', headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) - - def _process_dict(d): - return [(d, k) for k in d] - - def _process_list(list_): - if list_ is None: - return [] - - return [dk for d in list_ for dk in _process_dict(d)] - - stack = _process_dict(obs) - while stack: - (d, k) = stack.pop() - if k.endswith("filepath"): - d[k] = IGNORE - elif k.endswith("filepaths"): - stack.extend(_process_list(d[k])) - elif k.endswith("templates"): - stack.extend(_process_list(d[k])) - elif k.endswith("artifacts"): - stack.extend(_process_list(d[k])) - self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get("/api/v1/study/0/associations", headers=self.headers) + response = self.get('/api/v1/study-association/0', headers=self.headers) self.assertEqual(response.code, 404) + self.assertEqual(json_decode(response.body), + {'message': 'Study not found'}) -if __name__ == "__main__": +if __name__ == '__main__': main() From 54695332dd709eb27a0a280c5fd13d812a387c87 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 6 Nov 2025 16:20:28 -0700 Subject: [PATCH 02/13] API: add /api/v1/study//associations to retrieve comprehensive id, path, processing information for a study --- .../test/rest/test_study_associations.py | 61 ++++++++++++++----- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index ea2053f16..4d2adbc99 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -16,14 +16,17 @@ class StudyAssociationTests(RESTHandlerTestCase): def test_get_valid(self): - IGNORE = IGNORE - exp = {'study': 1, + IGNORE = 'IGNORE' + exp = {'study_id': 1, + 'study_sample_metadata_filepath': IGNORE, 'prep_templates': [{'prep_id': 1, - 'prep_filepath': IGNORE, - 'prep_datatype': '18S', + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', 'prep_human_filtering': 'The greatest human filtering method', 'prep_artifacts': [{'artifact_id': 1, - 'artifact_parent_ids': [1], + 'artifact_status': 'private', + 'artifact_parent_ids': None, 'artifact_basal_id': 1, 'artifact_processing_id': None, 'artifact_processing_name': None, @@ -35,7 +38,8 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'raw_barcodes'}]}, {'artifact_id': 2, - 'artifact_parent_ids': None, + 'artifact_status': 'private', + 'artifact_parent_ids': [1], 'artifact_basal_id': 1, 'artifact_processing_id': 1, 'artifact_processing_name': 'Split libraries FASTQ', @@ -60,6 +64,7 @@ def test_get_valid(self): 'artifact_filepath_id': 5, 'artifact_filepath_type': 'preprocessed_demux'}]}, {'artifact_id': 3, + 'artifact_status': 'private', 'artifact_parent_ids': [1], 'artifact_basal_id': 1, 'artifact_processing_id': 1, @@ -69,14 +74,15 @@ def test_get_valid(self): 'min_per_read_length_fraction': '0.75', 'sequence_max_n': '0', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', + 'rev_comp_mapping_barcodes': 'True', 'rev_comp': 'False', 'phred_quality_threshold': '3', 'barcode_type': 'golay_12', 'max_barcode_errors': '1.5', 'phred_offset': 'auto'}, - 'artifact_filepaths': []}, + 'artifact_filepaths': None}, {'artifact_id': 4, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -92,6 +98,7 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}, {'artifact_id': 5, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -107,6 +114,7 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}, {'artifact_id': 6, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -122,12 +130,14 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}]}, {'prep_id': 2, - 'prep_filepath': IGNORE, - 'prep_datatype': '18S', + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', 'prep_human_filtering': None, 'prep_artifacts': [{'artifact_id': 7, - 'artifact_parent_ids': [], + 'artifact_parent_ids': None, 'artifact_basal_id': 7, + 'artifact_status': 'private', 'artifact_processing_id': None, 'artifact_processing_name': None, 'artifact_processing_arguments': None, @@ -135,16 +145,37 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}]}]} - response = self.get('/api/v1/study-association/1', headers=self.headers) + response = self.get('/api/v1/study/1/associations', headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) + + def _process_dict(d): + return [(d, k) for k in d] + + def _process_list(l): + if l is None: + return [] + + return [dk for d in l + for dk in _process_dict(d)] + + stack = _process_dict(obs) + while stack: + (d, k) = stack.pop() + if k.endswith('filepath'): + d[k] = IGNORE + elif k.endswith('filepaths'): + stack.extend(_process_list(d[k])) + elif k.endswith('templates'): + stack.extend(_process_list(d[k])) + elif k.endswith('artifacts'): + stack.extend(_process_list(d[k])) + self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study-association/0', headers=self.headers) + response = self.get('/api/v1/study/0/associations', headers=self.headers) self.assertEqual(response.code, 404) - self.assertEqual(json_decode(response.body), - {'message': 'Study not found'}) if __name__ == '__main__': From bd1cf8652e5d86b155eeea89351542882c349abf Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 13:16:06 -0700 Subject: [PATCH 03/13] SQL: draft schema changes to support additional identifiers --- qiita_db/support_files/patches/95.sql | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 qiita_db/support_files/patches/95.sql diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql new file mode 100644 index 000000000..a2e713eb2 --- /dev/null +++ b/qiita_db/support_files/patches/95.sql @@ -0,0 +1,31 @@ +-- Dec 12, 2025 +-- Adding SEQUENCEs and support tables for sample_idx, prep_sample_idx, +-- and artifact_sample_idx + +CREATE SEQUENCE sequence_sample_idx AS BIGINT; +CREATE TABLE map_sample_idx ( + sample_name VARCHAR NOT NULL PRIMARY KEY, + study_idx BIGINT NOT NULL, + sample_idx BIGINT DEFAULT NEXTVAL('sequence_sample_idx') NOT NULL, + UNIQUE (study_idx, sample_idx), + UNIQUE (sample_idx), + CONSTRAINT fk_study FOREIGN KEY (study_idx) REFERENCES qiita.study (study_id) +); + +CREATE SEQUENCE sequence_prep_sample_idx AS BIGINT; +CREATE TABLE map_prep_sample_idx ( + prep_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_prep_sample_idx'), + prep_idx BIGINT NOT NULL, + sample_idx BIGINT NOT NULL, + UNIQUE (prep_idx, prep_sample_idx), + CONSTRAINT fk_prep_template FOREIGN KEY (prep_idx) REFERENCES qiita.prep_template (prep_template_id) +); + +CREATE SEQUENCE sequence_artifact_sample_idx AS BIGINT; +CREATE TABLE map_artifact_sample_idx ( + artifact_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_artifact_sample_idx'), + artifact_idx BIGINT NOT NULL, + prep_sample_idx BIGINT NOT NULL, + UNIQUE (artifact_idx, artifact_sample_idx), + CONSTRAINT fk_artifact FOREIGN KEY (artifact_idx) REFERENCES qiita.artifact (artifact_id) +); From 15ab6bf88bfedea37c0f04331faff1d0718a261a Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 13:52:09 -0700 Subject: [PATCH 04/13] API: unique_ids stub on base metadata object --- .../metadata_template/base_metadata_template.py | 17 +++++++++++++++++ .../test/test_base_metadata_template.py | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 36db2d207..096177e4f 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -470,6 +470,8 @@ class MetadataTemplate(qdb.base.QiitaObject): # forbidden_words not defined for base class. Please redefine for # sub-classes. _forbidden_words = {} + # qiita-unique integer identifier mapping table + _id_map_table = None @classmethod def _check_id(cls, id_): @@ -945,6 +947,21 @@ def _common_extend_steps(self, md_template): return new_samples, new_cols + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + raise IncompetentQiitaDeveloperError() + @classmethod def exists(cls, obj_id): r"""Checks if already exists a MetadataTemplate for the provided object diff --git a/qiita_db/metadata_template/test/test_base_metadata_template.py b/qiita_db/metadata_template/test/test_base_metadata_template.py index d2142231b..c543316ce 100644 --- a/qiita_db/metadata_template/test/test_base_metadata_template.py +++ b/qiita_db/metadata_template/test/test_base_metadata_template.py @@ -42,7 +42,14 @@ def test_init(self): with self.assertRaises(IncompetentQiitaDeveloperError): MT(1) - def test_exist(self): + def test_unique_ids(self): + """Unique IDs raises an error because it's not called from a subclass + """ + MT = qdb.metadata_template.base_metadata_template.MetadataTemplate + with self.assertRaises(IncompetentQiitaDeveloperError): + MT.unique_ids(self.study) + + def test_exists(self): """Exists raises an error because it's not called from a subclass""" MT = qdb.metadata_template.base_metadata_template.MetadataTemplate with self.assertRaises(IncompetentQiitaDeveloperError): From 6762157700d6daa673b972dc6b5ff257eea60c91 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 13:59:06 -0700 Subject: [PATCH 05/13] TST: check unique_ids --- qiita_db/metadata_template/test/test_prep_template.py | 9 +++++++++ qiita_db/metadata_template/test/test_sample_template.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 2e61229b6..9b8868135 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -540,6 +540,15 @@ def test_init(self): st = qdb.metadata_template.prep_template.PrepTemplate(1) self.assertTrue(st.id, 1) + def test_unique_ids(self): + obs = self.tester.unique_ids() + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + self.assertEqual(obs, exp) + + # verify a repeat call is unchanged + obs = self.tester.unique_ids() + self.assertEqual(obs, exp) + def test_table_name(self): """Table name return the correct string""" obs = qdb.metadata_template.prep_template.PrepTemplate._table_name(1) diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index 06281a095..63d8b0ad2 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -624,6 +624,15 @@ def test_init(self): st = qdb.metadata_template.sample_template.SampleTemplate(1) self.assertTrue(st.id, 1) + def test_unique_ids(self): + obs = self.tester.unique_ids() + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + self.assertEqual(obs, exp) + + # verify a repeat call is unchanged + obs = self.tester.unique_ids() + self.assertEqual(obs, exp) + def test_table_name(self): """Table name return the correct string""" obs = qdb.metadata_template.sample_template.SampleTemplate._table_name( From 391fa0b24226756b98403d9a21878baa51595d3f Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 14:09:28 -0700 Subject: [PATCH 06/13] TST: check unique_ids --- qiita_db/test/test_artifact.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 4b47db89a..53eb26d56 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1357,6 +1357,16 @@ def test_delete_as_output_job(self): with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): qdb.artifact.Artifact(artifact.id) + def test_unique_ids(self): + art = qdb.artifact.Artifact(1) + obs = art.unique_ids() + exp = {name: idx for idx, name in enumerate(sorted(art.prep_templates[0].keys()))} + self.assertEqual(obs, exp) + + # verify repeat calls are unchanged + obs = art.unique_ids() + self.assertEqual(obs, exp) + def test_name_setter(self): a = qdb.artifact.Artifact(1) self.assertEqual(a.name, "Raw data 1") From 5303ac0a9a6e1b8707c2ff6b25f08040de09e4d6 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 14:45:24 -0700 Subject: [PATCH 07/13] API: passing SampleTemplate.unique_ids --- .../base_metadata_template.py | 2 - qiita_db/metadata_template/sample_template.py | 37 +++++++++++++++++++ .../test/test_sample_template.py | 2 +- qiita_db/support_files/patches/95.sql | 1 - 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 096177e4f..49f3cd62c 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -470,8 +470,6 @@ class MetadataTemplate(qdb.base.QiitaObject): # forbidden_words not defined for base class. Please redefine for # sub-classes. _forbidden_words = {} - # qiita-unique integer identifier mapping table - _id_map_table = None @classmethod def _check_id(cls, id_): diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index e8bac7b25..d4e2540b8 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -176,6 +176,43 @@ def columns_restrictions(self): """ return qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + samples = [[self._id, s_id] for s_id in sorted(self.keys())] + with qdb.sql_connection.TRN: + # insert any IDs not present + sql = """INSERT INTO map_sample_idx (study_idx, sample_name) + VALUES (%s, %s) + ON CONFLICT (sample_name) + DO NOTHING""" + qdb.sql_connection.TRN.add(sql, samples, many=True) + + # obtain the association + sql = """SELECT + sample_name, + sample_idx + FROM map_sample_idx""" + qdb.sql_connection.TRN.add(sql) + + # form into a dict + mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} + + # commit in the event changes were made + qdb.sql_connection.TRN.commit() + + return mapping + def delete_samples(self, sample_names): """Delete `sample_names` from sample information file diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index 63d8b0ad2..5bc6a8ec5 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -626,7 +626,7 @@ def test_init(self): def test_unique_ids(self): obs = self.tester.unique_ids() - exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()), 1)} self.assertEqual(obs, exp) # verify a repeat call is unchanged diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql index a2e713eb2..5cd071564 100644 --- a/qiita_db/support_files/patches/95.sql +++ b/qiita_db/support_files/patches/95.sql @@ -7,7 +7,6 @@ CREATE TABLE map_sample_idx ( sample_name VARCHAR NOT NULL PRIMARY KEY, study_idx BIGINT NOT NULL, sample_idx BIGINT DEFAULT NEXTVAL('sequence_sample_idx') NOT NULL, - UNIQUE (study_idx, sample_idx), UNIQUE (sample_idx), CONSTRAINT fk_study FOREIGN KEY (study_idx) REFERENCES qiita.study (study_id) ); From 0e969d273e24091ffc9bbc4dac7edd51ef599376 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 15:17:20 -0700 Subject: [PATCH 08/13] API: passing PrepTemplate.unique_ids --- qiita_db/metadata_template/prep_template.py | 44 +++++++++++++++++++ .../test/test_prep_template.py | 2 +- qiita_db/support_files/patches/95.sql | 4 +- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 3808c4efd..036f286cd 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -341,6 +341,50 @@ def delete(cls, id_): qdb.sql_connection.TRN.execute() + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + sample_idx = qdb.study.Study(self.study_id).sample_template.unique_ids() + + paired = [] + for p_id in sorted(self.keys()): + if p_id in sample_idx: + paired.append([self._id, sample_idx[p_id]]) + + with qdb.sql_connection.TRN: + # insert any IDs not present + sql = """INSERT INTO map_prep_sample_idx (prep_idx, sample_idx) + VALUES (%s, %s) + ON CONFLICT (prep_idx, sample_idx) + DO NOTHING""" + qdb.sql_connection.TRN.add(sql, paired, many=True) + + # obtain the association + sql = """SELECT + sample_name, + prep_sample_idx + FROM map_prep_sample_idx + JOIN map_sample_idx USING (sample_idx)""" + qdb.sql_connection.TRN.add(sql) + + # form into a dict + mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} + + # commit in the event changes were made + qdb.sql_connection.TRN.commit() + + return mapping + def data_type(self, ret_id=False): """Returns the data_type or the data_type id diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 9b8868135..6c6a73541 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -542,7 +542,7 @@ def test_init(self): def test_unique_ids(self): obs = self.tester.unique_ids() - exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()), 1)} self.assertEqual(obs, exp) # verify a repeat call is unchanged diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql index 5cd071564..05485abe4 100644 --- a/qiita_db/support_files/patches/95.sql +++ b/qiita_db/support_files/patches/95.sql @@ -16,7 +16,7 @@ CREATE TABLE map_prep_sample_idx ( prep_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_prep_sample_idx'), prep_idx BIGINT NOT NULL, sample_idx BIGINT NOT NULL, - UNIQUE (prep_idx, prep_sample_idx), + CONSTRAINT uc_prep_sample UNIQUE(prep_idx, sample_idx), CONSTRAINT fk_prep_template FOREIGN KEY (prep_idx) REFERENCES qiita.prep_template (prep_template_id) ); @@ -25,6 +25,6 @@ CREATE TABLE map_artifact_sample_idx ( artifact_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_artifact_sample_idx'), artifact_idx BIGINT NOT NULL, prep_sample_idx BIGINT NOT NULL, - UNIQUE (artifact_idx, artifact_sample_idx), + CONSTRAINT uc_artifact_sample UNIQUE(artifact_idx, prep_sample_idx), CONSTRAINT fk_artifact FOREIGN KEY (artifact_idx) REFERENCES qiita.artifact (artifact_id) ); From 584ae08a25b3e00900ef18d7b3e1e88f263389a0 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 15:30:30 -0700 Subject: [PATCH 09/13] API: passing Artifact.unique_ids --- qiita_db/artifact.py | 48 ++++++++++++++++++++++++++++++++++ qiita_db/test/test_artifact.py | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 4ea95160a..58efe1af0 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1853,3 +1853,51 @@ def human_reads_filter_method(self, value): SET human_reads_filter_method_id = %s WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [idx[0], self.id]) + + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + if len(self.prep_templates) == 0: + raise ValueError("No associated prep template") + + if len(self.prep_templates) > 1: + raise ValueError("Cannot assign against multiple prep templates") + + paired = [[self._id, ps_idx] for ps_idx in sorted(self.prep_templates[0].unique_ids().values())] + + with qdb.sql_connection.TRN: + # insert any IDs not present + sql = """INSERT INTO map_artifact_sample_idx (artifact_idx, prep_sample_idx) + VALUES (%s, %s) + ON CONFLICT (artifact_idx, prep_sample_idx) + DO NOTHING""" + qdb.sql_connection.TRN.add(sql, paired, many=True) + + # obtain the association + sql = """SELECT + sample_name, + artifact_sample_idx + FROM map_artifact_sample_idx + JOIN map_prep_sample_idx USING (prep_sample_idx) + JOIN map_sample_idx USING (sample_idx) + WHERE artifact_idx=%s + """ + qdb.sql_connection.TRN.add(sql, [self._id, ]) + + # form into a dict + mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} + + # commit in the event changes were made + qdb.sql_connection.TRN.commit() + + return mapping diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 53eb26d56..a537a9178 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1360,7 +1360,7 @@ def test_delete_as_output_job(self): def test_unique_ids(self): art = qdb.artifact.Artifact(1) obs = art.unique_ids() - exp = {name: idx for idx, name in enumerate(sorted(art.prep_templates[0].keys()))} + exp = {name: idx for idx, name in enumerate(sorted(art.prep_templates[0].keys()), 1)} self.assertEqual(obs, exp) # verify repeat calls are unchanged From d82c27cc0b718afbe53dfb91b6ab716568ddb9e0 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 15:32:13 -0700 Subject: [PATCH 10/13] Constrain returned values --- qiita_db/metadata_template/prep_template.py | 6 ++++-- qiita_db/metadata_template/sample_template.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 036f286cd..10fbe4d1e 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -374,8 +374,10 @@ def unique_ids(self): sample_name, prep_sample_idx FROM map_prep_sample_idx - JOIN map_sample_idx USING (sample_idx)""" - qdb.sql_connection.TRN.add(sql) + JOIN map_sample_idx USING (sample_idx) + WHERE prep_idx=%s + """ + qdb.sql_connection.TRN.add(sql, [self._id, ]) # form into a dict mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index d4e2540b8..524dec4f8 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -202,8 +202,10 @@ def unique_ids(self): sql = """SELECT sample_name, sample_idx - FROM map_sample_idx""" - qdb.sql_connection.TRN.add(sql) + FROM map_sample_idx + WHERE study_idx=%s + """ + qdb.sql_connection.TRN.add(sql, [self._id, ]) # form into a dict mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} From dcd4beb2eb18181bb7ab0da51ecae9f2ac7fd904 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 15 Dec 2025 10:09:55 -0700 Subject: [PATCH 11/13] Based relative to Qiita schema --- qiita_db/support_files/patches/95.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql index 05485abe4..b9fc7cbcc 100644 --- a/qiita_db/support_files/patches/95.sql +++ b/qiita_db/support_files/patches/95.sql @@ -2,27 +2,27 @@ -- Adding SEQUENCEs and support tables for sample_idx, prep_sample_idx, -- and artifact_sample_idx -CREATE SEQUENCE sequence_sample_idx AS BIGINT; -CREATE TABLE map_sample_idx ( +CREATE SEQUENCE qiita.sequence_sample_idx AS BIGINT; +CREATE TABLE qiita.map_sample_idx ( sample_name VARCHAR NOT NULL PRIMARY KEY, study_idx BIGINT NOT NULL, - sample_idx BIGINT DEFAULT NEXTVAL('sequence_sample_idx') NOT NULL, + sample_idx BIGINT DEFAULT NEXTVAL('qiita.sequence_sample_idx') NOT NULL, UNIQUE (sample_idx), CONSTRAINT fk_study FOREIGN KEY (study_idx) REFERENCES qiita.study (study_id) ); -CREATE SEQUENCE sequence_prep_sample_idx AS BIGINT; -CREATE TABLE map_prep_sample_idx ( - prep_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_prep_sample_idx'), +CREATE SEQUENCE qiita.sequence_prep_sample_idx AS BIGINT; +CREATE TABLE qiita.map_prep_sample_idx ( + prep_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('qiita.sequence_prep_sample_idx'), prep_idx BIGINT NOT NULL, sample_idx BIGINT NOT NULL, CONSTRAINT uc_prep_sample UNIQUE(prep_idx, sample_idx), CONSTRAINT fk_prep_template FOREIGN KEY (prep_idx) REFERENCES qiita.prep_template (prep_template_id) ); -CREATE SEQUENCE sequence_artifact_sample_idx AS BIGINT; -CREATE TABLE map_artifact_sample_idx ( - artifact_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_artifact_sample_idx'), +CREATE SEQUENCE qiita.sequence_artifact_sample_idx AS BIGINT; +CREATE TABLE qiita.map_artifact_sample_idx ( + artifact_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('qiita.sequence_artifact_sample_idx'), artifact_idx BIGINT NOT NULL, prep_sample_idx BIGINT NOT NULL, CONSTRAINT uc_artifact_sample UNIQUE(artifact_idx, prep_sample_idx), From 53cb97d4080b1e6e8ea4ca79b72e80283558d9c6 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 15 Dec 2025 10:10:01 -0700 Subject: [PATCH 12/13] lint --- .../test/rest/test_study_associations.py | 344 +++++++++++------- 1 file changed, 204 insertions(+), 140 deletions(-) diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index 4d2adbc99..3436062ac 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -10,173 +10,237 @@ from tornado.escape import json_decode -from qiita_db.study import Study from qiita_pet.test.rest.test_base import RESTHandlerTestCase class StudyAssociationTests(RESTHandlerTestCase): def test_get_valid(self): - IGNORE = 'IGNORE' - exp = {'study_id': 1, - 'study_sample_metadata_filepath': IGNORE, - 'prep_templates': [{'prep_id': 1, - 'prep_status': 'private', - 'prep_sample_metadata_filepath': IGNORE, - 'prep_data_type': '18S', - 'prep_human_filtering': 'The greatest human filtering method', - 'prep_artifacts': [{'artifact_id': 1, - 'artifact_status': 'private', - 'artifact_parent_ids': None, - 'artifact_basal_id': 1, - 'artifact_processing_id': None, - 'artifact_processing_name': None, - 'artifact_processing_arguments': None, - 'artifact_filepaths': [{'artifact_filepath_id': 1, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'raw_forward_seqs'}, - {'artifact_filepath_id': 2, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'raw_barcodes'}]}, - {'artifact_id': 2, - 'artifact_status': 'private', - 'artifact_parent_ids': [1], - 'artifact_basal_id': 1, - 'artifact_processing_id': 1, - 'artifact_processing_name': 'Split libraries FASTQ', - 'artifact_processing_arguments': {'input_data': '1', - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', - 'rev_comp': 'False', - 'phred_quality_threshold': '3', - 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', - 'phred_offset': 'auto'}, - 'artifact_filepaths': [{'artifact_filepath_id': 3, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'preprocessed_fasta'}, - {'artifact_filepath': IGNORE, - 'artifact_filepath_id': 4, - 'artifact_filepath_type': 'preprocessed_fastq'}, - {'artifact_filepath': IGNORE, - 'artifact_filepath_id': 5, - 'artifact_filepath_type': 'preprocessed_demux'}]}, - {'artifact_id': 3, - 'artifact_status': 'private', - 'artifact_parent_ids': [1], - 'artifact_basal_id': 1, - 'artifact_processing_id': 1, - 'artifact_processing_name': 'Split libraries FASTQ', - 'artifact_processing_arguments': {'input_data': '1', - 'max_bad_run_length': '3', - 'min_per_read_length_fraction': '0.75', - 'sequence_max_n': '0', - 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'True', - 'rev_comp': 'False', - 'phred_quality_threshold': '3', - 'barcode_type': 'golay_12', - 'max_barcode_errors': '1.5', - 'phred_offset': 'auto'}, - 'artifact_filepaths': None}, - {'artifact_id': 4, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': {'input_data': '2', - 'reference': '1', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{'artifact_filepath_id': 9, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}, - {'artifact_id': 5, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': {'input_data': '2', - 'reference': '1', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{'artifact_filepath_id': 9, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}, - {'artifact_id': 6, - 'artifact_status': 'private', - 'artifact_parent_ids': [2], - 'artifact_basal_id': 1, - 'artifact_processing_id': 3, - 'artifact_processing_name': 'Pick closed-reference OTUs', - 'artifact_processing_arguments': {'input_data': '2', - 'reference': '2', - 'sortmerna_e_value': '1', - 'sortmerna_max_pos': '10000', - 'similarity': '0.97', - 'sortmerna_coverage': '0.97', - 'threads': '1'}, - 'artifact_filepaths': [{'artifact_filepath_id': 12, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}]}, - {'prep_id': 2, - 'prep_status': 'private', - 'prep_sample_metadata_filepath': IGNORE, - 'prep_data_type': '18S', - 'prep_human_filtering': None, - 'prep_artifacts': [{'artifact_id': 7, - 'artifact_parent_ids': None, - 'artifact_basal_id': 7, - 'artifact_status': 'private', - 'artifact_processing_id': None, - 'artifact_processing_name': None, - 'artifact_processing_arguments': None, - 'artifact_filepaths': [{'artifact_filepath_id': 22, - 'artifact_filepath': IGNORE, - 'artifact_filepath_type': 'biom'}]}]}]} + IGNORE = "IGNORE" + exp = { + "study_id": 1, + "study_sample_metadata_filepath": IGNORE, + "prep_templates": [ + { + "prep_id": 1, + "prep_status": "private", + "prep_sample_metadata_filepath": IGNORE, + "prep_data_type": "18S", + "prep_human_filtering": "The greatest human filtering method", + "prep_artifacts": [ + { + "artifact_id": 1, + "artifact_status": "private", + "artifact_parent_ids": None, + "artifact_basal_id": 1, + "artifact_processing_id": None, + "artifact_processing_name": None, + "artifact_processing_arguments": None, + "artifact_filepaths": [ + { + "artifact_filepath_id": 1, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "raw_forward_seqs", + }, + { + "artifact_filepath_id": 2, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "raw_barcodes", + }, + ], + }, + { + "artifact_id": 2, + "artifact_status": "private", + "artifact_parent_ids": [1], + "artifact_basal_id": 1, + "artifact_processing_id": 1, + "artifact_processing_name": "Split libraries FASTQ", + "artifact_processing_arguments": { + "input_data": "1", + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "False", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 3, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "preprocessed_fasta", + }, + { + "artifact_filepath": IGNORE, + "artifact_filepath_id": 4, + "artifact_filepath_type": "preprocessed_fastq", + }, + { + "artifact_filepath": IGNORE, + "artifact_filepath_id": 5, + "artifact_filepath_type": "preprocessed_demux", + }, + ], + }, + { + "artifact_id": 3, + "artifact_status": "private", + "artifact_parent_ids": [1], + "artifact_basal_id": 1, + "artifact_processing_id": 1, + "artifact_processing_name": "Split libraries FASTQ", + "artifact_processing_arguments": { + "input_data": "1", + "max_bad_run_length": "3", + "min_per_read_length_fraction": "0.75", + "sequence_max_n": "0", + "rev_comp_barcode": "False", + "rev_comp_mapping_barcodes": "True", + "rev_comp": "False", + "phred_quality_threshold": "3", + "barcode_type": "golay_12", + "max_barcode_errors": "1.5", + "phred_offset": "auto", + }, + "artifact_filepaths": None, + }, + { + "artifact_id": 4, + "artifact_status": "private", + "artifact_parent_ids": [2], + "artifact_basal_id": 1, + "artifact_processing_id": 3, + "artifact_processing_name": "Pick closed-reference OTUs", + "artifact_processing_arguments": { + "input_data": "2", + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 9, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + }, + { + "artifact_id": 5, + "artifact_status": "private", + "artifact_parent_ids": [2], + "artifact_basal_id": 1, + "artifact_processing_id": 3, + "artifact_processing_name": "Pick closed-reference OTUs", + "artifact_processing_arguments": { + "input_data": "2", + "reference": "1", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 9, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + }, + { + "artifact_id": 6, + "artifact_status": "private", + "artifact_parent_ids": [2], + "artifact_basal_id": 1, + "artifact_processing_id": 3, + "artifact_processing_name": "Pick closed-reference OTUs", + "artifact_processing_arguments": { + "input_data": "2", + "reference": "2", + "sortmerna_e_value": "1", + "sortmerna_max_pos": "10000", + "similarity": "0.97", + "sortmerna_coverage": "0.97", + "threads": "1", + }, + "artifact_filepaths": [ + { + "artifact_filepath_id": 12, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + }, + ], + }, + { + "prep_id": 2, + "prep_status": "private", + "prep_sample_metadata_filepath": IGNORE, + "prep_data_type": "18S", + "prep_human_filtering": None, + "prep_artifacts": [ + { + "artifact_id": 7, + "artifact_parent_ids": None, + "artifact_basal_id": 7, + "artifact_status": "private", + "artifact_processing_id": None, + "artifact_processing_name": None, + "artifact_processing_arguments": None, + "artifact_filepaths": [ + { + "artifact_filepath_id": 22, + "artifact_filepath": IGNORE, + "artifact_filepath_type": "biom", + } + ], + } + ], + }, + ], + } - response = self.get('/api/v1/study/1/associations', headers=self.headers) + response = self.get("/api/v1/study/1/associations", headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) def _process_dict(d): return [(d, k) for k in d] - def _process_list(l): - if l is None: + def _process_list(list_): + if list_ is None: return [] - return [dk for d in l - for dk in _process_dict(d)] + return [dk for d in list_ for dk in _process_dict(d)] stack = _process_dict(obs) while stack: (d, k) = stack.pop() - if k.endswith('filepath'): + if k.endswith("filepath"): d[k] = IGNORE - elif k.endswith('filepaths'): + elif k.endswith("filepaths"): stack.extend(_process_list(d[k])) - elif k.endswith('templates'): + elif k.endswith("templates"): stack.extend(_process_list(d[k])) - elif k.endswith('artifacts'): + elif k.endswith("artifacts"): stack.extend(_process_list(d[k])) self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study/0/associations', headers=self.headers) + response = self.get("/api/v1/study/0/associations", headers=self.headers) self.assertEqual(response.code, 404) -if __name__ == '__main__': +if __name__ == "__main__": main() From b2e9fa9e5dd0f04837edef3fd2d972144fcb9546 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Mon, 15 Dec 2025 10:49:34 -0700 Subject: [PATCH 13/13] Quality to schema --- qiita_db/artifact.py | 8 ++++---- qiita_db/metadata_template/prep_template.py | 6 +++--- qiita_db/metadata_template/sample_template.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index 58efe1af0..f1b88cf47 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1877,7 +1877,7 @@ def unique_ids(self): with qdb.sql_connection.TRN: # insert any IDs not present - sql = """INSERT INTO map_artifact_sample_idx (artifact_idx, prep_sample_idx) + sql = """INSERT INTO qiita.map_artifact_sample_idx (artifact_idx, prep_sample_idx) VALUES (%s, %s) ON CONFLICT (artifact_idx, prep_sample_idx) DO NOTHING""" @@ -1887,9 +1887,9 @@ def unique_ids(self): sql = """SELECT sample_name, artifact_sample_idx - FROM map_artifact_sample_idx - JOIN map_prep_sample_idx USING (prep_sample_idx) - JOIN map_sample_idx USING (sample_idx) + FROM qiita.map_artifact_sample_idx + JOIN qiita.map_prep_sample_idx USING (prep_sample_idx) + JOIN qiita.map_sample_idx USING (sample_idx) WHERE artifact_idx=%s """ qdb.sql_connection.TRN.add(sql, [self._id, ]) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 10fbe4d1e..5678a934a 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -363,7 +363,7 @@ def unique_ids(self): with qdb.sql_connection.TRN: # insert any IDs not present - sql = """INSERT INTO map_prep_sample_idx (prep_idx, sample_idx) + sql = """INSERT INTO qiita.map_prep_sample_idx (prep_idx, sample_idx) VALUES (%s, %s) ON CONFLICT (prep_idx, sample_idx) DO NOTHING""" @@ -373,8 +373,8 @@ def unique_ids(self): sql = """SELECT sample_name, prep_sample_idx - FROM map_prep_sample_idx - JOIN map_sample_idx USING (sample_idx) + FROM qiita.map_prep_sample_idx + JOIN qiita.map_sample_idx USING (sample_idx) WHERE prep_idx=%s """ qdb.sql_connection.TRN.add(sql, [self._id, ]) diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index 524dec4f8..9bfb19a48 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -192,7 +192,7 @@ def unique_ids(self): samples = [[self._id, s_id] for s_id in sorted(self.keys())] with qdb.sql_connection.TRN: # insert any IDs not present - sql = """INSERT INTO map_sample_idx (study_idx, sample_name) + sql = """INSERT INTO qiita.map_sample_idx (study_idx, sample_name) VALUES (%s, %s) ON CONFLICT (sample_name) DO NOTHING""" @@ -202,7 +202,7 @@ def unique_ids(self): sql = """SELECT sample_name, sample_idx - FROM map_sample_idx + FROM qiita.map_sample_idx WHERE study_idx=%s """ qdb.sql_connection.TRN.add(sql, [self._id, ])