From 26a9f75159a9c80ffa8109df1f9a10bd76663c97 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 5 Nov 2025 14:33:21 -0700 Subject: [PATCH 01/11] TST: study associations endpoint test --- .../test/rest/test_study_associations.py | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 qiita_pet/test/rest/test_study_associations.py diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py new file mode 100644 index 000000000..ea2053f16 --- /dev/null +++ b/qiita_pet/test/rest/test_study_associations.py @@ -0,0 +1,151 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from unittest import main + +from tornado.escape import json_decode + +from qiita_db.study import Study +from qiita_pet.test.rest.test_base import RESTHandlerTestCase + + +class StudyAssociationTests(RESTHandlerTestCase): + def test_get_valid(self): + IGNORE = IGNORE + exp = {'study': 1, + 'prep_templates': [{'prep_id': 1, + 'prep_filepath': IGNORE, + 'prep_datatype': '18S', + 'prep_human_filtering': 'The greatest human filtering method', + 'prep_artifacts': [{'artifact_id': 1, + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{'artifact_filepath_id': 1, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_forward_seqs'}, + {'artifact_filepath_id': 2, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'raw_barcodes'}]}, + {'artifact_id': 2, + 'artifact_parent_ids': None, + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': {'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': [{'artifact_filepath_id': 3, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'preprocessed_fasta'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 4, + 'artifact_filepath_type': 'preprocessed_fastq'}, + {'artifact_filepath': IGNORE, + 'artifact_filepath_id': 5, + 'artifact_filepath_type': 'preprocessed_demux'}]}, + {'artifact_id': 3, + 'artifact_parent_ids': [1], + 'artifact_basal_id': 1, + 'artifact_processing_id': 1, + 'artifact_processing_name': 'Split libraries FASTQ', + 'artifact_processing_arguments': {'input_data': '1', + 'max_bad_run_length': '3', + 'min_per_read_length_fraction': '0.75', + 'sequence_max_n': '0', + 'rev_comp_barcode': 'False', + 'rev_comp_mapping_barcodes': 'False', + 'rev_comp': 'False', + 'phred_quality_threshold': '3', + 'barcode_type': 'golay_12', + 'max_barcode_errors': '1.5', + 'phred_offset': 'auto'}, + 'artifact_filepaths': []}, + {'artifact_id': 4, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 5, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '1', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 9, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}, + {'artifact_id': 6, + 'artifact_parent_ids': [2], + 'artifact_basal_id': 1, + 'artifact_processing_id': 3, + 'artifact_processing_name': 'Pick closed-reference OTUs', + 'artifact_processing_arguments': {'input_data': '2', + 'reference': '2', + 'sortmerna_e_value': '1', + 'sortmerna_max_pos': '10000', + 'similarity': '0.97', + 'sortmerna_coverage': '0.97', + 'threads': '1'}, + 'artifact_filepaths': [{'artifact_filepath_id': 12, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}, + {'prep_id': 2, + 'prep_filepath': IGNORE, + 'prep_datatype': '18S', + 'prep_human_filtering': None, + 'prep_artifacts': [{'artifact_id': 7, + 'artifact_parent_ids': [], + 'artifact_basal_id': 7, + 'artifact_processing_id': None, + 'artifact_processing_name': None, + 'artifact_processing_arguments': None, + 'artifact_filepaths': [{'artifact_filepath_id': 22, + 'artifact_filepath': IGNORE, + 'artifact_filepath_type': 'biom'}]}]}]} + + response = self.get('/api/v1/study-association/1', headers=self.headers) + self.assertEqual(response.code, 200) + obs = json_decode(response.body) + self.assertEqual(obs, exp) + + def test_get_invalid(self): + response = self.get('/api/v1/study-association/0', headers=self.headers) + self.assertEqual(response.code, 404) + self.assertEqual(json_decode(response.body), + {'message': 'Study not found'}) + + +if __name__ == '__main__': + main() From 4c1e91f65b40747e26e1a4baaf66c467dbdc1632 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 5 Nov 2025 14:33:45 -0700 Subject: [PATCH 02/11] MAINT: be defensive on artifact prep assocation expectations --- qiita_db/artifact.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index bf81ddf41..ea40e3bef 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1463,8 +1463,22 @@ def prep_templates(self): FROM qiita.preparation_artifact WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return [qdb.metadata_template.prep_template.PrepTemplate(pt_id) - for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] + templates = [qdb.metadata_template.prep_template.PrepTemplate(pt_id) + for pt_id in qdb.sql_connection.TRN.execute_fetchflatten()] + + if len(templates) > 1: + # We never expect an artifact to be associated with multiple + # preparations + ids = [p.id for p in templates] + msg = f"Artifact({self.id}) associated with preps: {sorted(ids)}" + raise ValueError(msg) + + if len(templates) == 0: + # An artifact must be associated with a template + msg = f"Artifact({self.id}) is not associated with a template" + raise ValueError(msg) + + return templates @property def study(self): From 6bb87290c948b2cb86f3eae4b1ef0ef88814fb80 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 6 Nov 2025 16:20:28 -0700 Subject: [PATCH 03/11] API: add /api/v1/study//associations to retrieve comprehensive id, path, processing information for a study --- qiita_pet/handlers/rest/__init__.py | 2 + qiita_pet/handlers/rest/study_association.py | 195 ++++++++++++++++++ .../test/rest/test_study_associations.py | 61 ++++-- 3 files changed, 243 insertions(+), 15 deletions(-) create mode 100644 qiita_pet/handlers/rest/study_association.py diff --git a/qiita_pet/handlers/rest/__init__.py b/qiita_pet/handlers/rest/__init__.py index 73ad9382a..913758457 100644 --- a/qiita_pet/handlers/rest/__init__.py +++ b/qiita_pet/handlers/rest/__init__.py @@ -7,6 +7,7 @@ # ----------------------------------------------------------------------------- from .study import StudyHandler, StudyCreatorHandler, StudyStatusHandler +from .study_association import StudyAssociationHandler from .study_samples import (StudySamplesHandler, StudySamplesInfoHandler, StudySamplesCategoriesHandler, StudySamplesDetailHandler, @@ -25,6 +26,7 @@ ENDPOINTS = ( (r"/api/v1/study$", StudyCreatorHandler), (r"/api/v1/study/([0-9]+)$", StudyHandler), + (r"/api/v1/study/([0-9]+)/associations$", StudyAssociationHandler), (r"/api/v1/study/([0-9]+)/samples/categories=([a-zA-Z\-0-9\.:,_]*)", StudySamplesCategoriesHandler), (r"/api/v1/study/([0-9]+)/samples", StudySamplesHandler), diff --git a/qiita_pet/handlers/rest/study_association.py b/qiita_pet/handlers/rest/study_association.py new file mode 100644 index 000000000..525a42a14 --- /dev/null +++ b/qiita_pet/handlers/rest/study_association.py @@ -0,0 +1,195 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- +import warnings + +from tornado.escape import json_decode + +from qiita_db.handlers.oauth2 import authenticate_oauth +from qiita_db.study import StudyPerson, Study +from qiita_db.user import User +from .rest_handler import RESTHandler +from qiita_db.metadata_template.constants import SAMPLE_TEMPLATE_COLUMNS + + +# terms used more than once +_STUDY = 'study' +_PREP = 'prep' +_FILEPATH = 'filepath' +_STATUS = 'status' +_ARTIFACT = 'artifact' +_SAMPLE = 'sample' +_METADATA = 'metadata' +_TEMPLATE = 'template' +_ID = 'id' +_PROCESSING = 'processing' +_TYPE = 'type' + +# payload keys +STUDY_ID = f'{_STUDY}_{_ID}' +STUDY_SAMPLE_METADATA_FILEPATH = f'{_STUDY}_{_SAMPLE}_{_METADATA}_{_FILEPATH}' +PREP_TEMPLATES = f'{_PREP}_{_TEMPLATE}s' +PREP_ID = f'{_PREP}_{_ID}' +PREP_STATUS = f'{_PREP}_{_STATUS}' +PREP_SAMPLE_METADATA_FILEPATH = f'{_PREP}_{_SAMPLE}_{_METADATA}_{_FILEPATH}' +PREP_DATA_TYPE = f'{_PREP}_data_{_TYPE}' +PREP_HUMAN_FILTERING = f'{_PREP}_human_filtering' +PREP_ARTIFACTS = f'{_PREP}_{_ARTIFACT}s' +ARTIFACT_ID = f'{_ARTIFACT}_{_ID}' +ARTIFACT_STATUS = f'{_ARTIFACT}_{_STATUS}' +ARTIFACT_PARENT_IDS = f'{_ARTIFACT}_parent_{_ID}s' +ARTIFACT_BASAL_ID = f'{_ARTIFACT}_basal_{_ID}' +ARTIFACT_PROCESSING_ID = f'{_ARTIFACT}_{_PROCESSING}_{_ID}' +ARTIFACT_PROCESSING_NAME = f'{_ARTIFACT}_{_PROCESSING}_name' +ARTIFACT_PROCESSING_ARGUMENTS = f'{_ARTIFACT}_{_PROCESSING}_arguments' +ARTIFACT_FILEPATHS = f'{_ARTIFACT}_{_FILEPATH}s' +ARTIFACT_FILEPATH = f'{_ARTIFACT}_{_FILEPATH}' +ARTIFACT_FILEPATH_TYPE = f'{_ARTIFACT}_{_FILEPATH}_{_TYPE}' +ARTIFACT_FILEPATH_ID = f'{_ARTIFACT}_{_FILEPATH}_{_ID}' + + +def _most_recent_template_path(template): + filepaths = template.get_filepaths() + + # the test dataset shows that a prep can exist without a prep template + if len(filepaths) == 0: + return None + + metadata_paths = sorted(filepaths, reverse=True) + + # [0] -> the highest file by ID + # [1] -> the filepath + return metadata_paths[0][1] + + +def _set_study(payload, study): + filepath = _most_recent_template_path(study.sample_template) + + payload[STUDY_ID] = study.id + payload[STUDY_SAMPLE_METADATA_FILEPATH] = filepath + + +def _set_prep_templates(payload, study): + template_data = [] + for pt in study.prep_templates(): + _set_prep_template(template_data, pt) + payload[PREP_TEMPLATES] = template_data + + +def _get_human_filtering(prep_template): + # .current_human_filtering does not describe what the human filter is + if prep_template.artifact is not None: + return prep_template.artifact.human_reads_filter_method + + +def _set_prep_template(template_payload, prep_template): + filepath = _most_recent_template_path(prep_template) + + current_template = {} + current_template[PREP_ID] = prep_template.id + current_template[PREP_STATUS] = prep_template.status + current_template[PREP_SAMPLE_METADATA_FILEPATH] = filepath + current_template[PREP_DATA_TYPE] = prep_template.data_type() + current_template[PREP_HUMAN_FILTERING] = _get_human_filtering(prep_template) + + _set_artifacts(current_template, prep_template) + + template_payload.append(current_template) + + +def _get_artifacts(prep_template): + pending_artifact_objects = [prep_template.artifact, ] + all_artifact_objects = set(pending_artifact_objects[:]) + + while pending_artifact_objects: + artifact = pending_artifact_objects.pop() + pending_artifact_objects.extend(artifact.children) + all_artifact_objects.update(set(artifact.children)) + + return sorted(all_artifact_objects, key=lambda artifact: artifact.id) + + +def _set_artifacts(template_payload, prep_template): + prep_artifacts = [] + + if prep_template.artifact is None: + basal_id = None + else: + basal_id = prep_template.artifact.id + + for artifact in _get_artifacts(prep_template): + _set_artifact(prep_artifacts, artifact, basal_id) + template_payload[PREP_ARTIFACTS] = prep_artifacts + + +def _set_artifact(prep_artifacts, artifact, basal_id): + artifact_payload = {} + artifact_payload[ARTIFACT_ID] = artifact.id + + # Prep uses .status, artifact uses .visibility + # favoring .status as visibility implies a UI + artifact_payload[ARTIFACT_STATUS] = artifact.visibility + + parents = [parent.id for parent in artifact.parents] + artifact_payload[ARTIFACT_PARENT_IDS] = parents if parents else None + artifact_payload[ARTIFACT_BASAL_ID] = basal_id + + _set_artifact_processing(artifact_payload, artifact) + _set_artifact_filepaths(artifact_payload, artifact) + + prep_artifacts.append(artifact_payload) + + +def _set_artifact_processing(artifact_payload, artifact): + processing_parameters = artifact.processing_parameters + if processing_parameters is None: + artifact_processing_id = None + artifact_processing_name = None + artifact_processing_arguments = None + else: + command = processing_parameters.command + artifact_processing_id = command.id + artifact_processing_name = command.name + artifact_processing_arguments = processing_parameters.values + + artifact_payload[ARTIFACT_PROCESSING_ID] = artifact_processing_id + artifact_payload[ARTIFACT_PROCESSING_NAME] = artifact_processing_name + artifact_payload[ARTIFACT_PROCESSING_ARGUMENTS] = artifact_processing_arguments + + +def _set_artifact_filepaths(artifact_payload, artifact): + artifact_filepaths = [] + for filepath_data in artifact.filepaths: + local_payload = {} + local_payload[ARTIFACT_FILEPATH] = filepath_data['fp'] + local_payload[ARTIFACT_FILEPATH_ID] = filepath_data['fp_id'] + local_payload[ARTIFACT_FILEPATH_TYPE] = filepath_data['fp_type'] + artifact_filepaths.append(local_payload) + + # the test study includes an artifact which does not have filepaths + if len(artifact_filepaths) == 0: + artifact_filepaths = None + + artifact_payload[ARTIFACT_FILEPATHS] = artifact_filepaths + + +class StudyAssociationHandler(RESTHandler): + @authenticate_oauth + def get(self, study_id): + study = self.safe_get_study(study_id) + if study is None: + return + + payload = {} + _set_study(payload, study) + _set_prep_templates(payload, study) + self.write(payload) + self.finish() + + + # get all the things + diff --git a/qiita_pet/test/rest/test_study_associations.py b/qiita_pet/test/rest/test_study_associations.py index ea2053f16..4d2adbc99 100644 --- a/qiita_pet/test/rest/test_study_associations.py +++ b/qiita_pet/test/rest/test_study_associations.py @@ -16,14 +16,17 @@ class StudyAssociationTests(RESTHandlerTestCase): def test_get_valid(self): - IGNORE = IGNORE - exp = {'study': 1, + IGNORE = 'IGNORE' + exp = {'study_id': 1, + 'study_sample_metadata_filepath': IGNORE, 'prep_templates': [{'prep_id': 1, - 'prep_filepath': IGNORE, - 'prep_datatype': '18S', + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', 'prep_human_filtering': 'The greatest human filtering method', 'prep_artifacts': [{'artifact_id': 1, - 'artifact_parent_ids': [1], + 'artifact_status': 'private', + 'artifact_parent_ids': None, 'artifact_basal_id': 1, 'artifact_processing_id': None, 'artifact_processing_name': None, @@ -35,7 +38,8 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'raw_barcodes'}]}, {'artifact_id': 2, - 'artifact_parent_ids': None, + 'artifact_status': 'private', + 'artifact_parent_ids': [1], 'artifact_basal_id': 1, 'artifact_processing_id': 1, 'artifact_processing_name': 'Split libraries FASTQ', @@ -60,6 +64,7 @@ def test_get_valid(self): 'artifact_filepath_id': 5, 'artifact_filepath_type': 'preprocessed_demux'}]}, {'artifact_id': 3, + 'artifact_status': 'private', 'artifact_parent_ids': [1], 'artifact_basal_id': 1, 'artifact_processing_id': 1, @@ -69,14 +74,15 @@ def test_get_valid(self): 'min_per_read_length_fraction': '0.75', 'sequence_max_n': '0', 'rev_comp_barcode': 'False', - 'rev_comp_mapping_barcodes': 'False', + 'rev_comp_mapping_barcodes': 'True', 'rev_comp': 'False', 'phred_quality_threshold': '3', 'barcode_type': 'golay_12', 'max_barcode_errors': '1.5', 'phred_offset': 'auto'}, - 'artifact_filepaths': []}, + 'artifact_filepaths': None}, {'artifact_id': 4, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -92,6 +98,7 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}, {'artifact_id': 5, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -107,6 +114,7 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}, {'artifact_id': 6, + 'artifact_status': 'private', 'artifact_parent_ids': [2], 'artifact_basal_id': 1, 'artifact_processing_id': 3, @@ -122,12 +130,14 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}]}, {'prep_id': 2, - 'prep_filepath': IGNORE, - 'prep_datatype': '18S', + 'prep_status': 'private', + 'prep_sample_metadata_filepath': IGNORE, + 'prep_data_type': '18S', 'prep_human_filtering': None, 'prep_artifacts': [{'artifact_id': 7, - 'artifact_parent_ids': [], + 'artifact_parent_ids': None, 'artifact_basal_id': 7, + 'artifact_status': 'private', 'artifact_processing_id': None, 'artifact_processing_name': None, 'artifact_processing_arguments': None, @@ -135,16 +145,37 @@ def test_get_valid(self): 'artifact_filepath': IGNORE, 'artifact_filepath_type': 'biom'}]}]}]} - response = self.get('/api/v1/study-association/1', headers=self.headers) + response = self.get('/api/v1/study/1/associations', headers=self.headers) self.assertEqual(response.code, 200) obs = json_decode(response.body) + + def _process_dict(d): + return [(d, k) for k in d] + + def _process_list(l): + if l is None: + return [] + + return [dk for d in l + for dk in _process_dict(d)] + + stack = _process_dict(obs) + while stack: + (d, k) = stack.pop() + if k.endswith('filepath'): + d[k] = IGNORE + elif k.endswith('filepaths'): + stack.extend(_process_list(d[k])) + elif k.endswith('templates'): + stack.extend(_process_list(d[k])) + elif k.endswith('artifacts'): + stack.extend(_process_list(d[k])) + self.assertEqual(obs, exp) def test_get_invalid(self): - response = self.get('/api/v1/study-association/0', headers=self.headers) + response = self.get('/api/v1/study/0/associations', headers=self.headers) self.assertEqual(response.code, 404) - self.assertEqual(json_decode(response.body), - {'message': 'Study not found'}) if __name__ == '__main__': From 53999479bc8a0af3c74a8b3ca093cb4eb28f7ea4 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 13:16:06 -0700 Subject: [PATCH 04/11] SQL: draft schema changes to support additional identifiers --- qiita_db/support_files/patches/95.sql | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 qiita_db/support_files/patches/95.sql diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql new file mode 100644 index 000000000..a2e713eb2 --- /dev/null +++ b/qiita_db/support_files/patches/95.sql @@ -0,0 +1,31 @@ +-- Dec 12, 2025 +-- Adding SEQUENCEs and support tables for sample_idx, prep_sample_idx, +-- and artifact_sample_idx + +CREATE SEQUENCE sequence_sample_idx AS BIGINT; +CREATE TABLE map_sample_idx ( + sample_name VARCHAR NOT NULL PRIMARY KEY, + study_idx BIGINT NOT NULL, + sample_idx BIGINT DEFAULT NEXTVAL('sequence_sample_idx') NOT NULL, + UNIQUE (study_idx, sample_idx), + UNIQUE (sample_idx), + CONSTRAINT fk_study FOREIGN KEY (study_idx) REFERENCES qiita.study (study_id) +); + +CREATE SEQUENCE sequence_prep_sample_idx AS BIGINT; +CREATE TABLE map_prep_sample_idx ( + prep_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_prep_sample_idx'), + prep_idx BIGINT NOT NULL, + sample_idx BIGINT NOT NULL, + UNIQUE (prep_idx, prep_sample_idx), + CONSTRAINT fk_prep_template FOREIGN KEY (prep_idx) REFERENCES qiita.prep_template (prep_template_id) +); + +CREATE SEQUENCE sequence_artifact_sample_idx AS BIGINT; +CREATE TABLE map_artifact_sample_idx ( + artifact_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_artifact_sample_idx'), + artifact_idx BIGINT NOT NULL, + prep_sample_idx BIGINT NOT NULL, + UNIQUE (artifact_idx, artifact_sample_idx), + CONSTRAINT fk_artifact FOREIGN KEY (artifact_idx) REFERENCES qiita.artifact (artifact_id) +); From fb460417e7b479eadfe269e8d2fb516563837f1e Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 13:52:09 -0700 Subject: [PATCH 05/11] API: unique_ids stub on base metadata object --- .../metadata_template/base_metadata_template.py | 17 +++++++++++++++++ .../test/test_base_metadata_template.py | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 38f1143d9..5536f1898 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -470,6 +470,8 @@ class MetadataTemplate(qdb.base.QiitaObject): # forbidden_words not defined for base class. Please redefine for # sub-classes. _forbidden_words = {} + # qiita-unique integer identifier mapping table + _id_map_table = None @classmethod def _check_id(cls, id_): @@ -936,6 +938,21 @@ def _common_extend_steps(self, md_template): return new_samples, new_cols + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + raise IncompetentQiitaDeveloperError() + @classmethod def exists(cls, obj_id): r"""Checks if already exists a MetadataTemplate for the provided object diff --git a/qiita_db/metadata_template/test/test_base_metadata_template.py b/qiita_db/metadata_template/test/test_base_metadata_template.py index 7b83350c8..516b65b30 100644 --- a/qiita_db/metadata_template/test/test_base_metadata_template.py +++ b/qiita_db/metadata_template/test/test_base_metadata_template.py @@ -41,7 +41,14 @@ def test_init(self): with self.assertRaises(IncompetentQiitaDeveloperError): MT(1) - def test_exist(self): + def test_unique_ids(self): + """Unique IDs raises an error because it's not called from a subclass + """ + MT = qdb.metadata_template.base_metadata_template.MetadataTemplate + with self.assertRaises(IncompetentQiitaDeveloperError): + MT.unique_ids(self.study) + + def test_exists(self): """Exists raises an error because it's not called from a subclass""" MT = qdb.metadata_template.base_metadata_template.MetadataTemplate with self.assertRaises(IncompetentQiitaDeveloperError): From 9f1bb44c4c3593b1cc7b1b51f75d2a877e332994 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 13:59:06 -0700 Subject: [PATCH 06/11] TST: check unique_ids --- qiita_db/metadata_template/test/test_prep_template.py | 9 +++++++++ qiita_db/metadata_template/test/test_sample_template.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index c4978f47b..543ad9779 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -430,6 +430,15 @@ def test_init(self): st = qdb.metadata_template.prep_template.PrepTemplate(1) self.assertTrue(st.id, 1) + def test_unique_ids(self): + obs = self.tester.unique_ids() + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + self.assertEqual(obs, exp) + + # verify a repeat call is unchanged + obs = self.tester.unique_ids() + self.assertEqual(obs, exp) + def test_table_name(self): """Table name return the correct string""" obs = qdb.metadata_template.prep_template.PrepTemplate._table_name(1) diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index 7a91e26e4..b5abfa577 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -498,6 +498,15 @@ def test_init(self): st = qdb.metadata_template.sample_template.SampleTemplate(1) self.assertTrue(st.id, 1) + def test_unique_ids(self): + obs = self.tester.unique_ids() + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + self.assertEqual(obs, exp) + + # verify a repeat call is unchanged + obs = self.tester.unique_ids() + self.assertEqual(obs, exp) + def test_table_name(self): """Table name return the correct string""" obs = qdb.metadata_template.sample_template.SampleTemplate._table_name( From d6092cbbf462322cb98f74f85b428f96787365a5 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 14:09:28 -0700 Subject: [PATCH 07/11] TST: check unique_ids --- qiita_db/test/test_artifact.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 2319acfaa..7bf96e929 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1233,6 +1233,16 @@ def test_delete_as_output_job(self): with self.assertRaises(qdb.exceptions.QiitaDBUnknownIDError): qdb.artifact.Artifact(artifact.id) + def test_unique_ids(self): + art = qdb.artifact.Artifact(1) + obs = art.unique_ids() + exp = {name: idx for idx, name in enumerate(sorted(art.prep_templates[0].keys()))} + self.assertEqual(obs, exp) + + # verify repeat calls are unchanged + obs = art.unique_ids() + self.assertEqual(obs, exp) + def test_name_setter(self): a = qdb.artifact.Artifact(1) self.assertEqual(a.name, "Raw data 1") From 678867e625b9c5b882657b10b874de0bc926d8a1 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 14:45:24 -0700 Subject: [PATCH 08/11] API: passing SampleTemplate.unique_ids --- .../base_metadata_template.py | 2 - qiita_db/metadata_template/sample_template.py | 37 +++++++++++++++++++ .../test/test_sample_template.py | 2 +- qiita_db/support_files/patches/95.sql | 1 - 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 5536f1898..c874bb73f 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -470,8 +470,6 @@ class MetadataTemplate(qdb.base.QiitaObject): # forbidden_words not defined for base class. Please redefine for # sub-classes. _forbidden_words = {} - # qiita-unique integer identifier mapping table - _id_map_table = None @classmethod def _check_id(cls, id_): diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index 78422c3d4..5f1731058 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -172,6 +172,43 @@ def columns_restrictions(self): """ return qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + samples = [[self._id, s_id] for s_id in sorted(self.keys())] + with qdb.sql_connection.TRN: + # insert any IDs not present + sql = """INSERT INTO map_sample_idx (study_idx, sample_name) + VALUES (%s, %s) + ON CONFLICT (sample_name) + DO NOTHING""" + qdb.sql_connection.TRN.add(sql, samples, many=True) + + # obtain the association + sql = """SELECT + sample_name, + sample_idx + FROM map_sample_idx""" + qdb.sql_connection.TRN.add(sql) + + # form into a dict + mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} + + # commit in the event changes were made + qdb.sql_connection.TRN.commit() + + return mapping + def delete_samples(self, sample_names): """Delete `sample_names` from sample information file diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index b5abfa577..afe5f5b16 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -500,7 +500,7 @@ def test_init(self): def test_unique_ids(self): obs = self.tester.unique_ids() - exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()), 1)} self.assertEqual(obs, exp) # verify a repeat call is unchanged diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql index a2e713eb2..5cd071564 100644 --- a/qiita_db/support_files/patches/95.sql +++ b/qiita_db/support_files/patches/95.sql @@ -7,7 +7,6 @@ CREATE TABLE map_sample_idx ( sample_name VARCHAR NOT NULL PRIMARY KEY, study_idx BIGINT NOT NULL, sample_idx BIGINT DEFAULT NEXTVAL('sequence_sample_idx') NOT NULL, - UNIQUE (study_idx, sample_idx), UNIQUE (sample_idx), CONSTRAINT fk_study FOREIGN KEY (study_idx) REFERENCES qiita.study (study_id) ); From c16ef720fa72b8169d3d2c61a30c924918910855 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 15:17:20 -0700 Subject: [PATCH 09/11] API: passing PrepTemplate.unique_ids --- qiita_db/metadata_template/prep_template.py | 44 +++++++++++++++++++ .../test/test_prep_template.py | 2 +- qiita_db/support_files/patches/95.sql | 4 +- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 059ccb55f..43104818c 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -324,6 +324,50 @@ def delete(cls, id_): qdb.sql_connection.TRN.execute() + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + sample_idx = qdb.study.Study(self.study_id).sample_template.unique_ids() + + paired = [] + for p_id in sorted(self.keys()): + if p_id in sample_idx: + paired.append([self._id, sample_idx[p_id]]) + + with qdb.sql_connection.TRN: + # insert any IDs not present + sql = """INSERT INTO map_prep_sample_idx (prep_idx, sample_idx) + VALUES (%s, %s) + ON CONFLICT (prep_idx, sample_idx) + DO NOTHING""" + qdb.sql_connection.TRN.add(sql, paired, many=True) + + # obtain the association + sql = """SELECT + sample_name, + prep_sample_idx + FROM map_prep_sample_idx + JOIN map_sample_idx USING (sample_idx)""" + qdb.sql_connection.TRN.add(sql) + + # form into a dict + mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} + + # commit in the event changes were made + qdb.sql_connection.TRN.commit() + + return mapping + def data_type(self, ret_id=False): """Returns the data_type or the data_type id diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 543ad9779..27bb095d4 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -432,7 +432,7 @@ def test_init(self): def test_unique_ids(self): obs = self.tester.unique_ids() - exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()))} + exp = {name: idx for idx, name in enumerate(sorted(self.tester.keys()), 1)} self.assertEqual(obs, exp) # verify a repeat call is unchanged diff --git a/qiita_db/support_files/patches/95.sql b/qiita_db/support_files/patches/95.sql index 5cd071564..05485abe4 100644 --- a/qiita_db/support_files/patches/95.sql +++ b/qiita_db/support_files/patches/95.sql @@ -16,7 +16,7 @@ CREATE TABLE map_prep_sample_idx ( prep_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_prep_sample_idx'), prep_idx BIGINT NOT NULL, sample_idx BIGINT NOT NULL, - UNIQUE (prep_idx, prep_sample_idx), + CONSTRAINT uc_prep_sample UNIQUE(prep_idx, sample_idx), CONSTRAINT fk_prep_template FOREIGN KEY (prep_idx) REFERENCES qiita.prep_template (prep_template_id) ); @@ -25,6 +25,6 @@ CREATE TABLE map_artifact_sample_idx ( artifact_sample_idx BIGINT NOT NULL PRIMARY KEY DEFAULT NEXTVAL('sequence_artifact_sample_idx'), artifact_idx BIGINT NOT NULL, prep_sample_idx BIGINT NOT NULL, - UNIQUE (artifact_idx, artifact_sample_idx), + CONSTRAINT uc_artifact_sample UNIQUE(artifact_idx, prep_sample_idx), CONSTRAINT fk_artifact FOREIGN KEY (artifact_idx) REFERENCES qiita.artifact (artifact_id) ); From 929730b7ae20e831f3429cf436767f124dfd6a37 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 15:30:30 -0700 Subject: [PATCH 10/11] API: passing Artifact.unique_ids --- qiita_db/artifact.py | 48 ++++++++++++++++++++++++++++++++++ qiita_db/test/test_artifact.py | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index ea40e3bef..bc0008e46 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -1758,3 +1758,51 @@ def human_reads_filter_method(self, value): SET human_reads_filter_method_id = %s WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [idx[0], self.id]) + + def unique_ids(self): + r"""Return a stable mapping of sample_name to integers + + Obtain a map from a sample_name to an integer. The association is + unique Qiita-wide and 1-1. + + This method is idempotent. + + Returns + ------ + dict + {sample_name: integer_index} + """ + if len(self.prep_templates) == 0: + raise ValueError("No associated prep template") + + if len(self.prep_templates) > 1: + raise ValueError("Cannot assign against multiple prep templates") + + paired = [[self._id, ps_idx] for ps_idx in sorted(self.prep_templates[0].unique_ids().values())] + + with qdb.sql_connection.TRN: + # insert any IDs not present + sql = """INSERT INTO map_artifact_sample_idx (artifact_idx, prep_sample_idx) + VALUES (%s, %s) + ON CONFLICT (artifact_idx, prep_sample_idx) + DO NOTHING""" + qdb.sql_connection.TRN.add(sql, paired, many=True) + + # obtain the association + sql = """SELECT + sample_name, + artifact_sample_idx + FROM map_artifact_sample_idx + JOIN map_prep_sample_idx USING (prep_sample_idx) + JOIN map_sample_idx USING (sample_idx) + WHERE artifact_idx=%s + """ + qdb.sql_connection.TRN.add(sql, [self._id, ]) + + # form into a dict + mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} + + # commit in the event changes were made + qdb.sql_connection.TRN.commit() + + return mapping diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 7bf96e929..40c30d2f7 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -1236,7 +1236,7 @@ def test_delete_as_output_job(self): def test_unique_ids(self): art = qdb.artifact.Artifact(1) obs = art.unique_ids() - exp = {name: idx for idx, name in enumerate(sorted(art.prep_templates[0].keys()))} + exp = {name: idx for idx, name in enumerate(sorted(art.prep_templates[0].keys()), 1)} self.assertEqual(obs, exp) # verify repeat calls are unchanged From 66937cf575cc3823c0c1dc3593d0d0f61596085d Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 12 Dec 2025 15:32:13 -0700 Subject: [PATCH 11/11] Constrain returned values --- qiita_db/metadata_template/prep_template.py | 6 ++++-- qiita_db/metadata_template/sample_template.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 43104818c..4932a94c8 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -357,8 +357,10 @@ def unique_ids(self): sample_name, prep_sample_idx FROM map_prep_sample_idx - JOIN map_sample_idx USING (sample_idx)""" - qdb.sql_connection.TRN.add(sql) + JOIN map_sample_idx USING (sample_idx) + WHERE prep_idx=%s + """ + qdb.sql_connection.TRN.add(sql, [self._id, ]) # form into a dict mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()} diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index 5f1731058..c575d78a2 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -198,8 +198,10 @@ def unique_ids(self): sql = """SELECT sample_name, sample_idx - FROM map_sample_idx""" - qdb.sql_connection.TRN.add(sql) + FROM map_sample_idx + WHERE study_idx=%s + """ + qdb.sql_connection.TRN.add(sql, [self._id, ]) # form into a dict mapping = {r[0]: r[1] for r in qdb.sql_connection.TRN.execute_fetchindex()}