Merge pull request #3252 from antgonza/2023.02-review

charles-cowart · web-flow · commit bda13f8a20f4 · 2023-02-21T11:00:54.000-08:00
2023.02 review
diff --git a/.github/workflows/qiita-ci.yml b/.github/workflows/qiita-ci.yml
@@ -108,9 +108,9 @@ jobs:
       - name: Install plugins
         shell: bash -l {0}
         run: |
-          wget https://data.qiime2.org/distro/core/qiime2-2019.4-py36-linux-conda.yml
-          conda env create --quiet -n qtp-biom --file qiime2-2019.4-py36-linux-conda.yml
-          rm qiime2-2019.4-py36-linux-conda.yml
+          wget https://data.qiime2.org/distro/core/qiime2-2022.11-py38-linux-conda.yml
+          conda env create --quiet -n qtp-biom --file qiime2-2022.11-py38-linux-conda.yml
+          rm qiime2-2022.11-py38-linux-conda.yml
           export QIITA_SERVER_CERT=`pwd`/qiita_core/support_files/server.crt
           export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg
           export REDBIOM_HOST="http://localhost:7379"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,23 @@
 # Qiita changelog
 
+Version 2023.02
+---------------
+
+* Added "Software and Data Licensing" to the Qiita FAQ page.
+* External resources can now add new sample metadata categories to a study via
+the `/api/v1/study/` endpoint.
+* Added preparation-id to the GUI list of artifacts used in an analysis.
+* Added automatic lower-casing to INSDC null values [#3246](https://github.com/qiita-spots/qiita/issues/3246).
+* ArtifactHandler now returns the file size and full path of the files available in
+an Artifact. This change had two consequences: (1) the plugins now can control
+their behavior based on the file sizes, and (2) all plugins had to be updated to use this new feature.
+* Added [qiita_client.artifact_and_preparation_files](https://github.com/qiita-spots/qiita_client)
+to help plugins filter per_sample_FASTQ based on size and ignore small file sizes.
+* Added qiime2.2022.11 to the system; which updated these plugins: qp-qiime2, qtp-biom, qtp-diversity, qtp-visualization.
+* Added q2-mislabeled to the qiime2.2022.11.
+* Added support for Amplicon data to the internal [Sequence Processing Pipeline](https://github.com/qiita-spots/qp-knight-lab-processing).
+
+
 Version 2022.11
 ---------------
 
diff --git a/qiita_core/__init__.py b/qiita_core/__init__.py
@@ -6,4 +6,4 @@
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
 
-__version__ = "2022.11"
+__version__ = "2023.02"
diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py
@@ -27,7 +27,7 @@
 from . import user
 from . import processing_job
 
-__version__ = "2022.11"
+__version__ = "2023.02"
 
 __all__ = ["analysis", "artifact",  "archive", "base", "commands",
            "environment_manager", "exceptions", "investigation", "logger",
diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py
@@ -608,7 +608,8 @@ def _clean_validate_template(cls, md_template, study_id,
             for c in _ddf.columns:
                 if set(INSDC_NULL_VALUES) & set(_ddf[c].values):
                     for s, v in _ddf[c].to_dict().items():
-                        md_template[c][s] = INSDC_NULL_VALUES[v]
+                        if v in INSDC_NULL_VALUES:
+                            md_template[c][s] = INSDC_NULL_VALUES[v]
 
         return md_template
 
diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py
@@ -723,7 +723,10 @@ def test_clean_validate_template_error_duplicate_samples(self):
 
     def test_clean_validate_template(self):
         PT = qdb.metadata_template.prep_template.PrepTemplate
-        obs = PT._clean_validate_template(self.metadata, 2)
+        # modify input to make sure we hit all cases
+        md = self.metadata.copy()
+        md.loc['SKB7.640196']['str_column'] = 'UnSpeciFied'
+        obs = PT._clean_validate_template(md, 2)
         metadata_dict = {
             '2.SKB8.640193': {'center_name': 'ANL',
                               'center_project_name': 'Test Project',
@@ -755,7 +758,7 @@ def test_clean_validate_template(self):
                               'center_project_name': 'Test Project',
                               'ebi_submission_accession': None,
                               'emp_status': 'EMP',
-                              'str_column': 'Value for sample 3',
+                              'str_column': 'not applicable',
                               'primer': 'GTGCCAGCMGCCGCGGTAA',
                               'barcode': 'CCTCTGAGAGCT',
                               'run_prefix': "s_G1_L002_sequences",
diff --git a/qiita_pet/__init__.py b/qiita_pet/__init__.py
@@ -6,4 +6,4 @@
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
 
-__version__ = "2022.11"
+__version__ = "2023.02"
diff --git a/qiita_pet/handlers/api_proxy/__init__.py b/qiita_pet/handlers/api_proxy/__init__.py
@@ -38,7 +38,7 @@
 from .user import (user_jobs_get_req)
 from .util import check_access, check_fp
 
-__version__ = "2022.11"
+__version__ = "2023.02"
 
 __all__ = ['prep_template_summary_get_req', 'data_types_get_req',
            'study_get_req', 'sample_template_filepaths_get_req',
diff --git a/qiita_pet/support_files/doc/source/faq.rst b/qiita_pet/support_files/doc/source/faq.rst
@@ -381,6 +381,35 @@ downloaded from `qiita.ucsd.edu <https://qiita.ucsd.edu/>` and redbiom, includin
 and metadata, are distributed under the BSD 3-Clause License.
 
 
+Some of the studies have `Qiita-EBI Import` as the PI, why is this?
+-------------------------------------------------------------------
+
+These are studies that were downloaded to Qiita via `qebil <https://github.com/ucsd-cmi/qebil>`. If you want
+us to add your study, please send us an email.
+
+Now, if you are wondering about the possible "Processing notes", here are their explanation:
+
+-  MISSING: One or more of the fastq files for your study were unavailable for download from
+   EBI/ENA or the downloaded files were found to contain corrupt data and were excluded from our
+   automatic association and processing. A list of the affected samples and their corresponding
+   EBI/ENA ftp links can be found in the .MISSING. preparation information files in the Uploads
+   section of this page. If you would like to attempt to manually download and/or correct the
+   fastq files, please visit the linked EBI/ENA project page in the Study details and follow our
+   instructions for manually associating and processing the files.
+
+- TOOMANYREADS: One or more of the fastq files for your study were found to contain more read
+  files than indicated by the single or paired-end read technology that EBI/ENA indicated was
+  used for processing the sample. This is most likely the case for studies where index reads
+  have been included in a separate file as part of the upload, however our automated system
+  is unable to readily distinguish this. A list of the affected samples and their corresponding
+  EBI/ENA ftp links can be found in the .TOOMANYREADS. preparation information files in the
+  Uploads section of this page. If you would like to attempt to have these samples processed,
+  please visit the linked EBI/ENA project page in the Study details and either a) follow our
+  instructions for manually associating and processing the files here or b) email us to
+  indicate that the study should be processed with the assumption that the first file
+  associated with a samples is an index read file.
+
+
 How to cite Qiita?
 ------------------
 
diff --git a/qiita_pet/support_files/doc/source/processingdata/processing-recommendations.rst b/qiita_pet/support_files/doc/source/processingdata/processing-recommendations.rst
@@ -81,15 +81,24 @@ wetlab processing and we provide the following host references for your convenie
 Note that the command produces up to 6 output artifacts based on the aligner and database selected:
 
 - Alignment Profile: contains the raw alignment file and the no rank classification BIOM table
-- Taxonomic Prediction - phylum: contains the phylum level taxonomic predictions BIOM table
-- Taxonomic Prediction - genus: contains the genus level taxonomic predictions BIOM table
-- Taxonomic Prediction - species: contains the genus level taxonomic predictions BIOM table
 - Per genome Predictions: contains the per genome level taxonomic predictions BIOM table
-- Per gene Predictions: Only WoLr1, contains the per gene level taxonomic predictions BIOM table
+- Per gene Predictions: Only WoLr1 & WoLr2, contains the per gene level taxonomic predictions BIOM table
+- KEGG Pathways: Only WoLr2, contains the functional profile
+- KEGG Ontology (KO): Only WoLr2, contains the functional profile
+- KEGG Enzyme (EZ): Only WoLr2, contains the functional profile
+
 
 .. note::
-   Woltka provides easy transformations for the "per gene Prediction table" to generate functional
-   profiles, `more information <https://github.com/qiyunzhu/woltka/blob/master/doc/wol.md#comparison>`_.
+   Woltka 0.1.4 only produces per-genome, per-gene and functional profiles as we are moving
+   to Operational Genomic Units (OGUs), which have higher resolution than taxonomic units
+   for community ecology, and were shown to deliver stronger biological signals in
+   downstream analyses. For more information please read: `Phylogeny-Aware Analysis of
+   Metagenome Community Ecology Based on Matched Reference Genomes while Bypassing
+   Taxonomy <https://journals.asm.org/doi/10.1128/msystems.00167-22>`_. To work on lower
+   taxonomic levels (like species or genus) you can follow `these instructions
+   <https://github.com/qiyunzhu/woltka/blob/master/doc/collapse.md#collapse-to-level>`_ and use
+   this `lineages.txt <http://ftp.microbio.me/pub/wol2/taxonomy/lineages.txt>`_ file
+   with your collapse command.
 
 Aligners
 ^^^^^^^^
@@ -245,7 +254,7 @@ Metatranscriptome processing
 Qiita currently has one active Metatranscriptome data analysis pipeline, as follows:
 
 #. Ribosomal read filtering via `SortMeRNA <https://pubmed.ncbi.nlm.nih.gov/23071270/>`_; details below. This produces a `Ribosomal reads` and a `Non-ribosomal reads` artifact/
-#. Taxonomic profiling via Woltka; for more information see details above.
+#. Sequence profiling via Woltka; for more information see details above.
 
 Sample processing guidelines for metatranscriptomic data
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/qiita_ware/__init__.py b/qiita_ware/__init__.py
@@ -6,4 +6,4 @@
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
 
-__version__ = "2022.11"
+__version__ = "2023.02"
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 from setuptools import setup
 from glob import glob
 
-__version__ = "2022.11"
+__version__ = "2023.02"
 
 
 classes = """