Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 67 additions & 35 deletions qiita_pet/handlers/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,19 +523,27 @@ def get(self):
valid_data = ['raw', 'biom'] + templates

to_download = []
# for this block to work we need 3 main inputs: prep_id/sample_id, data
# and data_type - if one is missing raise an error, if both
# prep_id/sample_id are defined or data_type doesn't exist in qiita
# we should error
if data is None or (study_id is None and prep_id is None) or \
data not in valid_data:
raise HTTPError(422, reason='You need to specify both data (the '
'data type you want to download - %s) and '
'study_id or prep_id' % '/'.join(valid_data))
elif data_type is not None and data_type not in dtypes:
if data_type is not None and data_type not in dtypes:
raise HTTPError(422, reason='Not a valid data_type. Valid types '
'are: %s' % ', '.join(dtypes))
elif data in templates and prep_id is None and study_id is None:
if data in templates and prep_id is None and study_id is None:
raise HTTPError(422, reason='If downloading a sample or '
'preparation file you need to define study_id or'
' prep_id')
elif data in templates:

# if we get here, then we have two main options: templates or raw/biom;
# however, for raw/biom we need to retrieve the data via the study_id
# or the prep_id so splitting the next block in study_id/pre_id
if data in templates:
if data_type is not None:
raise HTTPError(422, reason='If requesting an information '
'file you cannot specify the data_type')
Expand Down Expand Up @@ -572,47 +580,71 @@ def get(self):
fname, datetime.now().strftime('%m%d%y-%H%M%S'))
self._set_nginx_headers(zip_fn)
else:
study_id = int(study_id)
try:
study = Study(study_id)
except QiitaDBUnknownIDError:
raise HTTPError(422, reason='Study does not exist')
# depending on if we have a study_id or a prep_id, instantiate
# the study for basic permission validations
if study_id is not None:
study_id = int(study_id)
try:
study = Study(study_id)
except QiitaDBUnknownIDError:
raise HTTPError(422, reason='Study does not exist')
zip_fn = 'study_%d_%s_%s.zip' % (
study_id, data, datetime.now().strftime(
'%m%d%y-%H%M%S'))
else:
public_raw_download = study.public_raw_download
if study.status != 'public':
raise HTTPError(404, reason='Study is not public. If this '
'is a mistake contact: %s' %
qiita_config.help_email)
elif data == 'raw' and not public_raw_download:
prep_id = int(prep_id)
try:
prep = PrepTemplate(prep_id)
except QiitaDBUnknownIDError:
raise HTTPError(422, reason='Prep does not exist')
study = Study(prep.study_id)
zip_fn = 'prep_%d_%s_%s.zip' % (
prep_id, data, datetime.now().strftime(
'%m%d%y-%H%M%S'))

public_raw_download = study.public_raw_download
# just to be 100% that the data is public, let's start
# with checking that the study is actually public
if study.status != 'public':
raise HTTPError(404, reason='Study is not public. If this '
'is a mistake contact: %s' %
qiita_config.help_email)
# now let's check that if the data is raw, the study's
# public_raw_download flag is on
if data == 'raw':
if not public_raw_download:
raise HTTPError(422, reason='No raw data access. If this '
'is a mistake contact: %s'
% qiita_config.help_email)
else:
# raw data
if study_id is not None:
artifacts = [a for a in study.artifacts(dtype=data_type)
if not a.parents]
# bioms
if data == 'biom':
artifacts = study.artifacts(
dtype=data_type, artifact_type='BIOM')
for a in artifacts:
if a.visibility != 'public' or a.has_human:
continue
to_download.extend(self._list_artifact_files_nginx(a))

if not to_download:
raise HTTPError(422, reason='Nothing to download. If '
'this is a mistake contact: %s'
% qiita_config.help_email)
else:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is getting to be a lot of nested else. I'm sure the logic works but it makes it quite hard to read.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you, I think this class shows how it has grown over time, which makes it difficult and complex. Thus, decided to rewrite it. Note that I didn't change the tests as those are checking multiple options, parameters and errors.

self._write_nginx_file_list(to_download)

zip_fn = 'study_%d_%s_%s.zip' % (
study_id, data, datetime.now().strftime(
'%m%d%y-%H%M%S'))
artifacts = [prep.artifact]
else: # this is biom
if study_id is not None:
artifacts = study.artifacts(
dtype=data_type, artifact_type='BIOM')
else:
artifacts = [a for a in
prep.artifact.descendants.nodes()
if a.artifact_type == 'BIOM']

# at this point artifacts has all the available artifact
# so we need to make sure they are public and have no has_human
# to be added to_download
for a in artifacts:
if a.visibility != 'public' or a.has_human:
continue
to_download.extend(self._list_artifact_files_nginx(a))

self._set_nginx_headers(zip_fn)
if not to_download:
raise HTTPError(422, reason='Nothing to download. If '
'this is a mistake contact: %s'
% qiita_config.help_email)

self._write_nginx_file_list(to_download)
self._set_nginx_headers(zip_fn)
self.finish()


Expand Down
20 changes: 20 additions & 0 deletions qiita_pet/test/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,26 @@ def test_download(self):
'mapping_files/5_mapping_file.txt')
self.assertRegex(response.body.decode('ascii'), exp)

# Now let's check download prep with no raw data access
response = self.get('/public_download/?data=raw&prep_id=1')
self.assertTrue(response.reason.startswith('No raw data access.'))

# Now success
Study(1).public_raw_download = True
response = self.get('/public_download/?data=raw&prep_id=1')
self.assertEqual(response.code, 200)
exp = ('- [0-9]* /protected/raw_data/1_s_G1_L001_sequences.fastq.gz '
'raw_data/1_s_G1_L001_sequences.fastq.gz\n- [0-9]* /protected'
'/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz raw_data/'
'1_s_G1_L001_sequences_barcodes.fastq.gz\n- [0-9]* /protected/'
'templates/1_prep_1_qiime_19700101-000000.txt mapping_files/'
'1_mapping_file.txt\n')
self.assertRegex(response.body.decode('ascii'), exp)

# for simplicity, let's just check respose.code
response = self.get('/public_download/?data=biom&prep_id=1')
self.assertEqual(response.code, 200)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May want to test or communicate intended behavior for the conditions you added in the main logic, namely:
if a.visibility != 'public' or a.has_human:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And what if there are no artifacts available? 422 with "Nothing to download" is that somewhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I completely agree, note that those tests are done for when a user uses study_id vs. prep_id. Thus, in stead of creating more tests, I "simplified" the code and added a lot of comments so a study_id test also covers the prep_id test, and viceversa - as they now share the same/main error checking blocks.

def test_download_sample_information(self):
response = self.get('/public_artifact_download/')
self.assertEqual(response.code, 422)
Expand Down
Loading