From 3061d943b4bc1aba36239fe25292de67e869d381 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Wed, 17 Sep 2025 09:09:36 +0200 Subject: [PATCH 01/21] discriminate nginx and direct requests and either use nginx or tornado file delivery method respectively --- .../cloud_handlers/file_transfer_handlers.py | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 2287c5c31..19a3f78e6 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -37,20 +37,33 @@ def get(self, requested_filepath): raise HTTPError(403, reason=( "The requested file is not present in Qiita's BASE_DATA_DIR!")) - # delivery of the file via nginx requires replacing the basedatadir - # with the prefix defined in the nginx configuration for the - # base_data_dir, '/protected/' by default - protected_filepath = filepath.replace(basedatadir, '/protected') - self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') - self.set_header('X-Accel-Redirect', protected_filepath) self.set_header('Content-Description', 'File Transfer') self.set_header('Expires', '0') self.set_header('Cache-Control', 'no-cache') - self.set_header('Content-Disposition', - 'attachment; filename=%s' % os.path.basename( - protected_filepath)) + + # We here need to differentiate a request that comes directly to the + # qiita instance (happens in testing) or was redirected through nginx + # (should be the default). If nginx, we can use nginx' fast file + # delivery mechanisms, otherwise, we need to send via slower tornado. + # We indirectly infer this by looking for the "X-Forwarded-For" header, + # which should only exists when redirectred through nginx. + if self.request.headers.get('X-Forwarded-For') is None: + self.set_header('Content-Disposition', + 'attachment; filename=%s' % os.path.basename(filepath)) + with open(filepath, "rb") as f: + self.write(f.read()) + else: + # delivery of the file via nginx requires replacing the basedatadir + # with the prefix defined in the nginx configuration for the + # base_data_dir, '/protected/' by default + protected_filepath = filepath.replace(basedatadir, '/protected') + self.set_header('X-Accel-Redirect', protected_filepath) + self.set_header('Content-Disposition', + 'attachment; filename=%s' % os.path.basename( + protected_filepath)) + self.finish() From cfe606af28c9561034d095edb28a0829b1487031 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Wed, 17 Sep 2025 09:11:37 +0200 Subject: [PATCH 02/21] codestyle --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 19a3f78e6..39b435d09 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -50,7 +50,8 @@ def get(self, requested_filepath): # We indirectly infer this by looking for the "X-Forwarded-For" header, # which should only exists when redirectred through nginx. if self.request.headers.get('X-Forwarded-For') is None: - self.set_header('Content-Disposition', + self.set_header( + 'Content-Disposition', 'attachment; filename=%s' % os.path.basename(filepath)) with open(filepath, "rb") as f: self.write(f.read()) @@ -60,7 +61,8 @@ def get(self, requested_filepath): # base_data_dir, '/protected/' by default protected_filepath = filepath.replace(basedatadir, '/protected') self.set_header('X-Accel-Redirect', protected_filepath) - self.set_header('Content-Disposition', + self.set_header( + 'Content-Disposition', 'attachment; filename=%s' % os.path.basename( protected_filepath)) From 71526bbca08d83fcee4d2d969b93c1830777cf21 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Thu, 18 Sep 2025 11:23:51 +0200 Subject: [PATCH 03/21] don't complain about overwriting files IF in test mode --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 4 +++- .../cloud_handlers/tests/test_file_transfer_handlers.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 39b435d09..6ca91bc35 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -92,7 +92,9 @@ def post(self): filepath = filepath[len(os.sep):] filepath = os.path.abspath(os.path.join(basedatadir, filepath)) - if os.path.exists(filepath): + # prevent overwriting existing files, except in test mode + if os.path.exists(filepath) and \ + (not qiita_config.test_environment): raise HTTPError(403, reason=( "The requested file is already " "present in Qiita's BASE_DATA_DIR!")) diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 12dfac9b9..8967df095 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -5,6 +5,7 @@ from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb +from qiita_core.qiita_settings import qiita_config class FetchFileFromCentralHandlerTests(OauthTestingBase): @@ -70,7 +71,11 @@ def test_post(self): # check if error is raised, if file already exists with open(fp_source, 'rb') as fh: + # we need to let qiita thinks for this test, to NOT be in test mode + oldval = qiita_config.test_environment + qiita_config.test_environment = False obs = self.post_authed(endpoint, files={'bar/': fh}) + qiita_config.test_environment = oldval self.assertIn("already present in Qiita's BASE_DATA_DIR!", obs.reason) From c7924ca5b19874817cc244ef986295e4ad38ac9f Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Thu, 18 Sep 2025 12:21:32 +0200 Subject: [PATCH 04/21] don't read config from file, but from DB --- .../cloud_handlers/file_transfer_handlers.py | 5 ++--- .../tests/test_file_transfer_handlers.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 6ca91bc35..321ee30f4 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -3,7 +3,7 @@ from tornado.web import HTTPError, RequestHandler from tornado.gen import coroutine -from qiita_core.util import execute_as_transaction +from qiita_core.util import execute_as_transaction, is_test_environment from qiita_db.handlers.oauth2 import authenticate_oauth from qiita_core.qiita_settings import qiita_config @@ -93,8 +93,7 @@ def post(self): filepath = os.path.abspath(os.path.join(basedatadir, filepath)) # prevent overwriting existing files, except in test mode - if os.path.exists(filepath) and \ - (not qiita_config.test_environment): + if os.path.exists(filepath) and (not is_test_environment()): raise HTTPError(403, reason=( "The requested file is already " "present in Qiita's BASE_DATA_DIR!")) diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 8967df095..9a7cec722 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -5,7 +5,7 @@ from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb -from qiita_core.qiita_settings import qiita_config +from qiita_db.sql_connection import TRN class FetchFileFromCentralHandlerTests(OauthTestingBase): @@ -72,10 +72,14 @@ def test_post(self): # check if error is raised, if file already exists with open(fp_source, 'rb') as fh: # we need to let qiita thinks for this test, to NOT be in test mode - oldval = qiita_config.test_environment - qiita_config.test_environment = False + with TRN: + TRN.add("UPDATE settings SET test = False") + TRN.execute() obs = self.post_authed(endpoint, files={'bar/': fh}) - qiita_config.test_environment = oldval + # reset test mode to true + with TRN: + TRN.add("UPDATE settings SET test = True") + TRN.execute() self.assertIn("already present in Qiita's BASE_DATA_DIR!", obs.reason) From ae888a8ba5bfcb709fff013481db0f99c58855b4 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Thu, 18 Sep 2025 18:18:50 +0200 Subject: [PATCH 05/21] allow to push whole directories to qiita main --- .../cloud_handlers/file_transfer_handlers.py | 43 ++++-- .../tests/test_file_transfer_handlers.py | 125 +++++++++++++++--- 2 files changed, 142 insertions(+), 26 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 321ee30f4..3e9c19283 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -2,6 +2,8 @@ from tornado.web import HTTPError, RequestHandler from tornado.gen import coroutine +import zipfile +from io import BytesIO from qiita_core.util import execute_as_transaction, is_test_environment from qiita_db.handlers.oauth2 import authenticate_oauth @@ -80,31 +82,54 @@ def post(self): # canonic version of base_data_dir basedatadir = os.path.abspath(qiita_config.base_data_dir) stored_files = [] + stored_directories = [] for filespath, filelist in self.request.files.items(): if filespath.startswith(basedatadir): filespath = filespath[len(basedatadir):] for file in filelist: + # differentiate between regular files and whole directories, + # which must be zipped AND the client must provide the + # is_directory='true' body argument. + sent_directory = self.get_body_argument( + 'is_directory', "false") == "true" + filepath = os.path.join(filespath, file['filename']) # remove leading / if filepath.startswith(os.sep): filepath = filepath[len(os.sep):] filepath = os.path.abspath(os.path.join(basedatadir, filepath)) + if sent_directory: + # if a whole directory was send, we want to store it at + # the given dirname of the filepath + filepath = os.path.dirname(filepath) + # prevent overwriting existing files, except in test mode if os.path.exists(filepath) and (not is_test_environment()): raise HTTPError(403, reason=( - "The requested file is already " - "present in Qiita's BASE_DATA_DIR!")) + "The requested %s is already " + "present in Qiita's BASE_DATA_DIR!" % + ('directory' if sent_directory else 'file'))) os.makedirs(os.path.dirname(filepath), exist_ok=True) - with open(filepath, "wb") as f: - f.write(file['body']) - stored_files.append(filepath) - - self.write("Stored %i files into BASE_DATA_DIR of Qiita:\n%s\n" % ( - len(stored_files), - '\n'.join(map(lambda x: ' - %s' % x, stored_files)))) + if sent_directory: + with zipfile.ZipFile(BytesIO(file['body'])) as zf: + zf.extractall(filepath) + stored_directories.append(filepath) + else: + with open(filepath, "wb") as f: + f.write(file['body']) + stored_files.append(filepath) + + for (_type, objs) in [('files', stored_files), + ('directories', stored_directories)]: + if len(objs) > 0: + self.write( + "Stored %i %s into BASE_DATA_DIR of Qiita:\n%s\n" % ( + len(objs), + _type, + '\n'.join(map(lambda x: ' - %s' % x, objs)))) self.finish() diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 9a7cec722..dc602a0f8 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -1,6 +1,7 @@ from unittest import main -from os.path import exists, basename -from os import remove +from os.path import exists, basename, join, isdir, splitext +from os import remove, makedirs +from shutil import rmtree, make_archive import filecmp from qiita_db.handlers.tests.oauthbase import OauthTestingBase @@ -11,22 +12,21 @@ class FetchFileFromCentralHandlerTests(OauthTestingBase): def setUp(self): super(FetchFileFromCentralHandlerTests, self).setUp() + self.endpoint = '/cloud/fetch_file_from_central/' + self.base_data_dir = qdb.util.get_db_files_base_dir() def test_get(self): - endpoint = '/cloud/fetch_file_from_central/' - base_data_dir = qdb.util.get_db_files_base_dir() - - obs = self.get_authed(endpoint + 'nonexistingfile') + obs = self.get_authed(self.endpoint + 'nonexistingfile') self.assertEqual(obs.status_code, 403) self.assertIn('outside of the BASE_DATA_DIR', obs.reason) obs = self.get_authed( - endpoint + base_data_dir[1:] + '/nonexistingfile') + self.endpoint + self.base_data_dir[1:] + '/nonexistingfile') self.assertEqual(obs.status_code, 403) self.assertIn('The requested file is not present', obs.reason) obs = self.get_authed( - endpoint + base_data_dir[1:] + + self.endpoint + self.base_data_dir[1:] + '/raw_data/FASTA_QUAL_preprocessing.fna') self.assertEqual(obs.status_code, 200) self.assertIn('FLP3FBN01ELBSX length=250 xy=1766_01', str(obs.content)) @@ -35,11 +35,19 @@ def test_get(self): class PushFileToCentralHandlerTests(OauthTestingBase): def setUp(self): super(PushFileToCentralHandlerTests, self).setUp() + self.endpoint = '/cloud/push_file_to_central/' + self.base_data_dir = qdb.util.get_db_files_base_dir() + self._clean_up_files = [] + + def tearDown(self): + for fp in self._clean_up_files: + if exists(fp): + if isdir(fp): + rmtree(fp) + else: + remove(fp) def test_post(self): - endpoint = '/cloud/push_file_to_central/' - base_data_dir = qdb.util.get_db_files_base_dir() - # create a test file "locally", i.e. in current working directory fp_source = 'foo.bar' with open(fp_source, 'w') as f: @@ -47,7 +55,7 @@ def test_post(self): self._files_to_remove.append(fp_source) # if successful, expected location of the file in BASE_DATA_DIR - fp_target = base_data_dir + '/bar/' + basename(fp_source) + fp_target = self.base_data_dir + '/bar/' + basename(fp_source) self._files_to_remove.append(fp_target) # create a second test file @@ -55,16 +63,16 @@ def test_post(self): with open(fp_source2, 'w') as f: f.write("this is another test\n") self._files_to_remove.append(fp_source2) - fp_target2 = base_data_dir + '/barr/' + basename(fp_source2) + fp_target2 = self.base_data_dir + '/barr/' + basename(fp_source2) self._files_to_remove.append(fp_target2) # test raise error if no file is given - obs = self.post_authed(endpoint) + obs = self.post_authed(self.endpoint) self.assertEqual(obs.reason, "No files to upload defined!") # test correct mechanism with open(fp_source, 'rb') as fh: - obs = self.post_authed(endpoint, files={'bar/': fh}) + obs = self.post_authed(self.endpoint, files={'bar/': fh}) self.assertIn('Stored 1 files into BASE_DATA_DIR of Qiita', str(obs.content)) self.assertTrue(filecmp.cmp(fp_source, fp_target, shallow=False)) @@ -75,7 +83,7 @@ def test_post(self): with TRN: TRN.add("UPDATE settings SET test = False") TRN.execute() - obs = self.post_authed(endpoint, files={'bar/': fh}) + obs = self.post_authed(self.endpoint, files={'bar/': fh}) # reset test mode to true with TRN: TRN.add("UPDATE settings SET test = True") @@ -89,7 +97,7 @@ def test_post(self): with open(fp_source, 'rb') as fh1: with open(fp_source2, 'rb') as fh2: obs = self.post_authed( - endpoint, files={'bar/': fh1, 'barr/': fh2}) + self.endpoint, files={'bar/': fh1, 'barr/': fh2}) self.assertIn('Stored 2 files into BASE_DATA_DIR of Qiita', str(obs.content)) self.assertTrue(filecmp.cmp(fp_source, fp_target, @@ -97,6 +105,89 @@ def test_post(self): self.assertTrue(filecmp.cmp(fp_source2, fp_target2, shallow=False)) + def _create_test_dir(self, prefix=None): + """Creates a test directory with files and subdirs.""" + # prefix + # |- testdir/ + # |---- fileA.txt + # |---- subdirA_l1/ + # |-------- fileB.fna + # |-------- subdirC_l2/ + # |------------ fileC.log + # |------------ fileD.seq + # |---- subdirB_l1/ + # |-------- fileE.sff + if (prefix is not None) and (prefix != ""): + prefix = join(prefix, 'testdir') + else: + prefix = 'testdir' + + for dir in [join(prefix, 'subdirA_l1', 'subdirC_l2'), + join(prefix, 'subdirB_l1')]: + if not exists(dir): + makedirs(dir) + for file, cont in [(join(prefix, 'fileA.txt'), 'contentA'), + (join(prefix, 'subdirA_l1', + 'fileB.fna'), 'this is B'), + (join(prefix, 'subdirA_l1', 'subdirC_l2', + 'fileC.log'), 'call me c'), + (join(prefix, 'subdirA_l1', 'subdirC_l2', + 'fileD.seq'), 'I d'), + (join(prefix, 'subdirB_l1', 'fileE.sff'), 'oh e')]: + with open(file, "w") as f: + f.write(cont + "\n") + self._clean_up_files.append(prefix) + + return prefix + + def _send_dir(self, fp_zipped='tmp_senddir.zip'): + dir = self._create_test_dir(prefix='/tmp/try1') + + make_archive(splitext(fp_zipped)[0], 'zip', dir) + self._clean_up_files.append(fp_zipped) + + with open(fp_zipped, 'rb') as fh: + obs = self.post_authed( + self.endpoint, + data={'is_directory': 'true'}, + files={dir: fh}) + + return obs + + def test_post_directory(self): + obs = self._send_dir() + self.assertTrue(obs.status_code == 200) + qmain_dir = obs.content.decode().split('\n')[1].split(' - ')[-1] + + self.assertTrue( + len(filecmp.cmpfiles( + '/tmp/try1/testdir/', qmain_dir, + ['fileA.txt', + 'subdirA_l1/fileB.fna', + 'subdirA_l1/subdirC_l2/fileC.log', + 'subdirA_l1/subdirC_l2/fileD.seq', + 'subdirB_l1/fileE.sff'])[0]) == 5) + + def test_post_directory_testexisting(self): + # check if error is raised, if directory already exists + # send first time, should be OK + obs = self._send_dir() + self.assertTrue(obs.status_code == 200) + + # we need to let qiita thinks for this test, to NOT be in test mode + with TRN: + TRN.add("UPDATE settings SET test = False") + TRN.execute() + # send again, should fal + obs = self._send_dir() + # reset test mode to true + with TRN: + TRN.add("UPDATE settings SET test = True") + TRN.execute() + + self.assertIn("already present in Qiita's BASE_DATA_DIR!", + obs.reason) + if __name__ == "__main__": main() From 81cb696363e992b2a1477dbd26c898ba383fdd85 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Thu, 6 Nov 2025 12:11:38 +0100 Subject: [PATCH 06/21] extended FetchFileFromCentralHandler to directories, but very limited to avoid lazy client from downloading whole BASE_DATA_DIR --- .../cloud_handlers/file_transfer_handlers.py | 132 ++++++++++++++++-- .../tests/test_file_transfer_handlers.py | 73 +++++++++- 2 files changed, 189 insertions(+), 16 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 3e9c19283..30f7e8ba1 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from tornado.web import HTTPError, RequestHandler from tornado.gen import coroutine @@ -7,7 +8,69 @@ from qiita_core.util import execute_as_transaction, is_test_environment from qiita_db.handlers.oauth2 import authenticate_oauth +from qiita_pet.handlers.download import BaseHandlerDownload from qiita_core.qiita_settings import qiita_config +import qiita_db as qdb + + +def is_directory(filepath): + """Tests if given filepath is listed as directory in Qiita DB. + + Note: this is independent of the actual filesystem, only checks DB entries. + + Parameters + ---------- + filepath : str + The filepath to the directory that shall be tested for beeing listed + as directory in Qiita's DB + + Returns + ------- + Bool: True if the last part of the filepath is contained as filepath in + qiita.filepath AND part after base_data_dir is a mountpoint in + qiita.data_directory AND the filepath_type is 'directory'. + False otherwise. + """ + working_filepath = filepath + # chop off trailing / to ensure we point to a directory name properly + if working_filepath.endswith(os.sep): + working_filepath = os.path.dirname(working_filepath) + + dirname = os.path.basename(working_filepath) + # file-objects foo are stored in //foo. To + # determine mountpoint from a given filepath, we need to chop of + # base_data_dir and then take the top directory level. + # Checking if user provided filepath contains a valid mountpoint adds + # to preventing users to download arbitrary file contents + try: + mount_dirname = Path(working_filepath).relative_to( + Path(qiita_config.base_data_dir)).parts[0] + except ValueError: + # base_data_dir is no proper prefix of given filepath + return False + except IndexError: + # only base_data_dir given + return False + if dirname == '' or mount_dirname == '': + # later should never be true due to above IndexError, but better save + # than sorry + return False + + with qdb.sql_connection.TRN: + # find entries that + # a) are of filepath_type "directory" + # b) whose filepath ends with directory name + # c) whose mountpoint matches the provided parent_directory + sql = """SELECT filepath_id + FROM qiita.filepath + JOIN qiita.filepath_type USING (filepath_type_id) + JOIN qiita.data_directory USING (data_directory_id) + WHERE filepath_type='directory' AND + filepath=%s AND + position(%s in mountpoint)>0;""" + qdb.sql_connection.TRN.add(sql, [dirname, mount_dirname]) + hits = qdb.sql_connection.TRN.execute_fetchflatten() + return len(hits) > 0 class FetchFileFromCentralHandler(RequestHandler): @@ -39,6 +102,18 @@ def get(self, requested_filepath): raise HTTPError(403, reason=( "The requested file is not present in Qiita's BASE_DATA_DIR!")) + filename_directory = "qiita-main-data.zip" + if os.path.isdir(filepath): + # Test if this directory is manages by Qiita's DB as directory + # Thus we can prevent that a lazy client simply downloads the whole + # basa_data_directory + if not is_directory(filepath): + raise HTTPError(403, reason=( + "You cannot access this directory!")) + else: + # flag the response for qiita_client + self.set_header('Is-Qiita-Directory', 'yes') + self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('Content-Description', 'File Transfer') @@ -52,21 +127,50 @@ def get(self, requested_filepath): # We indirectly infer this by looking for the "X-Forwarded-For" header, # which should only exists when redirectred through nginx. if self.request.headers.get('X-Forwarded-For') is None: - self.set_header( - 'Content-Disposition', - 'attachment; filename=%s' % os.path.basename(filepath)) - with open(filepath, "rb") as f: - self.write(f.read()) + # delivery via tornado + if not is_directory(filepath): + # a single file + self.set_header( + 'Content-Disposition', + 'attachment; filename=%s' % os.path.basename(filepath)) + with open(filepath, "rb") as f: + self.write(f.read()) + else: + # a whole directory + memfile = BytesIO() + with zipfile.ZipFile(memfile, 'w', zipfile.ZIP_DEFLATED) as zf: + for root, dirs, files in os.walk(filepath): + for file in files: + full_path = os.path.join(root, file) + # make path in zip file relative + rel_path = os.path.relpath(full_path, filepath) + zf.write(full_path, rel_path) + memfile.seek(0) + self.set_header('Content-Type', 'application/zip') + self.set_header('Content-Disposition', + 'attachment; filename=%s' % filename_directory) + self.write(memfile.read()) else: - # delivery of the file via nginx requires replacing the basedatadir - # with the prefix defined in the nginx configuration for the - # base_data_dir, '/protected/' by default - protected_filepath = filepath.replace(basedatadir, '/protected') - self.set_header('X-Accel-Redirect', protected_filepath) - self.set_header( - 'Content-Disposition', - 'attachment; filename=%s' % os.path.basename( - protected_filepath)) + # delivery via nginx + if not is_directory(filepath): + # a single file: + # delivery of the file via nginx requires replacing the + # basedatadir with the prefix defined in the nginx + # configuration for the base_data_dir, '/protected/' by default + protected_filepath = filepath.replace(basedatadir, + '/protected') + self.set_header('X-Accel-Redirect', protected_filepath) + self.set_header( + 'Content-Disposition', + 'attachment; filename=%s' % os.path.basename( + protected_filepath)) + else: + # a whole directory + to_download = BaseHandlerDownload._list_dir_files_nginx( + self, filepath) + BaseHandlerDownload._write_nginx_file_list(self, to_download) + BaseHandlerDownload._set_nginx_headers( + self, filename_directory) self.finish() diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index dc602a0f8..4cf4c3100 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -1,5 +1,5 @@ from unittest import main -from os.path import exists, basename, join, isdir, splitext +from os.path import exists, basename, join, isdir, splitext, dirname from os import remove, makedirs from shutil import rmtree, make_archive import filecmp @@ -7,13 +7,22 @@ from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb from qiita_db.sql_connection import TRN - +from qiita_pet.handlers.cloud_handlers.file_transfer_handlers import * class FetchFileFromCentralHandlerTests(OauthTestingBase): def setUp(self): super(FetchFileFromCentralHandlerTests, self).setUp() self.endpoint = '/cloud/fetch_file_from_central/' self.base_data_dir = qdb.util.get_db_files_base_dir() + self._clean_up_files = [] + + def tearDown(self): + for fp in self._clean_up_files: + if exists(fp): + if isdir(fp): + rmtree(fp) + else: + remove(fp) def test_get(self): obs = self.get_authed(self.endpoint + 'nonexistingfile') @@ -31,6 +40,34 @@ def test_get(self): self.assertEqual(obs.status_code, 200) self.assertIn('FLP3FBN01ELBSX length=250 xy=1766_01', str(obs.content)) + def test_get_directory(self): + # a directory that exists BUT is not managed as a directory by Qiita + obs = self.get_authed( + self.endpoint + self.base_data_dir[1:] + '/BIOM/7') + self.assertEqual(obs.status_code, 403) + self.assertIn('You cannot access this directory', obs.reason) + + # create directory and file for a negative test as mountpoint is not + # correct. 2_test_folder is registered in DB through + # populate_test_db.sql + fp_testfolder = join(self.base_data_dir, 'wrongmount', '2_test_folder') + makedirs(fp_testfolder, exist_ok=True) + PushFileToCentralHandlerTests._create_test_dir(self, fp_testfolder) + self._clean_up_files.append(fp_testfolder) + obs = self.get_authed(self.endpoint + fp_testfolder[1:]) + self.assertEqual(obs.status_code, 403) + self.assertIn('You cannot access this directory', obs.reason) + + # create directory and file for a positive test. 2_test_folder is + # registered in DB through populate_test_db.sql + fp_testfolder = join(self.base_data_dir, 'job', '2_test_folder') + makedirs(fp_testfolder, exist_ok=True) + PushFileToCentralHandlerTests._create_test_dir(self, fp_testfolder) + self._clean_up_files.append(fp_testfolder) + obs = self.get_authed(self.endpoint + fp_testfolder[1:]) + self.assertEqual(obs.status_code, 200) + self.assertIn('call me c', str(obs.content)) + class PushFileToCentralHandlerTests(OauthTestingBase): def setUp(self): @@ -189,5 +226,37 @@ def test_post_directory_testexisting(self): obs.reason) +class UtilsTests(OauthTestingBase): + def setUp(self): + self.base_data_dir = qdb.util.get_db_files_base_dir() + self._files_to_remove = [] + + def test_is_directory(self): + obs = is_directory(join('/wrong_root', 'karl', 'heinz')) + self.assertFalse(obs) + + # no path given + obs = is_directory('') + self.assertFalse(obs) + + # just pointing to BASE_DATA_DIR, i.e. no mountpoint given + obs = is_directory(self.base_data_dir) + self.assertFalse(obs) + + # existing dir, but not accessible as not managed by Qiita DB as dir + obs = is_directory(join(self.base_data_dir, 'BIOM')) + self.assertFalse(obs) + + # managed directory, but wrong mountpoint + fp_testfolder = join(self.base_data_dir, 'wrongmount', '2_test_folder') + obs = is_directory(fp_testfolder) + self.assertFalse(obs) + + # positive test + fp_testfolder = join(self.base_data_dir, 'job', '2_test_folder') + obs = is_directory(fp_testfolder) + self.assertTrue(obs) + + if __name__ == "__main__": main() From 8ce0cdb9007475ce00b7fc9b8954d5a6f410a556 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Thu, 6 Nov 2025 12:14:36 +0100 Subject: [PATCH 07/21] assert presence/absence of directory transfer flag --- .../cloud_handlers/tests/test_file_transfer_handlers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 4cf4c3100..295469bed 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -40,6 +40,8 @@ def test_get(self): self.assertEqual(obs.status_code, 200) self.assertIn('FLP3FBN01ELBSX length=250 xy=1766_01', str(obs.content)) + self.assertNotIn('Is-Qiita-Directory', obs.headers.keys()) + def test_get_directory(self): # a directory that exists BUT is not managed as a directory by Qiita obs = self.get_authed( @@ -67,6 +69,7 @@ def test_get_directory(self): obs = self.get_authed(self.endpoint + fp_testfolder[1:]) self.assertEqual(obs.status_code, 200) self.assertIn('call me c', str(obs.content)) + self.assertIn('Is-Qiita-Directory', obs.headers.keys()) class PushFileToCentralHandlerTests(OauthTestingBase): From 736d1fdd47131cfe0a8ba05bd67dc00689d5fb28 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Thu, 6 Nov 2025 12:19:27 +0100 Subject: [PATCH 08/21] codestyle --- .../cloud_handlers/tests/test_file_transfer_handlers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 295469bed..84b3d140b 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -1,5 +1,5 @@ from unittest import main -from os.path import exists, basename, join, isdir, splitext, dirname +from os.path import exists, basename, join, isdir, splitext from os import remove, makedirs from shutil import rmtree, make_archive import filecmp @@ -7,7 +7,9 @@ from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb from qiita_db.sql_connection import TRN -from qiita_pet.handlers.cloud_handlers.file_transfer_handlers import * +from qiita_pet.handlers.cloud_handlers.file_transfer_handlers import \ + is_directory + class FetchFileFromCentralHandlerTests(OauthTestingBase): def setUp(self): From c6f8357f555a7cf500f593630710490223426e02 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 10:22:41 +0100 Subject: [PATCH 09/21] adding debug information --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 30f7e8ba1..833113109 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -220,6 +220,9 @@ def post(self): os.makedirs(os.path.dirname(filepath), exist_ok=True) if sent_directory: with zipfile.ZipFile(BytesIO(file['body'])) as zf: + import sys + print("üüüüüüüü qiita filepath=%s" % filepath, file=sys.stderr) + print("üüüüüüüü qiita inventory:%s\n" % zf.filelist, file=sys.stderr) zf.extractall(filepath) stored_directories.append(filepath) else: From 1ff9892012d7efbc00bb66858758b05b64a7eb05 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 10:34:40 +0100 Subject: [PATCH 10/21] move debug to individual files --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 833113109..298f930e4 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -220,12 +220,11 @@ def post(self): os.makedirs(os.path.dirname(filepath), exist_ok=True) if sent_directory: with zipfile.ZipFile(BytesIO(file['body'])) as zf: - import sys - print("üüüüüüüü qiita filepath=%s" % filepath, file=sys.stderr) - print("üüüüüüüü qiita inventory:%s\n" % zf.filelist, file=sys.stderr) zf.extractall(filepath) stored_directories.append(filepath) else: + with open("/tmp/stefan.log", "a") as f: + f.write("üüüüüüüü qiita filepath=%s\n" % filepath) with open(filepath, "wb") as f: f.write(file['body']) stored_files.append(filepath) From 4bd4a18d7ee2669e490eed489245e1a93a8ab0a9 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 11:21:23 +0100 Subject: [PATCH 11/21] more debug when composing zip --- qiita_pet/handlers/download.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 60e029f29..8a9485684 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -150,6 +150,9 @@ def _write_nginx_file_list(self, to_download): for fp, fp_name, fp_checksum, fp_size in to_download]) self.set_header('X-Archive-Files', 'zip') + with open("/tmp/stefan.log", "a") as f: + f.write("üüüüüüüü all_files=%s\n" % all_files) + self.write("%s\n" % all_files) def _set_nginx_headers(self, fname): @@ -298,6 +301,10 @@ def get(self, study_id): self._write_nginx_file_list(to_download) + with open("/logs/download.log", "a") as f: + f.write("DownloadRawData::get(study_id=%s), to_download=%s\n" % (study_id, to_download)) + + zip_fn = 'study_raw_data_%d_%s.zip' % ( study_id, datetime.now().strftime('%m%d%y-%H%M%S')) From 14d43f1231da105f7d44b6f155b7e87238e58ca2 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 11:27:36 +0100 Subject: [PATCH 12/21] debug --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 298f930e4..8a0a7ea4c 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -168,6 +168,8 @@ def get(self, requested_filepath): # a whole directory to_download = BaseHandlerDownload._list_dir_files_nginx( self, filepath) + with open("/tmp/stefan.log", "a") as f: + f.write("üüüüüüüü to_download=%s\n" % to_download) BaseHandlerDownload._write_nginx_file_list(self, to_download) BaseHandlerDownload._set_nginx_headers( self, filename_directory) From fc367bbf8a55a3895af17c4394b85e88b99f9e52 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 11:50:28 +0100 Subject: [PATCH 13/21] modify nginx file list for ZIP --- .../handlers/cloud_handlers/file_transfer_handlers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 8a0a7ea4c..956e0af6e 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -145,6 +145,8 @@ def get(self, requested_filepath): # make path in zip file relative rel_path = os.path.relpath(full_path, filepath) zf.write(full_path, rel_path) + with open("/tmp/stefan.log", "a") as f: + f.write("üüüüüüüü da bin ich baff=%s %s\n" % (full_path, rel_path)) memfile.seek(0) self.set_header('Content-Type', 'application/zip') self.set_header('Content-Disposition', @@ -168,6 +170,12 @@ def get(self, requested_filepath): # a whole directory to_download = BaseHandlerDownload._list_dir_files_nginx( self, filepath) + # above function adds filepath to located files, which is + # different from the non-nginx version. Correct here: + to_download = [ + (fp, rel_path(fp_name, filepath), fp_checksum, fp_size) + for fp, fp_name, fp_checksum, fp_size + in to_download] with open("/tmp/stefan.log", "a") as f: f.write("üüüüüüüü to_download=%s\n" % to_download) BaseHandlerDownload._write_nginx_file_list(self, to_download) From 043c6beec29c3bd3edecdc4f5592db3b537415b3 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 11:55:57 +0100 Subject: [PATCH 14/21] use correct function --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 956e0af6e..e425fe217 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -173,7 +173,7 @@ def get(self, requested_filepath): # above function adds filepath to located files, which is # different from the non-nginx version. Correct here: to_download = [ - (fp, rel_path(fp_name, filepath), fp_checksum, fp_size) + (fp, os.path.relpath(fp_name, filepath), fp_checksum, fp_size) for fp, fp_name, fp_checksum, fp_size in to_download] with open("/tmp/stefan.log", "a") as f: From 3cb9cf786691dd3429bcbf9ecd2333852da876df Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 12:28:12 +0100 Subject: [PATCH 15/21] fix path computation --- .../handlers/cloud_handlers/file_transfer_handlers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index e425fe217..c1b9ac059 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -170,10 +170,16 @@ def get(self, requested_filepath): # a whole directory to_download = BaseHandlerDownload._list_dir_files_nginx( self, filepath) + + # fp_subdir is the part of the filepath the user requested, + # without QIITA_BASE_DIR + fp_subdir = os.path.relpath(filepath, basedatadir) + # above function adds filepath to located files, which is # different from the non-nginx version. Correct here: to_download = [ - (fp, os.path.relpath(fp_name, filepath), fp_checksum, fp_size) + (fp, os.path.relpath(fp_name, fp_subdir), fp_checksum, + fp_size) for fp, fp_name, fp_checksum, fp_size in to_download] with open("/tmp/stefan.log", "a") as f: From 2cc787dd15b3030b76addcbb431586938d298450 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 12:36:57 +0100 Subject: [PATCH 16/21] clean up --- .../cloud_handlers/file_transfer_handlers.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index c1b9ac059..74f994c79 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -145,8 +145,6 @@ def get(self, requested_filepath): # make path in zip file relative rel_path = os.path.relpath(full_path, filepath) zf.write(full_path, rel_path) - with open("/tmp/stefan.log", "a") as f: - f.write("üüüüüüüü da bin ich baff=%s %s\n" % (full_path, rel_path)) memfile.seek(0) self.set_header('Content-Type', 'application/zip') self.set_header('Content-Disposition', @@ -176,14 +174,19 @@ def get(self, requested_filepath): fp_subdir = os.path.relpath(filepath, basedatadir) # above function adds filepath to located files, which is - # different from the non-nginx version. Correct here: + # different from the non-nginx version, e.g. + # fp = /protected/job/2_test_folder/testdir/fileA.txt + # fp_name = job/2_test_folder/testdir/fileA.txt + # where "job/2_test_folder" is what user requested and + # "testdir/fileA.txt" is a file within this directory. + # When extracting by qiita_client, the "job/2_test_folder" + # part would be added twice (one by user request, second by + # unzipping). Therefore, we need to correct these names here: to_download = [ (fp, os.path.relpath(fp_name, fp_subdir), fp_checksum, fp_size) for fp, fp_name, fp_checksum, fp_size in to_download] - with open("/tmp/stefan.log", "a") as f: - f.write("üüüüüüüü to_download=%s\n" % to_download) BaseHandlerDownload._write_nginx_file_list(self, to_download) BaseHandlerDownload._set_nginx_headers( self, filename_directory) @@ -239,8 +242,6 @@ def post(self): zf.extractall(filepath) stored_directories.append(filepath) else: - with open("/tmp/stefan.log", "a") as f: - f.write("üüüüüüüü qiita filepath=%s\n" % filepath) with open(filepath, "wb") as f: f.write(file['body']) stored_files.append(filepath) From 94417720f30fc46ac32c48657cc582b78c9c76d0 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 7 Nov 2025 12:38:31 +0100 Subject: [PATCH 17/21] clean download.py --- qiita_pet/handlers/download.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 8a9485684..60e029f29 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -150,9 +150,6 @@ def _write_nginx_file_list(self, to_download): for fp, fp_name, fp_checksum, fp_size in to_download]) self.set_header('X-Archive-Files', 'zip') - with open("/tmp/stefan.log", "a") as f: - f.write("üüüüüüüü all_files=%s\n" % all_files) - self.write("%s\n" % all_files) def _set_nginx_headers(self, fname): @@ -301,10 +298,6 @@ def get(self, study_id): self._write_nginx_file_list(to_download) - with open("/logs/download.log", "a") as f: - f.write("DownloadRawData::get(study_id=%s), to_download=%s\n" % (study_id, to_download)) - - zip_fn = 'study_raw_data_%d_%s.zip' % ( study_id, datetime.now().strftime('%m%d%y-%H%M%S')) From 017b15030ea64bdb0ab924963d2b7bf0a03bb957 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Wed, 12 Nov 2025 12:15:16 +0100 Subject: [PATCH 18/21] add ability to delete files/dirs through API, but only in qiita test mode --- qiita_pet/handlers/cloud_handlers/__init__.py | 9 ++- .../cloud_handlers/file_transfer_handlers.py | 49 +++++++++++- .../tests/test_file_transfer_handlers.py | 78 +++++++++++++++++++ 3 files changed, 134 insertions(+), 2 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/__init__.py b/qiita_pet/handlers/cloud_handlers/__init__.py index d10344a05..7cc90db4f 100644 --- a/qiita_pet/handlers/cloud_handlers/__init__.py +++ b/qiita_pet/handlers/cloud_handlers/__init__.py @@ -1,5 +1,7 @@ from .file_transfer_handlers import (FetchFileFromCentralHandler, - PushFileToCentralHandler) + PushFileToCentralHandler, + DeleteFileFromCentralHandler) +from qiita_core.util import is_test_environment __all__ = ['FetchFileFromCentralHandler'] @@ -7,3 +9,8 @@ (r"/cloud/fetch_file_from_central/(.*)", FetchFileFromCentralHandler), (r"/cloud/push_file_to_central/", PushFileToCentralHandler) ] + +if is_test_environment(): + ENDPOINTS.append( + (r"/cloud/delete_file_from_central/(.*)", + DeleteFileFromCentralHandler)) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 74f994c79..6fd390c01 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -5,6 +5,7 @@ from tornado.gen import coroutine import zipfile from io import BytesIO +from shutil import rmtree from qiita_core.util import execute_as_transaction, is_test_environment from qiita_db.handlers.oauth2 import authenticate_oauth @@ -104,7 +105,7 @@ def get(self, requested_filepath): filename_directory = "qiita-main-data.zip" if os.path.isdir(filepath): - # Test if this directory is manages by Qiita's DB as directory + # Test if this directory is managed by Qiita's DB as directory # Thus we can prevent that a lazy client simply downloads the whole # basa_data_directory if not is_directory(filepath): @@ -256,3 +257,49 @@ def post(self): '\n'.join(map(lambda x: ' - %s' % x, objs)))) self.finish() + + +class DeleteFileFromCentralHandler(RequestHandler): + # Note: this function is NOT available in productive instances! + @authenticate_oauth + @coroutine + @execute_as_transaction + def get(self, requested_filepath): + if not is_test_environment(): + raise HTTPError(403, reason=( + "You cannot delete files through this API endpoint, when " + "Qiita is not in test-mode!")) + + # ensure we have an absolute path, i.e. starting at / + filepath = os.path.join(os.path.sep, requested_filepath) + # use a canonic version of the filepath + filepath = os.path.abspath(filepath) + + # canonic version of base_data_dir + basedatadir = os.path.abspath(qiita_config.base_data_dir) + + if not filepath.startswith(basedatadir): + # attempt to access files outside of the BASE_DATA_DIR + raise HTTPError(403, reason=( + "You cannot delete file '%s', which is outside of " + "the BASE_DATA_DIR of Qiita!" % filepath)) + + if not os.path.exists(filepath): + raise HTTPError(403, reason=( + "The requested file %s is not present " + "in Qiita's BASE_DATA_DIR!" % filepath)) + + if is_directory(filepath): + rmtree(filepath) + self.write("Deleted directory %s from BASE_DATA_DIR of QIita" % + filepath) + else: + if os.path.isdir(filepath): + raise HTTPError(403, reason=( + "You requested to delete directory %s, which is not " + "managed by Qiita as a directory!" % filepath)) + os.remove(filepath) + self.write("Deleted file %s from BASE_DATA_DIR of Qiita" % + filepath) + + self.finish() diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 84b3d140b..1ef705262 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -263,5 +263,83 @@ def test_is_directory(self): self.assertTrue(obs) +class DeleteFileFromCentralHandlerTests(OauthTestingBase): + def setUp(self): + super(DeleteFileFromCentralHandlerTests, self).setUp() + self.endpoint = '/cloud/delete_file_from_central/' + self.base_data_dir = qdb.util.get_db_files_base_dir() + self._clean_up_files = [] + + def tearDown(self): + for fp in self._clean_up_files: + if exists(fp): + if isdir(fp): + rmtree(fp) + else: + remove(fp) + + def test_post(self): + # check if error is raised when NOT providing a filepath + obs = self.get_authed(self.endpoint) + self.assertEqual(obs.status_code, 403) + self.assertIn("You cannot delete file '/', which", obs.reason) + + # check if error is raised when deleting something in productive mode + # we need to let qiita thinks for this test, to NOT be in test mode + with TRN: + TRN.add("UPDATE settings SET test = False") + TRN.execute() + obs = self.get_authed(self.endpoint) + with TRN: + TRN.add("UPDATE settings SET test = True") + TRN.execute() + self.assertEqual(obs.status_code, 403) + self.assertEqual("You cannot delete files through this API endpoint" + ", when Qiita is not in test-mode!", obs.reason) + + # check if error is raised when deleting existing file outside of base + # dir + obs = self.get_authed(self.endpoint + 'home') + self.assertEqual(obs.status_code, 403) + self.assertIn("You cannot delete file '/home', which", obs.reason) + + # check if a file can be deleted + # step 1: create file + fp_file = join(self.base_data_dir, 'deleteme') + with open(fp_file, 'w') as f: + f.write("this file shall be deleted") + self._clean_up_files.append(fp_file) + # step 2: ensure file exists + self.assertTrue(exists(fp_file)) + # step 3: delete file via API + obs = self.get_authed(self.endpoint + fp_file) + self.assertEqual(obs.status_code, 200) + self.assertIn("Deleted file %s from BASE_DATA_DIR" % fp_file, + str(obs.content)) + # step 4: ensure file does not exist anymore + self.assertFalse(exists(fp_file)) + + # check that only directory managed by qiita DB can be deleted + obs = self.get_authed(self.endpoint + self.base_data_dir + '/BIOM') + self.assertEqual(obs.status_code, 403) + self.assertIn("which is not managed by Qiita as a directory", + obs.reason) + + # check if a directory can be deleted + # step 1: create directory + fp_dir = join(self.base_data_dir, 'job/2_test_folder') + makedirs(fp_dir) + self._clean_up_files.append(fp_dir) + # step 2: ensure file exists + self.assertTrue(exists(fp_dir)) + # step 3: delete file via API + obs = self.get_authed(self.endpoint + fp_dir) + self.assertEqual(obs.status_code, 200) + self.assertIn("Deleted directory %s from BASE_DATA_DIR" % fp_dir, + str(obs.content)) + # step 4: ensure file does not exist anymore + self.assertFalse(exists(fp_dir)) + + if __name__ == "__main__": main() From 7604d84df0baeab15e82f5fa1fa205d98a2dd2d3 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Wed, 12 Nov 2025 12:17:34 +0100 Subject: [PATCH 19/21] also delete non managed dirs --- qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index 6fd390c01..cb11108c1 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -289,15 +289,11 @@ def get(self, requested_filepath): "The requested file %s is not present " "in Qiita's BASE_DATA_DIR!" % filepath)) - if is_directory(filepath): + if os.path.isdir(filepath): rmtree(filepath) self.write("Deleted directory %s from BASE_DATA_DIR of QIita" % filepath) else: - if os.path.isdir(filepath): - raise HTTPError(403, reason=( - "You requested to delete directory %s, which is not " - "managed by Qiita as a directory!" % filepath)) os.remove(filepath) self.write("Deleted file %s from BASE_DATA_DIR of Qiita" % filepath) From 50a178815224b2260c0af5af603565eebebb1e2c Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Wed, 12 Nov 2025 12:36:52 +0100 Subject: [PATCH 20/21] avoid deleting BIOM sub-dir :-/ --- .../cloud_handlers/tests/test_file_transfer_handlers.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py index 1ef705262..9bf10c053 100644 --- a/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/tests/test_file_transfer_handlers.py @@ -319,15 +319,9 @@ def test_post(self): # step 4: ensure file does not exist anymore self.assertFalse(exists(fp_file)) - # check that only directory managed by qiita DB can be deleted - obs = self.get_authed(self.endpoint + self.base_data_dir + '/BIOM') - self.assertEqual(obs.status_code, 403) - self.assertIn("which is not managed by Qiita as a directory", - obs.reason) - # check if a directory can be deleted # step 1: create directory - fp_dir = join(self.base_data_dir, 'job/2_test_folder') + fp_dir = join(self.base_data_dir, 'deletemeDir') makedirs(fp_dir) self._clean_up_files.append(fp_dir) # step 2: ensure file exists From dcedd02cba7114c00deacf06f0736a735eba1853 Mon Sep 17 00:00:00 2001 From: Stefan Janssen Date: Fri, 14 Nov 2025 12:42:32 +0100 Subject: [PATCH 21/21] also allow downloading of html summary directories --- .../handlers/cloud_handlers/file_transfer_handlers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py index cb11108c1..14d39c9b5 100644 --- a/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py +++ b/qiita_pet/handlers/cloud_handlers/file_transfer_handlers.py @@ -29,7 +29,8 @@ def is_directory(filepath): ------- Bool: True if the last part of the filepath is contained as filepath in qiita.filepath AND part after base_data_dir is a mountpoint in - qiita.data_directory AND the filepath_type is 'directory'. + qiita.data_directory AND the filepath_type is 'directory or + 'html_summary_dir'. False otherwise. """ working_filepath = filepath @@ -59,14 +60,14 @@ def is_directory(filepath): with qdb.sql_connection.TRN: # find entries that - # a) are of filepath_type "directory" + # a) are of filepath_type "directory" or "html_summary_dir" # b) whose filepath ends with directory name # c) whose mountpoint matches the provided parent_directory sql = """SELECT filepath_id FROM qiita.filepath JOIN qiita.filepath_type USING (filepath_type_id) JOIN qiita.data_directory USING (data_directory_id) - WHERE filepath_type='directory' AND + WHERE filepath_type IN ('directory', 'html_summary_dir') AND filepath=%s AND position(%s in mountpoint)>0;""" qdb.sql_connection.TRN.add(sql, [dirname, mount_dirname])