From fa03e04b8eeef719ef598403c0e4ed470488f1e6 Mon Sep 17 00:00:00 2001 From: Cristiano Urban Date: Wed, 27 Oct 2021 11:37:47 +0200 Subject: [PATCH] Trying to fix issues related to broken symlinks. It's getting very painful :(. Signed-off-by: Cristiano Urban --- transfer_service/checksum.py | 5 +++- transfer_service/data_rpc_server.py | 9 ++++--- transfer_service/import_executor.py | 10 ++++--- transfer_service/retrieve_executor.py | 2 +- transfer_service/store_executor.py | 2 +- transfer_service/store_preprocessor.py | 36 ++++++++++++++++++-------- transfer_service/system_utils.py | 32 ++++++++++++++--------- 7 files changed, 64 insertions(+), 32 deletions(-) diff --git a/transfer_service/checksum.py b/transfer_service/checksum.py index 700c51d..8cb38c1 100644 --- a/transfer_service/checksum.py +++ b/transfer_service/checksum.py @@ -84,5 +84,8 @@ class Checksum(object): else: for file in files: filePath = os.path.abspath(folder) + '/' + file - md5file.write(self.md5sum(filePath) + " ./" + file + '\n') + if not os.path.islink(filePath): + md5file.write(self.md5sum(filePath) + " ./" + file + '\n') md5file.close() + if os.path.exists(md5FilePath) and os.path.getsize(md5FilePath) == 0: + os.remove(md5FilePath) diff --git a/transfer_service/data_rpc_server.py b/transfer_service/data_rpc_server.py index dc6d655..9c45c52 100644 --- a/transfer_service/data_rpc_server.py +++ b/transfer_service/data_rpc_server.py @@ -259,9 +259,12 @@ class DataRPCServer(RedisRPCServer): #path = "/home/" + username + "/store" path = self.storageStorePath.replace("{username}", username) for el in os.listdir(path): - absPath = path + '/' + el - os.chown(absPath, 0, 0) - os.chmod(absPath, 0o444) + try: + absPath = path + '/' + el + os.chown(absPath, 0, 0) + os.chmod(absPath, 0o444) + except OSError: + self.logger.error(f"Unable to set permissions for '{absPath}', skip.") def run(self): self.logger.info(f"Starting RPC server of type {self.type}...") diff --git a/transfer_service/import_executor.py b/transfer_service/import_executor.py index 6c21492..d79fb31 100644 --- a/transfer_service/import_executor.py +++ b/transfer_service/import_executor.py @@ -145,8 +145,12 @@ class ImportExecutor(TaskExecutor): dnode.setLocationId(locationId) dnode.setJobId(self.jobId) dnode.setCreatorId(self.userId) - dnode.setContentLength(os.path.getsize(file)) - dnode.setContentMD5(self.md5calc.getMD5(file)) + if not os.path.islink(file): + dnode.setContentLength(os.path.getsize(file)) + dnode.setContentMD5(self.md5calc.getMD5(file)) + else: + dnode.setContentLength(0) + dnode.setContentMD5(None) dnode.setAsyncTrans(True) dnode.setSticky(True) try: @@ -250,7 +254,7 @@ class ImportExecutor(TaskExecutor): """ msg += info - m.setMessage("VOSpace data storage notification: Job ERROR", msg) + m.setMessage("VOSpace import notification: Job ERROR", msg) # Send e-mail notification m.send() except Exception: diff --git a/transfer_service/retrieve_executor.py b/transfer_service/retrieve_executor.py index d86bb52..b00b1cf 100644 --- a/transfer_service/retrieve_executor.py +++ b/transfer_service/retrieve_executor.py @@ -268,7 +268,7 @@ class RetrieveExecutor(TaskExecutor): osRelParentPath += "/" destDirPath = self.storageRetrievePath.replace("{username}", username) + osRelParentPath os.makedirs(destDirPath, exist_ok = True) - sp = subprocess.run(["rsync", "-av", srcPath, destDirPath], capture_output = True) + sp = subprocess.run(["rsync", "-av", "--no-links", srcPath, destDirPath], capture_output = True) if(sp.returncode or sp.stderr): self.logger.error(f"FATAL: error during the copy process, returnCode = {sp.returncode}, stderr: {sp.stderr}") return False diff --git a/transfer_service/store_executor.py b/transfer_service/store_executor.py index 9646e6d..5be2b43 100644 --- a/transfer_service/store_executor.py +++ b/transfer_service/store_executor.py @@ -116,7 +116,7 @@ class StoreExecutor(TaskExecutor): return False destPathPrefix = storageBasePath + '/' + self.username self.logger.info("Starting data copy...") - sp = subprocess.run(["rsync", "-av", srcPathPrefix + '/', destPathPrefix + '/'], capture_output = True) + sp = subprocess.run(["rsync", "-av", "--no-links", srcPathPrefix + '/', destPathPrefix + '/'], capture_output = True) if(sp.returncode or sp.stderr): self.logger.error("FATAL: an error occurred while copying the data.") return False diff --git a/transfer_service/store_preprocessor.py b/transfer_service/store_preprocessor.py index f94bbda..7b3cad1 100644 --- a/transfer_service/store_preprocessor.py +++ b/transfer_service/store_preprocessor.py @@ -71,14 +71,19 @@ class StorePreprocessor(TaskExecutor): #self.path = "/home/" + username + "/store" self.path = self.storageStorePath.replace("{username}", self.username) for folder, subfolders, files in os.walk(self.path): - os.chown(folder, 0, 0) - os.chmod(folder, 0o555) - for s in subfolders: - os.chown(os.path.join(folder, s), 0, 0) - os.chmod(os.path.join(folder, s), 0o555) - for f in files: - os.chown(os.path.join(folder, f), 0, 0) - os.chmod(os.path.join(folder, f), 0o555) + try: + os.chown(folder, 0, 0) + os.chmod(folder, 0o555) + for s in subfolders: + absPath = os.path.join(folder, s) + os.chown(absPath, 0, 0) + os.chmod(absPath, 0o555) + for f in files: + absPath = os.path.join(folder, f) + os.chown(absPath, 0, 0) + os.chmod(absPath, 0o555) + except OSError: + self.logger.error(f"Unable to set permissions for '{absPath}', skip.") def execute(self): try: @@ -127,7 +132,12 @@ class StorePreprocessor(TaskExecutor): self.md5calc.recursive(destPath) # Case 4: /home/user/store is empty (this should be handled by data_rpc_server.py) else: - self.logger.critical("FATAL: the 'store' directory is empty.") + self.logger.error("FATAL: the 'store' directory is empty.") + userInfo = self.systemUtils.userInfo(self.username) + uid = userInfo[1] + gid = userInfo[2] + os.chown(self.path, uid, gid) + os.chmod(self.path, 0o755) return False # Third scan after directory structure 'check & repair' @@ -192,8 +202,12 @@ class StorePreprocessor(TaskExecutor): dnode.setLocationId(locationId) dnode.setJobId(self.jobId) dnode.setCreatorId(self.userId) - dnode.setContentLength(os.path.getsize(file)) - dnode.setContentMD5(self.md5calc.getMD5(file)) + if not os.path.islink(file): + dnode.setContentLength(os.path.getsize(file)) + dnode.setContentMD5(self.md5calc.getMD5(file)) + else: + dnode.setContentLength(0) + dnode.setContentMD5(None) dnode.setSticky(True) try: now = datetime.datetime.now().isoformat() diff --git a/transfer_service/system_utils.py b/transfer_service/system_utils.py index 665d847..b72b826 100644 --- a/transfer_service/system_utils.py +++ b/transfer_service/system_utils.py @@ -59,40 +59,48 @@ class SystemUtils(object): i += 1 fileList.append(files) return [ dirList, fileList ] - - # Scan is performed only on the first level! + def scan(self, path): + """ + Performs a scan of dirs and files only at the first level, + removing symlinks, if any. + """ dirList = [] fileList = [] elementList = os.listdir(path) for el in elementList: elPath = path + '/' + el - if os.path.isdir(elPath): + if os.path.islink(elPath): + os.remove(elPath) + elif os.path.isdir(elPath): dirList.append(el) elif os.path.isfile(elPath): fileList.append(el) else: - sys.exit("FATAL: invalid file/dir.") + # do nothing... + pass return [ dirList, fileList ] def getSize(self, path): """ If 'path' is a file returns the file size in bytes, if 'path' is a directory it returns the total size of the dir, - in all the other cases it returns -1 + in all the other cases it returns 0 """ size = 0 - if os.path.isfile(path): + if os.path.isfile(path) and not os.path.islink(path): size = os.path.getsize(path) - elif os.path.isdir(path): + elif os.path.isdir(path) and not os.path.islink(path): for folder, subfolders, files in os.walk(path, topdown = True): cwd = os.path.basename(folder) parent = os.path.dirname(folder) - size += os.path.getsize(folder) - for file in files: - size += os.path.getsize(parent + '/' + cwd + '/' + file) - else: - size = -1 + base = parent + '/' + cwd + if not os.path.islink(folder): + size += os.path.getsize(folder) + for f in files: + file = base + '/' + f + if not os.path.islink(file): + size += os.path.getsize(file) return size def getFileSystemSize(self, path): -- GitLab