From 88388b3393b1c7dd04e24a2f313683741356aa0a Mon Sep 17 00:00:00 2001 From: Adrian Sevcenco Date: Sun, 7 Jul 2024 12:54:47 +0200 Subject: [PATCH] XRooD Cp :: document -parent; rewrite logic to add -rmprefix (removing first N components from the absolute path of the source) when copy to destination --- alienpy/tools_nowb.py | 43 +++++++++++++++++++++++++++---------------- alienpy/xrd_core.py | 17 +++++++++++------ alienpy/xrd_tools.py | 2 ++ 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/alienpy/tools_nowb.py b/alienpy/tools_nowb.py index 5760260..55e9968 100644 --- a/alienpy/tools_nowb.py +++ b/alienpy/tools_nowb.py @@ -479,31 +479,42 @@ def common_path(path_list: list) -> str: return common -def format_dst_fn(src_dir: str, src_file: str, dst: str, parent: int) -> str: +def format_dst_fn(src_dir: str, src_file: str, dst: str, parent: int = 0, truncate_basepath: int = 0) -> str: """Return the destination filename given the source dir/name, destination directory and number of parents to keep""" # let's get destination file name (relative path with parent value) if src_dir != src_file: # recursive operation total_relative_path = src_file.replace(src_dir, '', 1) src_dir_path = Path(src_dir) - src_dir_parts = src_dir_path.parts + src_dir_parts = list(src_dir_path.parts) + file_components = len(src_dir_parts) # it's directory' + if not src_dir.endswith('/'): src_dir_parts = src_dir_parts[:-1] - src_dir = '/'.join(map(lambda x: str(x or ''), src_dir_parts)) - src_dir = src_dir.replace('//', '/') - components_list = src_dir.split('/') - components_list[0] = '/' # first slash is lost in split - file_components = len(components_list) # it's directory' - parent = min(parent, file_components) # make sure maximum parent var point to first dir in path - parent_selection = components_list[(file_components - parent):] - rootdir_src_dir = '/'.join(parent_selection) - file_relative_name = f'{rootdir_src_dir}/{total_relative_path}' + + if truncate_basepath > 0: + # make sure to not truncate more the path components and account for initial / which is counted as a component + truncate_basepath = min(truncate_basepath, file_components - 1) + base_path_list = src_dir_parts[truncate_basepath + 1:] # add 1 to account for initial / that does not count as path component + else: + parent = min(parent, file_components) # make sure maximum parent var point to first dir in path + base_path_list = src_dir_parts[(file_components - parent):] + base_path = '/'.join(base_path_list).replace('//', '/') + base_path = f'{base_path}/{total_relative_path}' + else: src_file_path = Path(src_file) - file_components = len(src_file_path.parts) - 1 - 1 # without the file and up to slash - parent = min(parent, file_components) # make sure maximum parent var point to first dir in path - rootdir_src_file = src_file_path.parents[parent].as_posix() - file_relative_name = src_file.replace(rootdir_src_file, '', 1) + src_file_parts = list(src_file_path.parts) + file_components = len(src_file_parts) - 1 # without last element which is the file + + if truncate_basepath > 0: + # make sure to not truncate more the path components and account for initial / which is counted as a component + truncate_basepath = min(truncate_basepath, file_components - 1) + base_path_list = src_file_parts[truncate_basepath + 1:] # add 1 to account for initial / that does not count as path component + else: + parent = min(parent, file_components) # make sure maximum parent var point to first dir in path + base_path_list = src_file_parts[(file_components - parent):] + base_path = '/'.join(base_path_list).replace('//', '/') - dst_file = f'{dst}/{file_relative_name}' if dst.endswith('/') else dst + dst_file = f'{dst}/{base_path}' if dst.endswith('/') else dst return os.path.normpath(dst_file) diff --git a/alienpy/xrd_core.py b/alienpy/xrd_core.py index 86e24e2..2c0744f 100644 --- a/alienpy/xrd_core.py +++ b/alienpy/xrd_core.py @@ -140,7 +140,7 @@ def xrdfile_set_attr(uri: str = '', xattr_list: Optional[list] = None): def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list] = None, copy_list: Optional[list] = None, - pattern: Union[None, REGEX_PATTERN_TYPE, str] = None, parent: int = 999, + pattern: Union[None, REGEX_PATTERN_TYPE, str] = None, parent: int = 0, truncate_basepath: int = 0, overwrite: bool = False, is_regex: bool = False, strictspec: bool = False, httpurl: bool = False) -> RET: # pylint: disable=unused-argument """Process a source and destination copy arguments and make a list of individual lfns to be copied""" isSrcDir = isSrcLocal = isDownload = specs = None # make sure we set these to valid values later @@ -241,7 +241,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list] if isDownload: # pylint: disable=too-many-nested-blocks # src is GRID, we are DOWNLOADING from GRID location # to reduce the remote calls we treat files and directory on separate code-paths if src_stat.type == 'f': # single file - dst_filename = format_dst_fn(src, src, dst, parent) + dst_filename = format_dst_fn(src, src, dst, parent, truncate_basepath) # if overwrite the file validity checking will do md5 skip_file = retf_print(fileIsValid(dst_filename, src_stat.size, src_stat.mtime, src_stat.md5, shallow_check = not overwrite), opts = 'noerr') == 0 @@ -257,7 +257,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list] for lfn_obj in results_list.ansdict["results"]: # make CopyFile objs for each lfn lfn = get_lfn_key(lfn_obj) - dst_filename = format_dst_fn(src, lfn, dst, parent) + dst_filename = format_dst_fn(src, lfn, dst, parent, truncate_basepath) # if overwrite the file validity checking will do md5 skip_file = retf_print(fileIsValid(dst_filename, lfn_obj['size'], lfn_obj['ctime'], lfn_obj['md5'], shallow_check = not overwrite), opts = 'noerr') == 0 if skip_file: continue # destination exists and is valid, no point to re-download @@ -274,7 +274,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list] for local_file in results_list.ansdict["results"]: file_path = get_lfn_key(local_file) - lfn = format_dst_fn(src, file_path, dst, parent) + lfn = format_dst_fn(src, file_path, dst, parent, truncate_basepath) skip_file = retf_print(lfnIsValid(wb, lfn, file_path, shallow_check = not overwrite, removeTarget = True), opts = 'noerr') == 0 if skip_file: continue # destination exists and is valid, no point to re-upload @@ -446,6 +446,11 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '', parent_arg = get_arg_value(xrd_copy_command, '-parent') if parent_arg: parent = int(parent_arg) + # remove first N path components from full source path and keep the rest as basepath for destination + truncate_basepath = int(0) + rmprefix_arg = get_arg_value(xrd_copy_command, '-rmprefix') + if rmprefix_arg: truncate_basepath = int(rmprefix_arg) + # explicit specify a destination, the rest of arguments are source files dst_arg_specified = get_arg_value(xrd_copy_command, '-dst') @@ -572,7 +577,7 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '', common_root_path = common_path(xrd_copy_command) for src in xrd_copy_command: retobj = makelist_lfn(wb, arg_source = src, arg_target = f'{dst_arg_specified}/{src.replace(common_root_path, "")}', - find_args = find_args, parent = parent, + find_args = find_args, parent = parent, truncate_basepath = truncate_basepath, overwrite = overwrite, pattern = pattern, is_regex = use_regex, strictspec = strictspec, httpurl = httpurl, copy_list = copy_lfnlist) if retobj.exitcode != 0: print_err(retobj.err) # if any error let's just return what we got # noqa: R504 @@ -582,7 +587,7 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '', src = xrd_copy_command[-2] dst = xrd_copy_command[-1] retobj = makelist_lfn(wb, arg_source = src, arg_target = dst, - find_args = find_args, parent = parent, + find_args = find_args, parent = parent, truncate_basepath = truncate_basepath, overwrite = overwrite, pattern = pattern, is_regex = use_regex, strictspec = strictspec, httpurl = httpurl, copy_list = copy_lfnlist) if retobj.exitcode != 0: return retobj # if any error let's just return what we got # noqa: R504 diff --git a/alienpy/xrd_tools.py b/alienpy/xrd_tools.py index 00d75fb..68e7df4 100644 --- a/alienpy/xrd_tools.py +++ b/alienpy/xrd_tools.py @@ -222,6 +222,8 @@ def xrdcp_help() -> str: -S : uses num additional parallel streams to do the transfer. (max = 15) -chunks : number of chunks that should be requested in parallel -chunksz : chunk size (bytes) +-parent N : keep last N path components into destination filepath +-rmprefix N : remove first N path components from full source path and keep the rest as basepath for destination -T : number of parralel copy jobs from a set (for recursive copy); defaults to 8 for downloads -timeout : the job will fail if did not finish in this nr of seconds -retry : retry N times the copy process if failed