Skip to content

Commit

Permalink
XRooD Cp :: document -parent; rewrite logic to add -rmprefix (removin…
Browse files Browse the repository at this point in the history
…g first N components from the absolute path of the source) when copy to destination
  • Loading branch information
adriansev committed Jul 7, 2024
1 parent 3017717 commit 88388b3
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 22 deletions.
43 changes: 27 additions & 16 deletions alienpy/tools_nowb.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,31 +479,42 @@ def common_path(path_list: list) -> str:
return common


def format_dst_fn(src_dir: str, src_file: str, dst: str, parent: int) -> str:
def format_dst_fn(src_dir: str, src_file: str, dst: str, parent: int = 0, truncate_basepath: int = 0) -> str:
"""Return the destination filename given the source dir/name, destination directory and number of parents to keep"""
# let's get destination file name (relative path with parent value)
if src_dir != src_file: # recursive operation
total_relative_path = src_file.replace(src_dir, '', 1)
src_dir_path = Path(src_dir)
src_dir_parts = src_dir_path.parts
src_dir_parts = list(src_dir_path.parts)
file_components = len(src_dir_parts) # it's directory'

if not src_dir.endswith('/'): src_dir_parts = src_dir_parts[:-1]
src_dir = '/'.join(map(lambda x: str(x or ''), src_dir_parts))
src_dir = src_dir.replace('//', '/')
components_list = src_dir.split('/')
components_list[0] = '/' # first slash is lost in split
file_components = len(components_list) # it's directory'
parent = min(parent, file_components) # make sure maximum parent var point to first dir in path
parent_selection = components_list[(file_components - parent):]
rootdir_src_dir = '/'.join(parent_selection)
file_relative_name = f'{rootdir_src_dir}/{total_relative_path}'

if truncate_basepath > 0:
# make sure to not truncate more the path components and account for initial / which is counted as a component
truncate_basepath = min(truncate_basepath, file_components - 1)
base_path_list = src_dir_parts[truncate_basepath + 1:] # add 1 to account for initial / that does not count as path component
else:
parent = min(parent, file_components) # make sure maximum parent var point to first dir in path
base_path_list = src_dir_parts[(file_components - parent):]
base_path = '/'.join(base_path_list).replace('//', '/')
base_path = f'{base_path}/{total_relative_path}'

else:
src_file_path = Path(src_file)
file_components = len(src_file_path.parts) - 1 - 1 # without the file and up to slash
parent = min(parent, file_components) # make sure maximum parent var point to first dir in path
rootdir_src_file = src_file_path.parents[parent].as_posix()
file_relative_name = src_file.replace(rootdir_src_file, '', 1)
src_file_parts = list(src_file_path.parts)
file_components = len(src_file_parts) - 1 # without last element which is the file

if truncate_basepath > 0:
# make sure to not truncate more the path components and account for initial / which is counted as a component
truncate_basepath = min(truncate_basepath, file_components - 1)
base_path_list = src_file_parts[truncate_basepath + 1:] # add 1 to account for initial / that does not count as path component
else:
parent = min(parent, file_components) # make sure maximum parent var point to first dir in path
base_path_list = src_file_parts[(file_components - parent):]
base_path = '/'.join(base_path_list).replace('//', '/')

dst_file = f'{dst}/{file_relative_name}' if dst.endswith('/') else dst
dst_file = f'{dst}/{base_path}' if dst.endswith('/') else dst
return os.path.normpath(dst_file)


Expand Down
17 changes: 11 additions & 6 deletions alienpy/xrd_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def xrdfile_set_attr(uri: str = '', xattr_list: Optional[list] = None):


def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list] = None, copy_list: Optional[list] = None,
pattern: Union[None, REGEX_PATTERN_TYPE, str] = None, parent: int = 999,
pattern: Union[None, REGEX_PATTERN_TYPE, str] = None, parent: int = 0, truncate_basepath: int = 0,
overwrite: bool = False, is_regex: bool = False, strictspec: bool = False, httpurl: bool = False) -> RET: # pylint: disable=unused-argument
"""Process a source and destination copy arguments and make a list of individual lfns to be copied"""
isSrcDir = isSrcLocal = isDownload = specs = None # make sure we set these to valid values later
Expand Down Expand Up @@ -241,7 +241,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list]
if isDownload: # pylint: disable=too-many-nested-blocks # src is GRID, we are DOWNLOADING from GRID location
# to reduce the remote calls we treat files and directory on separate code-paths
if src_stat.type == 'f': # single file
dst_filename = format_dst_fn(src, src, dst, parent)
dst_filename = format_dst_fn(src, src, dst, parent, truncate_basepath)
# if overwrite the file validity checking will do md5

skip_file = retf_print(fileIsValid(dst_filename, src_stat.size, src_stat.mtime, src_stat.md5, shallow_check = not overwrite), opts = 'noerr') == 0
Expand All @@ -257,7 +257,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list]

for lfn_obj in results_list.ansdict["results"]: # make CopyFile objs for each lfn
lfn = get_lfn_key(lfn_obj)
dst_filename = format_dst_fn(src, lfn, dst, parent)
dst_filename = format_dst_fn(src, lfn, dst, parent, truncate_basepath)
# if overwrite the file validity checking will do md5
skip_file = retf_print(fileIsValid(dst_filename, lfn_obj['size'], lfn_obj['ctime'], lfn_obj['md5'], shallow_check = not overwrite), opts = 'noerr') == 0
if skip_file: continue # destination exists and is valid, no point to re-download
Expand All @@ -274,7 +274,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list]

for local_file in results_list.ansdict["results"]:
file_path = get_lfn_key(local_file)
lfn = format_dst_fn(src, file_path, dst, parent)
lfn = format_dst_fn(src, file_path, dst, parent, truncate_basepath)

skip_file = retf_print(lfnIsValid(wb, lfn, file_path, shallow_check = not overwrite, removeTarget = True), opts = 'noerr') == 0
if skip_file: continue # destination exists and is valid, no point to re-upload
Expand Down Expand Up @@ -446,6 +446,11 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '',
parent_arg = get_arg_value(xrd_copy_command, '-parent')
if parent_arg: parent = int(parent_arg)

# remove first N path components from full source path and keep the rest as basepath for destination
truncate_basepath = int(0)
rmprefix_arg = get_arg_value(xrd_copy_command, '-rmprefix')
if rmprefix_arg: truncate_basepath = int(rmprefix_arg)

# explicit specify a destination, the rest of arguments are source files
dst_arg_specified = get_arg_value(xrd_copy_command, '-dst')

Expand Down Expand Up @@ -572,7 +577,7 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '',
common_root_path = common_path(xrd_copy_command)
for src in xrd_copy_command:
retobj = makelist_lfn(wb, arg_source = src, arg_target = f'{dst_arg_specified}/{src.replace(common_root_path, "")}',
find_args = find_args, parent = parent,
find_args = find_args, parent = parent, truncate_basepath = truncate_basepath,
overwrite = overwrite, pattern = pattern,
is_regex = use_regex, strictspec = strictspec, httpurl = httpurl, copy_list = copy_lfnlist)
if retobj.exitcode != 0: print_err(retobj.err) # if any error let's just return what we got # noqa: R504
Expand All @@ -582,7 +587,7 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '',
src = xrd_copy_command[-2]
dst = xrd_copy_command[-1]
retobj = makelist_lfn(wb, arg_source = src, arg_target = dst,
find_args = find_args, parent = parent,
find_args = find_args, parent = parent, truncate_basepath = truncate_basepath,
overwrite = overwrite, pattern = pattern,
is_regex = use_regex, strictspec = strictspec, httpurl = httpurl, copy_list = copy_lfnlist)
if retobj.exitcode != 0: return retobj # if any error let's just return what we got # noqa: R504
Expand Down
2 changes: 2 additions & 0 deletions alienpy/xrd_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def xrdcp_help() -> str:
-S <aditional streams> : uses num additional parallel streams to do the transfer. (max = 15)
-chunks <nr chunks> : number of chunks that should be requested in parallel
-chunksz <bytes> : chunk size (bytes)
-parent N : keep last N path components into destination filepath
-rmprefix N : remove first N path components from full source path and keep the rest as basepath for destination
-T <nr_copy_jobs> : number of parralel copy jobs from a set (for recursive copy); defaults to 8 for downloads
-timeout <seconds> : the job will fail if did not finish in this nr of seconds
-retry <times> : retry N times the copy process if failed
Expand Down

0 comments on commit 88388b3

Please sign in to comment.