diff --git a/alienpy/tools_nowb.py b/alienpy/tools_nowb.py index 0e22adc..7bc0a0a 100644 --- a/alienpy/tools_nowb.py +++ b/alienpy/tools_nowb.py @@ -313,6 +313,25 @@ def get_arg_value(target: list, item: str) -> str: return val # noqa: R504 +def get_arg_value_multiple(target: list, item: str) -> list: + """Return the list af arguments values, for arguments used multiple times""" + val_list = [] + idx_to_be_removed = [] + arg_list_size = len(target) + # cannot get the value and remove from list in the same time + for idx, x in enumerate(target): + if x == item: + # if current index (starts at 0) is greater then len - 1, just return + if idx + 1 + 1 > arg_list_size: return val + val_list.append(target[idx + 1]) + idx_to_be_removed.append(idx + 1) + + idx_to_be_removed.reverse() + for idx in idx_to_be_removed: target.pop(idx) + list_remove_item(target, item) + return val_list # noqa: R504 + + def get_arg_2values(target: list, item: str) -> tuple: """Remove inplace all instances of item, item+1 and item+2 from list and return item+1, item+2""" val1 = val2 = None @@ -789,23 +808,28 @@ def file2file_dict(fn: str) -> dict: 'owner': pwd.getpwuid(file_name.stat().st_uid).pw_name, 'gowner': gid2name(file_name.stat().st_gid)} -def filter_file_prop(f_obj: dict, base_dir: str, find_opts: Union[str, list, None], compiled_regex: REGEX_PATTERN_TYPE = None) -> bool: +def filter_file_prop(f_obj: dict, base_dir: str, find_opts: Union[str, list, None], compiled_regex_list: Optional[list] = None) -> bool: """Return True if an file dict object pass the conditions in find_opts""" if not f_obj or not base_dir: return False if f_obj['lfn'].endswith('.'): return False - if not find_opts: return True + if not find_opts and not compiled_regex_list: return True opts = find_opts.split() if isinstance(find_opts, str) else find_opts.copy() + lfn = get_lfn_key(f_obj) if not base_dir.endswith('/'): base_dir = f'{base_dir}/' relative_lfn = lfn.replace(base_dir, '') # it will have N directories depth + 1 file components # string/pattern exclusion - exclude_string = get_arg_value(opts, '-exclude') - if exclude_string and exclude_string in relative_lfn: return False # this is filtering out the string from relative lfn - - exclude_regex = get_arg_value(opts, '-exclude_re') - if exclude_regex and compiled_regex and compiled_regex.match(relative_lfn): return False + exclude_str_list = get_arg_value_multiple(opts, '-exclude') + for exclude_string in exclude_str_list: + if exclude_string in relative_lfn: + return False # this is filtering out the string from relative lfn + + # regex based exclusion; we parse the already compiled regexes + for compiled_regex in compiled_regex_list: + match = compiled_regex.search(relative_lfn) + if match: return False min_size = get_arg_value(opts, '-minsize') if min_size: diff --git a/alienpy/xrd_core.py b/alienpy/xrd_core.py index 2c0744f..24550b4 100644 --- a/alienpy/xrd_core.py +++ b/alienpy/xrd_core.py @@ -22,7 +22,7 @@ from .global_vars import AlienSessionInfo, COLORS, REGEX_PATTERN_TYPE, specs_split from .wb_api import SendMsg, retf_print from .tools_nowb import (GetHumanReadableSize, PrintColor, common_path, create_metafile, deltat_ms_perf, - fileIsValid, fileline2list, format_dst_fn, get_arg, get_arg_value, get_hash_meta, get_lfn_key, get_lfn_name, get_size_meta, + fileIsValid, fileline2list, format_dst_fn, get_arg, get_arg_value, get_arg_value_multiple, get_hash_meta, get_lfn_key, get_lfn_name, get_size_meta, is_help, is_int, list_files_local, make_tmp_fn, md5, name2regex, now_str, path_local_stat, path_writable_any, valid_regex, unixtime2local) from .xrd_tools import commitFileList, expand_path_grid, extract_glob_pattern, lfn2fileTokens, list_files_grid, path_grid_stat, path_type, pathtype_grid, xrdcp_help, lfnIsValid @@ -250,7 +250,7 @@ def makelist_lfn(wb, arg_source: str, arg_target: str, find_args: Optional[list] if tokens and 'answer' in tokens: copy_list.append(CopyFile(src, dst_filename, isWrite, tokens['answer'], src)) else: # directory to be listed - results_list = list_files_grid(wb, src, pattern, is_regex, " ".join(find_args)) + results_list = list_files_grid(wb, src, pattern, is_regex, find_args) if "results" not in results_list.ansdict or len(results_list.ansdict["results"]) < 1: msg = f"No files found with: find {' '.join(find_args) if find_args else ''}{' -r ' if is_regex else ''} -a -s {src} {pattern}" return RET(42, '', msg) # ENOMSG /* No message of desired type */ @@ -484,11 +484,13 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '', maxctime_arg = get_arg_value(xrd_copy_command, '-max-ctime') if maxctime_arg: find_args.extend(['-max-ctime', maxctime_arg]) - exclude_str_arg = get_arg_value(xrd_copy_command, '-exclude') - if exclude_str_arg: find_args.extend(['-exclude', exclude_str_arg]) + exclude_str_list = get_arg_value_multiple(xrd_copy_command, '-exclude') + for ex_str_pat in exclude_str_list: + find_args.extend(['-exclude', ex_str_pat]) - exclude_re_arg = get_arg_value(xrd_copy_command, '-exclude_re') - if exclude_re_arg: find_args.extend(['-exclude_re', exclude_re_arg]) + exclude_re_arg_list = get_arg_value_multiple(xrd_copy_command, '-exclude_re') + for ex_re_pat in exclude_re_arg_list: + find_args.extend(['-exclude_re', ex_re_pat]) user_arg = get_arg_value(xrd_copy_command, '-user') if user_arg: find_args.extend(['-user', user_arg]) @@ -511,8 +513,9 @@ def DO_XrootdCp(wb, xrd_copy_command: Optional[list] = None, printout: str = '', ref_site = get_arg_value(xrd_copy_command, '-site') if ref_site: find_args.extend(['-S', ref_site]) - exclude_pattern = get_arg_value(xrd_copy_command, '-e') - if exclude_pattern: find_args.extend(['-e', exclude_pattern]) + exclude_pattern_list = get_arg_value_multiple(xrd_copy_command, '-e') + for ex_pat in exclude_pattern_list: + find_args.extend(['-e', ex_pat]) use_regex = False filtering_enabled = False diff --git a/alienpy/xrd_tools.py b/alienpy/xrd_tools.py index 68e7df4..462556a 100644 --- a/alienpy/xrd_tools.py +++ b/alienpy/xrd_tools.py @@ -12,7 +12,7 @@ from .setup_logging import DEBUG, print_err from .global_vars import AlienSessionInfo, COLORS, REGEX_PATTERN_TYPE, lfn_prefix_re, specs_split from .wb_api import SendMsg, SendMsgMulti, retf_print -from .tools_nowb import CreateJsonCommand, PrintColor, create_metafile, filter_file_prop, get_arg, get_arg_value, get_lfn_key, make_tmp_fn, valid_regex, md5 +from .tools_nowb import CreateJsonCommand, PrintColor, create_metafile, filter_file_prop, get_arg, get_arg_value, get_arg_value_multiple, get_lfn_key, make_tmp_fn, valid_regex, md5 def lfnAccessUrl(wb, lfn: str, local_file: str = '', specs: Union[None, list, str] = None, isWrite: bool = False, strictspec: bool = False, httpurl: bool = False) -> dict: @@ -353,13 +353,14 @@ def list_files_grid(wb, search_dir: str, pattern: Union[None, REGEX_PATTERN_TYPE get_arg(find_args_list, '-w') get_arg(find_args_list, '-wh') - exclude_string = get_arg_value(find_args_list, '-exclude') - if exclude_string: - filter_args_list.extend(['-exclude', exclude_string]) + exclude_str_list = get_arg_value_multiple(find_args_list, '-exclude') + for ex_str_pat in exclude_str_list: + filter_args_list.extend(['-exclude', ex_str_pat]) - exclude_regex = get_arg_value(find_args_list, '-exclude_re') - if exclude_regex: - filter_args_list.extend(['-exclude_re', exclude_regex]) + compiled_regex_list = [] + exclude_re_arg_list = get_arg_value_multiple(find_args_list, '-exclude_re') + for ex_re_pat in exclude_re_arg_list: + compiled_regex_list.append(re.compile(ex_re_pat)) # precompile the regex for exclusion min_depth = get_arg_value(find_args_list, '-mindepth') if min_depth: @@ -450,12 +451,9 @@ def list_files_grid(wb, search_dir: str, pattern: Union[None, REGEX_PATTERN_TYPE results_list_filtered = [] # items that pass the conditions are the actual/final results - compiled_regex = None - if exclude_regex: compiled_regex = re.compile(exclude_regex) # precompile the regex for exclusion - for found_lfn_dict in results_list: # parse results to apply filters - if not filter_file_prop(found_lfn_dict, search_dir, filter_args_list, compiled_regex): continue - results_list_filtered.append(found_lfn_dict) # at this point all filters were passed + if filter_file_prop(found_lfn_dict, search_dir, filter_args_list, compiled_regex_list): + results_list_filtered.append(found_lfn_dict) # at this point all filters were passed if not results_list_filtered: return RET(2, "", f"No files passed the filters :: {search_dir} /pattern: {pattern} /find_args: {find_args}")