From acae2df7ca9c45f8cb45d6fdad33a641b2df0440 Mon Sep 17 00:00:00 2001 From: Claude Warren Date: Mon, 13 Jan 2025 23:43:39 +0000 Subject: [PATCH] fixed pattern match --- .../config/exclusion/ExclusionProcessor.java | 4 +- .../config/exclusion/plexus/MatchPattern.java | 2 +- .../exclusion/plexus/MatchPatterns.java | 40 +- .../rat/document/DocumentNameMatcher.java | 390 ++++++++++++++++-- 4 files changed, 370 insertions(+), 66 deletions(-) diff --git a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/ExclusionProcessor.java b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/ExclusionProcessor.java index e21432e03..105312930 100644 --- a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/ExclusionProcessor.java +++ b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/ExclusionProcessor.java @@ -255,10 +255,10 @@ public DocumentNameMatcher getNameMatcher(final DocumentName basedir) { .addTo(new ArrayList<>()); if (!incl.isEmpty()) { - inclMatchers.add(new DocumentNameMatcher("included patterns", MatchPatterns.from(incl), basedir)); + inclMatchers.add(new DocumentNameMatcher("included patterns", MatchPatterns.from("/", incl), basedir)); } if (!excl.isEmpty()) { - exclMatchers.add(new DocumentNameMatcher("excluded patterns", MatchPatterns.from(excl), basedir)); + exclMatchers.add(new DocumentNameMatcher("excluded patterns", MatchPatterns.from("/", excl), basedir)); } if (!includedPaths.isEmpty()) { diff --git a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPattern.java b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPattern.java index c43836ecd..c2c3ae7c9 100644 --- a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPattern.java +++ b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPattern.java @@ -49,7 +49,7 @@ public final class MatchPattern { private final char[][] tokenizedChar; - private MatchPattern(final String source, final String separator) { + MatchPattern(final String source, final String separator) { regexPattern = SelectorUtils.isRegexPrefixedPattern(source) ? source.substring( SelectorUtils.REGEX_HANDLER_PREFIX.length(), diff --git a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPatterns.java b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPatterns.java index 454ba304e..9a6d443e4 100644 --- a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPatterns.java +++ b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/plexus/MatchPatterns.java @@ -22,8 +22,9 @@ import java.io.File; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import java.util.function.Predicate; +import java.util.stream.Collectors; @SuppressWarnings({"checkstyle:RegexpSingleLine", "checkstyle:JavadocVariable"}) /** @@ -41,15 +42,15 @@ private MatchPatterns(final MatchPattern[] patterns) { @Override public String toString() { - return source(); + return Arrays.stream(patterns).map(MatchPattern::toString).collect(Collectors.toList()).toString(); } public String source() { - List sources = new ArrayList<>(); - for (MatchPattern pattern : patterns) { - sources.add(pattern.source()); - } - return "[" + String.join(", ", sources) + "]"; + return Arrays.stream(patterns).map(MatchPattern::source).collect(Collectors.toList()).toString(); + } + + public Iterable patterns() { + return Arrays.asList(patterns); } /** @@ -83,36 +84,23 @@ public boolean matches(final String name, final char[][] tokenizedNameChar, fina return false; } - public Predicate asPredicate(final boolean isCaseSensitive) { - return name -> matches(name, isCaseSensitive); - } - - public boolean matchesPatternStart(final String name, final boolean isCaseSensitive) { - for (MatchPattern includesPattern : patterns) { - if (includesPattern.matchPatternStart(name, isCaseSensitive)) { - return true; - } - } - return false; - } - - public static MatchPatterns from(final String... sources) { + public static MatchPatterns from(final String separator, final String... sources) { final int length = sources.length; MatchPattern[] result = new MatchPattern[length]; for (int i = 0; i < length; i++) { - result[i] = MatchPattern.fromString(sources[i]); + result[i] = new MatchPattern(sources[i], separator); } return new MatchPatterns(result); } - public static MatchPatterns from(final Iterable strings) { - return new MatchPatterns(getMatchPatterns(strings)); + public static MatchPatterns from(final String separator, final Iterable strings) { + return new MatchPatterns(getMatchPatterns(separator, strings)); } - private static MatchPattern[] getMatchPatterns(final Iterable items) { + private static MatchPattern[] getMatchPatterns(final String separator, final Iterable items) { List result = new ArrayList<>(); for (String string : items) { - result.add(MatchPattern.fromString(string)); + result.add(new MatchPattern(string, separator)); } return result.toArray(new MatchPattern[0]); } diff --git a/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java b/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java index 30bbbbede..0e66b19f7 100644 --- a/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java +++ b/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java @@ -23,9 +23,14 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Optional; +import java.util.Set; import java.util.function.Predicate; +import org.apache.rat.ConfigurationException; import org.apache.rat.config.exclusion.plexus.MatchPattern; import org.apache.rat.config.exclusion.plexus.MatchPatterns; @@ -40,6 +45,8 @@ public final class DocumentNameMatcher { private final Predicate predicate; /** The name of this matcher. */ private final String name; + /** {@code true} this this matcher is a collection of matchers */ + private final boolean isCollection; /** * A matcher that matches all documents. @@ -59,6 +66,7 @@ public final class DocumentNameMatcher { public DocumentNameMatcher(final String name, final Predicate predicate) { this.name = name; this.predicate = predicate; + this.isCollection = predicate instanceof CollectionPredicateImpl; } /** @@ -77,9 +85,22 @@ public DocumentNameMatcher(final String name, final DocumentNameMatcher delegate * @param basedir the base directory for the scanning. */ public DocumentNameMatcher(final String name, final MatchPatterns patterns, final DocumentName basedir) { - this(name, (Predicate) documentName -> patterns.matches(documentName.getName(), - MatchPattern.tokenizePathToString(documentName.getName(), basedir.getDirectorySeparator()), - basedir.isCaseSensitive())); + this(name, new MatchPatternsPredicate(basedir, patterns)); + } + + /** + * Tokenizes name for faster Matcher processing. + * @param name the name to tokenize + * @param dirSeparator the directory separator + * @return the tokenized name. + */ + private static char[][] tokenize(final String name, final String dirSeparator) { + String[] tokenizedName = MatchPattern.tokenizePathToString(name, dirSeparator); + char[][] tokenizedNameChar = new char[tokenizedName.length][]; + for (int i = 0; i < tokenizedName.length; i++) { + tokenizedNameChar[i] = tokenizedName[i].toCharArray(); + } + return tokenizedNameChar; } /** @@ -88,7 +109,20 @@ public DocumentNameMatcher(final String name, final MatchPatterns patterns, fina * @param matchers fully specified matchers. */ public DocumentNameMatcher(final String name, final MatchPatterns matchers) { - this(name, (Predicate) documentName -> matchers.matches(documentName.getName(), documentName.isCaseSensitive())); + this(name, new CollectionPredicate() { + @Override + public Iterable getMatchers() { + final List result = new ArrayList<>(); + matchers.patterns().forEach(p -> result.add(new DocumentNameMatcher(p.source(), + (Predicate) x -> MatchPatterns.from("/", p.source()).matches(x.getName(), x.isCaseSensitive())))); + return result; + } + + @Override + public boolean test(final DocumentName documentName) { + return matchers.matches(documentName.getName(), documentName.isCaseSensitive()); + } + }); } /** @@ -97,7 +131,7 @@ public DocumentNameMatcher(final String name, final MatchPatterns matchers) { * @param fileFilter the file filter to execute. */ public DocumentNameMatcher(final String name, final FileFilter fileFilter) { - this(name, (Predicate) documentName -> fileFilter.accept(new File(documentName.getName()))); + this(name, new FileFilterPredicate(fileFilter)); } /** @@ -108,11 +142,39 @@ public DocumentNameMatcher(final FileFilter fileFilter) { this(fileFilter.toString(), fileFilter); } + public boolean isCollection() { + return isCollection; + } + + /** + * Returns the predicate that this DocumentNameMatcher is using. + * @return The predicate that this DocumentNameMatcher is using. + */ + public Predicate getPredicate() { + return predicate; + } + @Override public String toString() { return name; } + /** + * Decomposes the matcher execution against the candidate. + * @param candidate the candiate to check. + * @return a list of {@link DecomposeData} for each evaluation in the matcher. + */ + public List decompose(final DocumentName candidate) { + final List result = new ArrayList<>(); + decompose(0, this, candidate, result); + return result; + } + + private void decompose(final int level, final DocumentNameMatcher matcher, final DocumentName candidate, final List result) { + final Predicate pred = matcher.getPredicate(); + result.add(new DecomposeData(level, matcher, candidate, pred.test(candidate))); + } + /** * Performs the match against the DocumentName. * @param documentName the document name to check. @@ -135,8 +197,7 @@ public static DocumentNameMatcher not(final DocumentNameMatcher nameMatcher) { return MATCHES_ALL; } - return new DocumentNameMatcher(format("not(%s)", nameMatcher), - (Predicate) documentName -> !nameMatcher.matches(documentName)); + return new DocumentNameMatcher(format("not(%s)", nameMatcher), new NotPredicate(nameMatcher)); } /** @@ -150,30 +211,43 @@ private static String join(final Collection matchers) { return String.join(", ", children); } + private static Optional standardCollectionCheck(final Collection matchers, + final DocumentNameMatcher override) { + if (matchers.isEmpty()) { + throw new ConfigurationException("Empty matcher collection"); + } + if (matchers.size() == 1) { + return Optional.of(matchers.iterator().next()); + } + if (matchers.contains(override)) { + return Optional.of(override); + } + return Optional.empty(); + } + /** * Performs a logical {@code OR} across the collection of matchers. * @param matchers the matchers to check. * @return a matcher that returns {@code true} if any of the enclosed matchers returns {@code true}. */ public static DocumentNameMatcher or(final Collection matchers) { - if (matchers.isEmpty()) { - return MATCHES_NONE; - } - if (matchers.size() == 1) { - return matchers.iterator().next(); - } - if (matchers.contains(MATCHES_ALL)) { - return MATCHES_ALL; + Optional opt = standardCollectionCheck(matchers, MATCHES_ALL); + if (opt.isPresent()) { + return opt.get(); } - return new DocumentNameMatcher(format("or(%s)", join(matchers)), (Predicate) documentName -> { - for (DocumentNameMatcher matcher : matchers) { - if (matcher.matches(documentName)) { - return true; - } - } - return false; - }); + // preserve order + Set workingSet = new LinkedHashSet<>(); + for (DocumentNameMatcher matcher : matchers) { + // check for nested or + if (matcher.predicate instanceof Or) { + ((Or) matcher.predicate).getMatchers().forEach(workingSet::add); + } else { + workingSet.add(matcher); + } + } + return standardCollectionCheck(matchers, MATCHES_ALL) + .orElseGet(() -> new DocumentNameMatcher(format("or(%s)", join(workingSet)), new Or(workingSet))); } /** @@ -191,24 +265,45 @@ public static DocumentNameMatcher or(final DocumentNameMatcher... matchers) { * @return a matcher that returns {@code true} if all the enclosed matchers return {@code true}. */ public static DocumentNameMatcher and(final Collection matchers) { - if (matchers.isEmpty()) { - return MATCHES_NONE; - } - if (matchers.size() == 1) { - return matchers.iterator().next(); + Optional opt = standardCollectionCheck(matchers, MATCHES_NONE); + if (opt.isPresent()) { + return opt.get(); } - if (matchers.contains(MATCHES_NONE)) { - return MATCHES_NONE; + + // preserve order + Set workingSet = new LinkedHashSet<>(); + for (DocumentNameMatcher matcher : matchers) { + // check for nexted And + if (matcher.predicate instanceof And) { + ((And) matcher.predicate).getMatchers().forEach(workingSet::add); + } else { + workingSet.add(matcher); + } } + opt = standardCollectionCheck(matchers, MATCHES_NONE); + return opt.orElseGet(() -> new DocumentNameMatcher(format("and(%s)", join(workingSet)), new And(workingSet))); + } - return new DocumentNameMatcher(format("and(%s)", join(matchers)), (Predicate) documentName -> { - for (DocumentNameMatcher matcher : matchers) { - if (!matcher.matches(documentName)) { - return false; - } + /** + * A particular matcher that will not match any excluded unless they are listed in the includes. + * @param includes the DocumentNameMatcher to match the includes. + * @param excludes the DocumentNameMatcher to match the excludes. + * @return a DocumentNameMatcher with the specified logic. + */ + public static DocumentNameMatcher matcherSet(final DocumentNameMatcher includes, + final DocumentNameMatcher excludes) { + if (excludes == MATCHES_NONE) { + return MATCHES_ALL; + } else { + if (includes == MATCHES_NONE) { + return not(excludes); } - return true; - }); + } + if (includes == MATCHES_ALL) { + return MATCHES_ALL; + } + List workingSet = Arrays.asList(includes, excludes); + return new DocumentNameMatcher(format("matcherSet(%s)", join(workingSet)), new MatcherPredicate(workingSet)); } /** @@ -219,4 +314,225 @@ public static DocumentNameMatcher and(final Collection matc public static DocumentNameMatcher and(final DocumentNameMatcher... matchers) { return and(Arrays.asList(matchers)); } + + + + /** + * A DocumentName predicate that uses MatchPatterns. + */ + public static final class MatchPatternsPredicate implements Predicate { + /** The base diirectory for the pattern matches */ + private final DocumentName basedir; + /** The patter matchers */ + private final MatchPatterns patterns; + + private MatchPatternsPredicate(final DocumentName basedir, final MatchPatterns patterns) { + this.basedir = basedir; + this.patterns = patterns; + } + + @Override + public boolean test(final DocumentName documentName) { + return patterns.matches(documentName.getName(), + tokenize(documentName.getName(), basedir.getDirectorySeparator()), + basedir.isCaseSensitive()); + } + + @Override + public String toString() { + return patterns.toString(); + } + } + + /** + * A DocumentName predicate reverses another DocumentNameMatcher + */ + public static final class NotPredicate implements Predicate { + /** The document name matcher to reverse */ + private final DocumentNameMatcher nameMatcher; + + private NotPredicate(final DocumentNameMatcher nameMatcher) { + this.nameMatcher = nameMatcher; + } + + @Override + public boolean test(final DocumentName documentName) { + return !nameMatcher.matches(documentName); + } + + @Override + public String toString() { + return nameMatcher.predicate.toString(); + } + } + + /** + * A DocumentName predicate that uses FileFilter. + */ + public static final class FileFilterPredicate implements Predicate { + /** The file filter */ + private final FileFilter fileFilter; + + private FileFilterPredicate(final FileFilter fileFilter) { + this.fileFilter = fileFilter; + } + + @Override + public boolean test(final DocumentName documentName) { + return fileFilter.accept(new File(documentName.getName())); + } + + @Override + public String toString() { + return fileFilter.toString(); + } + } + + interface CollectionPredicate extends Predicate { + Iterable getMatchers(); + } + /** + * A marker interface to indicate this predicate contains a collection of matchers. + */ + abstract static class CollectionPredicateImpl implements CollectionPredicate { + /** The collection for matchers that make up this predicate */ + private final Iterable matchers; + + /** + * Constructs a collecton predicate from the collection of matchers + * @param matchers the colleciton of matchers to use. + */ + protected CollectionPredicateImpl(final Iterable matchers) { + this.matchers = matchers; + } + + /** + * Gets the internal matchers. + * @return an iterable over the internal matchers. + */ + public Iterable getMatchers() { + return matchers; + } + + public String toString() { + StringBuilder builder = new StringBuilder(this.getClass().getName()).append(": ").append(System.lineSeparator()); + for (DocumentNameMatcher matcher : matchers) { + builder.append(matcher.predicate.toString()).append(System.lineSeparator()); + } + return builder.toString(); + } + } + + /** + * An implementation of "and" logic across a collection of DocumentNameMatchers. + */ + // package private for testing access + static class And extends CollectionPredicateImpl { + And(final Iterable matchers) { + super(matchers); + } + + @Override + public boolean test(final DocumentName documentName) { + for (DocumentNameMatcher matcher : getMatchers()) { + if (!matcher.matches(documentName)) { + return false; + } + } + return true; + } + } + + /** + * An implementation of "or" logic across a collection of DocumentNameMatchers. + */ + // package private for testing access + static class Or extends CollectionPredicateImpl { + Or(final Iterable matchers) { + super(matchers); + } + + @Override + public boolean test(final DocumentName documentName) { + for (DocumentNameMatcher matcher : getMatchers()) { + if (matcher.matches(documentName)) { + return true; + } + } + return false; + } + } + + /** + * An implementation of "or" logic across a collection of DocumentNameMatchers. + */ + // package private for testing access + static class MatcherPredicate extends CollectionPredicateImpl { + MatcherPredicate(final Iterable matchers) { + super(matchers); + } + + @Override + public boolean test(final DocumentName documentName) { + Iterator iter = getMatchers().iterator(); + // included + if (iter.next().matches(documentName)) { + return true; + } + // excluded + if (iter.next().matches(documentName)) { + return false; + } + return true; + } + } + + /** + * Data from a {@link DocumentNameMatcher#decompose(DocumentName)} call. + */ + public static final class DecomposeData { + /** the level this data was generated at */ + private final int level; + /** The name of the DocumentNameMatcher that created this result */ + private final DocumentNameMatcher matcher; + /** The result of the check. */ + private final boolean result; + /** The candidate */ + private final DocumentName candidate; + + private DecomposeData(final int level, final DocumentNameMatcher matcher, final DocumentName candidate, final boolean result) { + this.level = level; + this.matcher = matcher; + this.result = result; + this.candidate = candidate; + } + + @Override + public String toString() { + final String fill = createFill(level); + return format("%s%s: >>%s<< %s%n%s", + fill, matcher.toString(), result, + level == 0 ? candidate.getName() : "", + matcher.predicate instanceof CollectionPredicate ? + decompose(level + 1, (CollectionPredicate) matcher.predicate, candidate) : + String.format("%s%s >>%s<<", createFill(level + 1), matcher.predicate.toString(), matcher.predicate.test(candidate))); + } + + private String createFill(final int level) { + final char[] chars = new char[level * 2]; + Arrays.fill(chars, ' '); + return new String(chars); + } + + private String decompose(final int level, final CollectionPredicate predicate, final DocumentName candidate) { + List result = new ArrayList<>(); + + for (DocumentNameMatcher nameMatcher : predicate.getMatchers()) { + nameMatcher.decompose(level, nameMatcher, candidate, result); + } + StringBuilder sb = new StringBuilder(); + result.forEach(x -> sb.append(x).append(System.lineSeparator())); + return sb.toString(); + } + } }