Skip to content

Commit

Permalink
implemented matcher set in ExclusionProcessor
Browse files Browse the repository at this point in the history
  • Loading branch information
Claudenw committed Jan 27, 2025
1 parent e5073ff commit 89192f8
Show file tree
Hide file tree
Showing 4 changed files with 384 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;

import org.apache.rat.config.exclusion.plexus.MatchPatterns;
import org.apache.rat.document.DocumentName;
import org.apache.rat.document.DocumentNameMatcher;
import org.apache.rat.utils.DefaultLog;
Expand Down Expand Up @@ -85,10 +85,8 @@ private void resetLastMatcher() {
* @return this
*/
public ExclusionProcessor addIncludedPatterns(final Iterable<String> patterns) {
List<String> lst = new ArrayList<>();
patterns.forEach(lst::add);
DefaultLog.getInstance().info(format("Including patterns: %s", String.join(", ", lst)));
includedPatterns.addAll(lst);
DefaultLog.getInstance().info(format("Including patterns: %s", String.join(", ", patterns)));
patterns.forEach(includedPatterns::add);
resetLastMatcher();
return this;
}
Expand Down Expand Up @@ -140,10 +138,8 @@ public ExclusionProcessor addIncludedCollection(final StandardCollection collect
* @return this
*/
public ExclusionProcessor addExcludedPatterns(final Iterable<String> patterns) {
List<String> lst = new ArrayList<>();
patterns.forEach(lst::add);
DefaultLog.getInstance().info(format("Excluding patterns: %s", String.join(", ", lst)));
excludedPatterns.addAll(lst);
DefaultLog.getInstance().info(format("Excluding patterns: %s", String.join(", ", patterns)));
patterns.forEach(excludedPatterns::add);
resetLastMatcher();
return this;
}
Expand Down Expand Up @@ -175,22 +171,6 @@ public ExclusionProcessor addExcludedCollection(final StandardCollection collect
return this;
}

/**
* Adds to lists of qualified file patterns. Non-matching patterns start with a {@code !}.
* @param matching the list to put matching file patterns into.
* @param notMatching the list to put non-matching files patterns into.
* @param patterns the patterns to match.
*/
private void segregateList(final Set<String> matching, final Set<String> notMatching,
final Iterable<String> patterns) {
if (patterns.iterator().hasNext()) {
ExtendedIterator.create(patterns.iterator()).filter(ExclusionUtils.MATCH_FILTER).forEachRemaining(matching::add);
ExtendedIterator.create(patterns.iterator()).filter(ExclusionUtils.NOT_MATCH_FILTER)
.map(s -> s.substring(1))
.forEachRemaining(notMatching::add);
}
}

/**
* Creates a Document name matcher that will return {@code false} on any
* document that is excluded.
Expand All @@ -204,84 +184,136 @@ public DocumentNameMatcher getNameMatcher(final DocumentName basedir) {
if (lastMatcher == null || !basedir.equals(lastMatcherBaseDir)) {
lastMatcherBaseDir = basedir;

final Set<String> incl = new TreeSet<>();
final Set<String> excl = new TreeSet<>();
final List<DocumentNameMatcher> inclMatchers = new ArrayList<>();

// add the file processors
for (StandardCollection sc : fileProcessors) {
ExtendedIterator<FileProcessor> iter = sc.fileProcessor();
if (iter.hasNext()) {
iter.forEachRemaining(fp -> {
segregateList(excl, incl, fp.apply(basedir));
fp.customDocumentNameMatchers().forEach(inclMatchers::add);
});
} else {
DefaultLog.getInstance().info(String.format("%s does not have a fileProcessor.", sc));
}
}

// add the standard patterns
segregateList(incl, excl, new FileProcessor(includedPatterns).apply(basedir));
segregateList(excl, incl, new FileProcessor(excludedPatterns).apply(basedir));
final List<MatcherSet> matchers = extractFileProcessors(basedir);
final MatcherSet.Builder fromCommandLine = new MatcherSet.Builder();
DocumentName.Builder nameBuilder = DocumentName.builder(basedir).setBaseName(basedir);
extractPatterns(nameBuilder, fromCommandLine);
extractCollectionPatterns(nameBuilder, fromCommandLine);
extractCollectionMatchers(fromCommandLine);
extractPaths(fromCommandLine);
matchers.add(fromCommandLine.build());

// add the collection patterns
for (StandardCollection sc : includedCollections) {
Set<String> patterns = sc.patterns();
if (patterns.isEmpty()) {
DefaultLog.getInstance().info(String.format("%s does not have a defined collection for inclusion.", sc));
} else {
segregateList(incl, excl, new FileProcessor(sc.patterns()).apply(basedir));
}
}
for (StandardCollection sc : excludedCollections) {
Set<String> patterns = sc.patterns();
if (patterns.isEmpty()) {
DefaultLog.getInstance().info(String.format("%s does not have a defined collection for exclusion.", sc));
} else {
segregateList(excl, incl, new FileProcessor(sc.patterns()).apply(basedir));
}
}
lastMatcher = MatcherSet.merge(matchers).createMatcher();
DefaultLog.getInstance().debug(format("Created matcher set for %s%n%s", basedir.getName(),
lastMatcher));
}
return lastMatcher;
}

// add the matchers
ExtendedIterator.create(includedCollections.iterator())
.map(StandardCollection::staticDocumentNameMatcher)
.filter(Objects::nonNull)
.forEachRemaining(inclMatchers::add);
/**
* Extracts the file processors from {@link #fileProcessors}.
* @param basedir The directory to base the file processors on.
* @return a list of MatcherSets that are created for each {@link #fileProcessors} entry.
*/
private List<MatcherSet> extractFileProcessors(final DocumentName basedir) {
final List<MatcherSet> fileProcessorList = new ArrayList<>();
for (StandardCollection sc : fileProcessors) {
final Set<String> names = new HashSet<>();
sc.fileProcessor().map(fp -> fp.apply(basedir)).forEachRemaining(n -> n.forEach(names::add));
MatcherSet.Builder builder = new MatcherSet.Builder();
Set<String> matching = new HashSet<>();
Set<String> notMatching = new HashSet<>();
MatcherSet.Builder.segregateList(matching, notMatching, names);
builder.addIncluded(basedir.resolve(sc.name()), notMatching);
builder.addExcluded(basedir.resolve(sc.name()), matching);
fileProcessorList.add(builder.build());
}
return fileProcessorList;
}

List<DocumentNameMatcher> exclMatchers = ExtendedIterator.create(excludedCollections.iterator())
.map(StandardCollection::staticDocumentNameMatcher)
.filter(Objects::nonNull)
.addTo(new ArrayList<>());
/**
* Converts the pattern to use the directory separator specified by the document name and localises it for
* exclusion processing.
* @param documentName The document name to adjust the pattern against.
* @param pattern the pattern.
* @return the prepared pattern.
*/
private String preparePattern(final DocumentName documentName, final String pattern) {
return ExclusionUtils.qualifyPattern(documentName,
ExclusionUtils.convertSeparator(pattern, "/", documentName.getDirectorySeparator()));
}
/**
* Extracts {@link #includedPatterns} and {@link #excludedPatterns} into the specified matcherBuilder.
* @param nameBuilder The name builder for the pattern. File names are resolved against the generated name.
* @param matcherBuilder the MatcherSet.Builder to add the patterns to.
*/
private void extractPatterns(final DocumentName.Builder nameBuilder, final MatcherSet.Builder matcherBuilder) {
DocumentName name = nameBuilder.setName("Patterns").build();
if (!excludedPatterns.isEmpty()) {
matcherBuilder.addExcluded(name, excludedPatterns.stream()
.map(s -> preparePattern(name, s))
.collect(Collectors.toSet()));
}
if (!includedPatterns.isEmpty()) {
matcherBuilder.addIncluded(name, includedPatterns.stream()
.map(s -> preparePattern(name, s)).collect(Collectors.toSet()));
}
}

if (!incl.isEmpty()) {
inclMatchers.add(new DocumentNameMatcher("included patterns", MatchPatterns.from(basedir.getDirectorySeparator(), incl), basedir));
/**
* Extracts {@link #includedCollections} and {@link #excludedCollections} patterns into the specified matcherBuilder.
* @param nameBuilder the name builder for the pattern names.
* @param matcherBuilder the MatcherSet.Builder to add the collections to.
*/
private void extractCollectionPatterns(final DocumentName.Builder nameBuilder, final MatcherSet.Builder matcherBuilder) {
final Set<String> incl = new TreeSet<>();
final Set<String> excl = new TreeSet<>();
for (StandardCollection sc : includedCollections) {
Set<String> patterns = sc.patterns();
if (patterns.isEmpty()) {
DefaultLog.getInstance().debug(String.format("%s does not have a defined collection for inclusion.", sc));
} else {
MatcherSet.Builder.segregateList(incl, excl, sc.patterns());
}
if (!excl.isEmpty()) {
exclMatchers.add(new DocumentNameMatcher("excluded patterns", MatchPatterns.from(basedir.getDirectorySeparator(), excl), basedir));
}
for (StandardCollection sc : excludedCollections) {
Set<String> patterns = sc.patterns();
if (patterns.isEmpty()) {
DefaultLog.getInstance().debug(String.format("%s does not have a defined collection for exclusion.", sc));
} else {
MatcherSet.Builder.segregateList(excl, incl, sc.patterns());
}
}
DocumentName name = nameBuilder.setName("Collections").build();
matcherBuilder
.addExcluded(name, excl.stream().map(s -> preparePattern(name.getBaseDocumentName(), s)).collect(Collectors.toSet()))
.addIncluded(name, incl.stream().map(s -> preparePattern(name.getBaseDocumentName(), s)).collect(Collectors.toSet()));
}

if (!includedPaths.isEmpty()) {
for (DocumentNameMatcher matcher : includedPaths) {
DefaultLog.getInstance().info(format("Including path matcher %s", matcher));
inclMatchers.add(matcher);
}
}
if (!excludedPaths.isEmpty()) {
for (DocumentNameMatcher matcher : excludedPaths) {
DefaultLog.getInstance().info(format("Excluding path matcher %s", matcher));
exclMatchers.add(matcher);
}
}
/**
* Extracts {@link #includedCollections} and {@link #excludedCollections} matchers into the specified matcherBuilder.
* @param matcherBuilder the MatcherSet.Builder to add the collections to.
*/
private void extractCollectionMatchers(final MatcherSet.Builder matcherBuilder) {
ExtendedIterator.create(includedCollections.iterator())
.map(StandardCollection::staticDocumentNameMatcher)
.filter(Objects::nonNull)
.forEachRemaining(matcherBuilder::addIncluded);

lastMatcher = DocumentNameMatcher.MATCHES_ALL;
if (!exclMatchers.isEmpty()) {
lastMatcher = DocumentNameMatcher.not(DocumentNameMatcher.or(exclMatchers));
if (!inclMatchers.isEmpty()) {
lastMatcher = DocumentNameMatcher.or(DocumentNameMatcher.or(inclMatchers), lastMatcher);
}
ExtendedIterator.create(excludedCollections.iterator())
.map(StandardCollection::staticDocumentNameMatcher)
.filter(Objects::nonNull)
.forEachRemaining(matcherBuilder::addExcluded);
}

/**
* Extracts {@link #includedPaths} and {@link #excludedPaths} patterns into the specified matcherBuilder.
* @param matcherBuilder the MatcherSet.Builder to add the collections to.
*/
private void extractPaths(final MatcherSet.Builder matcherBuilder) {
if (!includedPaths.isEmpty()) {
for (DocumentNameMatcher matcher : includedPaths) {
DefaultLog.getInstance().info(format("Including path matcher %s", matcher));
matcherBuilder.addIncluded(matcher);
}
}
if (!excludedPaths.isEmpty()) {
for (DocumentNameMatcher matcher : excludedPaths) {
DefaultLog.getInstance().info(format("Excluding path matcher %s", matcher));
matcherBuilder.addExcluded(matcher);
}
}
return lastMatcher;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang3.StringUtils;
import org.apache.rat.ConfigurationException;
import org.apache.rat.config.exclusion.plexus.MatchPattern;
import org.apache.rat.config.exclusion.plexus.SelectorUtils;
import org.apache.rat.document.DocumentName;
import org.apache.rat.document.DocumentNameMatcher;
import org.apache.rat.utils.DefaultLog;
import org.apache.rat.utils.ExtendedIterator;
import org.apache.rat.utils.Log;

import static java.lang.String.format;

Expand All @@ -48,11 +52,14 @@ public final class ExclusionUtils {
/** The list of comment prefixes that are used to filter comment lines. */
public static final List<String> COMMENT_PREFIXES = Arrays.asList("#", "##", "//", "/**", "/*");

/** Prefix used to negate the given pattern. */
public static final String NEGATION_PREFIX = "!";

/** A predicate that filters out lines that do NOT start with "!" */
public static final Predicate<String> NOT_MATCH_FILTER = s -> s.startsWith("!");
public static final Predicate<String> NOT_MATCH_FILTER = s -> s.startsWith(NEGATION_PREFIX);

/** A predicate that filters out lines that start with "!" */
public static final Predicate<String> MATCH_FILTER = s -> !s.startsWith("!");
public static final Predicate<String> MATCH_FILTER = NOT_MATCH_FILTER.negate();

private ExclusionUtils() {
// do not instantiate
Expand Down Expand Up @@ -112,7 +119,20 @@ public static Predicate<String> commentFilter(final String commentPrefix) {
* @return a FileFilter.
*/
public static FileFilter asFileFilter(final DocumentName parent, final DocumentNameMatcher nameMatcher) {
return file -> nameMatcher.matches(DocumentName.builder(file).setBaseName(parent.getBaseName()).build());
return file -> {
DocumentName candidate = DocumentName.builder(file).setBaseName(parent.getBaseName()).build();
boolean result = nameMatcher.matches(candidate);
Log log = DefaultLog.getInstance();
if (log.isEnabled(Log.Level.DEBUG)) {
log.debug(format("FILTER TEST for %s -> %s", file, result));
if (!result) {
List< DocumentNameMatcher.DecomposeData> data = nameMatcher.decompose(candidate);
log.debug("Decomposition for " + candidate);
data.forEach(log::debug);
}
}
return result;
};
}

/**
Expand Down Expand Up @@ -172,12 +192,20 @@ protected boolean isValidLine(final String line) {

/**
* Returns {@code true} if the file name represents a hidden file.
* @param f the file to check.
* @param file the file to check.
* @return {@code true} if it is the name of a hidden file.
*/
public static boolean isHidden(final File f) {
String s = f.getName();
return s.startsWith(".") && !(s.equals(".") || s.equals(".."));
public static boolean isHidden(final File file) {
return isHidden(file.getName());
}

/**
* Returns {@code true} if the filename represents a hidden file
* @param fileName the file to check.
* @return true if it is the name of a hidden file.
*/
public static boolean isHidden(final String fileName) {
return fileName.startsWith(".") && !(fileName.equals(".") || fileName.equals(".."));
}

private static void verifyFile(final File file) {
Expand All @@ -186,6 +214,31 @@ private static void verifyFile(final File file) {
}
}

/**
* Modifies the {@link MatchPattern} formatted {@code pattern} argument by expanding the pattern and
* by adjusting the pattern to include the basename from the {@code documentName} argument.
* @param documentName the name of the file being read.
* @param pattern the pattern to format.
* @return the completely formatted pattern
*/
public static String qualifyPattern(final DocumentName documentName, final String pattern) {
boolean prefix = pattern.startsWith(NEGATION_PREFIX);
String workingPattern = prefix ? pattern.substring(1) : pattern;
String normalizedPattern = SelectorUtils.extractPattern(workingPattern, documentName.getDirectorySeparator());

StringBuilder sb = new StringBuilder(prefix ? NEGATION_PREFIX : "");
if (SelectorUtils.isRegexPrefixedPattern(workingPattern)) {
sb.append(SelectorUtils.REGEX_HANDLER_PREFIX)
.append("\\Q").append(documentName.getBaseName())
.append(documentName.getDirectorySeparator())
.append("\\E").append(normalizedPattern)
.append(SelectorUtils.PATTERN_HANDLER_SUFFIX);
} else {
sb.append(documentName.getBaseDocumentName().resolve(normalizedPattern).getName());
}
return sb.toString();
}

/**
* Tokenizes the string based on the directory separator.
* @param source the source to tokenize.
Expand Down
Loading

0 comments on commit 89192f8

Please sign in to comment.