From e320da3b7d51fed1f400b7ad0f670411bc9cbb60 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Wed, 8 Jan 2025 17:02:23 -0500 Subject: [PATCH] New dev-tools/scripts/parseContributorsFromChanges.py (#2424) Parses a CHANGES.txt section passed in, in order to identify all contributors. Future: integrate with the release wizard, to include providing only the versioned section of CHANGES.txt to the script. (cherry picked from commit 35b27d3642797df345233b0c1f1a5cda9855b120) --- .../scripts/parseContributorsFromChanges.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 dev-tools/scripts/parseContributorsFromChanges.py diff --git a/dev-tools/scripts/parseContributorsFromChanges.py b/dev-tools/scripts/parseContributorsFromChanges.py new file mode 100644 index 00000000000..9ce1a5e19ae --- /dev/null +++ b/dev-tools/scripts/parseContributorsFromChanges.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re +from collections import defaultdict + +# Read data from standard input +data = sys.stdin.read() + +# Replace all carriage return line feed (Windows) with line feed +data = data.replace('\r\n', '\n') + +# Replace all carriage return (Mac OS before X) with line feed +data = data.replace('\r', '\n') + +# Split data at blank lines +paras = data.split('\n\n') + +# Initialize a default dictionary to store contributors and their counts +contributors = defaultdict(int) + +# Regular expression to find the attribution in parentheses at the end of a line +pattern = re.compile(r"\(([^()]*)\)$") + +for para in paras: + # Normalize whitespace (replace all whitespace with a single space) + para = re.sub('\s+', ' ', para).strip() + #print(f'> {para}') + + # Find all contributors in the line + match = pattern.search(para.strip()) + if match: + attribution = match.group(1) + # might have a "via" committer; we only want the author here + attribution = attribution.split(" via ")[0] # keep left side + # Split the contributors by comma and strip whitespace + for contributor in attribution.split(','): + contributor = contributor.strip() + contributors[contributor] += 1 + +del contributors['solrbot'] + +sorted_contributors = sorted(contributors.items(), key=lambda item: item[1], reverse=True) + +# Print the contributors and their counts +for contributor, count in sorted_contributors: + print(f'{contributor}: {count}') + +print('\n\nThanks to all contributors!: ') +print(', '.join([contributor for contributor, count in sorted_contributors])) \ No newline at end of file