From e320da3b7d51fed1f400b7ad0f670411bc9cbb60 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Wed, 8 Jan 2025 17:02:23 -0500
Subject: [PATCH] New dev-tools/scripts/parseContributorsFromChanges.py (#2424)

Parses a CHANGES.txt section passed in, in order to identify all contributors.

Future: integrate with the release wizard, to include providing only the versioned section of CHANGES.txt to the script.
(cherry picked from commit 35b27d3642797df345233b0c1f1a5cda9855b120)
---
 .../scripts/parseContributorsFromChanges.py   | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 dev-tools/scripts/parseContributorsFromChanges.py

diff --git a/dev-tools/scripts/parseContributorsFromChanges.py b/dev-tools/scripts/parseContributorsFromChanges.py
new file mode 100644
index 00000000000..9ce1a5e19ae
--- /dev/null
+++ b/dev-tools/scripts/parseContributorsFromChanges.py
@@ -0,0 +1,63 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import sys
+import re
+from collections import defaultdict
+
+# Read data from standard input
+data = sys.stdin.read()
+
+# Replace all carriage return line feed (Windows) with line feed
+data = data.replace('\r\n', '\n')
+
+# Replace all carriage return (Mac OS before X) with line feed
+data = data.replace('\r', '\n')
+
+# Split data at blank lines
+paras = data.split('\n\n')
+
+# Initialize a default dictionary to store contributors and their counts
+contributors = defaultdict(int)
+
+# Regular expression to find the attribution in parentheses at the end of a line
+pattern = re.compile(r"\(([^()]*)\)$")
+
+for para in paras:
+  # Normalize whitespace (replace all whitespace with a single space)
+  para = re.sub('\s+', ' ', para).strip()
+  #print(f'> {para}')
+
+  # Find all contributors in the line
+  match = pattern.search(para.strip())
+  if match:
+    attribution = match.group(1)
+    # might have a "via" committer; we only want the author here
+    attribution = attribution.split(" via ")[0] # keep left side
+    # Split the contributors by comma and strip whitespace
+    for contributor in attribution.split(','):
+      contributor = contributor.strip()
+      contributors[contributor] += 1
+
+del contributors['solrbot']
+
+sorted_contributors = sorted(contributors.items(), key=lambda item: item[1], reverse=True)
+
+# Print the contributors and their counts
+for contributor, count in sorted_contributors:
+  print(f'{contributor}: {count}')
+
+print('\n\nThanks to all contributors!: ')
+print(', '.join([contributor for contributor, count in sorted_contributors]))
\ No newline at end of file