-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan4dups.py
executable file
·64 lines (56 loc) · 1.48 KB
/
scan4dups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
# Forked from
# https://github.com/kragen/xcompose/blob/master/scan4dups.py
from __future__ import annotations
import re
import sys
entries: dict[str, str] = dict()
values: set[str] = set()
print("Parsing input...")
for line in sys.stdin:
# print("((%s))"%line)
startpos = 0
name = ''
# dups: list = []
while True:
m = re.match(r"\s*<(\w+)>", line[startpos:])
if not m:
break
word = m.group(1)
name += ' ' + word
startpos += m.end()
if startpos <= 0:
continue
m = re.match(r'[^"]*"(.+)"', line)
if not m:
# Shouldn't happen, but just in case
val = '???'
print("Couldn't make sense of line: " + line)
else:
val = m.group(1)
values.add(val)
if name in entries:
if val != entries[name]:
print("Exact conflict found: (%s ) [%s] [%s]"%(name, entries[name], val))
else:
print("\tRedundant definition: (%s ) [%s]"%(name, val))
else:
entries[name] = val
print("Done.")
print("Checking prefixes...")
for key in entries.keys():
# print "Key: (%s)"%key
pref = ''
# Careful when splitting. The key always starts with a space.
for word in key.split(" ")[:-1]: # chop the last one; that'll always match
# Skip the empty first entry
if not word:
continue
pref += " " + word
# print "checking (%s)"%pref
if pref in entries:
print("Prefix conflict found: "
"(%s ) [%s] vs. (%s ) [%s]"%(pref, entries[pref], key, entries[key]))
print("Done.")
print("%s entries total"%len(entries))
print("%s unique characters"%len(values))