forked from CCBR/spacesavers2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspacesavers2_usurp
executable file
·142 lines (130 loc) · 4.64 KB
/
spacesavers2_usurp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
import sys
import os
import textwrap
import time
from pathlib import Path
import uuid
from src.VersionCheck import version_check
from src.VersionCheck import __version__
version_check()
from src.utils import *
from datetime import date
import argparse
def main():
start = time.time()
scriptname = os.path.basename(__file__)
elog = textwrap.dedent(
"""\
Version:
{}
Example:
> spacesavers2_usurp -g grubbers.TSV -h someHash
""".format(
__version__
)
)
parser = argparse.ArgumentParser(
description="spacesavers2_usurp: delete all but one copy of the file matching the hash and replace all other copies with hardlinks",
epilog=elog,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"-g",
"--grubber",
dest="grubber",
required=True,
type=str,
default=sys.stdin,
help="spacesavers2_grubbers output TSV file",
)
parser.add_argument(
"-x",
"--hash",
dest="hash",
required=True,
type=str,
help="hash (or unique partial hash) from column 1 of spacesavers2_grubbers TSV file",
)
parser.add_argument(
"-f",
"--force",
dest="force",
required=False,
action=argparse.BooleanOptionalAction,
help="forcefully create symlink if hardlink is not possible",
)
parser.add_argument("-v", "--version", action="version", version=__version__)
global args
args = parser.parse_args()
print_with_timestamp(
start=start, scriptname=scriptname, string="version: {}".format(__version__)
)
total_saved = 0
with open(args.grubber, "rt") as grubbers:
for l in grubbers:
l = l.strip().split("\t")
lhash = l[0]
if args.hash in lhash:
original_copy = Path(l[4].strip('"'))
dupfiles = l[5].split("##")
print_with_timestamp(
start=start,
scriptname=scriptname,
string="Original copy: {}".format(original_copy),
)
if not os.access(original_copy, os.R_OK):
print_with_timestamp(
start=start,
scriptname=scriptname,
string="Original copy is not readable. Hardlinks cannot be created!",
)
exit(1)
inode_set = set()
for dup in dupfiles:
dup = Path(dup.strip('"'))
duptmp = Path(str(dup) + "." + str(uuid.uuid4()))
st = os.stat(dup)
fsize = st.st_size
finode = st.st_ino
if not finode in inode_set:
inode_set.add(finode)
total_saved += fsize
print_with_timestamp(
start=start,
scriptname=scriptname,
string="Deleting copy: {}".format(dup),
)
try:
os.link(original_copy, duptmp)
print_with_timestamp(
start=start,
scriptname=scriptname,
string="Creating hard link file: {}".format(dup),
)
os.remove(dup)
os.rename(duptmp, dup)
except OSError:
if args.force:
print_with_timestamp(
start=start,
scriptname=scriptname,
string="Creating symlink file: {}".format(dup),
)
os.remove(dup)
os.symlink(original_copy, dup)
else:
print_with_timestamp(
start=start,
scriptname=scriptname,
string="OSError occurred while creating hard-link. Probably trying to create a cross-device hard-link. Try using --force to create symlink instead.",
)
# break
total_saved_human_readable = get_human_readable_size(total_saved)
print_with_timestamp(
start=start,
scriptname=scriptname,
string="Done! Saved: {}".format(total_saved_human_readable),
)
if __name__ == "__main__":
main()