-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan.py
139 lines (118 loc) · 4.05 KB
/
scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import tarfile
import zipfile
from pathlib import Path
from pprint import pprint
import filetype
import config
from config import support_archive_type, support_image_type, images_min_num
from manga import Manga
def scan_file_dirs(p: Path) -> list[Path]:
archives = []
for root, dirs, files in os.walk(p):
for f in files:
_f = Path(root) / Path(f)
if not _f.is_file():
continue
try:
k = filetype.guess(_f)
except OSError as e:
print(e)
continue
if not k:
continue
# print(f, k.mime)
if k.mime in support_archive_type:
archives.append(Path(f))
continue
if str(k.mime).startswith('image/'):
archives.append(Path(os.path.relpath(Path(root), config.root)))
archives = list(set(archives))
return archives
def zip_get_filelist(p: Path) -> (bool, list[str]):
if not zipfile.is_zipfile(p):
return False, []
with zipfile.ZipFile(p, 'r') as z:
file_list = z.namelist()
return True, file_list
def tar_get_filelist(p: Path) -> (bool, list[str]):
if not tarfile.is_tarfile(p):
return False, []
with tarfile.open(p) as t:
return True, t.getnames()
FILE_TYPE_IMPL = {
'application/zip': zip_get_filelist,
'application/tar': tar_get_filelist
}
def filter_file_dirs(ps: list[Path]) -> dict[str, Manga]:
def filter_file_list(file_list: list[str]) -> list[str]:
image_cnt = 0
for f in file_list:
extension = os.path.splitext(f)[-1]
if extension:
extension = extension[1:]
if extension in support_image_type:
image_cnt += 1
if image_cnt >= images_min_num:
return file_list
return []
def filter_file(p: Path) -> (bool, list[str], str):
k = filetype.guess(p)
# print(k.mime)
if k.mime in FILE_TYPE_IMPL.keys():
ok, file_list = FILE_TYPE_IMPL[k.mime](p)
if not ok:
return False, [], k.mime
file_list_ = filter_file_list(file_list)
if len(file_list_) == 0:
return False, file_list_, k.mime
else:
return True, file_list_, k.mime
elif str(p).endswith(".tgz") or str(p).endswith(".tar.gz") or str(p).endswith(".tar.xz") or str(p).endswith(
".tar.bz2") or str(p).endswith(".txz"):
ok, file_list = tar_get_filelist(p)
print(ok, file_list)
if not ok:
return False, [], 'unknown'
file_list_ = filter_file_list(file_list)
if len(file_list_) == 0:
return False, file_list_, 'application/tar'
else:
return True, file_list_, 'application/tar'
else:
return False, [], k.mime
def filter_dir(p: Path) -> (bool, list):
image_cnt = 0
file_list = []
for root, dirs, files in os.walk(p):
for f in files:
# f = Path(root) / Path(f)
k = filetype.guess(Path(root) / Path(f))
if k.extension in support_image_type:
image_cnt += 1
file_list.append(Path(f))
if image_cnt > images_min_num:
return True, file_list
else:
return False, file_list
archives = {}
for p in ps:
ok = False
file_list = None
abs_p = config.root / p
is_dir = False
mime = ''
if abs_p.is_file():
ok, file_list, mime = filter_file(abs_p)
elif abs_p.is_dir():
ok, file_list = filter_dir(abs_p)
is_dir = True
if not ok:
continue
stat = os.stat(abs_p)
archives[str(p)] = Manga(p, file_list, is_dir, mime, stat.st_ctime)
return archives
if __name__ == '__main__':
s = scan_file_dirs(Path(config.root))
s = filter_file_dirs(s)
pprint(s)