-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_images.py
70 lines (56 loc) · 1.8 KB
/
check_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import numpy as np
import onnxruntime as rt
import argparse
from PIL import Image, ImageFile
import os
import huggingface_hub
import pandas as pd
import argparse
from glob import glob
from multiprocessing import Pool, current_process
from tqdm import tqdm
import json
ImageFile.LOAD_TRUNCATED_IMAGES = True
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path", type=str, default=".")
parser.add_argument("--save_path", type=str, default=None)
args = parser.parse_args()
return args
def is_image(image_path):
image_types = ["png", "jpg", ".peg", "gif", "webp", "bmp", "jpeg"]
if image_path.split(".")[-1] not in image_types:
return False
# try:
# Image.open(image_path).convert("RGBA")
# except Exception:
# print(f"Error opening {image_path}")
# return False
else:
return True
def is_valid_image(image_path):
try:
Image.open(image_path).convert("RGBA")
except Exception:
print(f"Error opening {image_path}")
return False
else:
return True
def init_subprocess(device, num_gpus):
global predictor
predictor = Predictor(
device=device, device_id=(current_process()._identity[0] - 1) % num_gpus
)
if __name__ == "__main__":
args = parse_args()
image_paths = glob(f"{args.dataset_path}/**", recursive=True)
image_paths = [image_path for image_path in image_paths if is_image(image_path)]
with Pool() as p:
results = list(
tqdm(p.imap(is_valid_image, image_paths), total=len(image_paths))
)
error_image_paths = [
image_paths[i] + "\n" for i in range(len(image_paths)) if not results[i]
]
with open(args.save_path, "w") as f:
f.writelines(error_image_paths)