-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrplan_dataset.py
133 lines (120 loc) · 5.26 KB
/
rplan_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
from datasets import load_from_disk, DatasetDict
import datasets
import numpy as np
import random
room_label = {
0: "LivingRoom",
1: "MasterRoom",
2: "Kitchen",
3: "Bathroom",
4: "DiningRoom",
5: "ChildRoom",
6: "StudyRoom",
7: "SecondRoom",
8: "GuestRoom",
9: "Balcony",
10: "Entrance",
11: "Storage",
12: "Wall-in",
13: "External",
14: "ExteriorWall",
15: "FrontDoor",
16: "InteriorWall",
17: "InteriorDoor",
}
def get_custom_dataset(dataset_config, tokenizer, split, testing=False):
exprm = int(dataset_config.exprm[:1])
ds_dir = 'datasets/rplan_converted/'
dd = []
for idx in [5,6,7,8]:
if idx == exprm:
continue
dd.append(load_from_disk(f'{ds_dir}{idx}'))
dataset = DatasetDict()
for key in dd[0]:
dataset[key] = datasets.concatenate_datasets([ddd[key] for ddd in dd])
if split == 'validation':
split = 'test'
dataset = dataset[split]
pixel2len = 18/256
pixel2area = pixel2len**2
def process_sample(data):
if str(dataset_config.exprm).find('new') == -1:
num_rooms = len(data['rooms'])
json_str = f'{{"rooms": ['
for room_idx, room_info in enumerate(data['rooms']):
json_str += f'{{"room_type": "{room_label[room_info[-2]]}", '
json_str += '"floor_polygon": ['
for x,y in data['polygons'][room_idx]:
json_str += f'{{"x": {x}, "z": {y}}}, '
json_str = json_str.strip(', ') + '], '
json_str += f'"id": "room|{room_idx}"}}, '
json_str = json_str.strip(', ') + ']}'
else:
num_rooms = len(data['rooms'])
total_area = 0
room_types = []
json_str = f'"rooms": ['
for room_idx, room_info in enumerate(data['rooms']):
y0,x0,y1,x1,c1,c2,area, height, width = room_info
total_area += area
json_str += f'{{"area": {area*pixel2area:.2f}, '
json_str += f'"room_type": "{room_label[c1]}", '
room_types.append(room_label[c1])
json_str += '"floor_polygon": ['
for x,y in data['polygons'][room_idx]:
json_str += f'{{"x": {x}, "z": {y}}}, '
json_str = json_str.strip(', ') + '], '
json_str += f'"height": {height*pixel2len:.2f}, '
json_str += f'"width": {width*pixel2len:.2f}, '
json_str += f'"id": "room|{room_idx}"}}, '
json_str = json_str.strip(', ') + ']}'
json_str = f'{{"room_count": {len(data["rooms"])}, "total_area": {total_area*pixel2area:.2f}, "room_types": {room_types}, ' + json_str
json_str = json_str.strip(', ')
json_str = json_str.replace("'",'"')
prompt_d={}
prompt_d = json.loads(json_str.replace("'",'"'))
for room_dict in prompt_d['rooms']:
del room_dict['floor_polygon']
for k in list(room_dict.keys()):
if random.random() < 0.5:
del room_dict[k]
if len(room_dict.keys()) == 0:
del room_dict
if len(prompt_d['rooms']) == 0:
del prompt_d['rooms']
rands = np.random.random(len(prompt_d.keys()))
rands[np.argmax(rands)] = 1.0
for idx, k in enumerate(list(prompt_d.keys())):
if rands[idx] < 0.5:
del prompt_d[k]
instruction_str = 'you are to generate a floor plan in a JSON structure. you have to satisfy the adjacency constraints given as pairs of neighboring rooms; two connecting rooms are presented as (room_type1 room_id1, room_type2 room_id2). you also need to satisfy additional contraints given by the user.'
adjacency_str = f'total number of rooms: {num_rooms}; adjacency pairs: '
for u,v,_ in data['edges']:
type_u = room_label[data['rooms'][u][4]]
type_v = room_label[data['rooms'][v][4]]
id_u = f"room|{u}"
id_v = f"room|{v}"
adjacency_str += f'({type_u} = "{id_u}", {type_v} = "{id_v}"), '
adjacency_str = adjacency_str.strip(', ')
user_str = adjacency_str
if len(prompt_d.keys())>0:
user_str += f'. additional constraints: {str(prompt_d)}'
prompt_str = f"""<|start_header_id|>system<|end_header_id|> {instruction_str}<|eot_id|><|start_header_id|>user<|end_header_id|> {user_str}<|eot_id|><|start_header_id|>assistant<|end_header_id|> """
prompt = tokenizer(f"{tokenizer.bos_token}{prompt_str}", add_special_tokens=False)
floorplan = tokenizer(f"{json_str}{tokenizer.eos_token}", add_special_tokens=False)
input_ids = prompt['input_ids'] + floorplan['input_ids']
attention_mask = [1] * (len(prompt['input_ids']) + len(floorplan['input_ids']))
labels = [-100] * len(prompt['input_ids']) + floorplan['input_ids']
return {
'input_ids': input_ids,
'attention_mask': attention_mask,
'labels': labels
}
return dataset.map(
process_sample,
remove_columns=list(dataset.features)
)
if __name__ == '__main__':
get_custom_dataset({'exprm':4}, None, 'train')