-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconflation.sql
284 lines (264 loc) · 8.9 KB
/
conflation.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
-- add fields for OSM tags and data processing
ALTER TABLE buildings
ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text,
ADD COLUMN loc_geom geometry(multipolygon,32616),
ADD COLUMN conflated boolean DEFAULT FALSE,
ADD COLUMN main boolean; -- is it the main building on the parcel?
-- create local geometry fields and validate geometries
UPDATE buildings SET loc_geom = ST_MakeValid(ST_Transform(geom,32616));
CREATE INDEX ON buildings USING GIST (loc_geom);
-- added fields for the parcels table
ALTER TABLE parcels
ADD COLUMN "addr:street" text,
ADD COLUMN loc_geom geometry(multipolygon,32616),
ADD COLUMN building_count integer,
ADD COLUMN repeating BOOLEAN DEFAULT FALSE;
-- create local geometry fields and validate geometries
UPDATE parcels SET loc_geom = ST_MakeValid(ST_Transform(geom,32616));
CREATE INDEX ON parcels USING GIST (loc_geom);
-- parse and expand parcel street addresses
UPDATE parcels SET "addr:street" = initcap(addrst)||' '||
CASE
WHEN upper(addrsf) = 'AV' THEN 'Avenue'
WHEN upper(addrsf) = 'DR' THEN 'Drive'
WHEN upper(addrsf) = 'RD' THEN 'Road'
WHEN upper(addrsf) = 'ST' THEN 'Street'
WHEN upper(addrsf) = 'LN' THEN 'Lane'
WHEN upper(addrsf) = 'CT' THEN 'Court'
WHEN upper(addrsf) = 'PL' THEN 'Place'
WHEN upper(addrsf) = 'CR' THEN 'Circle'
WHEN upper(addrsf) = 'TE' THEN 'Terrace'
WHEN upper(addrsf) = 'PK' THEN 'Park'
WHEN upper(addrsf) = 'WY' THEN 'Way'
WHEN upper(addrsf) = 'BV' THEN 'Boulevard'
WHEN upper(addrsf) = 'PW' THEN 'Parkway'
WHEN upper(addrsf) = 'TL' THEN 'Trail'
WHEN upper(addrsf) = 'HW' THEN 'Highway'
WHEN upper(addrsf) = 'WA' THEN 'Way'
WHEN upper(addrsf) = 'TR' THEN 'Terrace'
WHEN upper(addrsf) = 'SQ' THEN 'Square'
WHEN upper(addrsf) = 'AL' THEN 'Alley'
WHEN upper(addrsf) = 'BL' THEN 'Boulevard'
WHEN upper(addrsf) = 'CI' THEN 'Circle'
WHEN upper(addrsf) = 'PT' THEN 'Point'
WHEN upper(addrsf) = 'PI' THEN 'Pike'
WHEN upper(addrsf) = 'LA' THEN 'Lane'
ELSE '' -- NULL cases mostly have the suffix in the name field
END;
-- identify repeating parcels (indicates multiple addresses associated with buildings)
WITH geom_counts AS (
SELECT array_agg(gid) AS ids, COUNT(*)
FROM parcels
GROUP BY geom
), geom_counts2 AS (
SELECT * FROM geom_counts WHERE count > 1
)
UPDATE parcels SET repeating = TRUE
FROM geom_counts2
WHERE ids @> ARRAY[gid];
-- identify parcels with multiple buildings
UPDATE parcels SET building_count = NULL WHERE building_count IS NOT NULL;
WITH bcounts AS (
SELECT
p.gid, COUNT(*)
FROM buildings AS b JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
GROUP BY p.gid
)
UPDATE parcels SET building_count = count
FROM bcounts WHERE bcounts.gid = parcels.gid;
-- add addresses to buildings with simple 1:1 matches to parcels
UPDATE buildings SET "addr:housenumber" = NULL, "addr:street" = NULL;
WITH a AS (
SELECT
b.gid, p.addrno, p."addr:street"
FROM buildings AS b JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
WHERE p.building_count = 1 AND NOT p.repeating
)
UPDATE buildings SET
"addr:housenumber" = a.addrno,
"addr:street" = a."addr:street"
FROM a WHERE buildings.gid = a.gid;
--SELECT COUNT(*) FROM buildings WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL;
-- attempt to identify garages and sheds so they don't get addresses
UPDATE buildings SET main = NULL;
-- sort the buildings on each parcel by size, but only where it's likely a garage/shed situation
WITH sizes AS (
SELECT
p.gid AS pid,
b.gid AS bid,
row_number() OVER ( PARTITION BY p.gid ORDER BY ST_Area(b.loc_geom) DESC) AS size_order
FROM buildings AS b JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
WHERE
NOT p.repeating AND -- single parcels
p.building_count IN (2,3) -- 2 or 3 buildings on parcel
ORDER BY p.gid ASC
) UPDATE buildings SET main = CASE
WHEN size_order = 1 THEN TRUE
WHEN size_order > 1 THEN FALSE
ELSE NULL
END
FROM sizes WHERE sizes.bid = buildings.gid;
-- now assign addresses to main buildings on parcels with outbuildings
WITH a AS (
SELECT
b.gid, p.addrno, p."addr:street"
FROM buildings AS b JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
WHERE
p.building_count IN (2,3)
AND NOT p.repeating
AND b.main -- is main building
)
UPDATE buildings SET
"addr:housenumber" = a.addrno,
"addr:street" = a."addr:street"
FROM a WHERE buildings.gid = a.gid;
-- get a count of outbuildings so we know how many addresses are intentionally unassigned
SELECT
COUNT(*)
FROM buildings AS b JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
WHERE
p.building_count IN (2,3)
AND NOT p.repeating
AND NOT b.main; -- is NOT main building
-- try to assign multiple addresses from multiple parcels to single buildings
WITH addresses AS (
SELECT
b.gid,
array_to_string( ARRAY_AGG(DISTINCT addrno), ';') AS housenumber,
array_to_string( ARRAY_AGG(DISTINCT "addr:street"), ';') AS street
FROM buildings AS b JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
WHERE
p.building_count = 1 AND
p.repeating AND
b."addr:housenumber" IS NULL
GROUP BY b.gid
)
UPDATE buildings AS b SET
"addr:housenumber" = housenumber,
"addr:street" = street
FROM addresses AS a
WHERE a.gid = b.gid;
-- try to identify addresses for buildings across multiple parcels
WITH addresses AS (
SELECT
b.gid,
array_to_string( ARRAY_AGG(DISTINCT addrno), ';') AS addrno,
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
COUNT(*)
FROM buildings AS b
JOIN parcels AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) < 0.9*ST_Area(b.loc_geom)
WHERE
b."addr:housenumber" IS NULL AND
NOT p.repeating AND
p.addrno IS NOT NULL AND
b.sqft > 1000
GROUP BY b.gid
)
UPDATE buildings AS b SET
"addr:housenumber" = addrno,
"addr:street" = street
FROM addresses AS a
WHERE
count = 1 AND -- only simple cases!
a.gid = b.gid;
-- identify intersecting/conflated buildings
UPDATE buildings AS b SET conflated = FALSE;
UPDATE buildings AS b SET conflated = TRUE
FROM ham_polygon AS osm
WHERE ST_Intersects(b.geom,osm.way)
AND osm.building IS NOT NULL and osm.building != 'no';
-- dump simplified polygon geometries and OSM relavant fields into another table for exporting
-- this code is based on https://trac.osgeo.org/postgis/wiki/UsersWikiSimplifyPreserveTopology
-- it does take a very long time to run on this dataset...
-- first do conflated buildings
with poly as (
SELECT
gid,
"addr:housenumber",
"addr:street",
est_h_feet,
storyabove,
storybelow,
cwwuse,
(st_dump(loc_geom)).*
FROM buildings
WHERE conflated
)
SELECT
poly.gid,
poly."addr:housenumber",
poly."addr:street",
poly.est_h_feet,
poly.storyabove,
poly.storybelow,
poly.cwwuse,
ST_Transform(baz.geom,4326) AS geom
INTO simplified_conflated_buildings
FROM (
SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom
FROM (
-- simplify geometries to a 0.2m tolerance to avoid repeated points
SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom
FROM (
SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom
FROM poly
) AS foo
) AS bar
) AS baz, poly
WHERE
ST_Intersects(poly.geom, baz.geom)
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid);
-- next do non-conflated buldings separately
with poly as (
SELECT
gid,
"addr:housenumber",
"addr:street",
est_h_feet,
storyabove,
storybelow,
cwwuse,
(st_dump(loc_geom)).*
FROM buildings
WHERE NOT conflated -- note: NOT
)
SELECT
poly.gid,
poly."addr:housenumber",
poly."addr:street",
poly.est_h_feet,
poly.storyabove,
poly.storybelow,
poly.cwwuse,
ST_Transform(baz.geom,4326) AS geom
INTO simplified_buildings
FROM (
SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom
FROM (
-- simplify geometries to a 0.2m tolerance to avoid repeated points
SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom
FROM (
SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom
FROM poly
) AS foo
) AS bar
) AS baz, poly
WHERE
ST_Intersects(poly.geom, baz.geom)
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;