Skip to content

Commit

Permalink
Merge pull request #17 from mauro3/m3/spacecadet
Browse files Browse the repository at this point in the history
Issues with spaces in a file, this fixes it
  • Loading branch information
jguerber authored Jun 6, 2024
2 parents be49473 + 1070e80 commit a13adc4
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 29 deletions.
2 changes: 1 addition & 1 deletion example/small.asc
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ NODATA_value -9999
1 2 3 4 5 4
5 4 3 2 1 2
7 8 7 8 7 1
9 9 9 9 9 1
9 9 9 9 6 1
1 2 3 4 5 1
12 changes: 12 additions & 0 deletions example/swisstopo.asc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
NCOLS 10
NROWS 9
XLLCORNER 479900.
YLLCORNER 61900.
CELLSIZE 200.
NODATA_VALUE -9999.
-9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999.
-9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999.
835.415 863.55 887.424 869.213 855.539 845.878 829.714 815.258 807.458 799.816 799.2 808.076 818.866 823.66 827.642 831.718 835.851 839.864 830.474 818.6
827.881 841.593 819.972 820.397 821.595 821.194 820.064 798.059 760.637 739.562 785.239 783.209 768.894 725.434 732.013 730.77 815.899 861.594 877.095 887.203
876.015 863.514 860.521 869.822 879.019 882.912 879.207 878.006 879.207 881.804

57 changes: 30 additions & 27 deletions src/io.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
read_ascii(filename::AbstractString) => Union{Tuple{Array, NamedTuple}, NamedTuple}
Reads an ASCII file. Parameters are parsed according to the [AAIGrid](https://gdal.org/drivers/raster/aaigrid.html) format. Data elements are assumed to be of the same type as the nodatavalue header parameter if possible. If there is no NODATA_value field in the header, data type is estimated by checking if there are any floating numbers in the first 10 data rows.
Reads an ASCII file. Parameters are parsed according to the [AAIGrid](https://gdal.org/drivers/raster/aaigrid.html) format. Data elements are assumed to be of the same type as the nodatavalue header parameter if possible. If there is no nodata_value field in the header, data type is estimated by checking if there are any floating numbers in the first 10 data rows.
# Keywords
Expand All @@ -21,7 +21,7 @@ function read_ascii(filename::AbstractString; lazy = false)
yll = header["yllcorner"],
dx = header["dx"],
dy = header["dy"],
nodatavalue = header["NODATA_value"]
nodatavalue = header["nodata_value"]
)

if !lazy
Expand All @@ -36,23 +36,23 @@ end
"""
_read_header
Reads the first lines that don't start with a space. Converts them to a Dict
Reads the first lines that don't start with a number. Converts them to a Dict
with 9 entries with all the parameters we need both for assessing data type and keeping header parameters.
"""
function _read_header(filename::AbstractString)
header = Dict{String, Any}()
open(filename, "r") do f
line = readline(f)
while line[1] != ' '
# split line
spl = split(line, ' ')
# read and split line
spl = split(strip(readline(f)), ' ')
while tryparse(Float64, spl[1])===nothing # header lines do not start with a number

# remove "" elements
clean = deleteat!(spl, findall(x -> x == "", spl))
# add to header
header[clean[1]] = clean[2]
header[lowercase(clean[1])] = clean[2]

# continue reading
line = readline(f)
# continue reading and split line
spl = split(strip(readline(f)), ' ')
end
end

Expand All @@ -78,16 +78,18 @@ Looks in `header` for a number of lines to ignore, then writes the following lin
"""
function _read_data(filename::AbstractString, header::Dict{String, Any})
# only store data lines in a variable
raw = open(readlines, filename)[(header["nlines"]+1):end]
io = open(filename)
# read the header
[readline(io) for i=1:header["nlines"]]

raw = map(l -> split(l, ' ')[2:end], raw) # remove spaces: this is now a
# vector of vector of strings
# now read the rest of the file
raw = split(read(io, String))

if header["datatype"] == Any # if datatype is undetermined yet
ncheck = min(header["nrows"], 10) # check 10 rows or less
ncheck = min(header["nrows"]*header["ncols"], 100) # check 100 numbers or less
found_float = false
for i in 1:ncheck
if !all(map(w -> match(r"[.]", w) === nothing, raw[i]))
if match(r"[.]", raw[i]) !== nothing
found_float = true
break
end
Expand All @@ -97,9 +99,10 @@ function _read_data(filename::AbstractString, header::Dict{String, Any})
else
datatype = header["datatype"]
end
out = map(l -> parse.(datatype, l), raw)

return mapreduce(permutedims, vcat, out) # convert to matrix
out = parse.(datatype, raw)

return permutedims(reshape(out, header["ncols"], header["nrows"]))
end

"""
Expand All @@ -122,7 +125,7 @@ function _check_and_parse_required(header::Dict{String, Any})
end

function _cellsize_or_dxdy(header::Dict{String, Any})

if haskey(header, "cellsize")

haskey(header, "dx") && @warn "Provided cellsize, ignoring dx"
Expand All @@ -147,17 +150,17 @@ end
"""
_check_nodata
If NODATA_value is a header line, keep it as nodatavalue and detect its type. If
NODATA_value is missing, we set it to -9999.0 and its type to Any.
If nodata_value is a header line, keep it as nodatavalue and detect its type. If
nodata_value is missing, we set it to -9999.0 and its type to Any.
"""
function _check_nodata(header::Dict{String, Any})
if haskey(header, "NODATA_value")
if haskey(header, "nodata_value")
# no floating point in nodata ? datatype is considered int
datatype = isnothing(match(r"[.]", header["NODATA_value"])) ? Int32 : Float32
header["NODATA_value"] = parse(datatype, header["NODATA_value"])
datatype = isnothing(match(r"[.]", header["nodata_value"])) ? Int32 : Float32
header["nodata_value"] = parse(datatype, header["nodata_value"])
header["datatype"] = datatype
else
header["NODATA_value"] = -9999.0
header["nodata_value"] = -9999.0
header["datatype"] = Any
end

Expand Down Expand Up @@ -200,11 +203,11 @@ function write_ascii(filename::AbstractString, dat::AbstractArray{T, 2}; ncols::
size(dat) == (nrows, ncols) || throw(ArgumentError("$nrows rows and $ncols cols incompatible with array of size $(size(dat))"))

datatype = if detecttype
typeof(nodatavalue) <: AbstractFloat ? Float32 : Int32
typeof(nodatavalue) <: AbstractFloat ? Float32 : Int32
else
Float32
end

# ensure right type for dat and nodatavalue
dat = datatype.(dat)
nodatavalue = datatype(nodatavalue)
Expand All @@ -227,4 +230,4 @@ function write_ascii(filename::AbstractString, dat::AbstractArray{T, 2}; ncols::
end
end
return filename
end
end
8 changes: 7 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using Test
@testset "read" begin
@test read_ascii("../example/small.asc"; lazy = true) isa NamedTuple
@test asc[1][2,3] == 3
@test asc[1][4,5] == 6
@test typeof(asc[1]) == Matrix{Int32}
@test_throws ArgumentError read_ascii("doesntexist.asc")
end
Expand All @@ -20,7 +21,7 @@ using Test
nodatavalue = 1,
)
dat = [1 1 1 1;2 2 2 2;3 3 3 3;4 4 4 4]

@testset "write" begin
pars2 = (
ncols = 4,
Expand Down Expand Up @@ -97,6 +98,11 @@ using Test
@test_throws "nrows not found in file header" ASCIIrasters.read_ascii("../example/missingnrow.asc")
end

@testset "test different whitespaces" begin
d,h = ASCIIrasters.read_ascii("../example/swisstopo.asc")
@test size(d)==(h.nrows,h.ncols)
end

# cleanup
rm("./test.asc")
rm("./test2.asc")
Expand Down

0 comments on commit a13adc4

Please sign in to comment.