Merge pull request #17 from mauro3/m3/spacecadet

Issues with spaces in a file, this fixes it
JuliaGeo · Jun 6, 2024 · a13adc4 · a13adc4
2 parents be49473 + 1070e80
commit a13adc4
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 29 deletions.
diff --git a/example/small.asc b/example/small.asc
@@ -8,5 +8,5 @@ NODATA_value  -9999
  1 2 3 4 5 4
  5 4 3 2 1 2
  7 8 7 8 7 1
- 9 9 9 9 9 1
+ 9 9 9 9 6 1
  1 2 3 4 5 1
diff --git a/example/swisstopo.asc b/example/swisstopo.asc
@@ -0,0 +1,12 @@
+NCOLS 10 
+NROWS 9 
+XLLCORNER 479900. 
+YLLCORNER 61900. 
+CELLSIZE 200. 
+NODATA_VALUE -9999. 
+-9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. 
+-9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. -9999. 
+835.415 863.55 887.424 869.213 855.539 845.878 829.714 815.258 807.458 799.816 799.2 808.076 818.866 823.66 827.642 831.718 835.851 839.864 830.474 818.6 
+827.881 841.593 819.972 820.397 821.595 821.194 820.064 798.059 760.637 739.562 785.239 783.209 768.894 725.434 732.013 730.77 815.899 861.594 877.095 887.203 
+876.015 863.514 860.521 869.822 879.019 882.912 879.207 878.006 879.207 881.804 
+
diff --git a/src/io.jl b/src/io.jl
@@ -1,7 +1,7 @@
 """
     read_ascii(filename::AbstractString) => Union{Tuple{Array, NamedTuple}, NamedTuple}
 
-Reads an ASCII file. Parameters are parsed according to the [AAIGrid](https://gdal.org/drivers/raster/aaigrid.html) format. Data elements are assumed to be of the same type as the nodatavalue header parameter if possible. If there is no NODATA_value field in the header, data type is estimated by checking if there are any floating numbers in the first 10 data rows.
+Reads an ASCII file. Parameters are parsed according to the [AAIGrid](https://gdal.org/drivers/raster/aaigrid.html) format. Data elements are assumed to be of the same type as the nodatavalue header parameter if possible. If there is no nodata_value field in the header, data type is estimated by checking if there are any floating numbers in the first 10 data rows.
 
 # Keywords
 
@@ -21,7 +21,7 @@ function read_ascii(filename::AbstractString; lazy = false)
         yll = header["yllcorner"],
         dx = header["dx"],
         dy = header["dy"],
-        nodatavalue = header["NODATA_value"]
+        nodatavalue = header["nodata_value"]
     )
 
     if !lazy
@@ -36,23 +36,23 @@ end
 """
     _read_header
 
-Reads the first lines that don't start with a space. Converts them to a Dict
+Reads the first lines that don't start with a number. Converts them to a Dict
 with 9 entries with all the parameters we need both for assessing data type and keeping header parameters.
 """
 function _read_header(filename::AbstractString)
     header = Dict{String, Any}()
     open(filename, "r") do f
-        line = readline(f)
-        while line[1] != ' '
-            # split line
-            spl = split(line, ' ')
+        # read and split line
+        spl = split(strip(readline(f)), ' ')
+        while tryparse(Float64, spl[1])===nothing # header lines do not start with a number
+
             # remove "" elements
             clean = deleteat!(spl, findall(x -> x == "", spl))
             # add to header
-            header[clean[1]] = clean[2]
+            header[lowercase(clean[1])] = clean[2]
 
-            # continue reading
-            line = readline(f)
+            # continue reading and split line
+            spl = split(strip(readline(f)), ' ')
         end
     end
 
@@ -78,16 +78,18 @@ Looks in `header` for a number of lines to ignore, then writes the following lin
 """
 function _read_data(filename::AbstractString, header::Dict{String, Any})
     # only store data lines in a variable
-    raw = open(readlines, filename)[(header["nlines"]+1):end]
+    io = open(filename)
+    # read the header
+    [readline(io) for i=1:header["nlines"]]
 
-    raw = map(l -> split(l, ' ')[2:end], raw) # remove spaces:  this is now a
-    # vector of vector of strings
+    # now read the rest of the file
+    raw = split(read(io, String))
 
     if header["datatype"] == Any # if datatype is undetermined yet
-        ncheck = min(header["nrows"], 10) # check 10 rows or less
+        ncheck = min(header["nrows"]*header["ncols"], 100) # check 100 numbers or less
         found_float = false
         for i in 1:ncheck
-            if !all(map(w -> match(r"[.]", w) === nothing, raw[i]))
+            if match(r"[.]", raw[i]) !== nothing
                 found_float = true
                 break
             end
@@ -97,9 +99,10 @@ function _read_data(filename::AbstractString, header::Dict{String, Any})
     else
         datatype = header["datatype"]
     end
-    out = map(l -> parse.(datatype, l), raw)
 
-    return mapreduce(permutedims, vcat, out) # convert to matrix
+    out = parse.(datatype, raw)
+
+    return permutedims(reshape(out, header["ncols"], header["nrows"]))
 end
 
 """
@@ -122,7 +125,7 @@ function _check_and_parse_required(header::Dict{String, Any})
 end
 
 function _cellsize_or_dxdy(header::Dict{String, Any})
-    
+
     if haskey(header, "cellsize")
 
         haskey(header, "dx") && @warn "Provided cellsize, ignoring dx"
@@ -147,17 +150,17 @@ end
 """
     _check_nodata
 
-If NODATA_value is a header line, keep it as nodatavalue and detect its type. If
-NODATA_value is missing, we set it to -9999.0 and its type to Any.
+If nodata_value is a header line, keep it as nodatavalue and detect its type. If
+nodata_value is missing, we set it to -9999.0 and its type to Any.
 """
 function _check_nodata(header::Dict{String, Any})
-    if haskey(header, "NODATA_value")
+    if haskey(header, "nodata_value")
         # no floating point in nodata ? datatype is considered int
-        datatype = isnothing(match(r"[.]", header["NODATA_value"])) ? Int32 : Float32
-        header["NODATA_value"] = parse(datatype, header["NODATA_value"])
+        datatype = isnothing(match(r"[.]", header["nodata_value"])) ? Int32 : Float32
+        header["nodata_value"] = parse(datatype, header["nodata_value"])
         header["datatype"] = datatype
     else
-        header["NODATA_value"] = -9999.0
+        header["nodata_value"] = -9999.0
         header["datatype"] = Any
     end
 
@@ -200,11 +203,11 @@ function write_ascii(filename::AbstractString, dat::AbstractArray{T, 2}; ncols::
     size(dat) == (nrows, ncols) || throw(ArgumentError("$nrows rows and $ncols cols incompatible with array of size $(size(dat))"))
 
     datatype = if detecttype
-        typeof(nodatavalue) <: AbstractFloat ? Float32 : Int32 
+        typeof(nodatavalue) <: AbstractFloat ? Float32 : Int32
     else
         Float32
     end
-    
+
     # ensure right type for dat and nodatavalue
     dat = datatype.(dat)
     nodatavalue = datatype(nodatavalue)
@@ -227,4 +230,4 @@ function write_ascii(filename::AbstractString, dat::AbstractArray{T, 2}; ncols::
         end
     end
     return filename
-end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -6,6 +6,7 @@ using Test
     @testset "read" begin
         @test read_ascii("../example/small.asc"; lazy = true) isa NamedTuple
         @test asc[1][2,3] == 3
+        @test asc[1][4,5] == 6
         @test typeof(asc[1]) == Matrix{Int32}
         @test_throws ArgumentError read_ascii("doesntexist.asc")
     end
@@ -20,7 +21,7 @@ using Test
             nodatavalue = 1,
         )
     dat = [1 1 1 1;2 2 2 2;3 3 3 3;4 4 4 4]
-    
+
     @testset "write" begin
         pars2 = (
             ncols = 4,
@@ -97,6 +98,11 @@ using Test
         @test_throws "nrows not found in file header" ASCIIrasters.read_ascii("../example/missingnrow.asc")
     end
 
+    @testset "test different whitespaces" begin
+        d,h = ASCIIrasters.read_ascii("../example/swisstopo.asc")
+        @test size(d)==(h.nrows,h.ncols)
+    end
+
     # cleanup
     rm("./test.asc")
     rm("./test2.asc")