From dcd2c70288082239fc98f0e12bd0b9f909b1cac5 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Tue, 4 Apr 2023 16:21:08 +0100 Subject: [PATCH 1/2] Add `compression` argument Enables the `shuffle` filter and sets `deflate_level` Closes #3 --- src/neasyf.f90 | 131 +++++++++++++++++++++++++++++++++++----- src/neasyf.in.f90 | 12 ++++ src/neasyf.write.in.f90 | 17 +++++- tests/test_write.pf | 61 +++++++++++++++++++ 4 files changed, 205 insertions(+), 16 deletions(-) diff --git a/src/neasyf.f90 b/src/neasyf.f90 index dc5d40c..81aaf30 100644 --- a/src/neasyf.f90 +++ b/src/neasyf.f90 @@ -67,6 +67,18 @@ module neasyf private public :: neasyf_open, neasyf_close, neasyf_type, neasyf_dim public :: neasyf_write, neasyf_read, neasyf_error, neasyf_metadata + public :: neasyf_default_compression + + !> Default compression level to use when creating variables. The default is + !> zero, no compression. Non-zero values should be between 1-9 + !> + !> This can be overridden explicitly in calls to [[neasyf_write]]. + !> + !> Setting this to a non-zero value also enables the `shuffle` filter. There + !> is some discussion of how compression works in netCDF in the [documentation + !> for the C + !> library](https://docs.unidata.ucar.edu/netcdf-c/current/group__variables.html#ga59dad3301f241a7eb86f31b339af2d26) + integer :: neasyf_default_compression = 0 integer, parameter :: nf_kind = kind(NF90_INT) @@ -990,7 +1002,7 @@ end subroutine neasyf_write_scalar !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank1(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1011,12 +1023,19 @@ subroutine neasyf_write_rank1(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1037,9 +1056,15 @@ subroutine neasyf_write_rank1(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then @@ -1087,7 +1112,7 @@ end subroutine neasyf_write_rank1 !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank2(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1108,12 +1133,19 @@ subroutine neasyf_write_rank2(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1134,9 +1166,15 @@ subroutine neasyf_write_rank2(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then @@ -1184,7 +1222,7 @@ end subroutine neasyf_write_rank2 !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank3(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1205,12 +1243,19 @@ subroutine neasyf_write_rank3(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1231,9 +1276,15 @@ subroutine neasyf_write_rank3(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then @@ -1281,7 +1332,7 @@ end subroutine neasyf_write_rank3 !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank4(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1302,12 +1353,19 @@ subroutine neasyf_write_rank4(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1328,9 +1386,15 @@ subroutine neasyf_write_rank4(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then @@ -1378,7 +1442,7 @@ end subroutine neasyf_write_rank4 !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank5(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1399,12 +1463,19 @@ subroutine neasyf_write_rank5(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1425,9 +1496,15 @@ subroutine neasyf_write_rank5(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then @@ -1475,7 +1552,7 @@ end subroutine neasyf_write_rank5 !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank6(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1496,12 +1573,19 @@ subroutine neasyf_write_rank6(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1522,9 +1606,15 @@ subroutine neasyf_write_rank6(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then @@ -1572,7 +1662,7 @@ end subroutine neasyf_write_rank6 !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank7(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -1593,12 +1683,19 @@ subroutine neasyf_write_rank7(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -1619,9 +1716,15 @@ subroutine neasyf_write_rank7(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then diff --git a/src/neasyf.in.f90 b/src/neasyf.in.f90 index 1a67207..f264e9c 100644 --- a/src/neasyf.in.f90 +++ b/src/neasyf.in.f90 @@ -67,6 +67,18 @@ module neasyf private public :: neasyf_open, neasyf_close, neasyf_type, neasyf_dim public :: neasyf_write, neasyf_read, neasyf_error, neasyf_metadata + public :: neasyf_default_compression + + !> Default compression level to use when creating variables. The default is + !> zero, no compression. Non-zero values should be between 1-9 + !> + !> This can be overridden explicitly in calls to [[neasyf_write]]. + !> + !> Setting this to a non-zero value also enables the `shuffle` filter. There + !> is some discussion of how compression works in netCDF in the [documentation + !> for the C + !> library](https://docs.unidata.ucar.edu/netcdf-c/current/group__variables.html#ga59dad3301f241a7eb86f31b339af2d26) + integer :: neasyf_default_compression = 0 integer, parameter :: nf_kind = kind(NF90_INT) diff --git a/src/neasyf.write.in.f90 b/src/neasyf.write.in.f90 index 2851fa2..c37b244 100644 --- a/src/neasyf.write.in.f90 +++ b/src/neasyf.write.in.f90 @@ -19,7 +19,7 @@ !> !> which avoids the need to manually pad each dimension name with spaces. subroutine neasyf_write_rank{n}(parent_id, name, values, dim_ids, dim_names, & - varid, units, long_name, start, count, stride, map) + varid, units, long_name, start, count, stride, map, compression) use, intrinsic :: iso_fortran_env, only : error_unit use netcdf, only : nf90_inq_varid, nf90_def_var, nf90_put_var, nf90_put_att, & nf90_inq_dimid, NF90_NOERR, NF90_ENOTVAR, NF90_EDIMMETA @@ -40,12 +40,19 @@ subroutine neasyf_write_rank{n}(parent_id, name, values, dim_ids, dim_names, & !> Long descriptive name character(len=*), optional, intent(in) :: long_name integer, dimension(:), optional, intent(in) :: start, count, stride, map + !> If non-zero, use compression. + !> + !> Enables the `shuffle` netCDF filter and sets the `deflate_level` + !> parameter to `compression`. You can set the default compression through + !> [[neasyf_default_compression]] + integer, optional, intent(in) :: compression integer, dimension(:), allocatable :: local_dim_ids integer :: dim_index integer(nf_kind) :: nf_type integer :: status integer :: var_id + integer :: local_compression status = nf90_inq_varid(parent_id, name, var_id) ! Variable doesn't exist, so let's create it @@ -66,9 +73,15 @@ subroutine neasyf_write_rank{n}(parent_id, name, values, dim_ids, dim_names, & end do end if + local_compression = neasyf_default_compression + if (present(compression)) then + local_compression = compression + end if + nf_type = neasyf_type(values) ! TODO: check if nf_type indicates a derived type - status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id) + status = nf90_def_var(parent_id, name, nf_type, local_dim_ids, var_id, & + shuffle=(local_compression > 0), deflate_level=local_compression) deallocate(local_dim_ids) if (present(units)) then diff --git a/tests/test_write.pf b/tests/test_write.pf index 73b1b75..f1a6695 100644 --- a/tests/test_write.pf +++ b/tests/test_write.pf @@ -28,6 +28,7 @@ contains integer :: unit open(newunit=unit, file=temp_file) close(unit, status="delete") + neasyf_default_compression = 0 end subroutine teardown @test @@ -186,4 +187,64 @@ contains @assertEqual(expected_data, read_data) end subroutine test_write_2d_dim_names + + @test + subroutine test_write_2d_compressed + integer, parameter :: nx = 5, nt = 3 + integer :: i + real, dimension(nx, nt), parameter :: read_data = reshape([(i, i=1, nx*nt)], shape(read_data)) + real, dimension(nx, nt) :: expected_data + integer, parameter :: expected_deflate_level = 1 + integer :: x_dim_id, y_dim_id, var_id, deflate_level + logical :: shuffle + + call neasyf_dim(file_id, "x", dim_size=nx, dimid=x_dim_id) + call neasyf_dim(file_id, "t", unlimited=.true.) + call neasyf_write(file_id, "data", read_data(:, 1), dim_names=["x", "t"], start=[1, 1], & + compression=expected_deflate_level) + call neasyf_write(file_id, "data", read_data(:, 2), dim_names=["x", "t"], start=[1, 2], & + compression=expected_deflate_level) + call neasyf_write(file_id, "data", read_data(:, 3), dim_names=["x", "t"], start=[1, 3], & + compression=expected_deflate_level) + call neasyf_close(file_id) + + call neasyf_error(nf90_open(temp_file, NF90_NOWRITE, file_id)) + call neasyf_error(nf90_inq_varid(file_id, "data", var_id)) + call neasyf_error(nf90_get_var(file_id, var_id, expected_data)) + call neasyf_error(nf90_inquire_variable(file_id, var_id, shuffle=shuffle, deflate_level=deflate_level)) + call neasyf_close(file_id) + + @assertEqual(expected_data, read_data) + @assertEqual(expected_deflate_level, deflate_level) + @assertEqual(shuffle, .true.) + end subroutine test_write_2d_compressed + + @test + subroutine test_write_2d_default_compressed + integer, parameter :: nx = 5, nt = 3 + integer :: i + real, dimension(nx, nt), parameter :: read_data = reshape([(i, i=1, nx*nt)], shape(read_data)) + real, dimension(nx, nt) :: expected_data + integer, parameter :: expected_deflate_level = 1 + integer :: x_dim_id, y_dim_id, var_id, deflate_level + logical :: shuffle + + call neasyf_dim(file_id, "x", dim_size=nx, dimid=x_dim_id) + call neasyf_dim(file_id, "t", unlimited=.true.) + neasyf_default_compression = expected_deflate_level + call neasyf_write(file_id, "data", read_data(:, 1), dim_names=["x", "t"], start=[1, 1]) + call neasyf_write(file_id, "data", read_data(:, 2), dim_names=["x", "t"], start=[1, 2]) + call neasyf_write(file_id, "data", read_data(:, 3), dim_names=["x", "t"], start=[1, 3]) + call neasyf_close(file_id) + + call neasyf_error(nf90_open(temp_file, NF90_NOWRITE, file_id)) + call neasyf_error(nf90_inq_varid(file_id, "data", var_id)) + call neasyf_error(nf90_get_var(file_id, var_id, expected_data)) + call neasyf_error(nf90_inquire_variable(file_id, var_id, shuffle=shuffle, deflate_level=deflate_level)) + call neasyf_close(file_id) + + @assertEqual(expected_data, read_data) + @assertEqual(expected_deflate_level, deflate_level) + @assertEqual(shuffle, .true.) + end subroutine test_write_2d_default_compressed end module test_write From 7ef02f9424f646886d49d2ced287fe375ead5121 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Tue, 4 Apr 2023 16:22:44 +0100 Subject: [PATCH 2/2] Fix some Ford links Can't link to procedure arguments --- src/neasyf.f90 | 6 +++--- src/neasyf.in.f90 | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/neasyf.f90 b/src/neasyf.f90 index 81aaf30..b4c067a 100644 --- a/src/neasyf.f90 +++ b/src/neasyf.f90 @@ -771,15 +771,15 @@ end function polymorphic_get_var_rank7 !> Create a dimension if it doesn't already exist. !> !> If the dimension doesn't exist, also create a variable of the same name and - !> fill it with [[values]], or the integers in the range `1..dim_size`. The - !> optional argument [[unlimited]] can be used to make this dimension + !> fill it with `values`, or the integers in the range `1..dim_size`. The + !> optional argument `unlimited` can be used to make this dimension !> unlimited in extent. !> !> Optional arguments "unit" and "long_name" allow you to create attributes !> of the same names. !> !> The netCDF IDs of the dimension and corresponding variable can be returned - !> through [[dimid]] and [[varid]] respectively. + !> through `dimid` and `varid` respectively. subroutine neasyf_dim(parent_id, name, values, dim_size, dimid, varid, units, long_name, unlimited) use netcdf, only : nf90_inq_dimid, nf90_inq_varid, nf90_def_var, nf90_def_dim, nf90_put_var, nf90_put_att, & diff --git a/src/neasyf.in.f90 b/src/neasyf.in.f90 index f264e9c..87bc10a 100644 --- a/src/neasyf.in.f90 +++ b/src/neasyf.in.f90 @@ -317,15 +317,15 @@ end function polymorphic_get_var_scalar !> Create a dimension if it doesn't already exist. !> !> If the dimension doesn't exist, also create a variable of the same name and - !> fill it with [[values]], or the integers in the range `1..dim_size`. The - !> optional argument [[unlimited]] can be used to make this dimension + !> fill it with `values`, or the integers in the range `1..dim_size`. The + !> optional argument `unlimited` can be used to make this dimension !> unlimited in extent. !> !> Optional arguments "unit" and "long_name" allow you to create attributes !> of the same names. !> !> The netCDF IDs of the dimension and corresponding variable can be returned - !> through [[dimid]] and [[varid]] respectively. + !> through `dimid` and `varid` respectively. subroutine neasyf_dim(parent_id, name, values, dim_size, dimid, varid, units, long_name, unlimited) use netcdf, only : nf90_inq_dimid, nf90_inq_varid, nf90_def_var, nf90_def_dim, nf90_put_var, nf90_put_att, &