diff --git a/test/perf/README.md b/test/perf/README.md index 874149c8..f0c59527 100644 --- a/test/perf/README.md +++ b/test/perf/README.md @@ -1,11 +1,5 @@ # Benchmarks -The operating systems typically caches access to the file system. -To make these benchmarks more realistic, the file system caches is dropped at every iteration so that the disk IO *is* included in the reported run times. -On Linux, the caches are dropped by writing `3` to the file `/proc/sys/vm/drop_caches` however this requires super user privileges. -These benchmarks require a Linux operating system (as dropping file caches is OS-specific). - - ## Installation ### Julia packages @@ -43,16 +37,31 @@ These are the steps to run the benchmark: julia generate_data.jl ``` -* As a *root user*, run the shell script `benchmark.sh`. It is necessary that the root user has access to the Julia, python and R netCDF packages (NCDatasets, netCDF4 and ncdf4 respectively). +* Run the shell script `benchmark.sh`. ```bash ./benchmark.sh ``` -If all packages are installed in the home directory of an unpriviledges user e.g. `my_user_name`, they can be made available to the root user changing temporarily the `HOME` environement variable to `/home/my_user_name` in the root shell before running `./benchmark.sh`: +The script will output a markdown table with the benchmark statistics. + +## Dropping file caches + +The operating systems typically caches access to the file system. +To make these benchmarks more realistic, the file system caches can be dropped at every iteration using the benchmark script with the option `--drop-caches`. +so that the disk IO *is* included in the reported run times. +On Linux, the caches are dropped by writing `3` to the file `/proc/sys/vm/drop_caches` however this requires super user privileges. +In this case, these benchmarks require a Linux operating system (as dropping file caches is OS-specific). + + +* As a *root user*, run the shell script `benchmark.sh`. It is necessary that the root user has access to the Julia, python and R netCDF packages (NCDatasets, netCDF4 and ncdf4 respectively). ```bash -HOME=/home/my_user_name ./benchmark.sh +./benchmark.sh --drop-caches ``` -The script will output a markdown table with the benchmark statistics. +If all packages are installed in the home directory of an unpriviledges user e.g. `my_user_name`, they can be made available to the root user changing temporarily the `HOME` environement variable to `/home/my_user_name` in the root shell before running `./benchmark.sh`: + +```bash +HOME=/home/my_user_name ./benchmark.sh --drop-caches +``` diff --git a/test/perf/benchmark-R-ncdf4.R b/test/perf/benchmark-R-ncdf4.R index 584cc3cb..b57a2268 100644 --- a/test/perf/benchmark-R-ncdf4.R +++ b/test/perf/benchmark-R-ncdf4.R @@ -6,15 +6,18 @@ library(ncdf4) library(microbenchmark) +print(R.version.string) print(paste("ncdf4 version: ",packageVersion("ncdf4"))) fname = "filename_fv.nc" -process <- function(fname) { - # drop file caches; requires root - fileConn<-file("/proc/sys/vm/drop_caches",open = "wt") - writeLines("3", fileConn) - close(fileConn) +process <- function(fname,drop_caches) { + if (drop_caches) { + # drop file caches; requires root + fileConn<-file("/proc/sys/vm/drop_caches",open = "wt") + writeLines("3", fileConn) + close(fileConn) + } nc = nc_open(fname) @@ -29,14 +32,16 @@ process <- function(fname) { return(tot/nmax) } +drop_caches <- "--drop-caches" %in% commandArgs(trailingOnly=TRUE) +print(paste("drop caches: ",drop_caches)) start_time <- Sys.time() -tot = process(fname) +tot = process(fname,drop_caches) end_time <- Sys.time() print(paste("time ",end_time - start_time)) print(paste("result ",tot)) -mbm <- microbenchmark("ncdf4" = process(fname),times=100) +mbm <- microbenchmark("ncdf4" = process(fname,drop_caches),times=100) fileConn<-file("R-ncdf4.txt",open = "wt") diff --git a/test/perf/benchmark-julia-NCDatasets.jl b/test/perf/benchmark-julia-NCDatasets.jl index 347a0505..a1fe4db6 100644 --- a/test/perf/benchmark-julia-NCDatasets.jl +++ b/test/perf/benchmark-julia-NCDatasets.jl @@ -13,9 +13,11 @@ function compute(v) return tot/size(v,3) end -function process(fname) - # drop file caches; requires root - write("/proc/sys/vm/drop_caches","3") +function process(fname,drop_caches) + if drop_caches + # drop file caches; requires root + write("/proc/sys/vm/drop_caches","3") + end ds = NCDataset(fname,"r") do ds v = ds["v1"]; @@ -24,12 +26,15 @@ function process(fname) end end -fname = "filename_fv.nc" -tot = process(fname) +drop_caches = "--drop-caches" in ARGS +println("Julia ",VERSION) +println("drop caches: ",drop_caches) +fname = "filename_fv.nc" +tot = process(fname,drop_caches) println("result ",tot) -bm = run(@benchmarkable process(fname) samples=100 seconds=10000) +bm = run(@benchmarkable process(fname,drop_caches) samples=100 seconds=10000) @show bm diff --git a/test/perf/benchmark-python-netCDF4.py b/test/perf/benchmark-python-netCDF4.py index 661103b3..cd64c8be 100644 --- a/test/perf/benchmark-python-netCDF4.py +++ b/test/perf/benchmark-python-netCDF4.py @@ -5,6 +5,7 @@ import netCDF4 import numpy as np import timeit +import sys def compute(v): tot = 0 @@ -13,9 +14,10 @@ def compute(v): return tot/v.shape[0] -def process(fname): - with open("/proc/sys/vm/drop_caches","w") as f: - f.write("3") +def process(fname,drop_caches): + if drop_caches: + with open("/proc/sys/vm/drop_caches","w") as f: + f.write("3") with netCDF4.Dataset(fname) as ds: v = ds["v1"] @@ -24,15 +26,20 @@ def process(fname): if __name__ == "__main__": + drop_caches = "--drop-caches" in sys.argv + + print("Python ",sys.version) + print("drop caches: ",drop_caches) + fname = "filename_fv.nc"; - tot = process(fname) + tot = process(fname,drop_caches) print("result ",tot) setup = "from __main__ import process" print("python-netCDF4 version ",netCDF4.__version__) - benchtime = timeit.repeat(lambda: process(fname), setup=setup,number = 1, repeat = 100) + benchtime = timeit.repeat(lambda: process(fname,drop_caches), setup=setup,number = 1, repeat = 100) with open("python-netCDF4.txt","w") as f: for bt in benchtime: print(bt,file=f) diff --git a/test/perf/benchmark.sh b/test/perf/benchmark.sh index e4a76c4b..e3acc2c3 100755 --- a/test/perf/benchmark.sh +++ b/test/perf/benchmark.sh @@ -1,7 +1,8 @@ #!/bin/bash -julia benchmark-julia-NCDatasets.jl -python3 benchmark-python-netCDF4.py -Rscript benchmark-R-ncdf4.R +args="$@" +julia benchmark-julia-NCDatasets.jl $args +python3 benchmark-python-netCDF4.py $args +Rscript benchmark-R-ncdf4.R $args julia summary.jl