Merge branch 'development' of https://github.com/pixalytics-ltd/uplan…

…d-burn-detection into main
pixalytics-ltd · Mar 15, 2021 · 08368fd · 08368fd
2 parents be1f78b + 2c8f020
commit 08368fd
Show file tree

Hide file tree

Showing 35 changed files with 6,627 additions and 2 deletions.
diff --git a/Analyse-Sentinel1-slc.ipynb b/Analyse-Sentinel1-slc.ipynb
diff --git a/Analyse-Sentinel1.ipynb b/Analyse-Sentinel1.ipynb
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -0,0 +1,8 @@
+# This is a comment.
+# Each line is a file pattern followed by one or more owners.
+
+# These owners will be the default owners for everything in
+# the repo. Unless a later match takes precedence,
+# @global-owner1 and @global-owner2 will be requested for
+# review when someone opens a pull request.
+*       @pixalytics
diff --git a/Combined_UBurn_WorkBook.ipynb b/Combined_UBurn_WorkBook.ipynb
diff --git a/Pull-ERA5.ipynb b/Pull-ERA5.ipynb
@@ -0,0 +1,289 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defining variables as not called externally\n",
+      "Loaded configuration: {'sharedfolder': 'my_shared_data_folder'}.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "from osgeo import osr\n",
+    "import geojson as gj\n",
+    "import netCDF4\n",
+    "from datetime import datetime, timezone\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.set_loglevel(\"critical\")\n",
+    "\n",
+    "# https://cds.climate.copernicus.eu/api-how-to\n",
+    "# Import library for CDS data access and load credentials\n",
+    "import cdsapi\n",
+    "c = cdsapi.Client()\n",
+    "\n",
+    "# Upland burn modules\n",
+    "import utils.plot_data as plotd\n",
+    "import utils.functions as functions\n",
+    "from utils.get_configuration import get_configuration\n",
+    "\n",
+    "# Define input variables if not calling externally\n",
+    "try:\n",
+    "    print(\"Input dataset from: {}\".format(datasets))     # check if defined\n",
+    "\n",
+    "# if not defined raises an error and control shifts to except block. \n",
+    "except:\n",
+    "    print (\"Defining variables as not called externally\") \n",
+    "    \n",
+    "    # Verbose output\n",
+    "    verbose = False\n",
+    "    \n",
+    "    # Get configuration information\n",
+    "    basefolder, s1ardfolder, datasets, outfolder, tmpfolder, ofiles, hdfile, pfile, verbose, graphics = get_configuration(cstudy)\n",
+    "    \n",
+    "    # Burn Polygon\n",
+    "    polygon = \"Skye_burn_extent_428\"\n",
+    "    \n",
+    "    # Whether to plot the graph\n",
+    "    graphics = False\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Specify site to extract\n",
+    "\n",
+    "# WGS84 projection reference\n",
+    "OSR_WGS84 = osr.SpatialReference()\n",
+    "OSR_WGS84.ImportFromEPSG(4326)\n",
+    "OSR_OSGB36 = osr.SpatialReference()\n",
+    "OSR_OSGB36.ImportFromEPSG(27700)\n",
+    "\n",
+    "# Setup burn area polygon\n",
+    "polygon_file = os.path.join(datasets, polygon + \".geojson\")\n",
+    "\n",
+    "# Get coords for bounding box\n",
+    "with open (polygon_file, 'r') as f:\n",
+    "    loadg = gj.loads(f.read())\n",
+    "x, y = zip(*gj.utils.coords(loadg))\n",
+    "del loadg\n",
+    "min_x, max_x, min_y, max_y = min(x), max(x), min(y), max(y)\n",
+    "if verbose:\n",
+    "    print(\"Input for GeoJSON ULeft {:.3f}:{:.3f} LRight {:.3f}:{:.3f} \".format(min_x, max_y, max_x, min_y))\n",
+    "\n",
+    "# Transform to WGS84\n",
+    "to_wgs84 = osr.CoordinateTransformation(OSR_OSGB36, OSR_WGS84)\n",
+    "xymin = to_wgs84.TransformPoint(min_x, min_y)\n",
+    "xymax = to_wgs84.TransformPoint(max_x, max_y)\n",
+    "offset = 0.01 # degrees\n",
+    "sitell = [xymax[1]+offset, xymin[0]-offset, xymin[1]-offset, xymax[0]+offset]\n",
+    "diff1, diff2 = (xymax[1]+offset) - (xymin[1]-offset), (xymax[0]+offset) - (xymin[0]-offset)\n",
+    "if diff1 < 0.1 or diff2 < 0.1: # Increase as otherwise can be 1 dimensional array with no valid data\n",
+    "    offset = 0.15 # degrees\n",
+    "    sitell = [xymax[1]+offset, xymin[0]-offset, xymin[1]-offset, xymax[0]+offset]\n",
+    "    diff3, diff4 = (xymax[1]+offset) - (xymin[1]-offset), (xymax[0]+offset) - (xymin[0]-offset)\n",
+    "    if verbose:\n",
+    "        print(\"ERA {} Box size < 0.1 [{:.3f} {:.3f}] increasing to {:.3f} {:.3f}\".format(polygon, diff1,diff2,diff3, diff4))\n",
+    "    \n",
+    "if verbose:\n",
+    "    print(\"WGS84 ULeft {:.3f}:{:.3f} LRight {:.3f}:{:.3f} \".format(xymin[0], xymax[1], xymax[0], xymin[1]))\n",
+    "\n",
+    "# Years and months to download and plot\n",
+    "years = ['2018','2019']\n",
+    "months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']\n",
+    "num_days = functions.numberOfDays(int(years[0]), int(months[0]))\n",
+    "days = list(range(1, num_days+1, 1))\n",
+    "if verbose:\n",
+    "    print(\"Number of days: \", num_days, days)  \n",
+    "\n",
+    "# Variables\n",
+    "variables = [\n",
+    "            '10m_u_component_of_wind', '10m_v_component_of_wind', 'leaf_area_index_high_vegetation',\n",
+    "            'leaf_area_index_low_vegetation', 'snow_cover', 'surface_pressure',\n",
+    "            'surface_runoff', 'surface_solar_radiation_downwards', 'total_precipitation',\n",
+    "        ]\n",
+    "ecmwf_names = [\n",
+    "            'u10', 'v10', 'lai_hv', 'lai_lv', 'snowc', 'sp', 'sro', 'ssrd', 'tp',\n",
+    "        ]\n",
+    "numvariables = len(variables)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Defined output path\n",
+    "outfile = polygon + '-download.nc'\n",
+    "erafolder = os.path.join(datasets,\"era-downloaded\")\n",
+    "outnc = os.path.join(erafolder,outfile)\n",
+    "keep = True\n",
+    "if not keep and os.path.exists(outnc):\n",
+    "    os.remove(outnc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Already downloaded: /home/slavender/my_shared_data_folder/era-downloaded/Skye_burn_extent_428-download.nc \n"
+     ]
+    }
+   ],
+   "source": [
+    "# Retieve needed data\n",
+    "if not os.path.exists(outnc):\n",
+    "    print(\"Downloading: \",outnc)\n",
+    "    c.retrieve(\n",
+    "        'reanalysis-era5-land',\n",
+    "        {\n",
+    "            'format': 'netcdf',\n",
+    "            'product_type': 'monthly_averaged_reanalysis',\n",
+    "            'variable': variables,\n",
+    "            'year': years,\n",
+    "            'month': months,\n",
+    "            'day': days,\n",
+    "            'time': '00:00',\n",
+    "            'area': sitell,\n",
+    "        },\n",
+    "        outnc)\n",
+    "else:\n",
+    "    print(\"Already downloaded: {} \".format(outnc))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading: odict_keys(['longitude', 'latitude', 'time', 'u10', 'v10', 'lai_hv', 'lai_lv', 'snowc', 'sp', 'sro', 'ssrd', 'tp'])  \n",
+      "Time steps 730 times ['01Jan18', '02Jan18', '03Jan18', '04Jan18', '05Jan18']\n",
+      "u10: (730, 3, 4) (730, 9)\n",
+      "0: u10 min -9.597 max 12.226\n",
+      "v10: (730, 3, 4) (730, 9)\n",
+      "1: v10 min -12.833 max 14.153\n",
+      "lai_hv: (730, 3, 4) (730, 9)\n",
+      "2: lai_hv min 2.424 max 2.568\n",
+      "lai_lv: (730, 3, 4) (730, 9)\n",
+      "3: lai_lv min 3.556 max 4.196\n",
+      "snowc: (730, 3, 4) (730, 9)\n",
+      "4: snowc min 0.000 max 34.019\n",
+      "sp: (730, 3, 4) (730, 9)\n",
+      "5: sp min 95312.055 max 103318.089\n",
+      "sro: (730, 3, 4) (730, 9)\n",
+      "6: sro min 0.000 max 0.002\n",
+      "ssrd: (730, 3, 4) (730, 9)\n",
+      "7: ssrd min 116854.474 max 29072800.754\n",
+      "tp: (730, 3, 4) (730, 9)\n",
+      "8: tp min 0.000 max 0.031\n",
+      "Extracted data shape:  (730, 9)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load NetCDF\n",
+    "file = netCDF4.Dataset(outnc)\n",
+    "print(\"Loading: {}  \".format(file.variables.keys()))\n",
+    "\n",
+    "# Extract times\n",
+    "times = file.variables['time']\n",
+    "dt = [netCDF4.num2date(ts, units=times.units).strftime('%d%b%y') for ts in times[:]]\n",
+    "dt_num = [netCDF4.num2date(ts, units=times.units).strftime('%Y%m%d') for ts in times[:]]\n",
+    "timesteps = len(dt)\n",
+    "if verbose:\n",
+    "    print(\"Time steps {} times {}\".format(timesteps,dt[0:5]))\n",
+    "\n",
+    "# Reformat the variables\n",
+    "mean_parameters = np.zeros((timesteps,numvariables), dtype=float)\n",
+    "std_parameters = np.zeros((timesteps,numvariables), dtype=float)\n",
+    "\n",
+    "numyears = 0.0\n",
+    "for num in range(int(numvariables)):\n",
+    "    parameter = file.variables[ecmwf_names[num]] # physical variable\n",
+    "    # Set null values to zero\n",
+    "    zdim, ydim, xdim = parameter.shape\n",
+    "    if verbose:\n",
+    "        print(\"{}: {} {}\".format(ecmwf_names[num],parameter.shape,mean_parameters.shape))\n",
+    "    for month in range(timesteps):\n",
+    "        #print(parameter[month,:,:])\n",
+    "        parameter[month,:,:][parameter[month,:,:] < (-32766.000)] = np.nan\n",
+    "        mean_parameters[month,num] = np.nanmean(parameter[month,:,:])\n",
+    "        std_parameters[month,num] = np.nanstd(parameter[month,:,:])\n",
+    "    if verbose:\n",
+    "        print(\"{}: {} min {:.3f} max {:.3f}\".format(num, ecmwf_names[num],np.nanmin(mean_parameters[:,num]),np.nanmax(mean_parameters[:,num])))\n",
+    "    \n",
+    "print(\"Extracted data shape: \", mean_parameters.shape) \n",
+    "\n",
+    "# Clear arrays\n",
+    "file = None\n",
+    "del parameter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if graphics:\n",
+    "    # Plot time-series\n",
+    "    outfile = os.path.join(outfolder,\"era-plot.png\")\n",
+    "    prodnum = 8 # precipitation\n",
+    "    # print(\"Plotting {} for {}\".format(ecmwf_names[prodnum],dt))\n",
+    "    plotd.plot_line(dt, mean_parameters[:,prodnum], std_parameters[:,prodnum], variables[prodnum], outfile)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,md"
+  },
+  "kernelspec": {
+   "display_name": "Python [conda env:rsgislib_dev]",
+   "language": "python",
+   "name": "conda-env-rsgislib_dev-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/README.md b/README.md
@@ -1,2 +1,61 @@
-# upland-burn-detection
-Detection of upland burn using Sentinel-1 data
+# Upland Burn Project Jupyter Notebooks
+This repository is part of the final set of deliverables produced by Pixalytics and EnviroSAR under their contract in response to the Department for Environment & Rural Affairs (DEFRA) Invitation To Tender (ITT) 76044 “Upland Burn Detection with Radar”. 
+
+For this project, three case study areas were of interest: Isle of Skye, Cairngorms and the Peak District National Park. From the following DropBox link, a subset of the data generated can be downloaded for testing the Jupyter Notebooks: https://www.dropbox.com/s/phje3itiat6yt33/my_shared_data_folder.tar.gz?dl=0 
+
+The folder structure that was setup in the original Jupyter Lab was:
+* notebooks (contents of the GitHub respository)
+* my_shared_data_folder (datasets on a mounted drive, with the location defined in the yml file in the utils folder)
+    + datasets (small files that included the CORINE land cover data and GeoJSONs)
+        - baselines (calculated coherence baselines)
+        - era-downloaded (downloaded and stored ERA5 data, files generated each time the Combined_UBurn_WorkBook Jupyter notebook is run)
+        - shape_files (shapefiles corresponding to the GeoJSONs in the main datasets folder)
+    + cairngorms
+        - backscatter_tiffs
+        - coherence_tiffs
+        - coherence_tiffs2
+    + pdistrict
+        - backscatter_tiffs
+        - coherence_tiffs
+        - coherence_tiffs2
+    + skye
+        - backscatter_tiffs
+        - coherence_tiffs (no data as same as v2)
+        - coherence_tiffs2
+
+<bold>Note:</bold> The coherence processing required several iterations to generate the best dataset for automated extraction. In comparison to version 1 that produced coherence based on matching slice numbers, version 2 ensures the area of interest is fully covered when moving from north to south (or vice versa) by analysing multiple combinations that account for any shift in slice position between consecutive orbits. However, a small limitation remains because Sentinel-1 files will only cover part of the area of interest in the east / west direction as the 
+orbit path itself may "clip" the area of interest. Therefore a future step may be to combine multiple orbits, or filter these orbits out entirely. Currently, the latter can be achieved during the Jupyter Notebook analysis, or initial processing when the user can select the relative orbit to process.
+
+## Downloading Backscattering Datasets
+
+In the pre-process folder there is a notebook (Download-Sentine1ARD-JNCC) to download Sentinel-1 backscattering data for an area / timeframe you wish. The data is downloaded from the JNCC ARD archived held on CEDA, and then subsetted and merged for the area of interest.
+
+## Processing Coherence Datasets
+
+In the pre-process folder there is a notebook (Generate-Sentine1SLC) to process Sentinel-1 coherence data for an area / timeframe you wish.
+
+<bold>Note:</bold> Once you have your final images, it is worth cleaning up the initial download folder and intermediate folder as the memory usage quickly adds up!
+
+## Running the Analysis of the Backscattering and Coherence Data
+
+Coherence or Backscatter datasets can be investigated using the Combined_UBurn_WorkBook Jupyter notebook. 
+* When initially deploying the notebook, you will be asked to choose the dataset (either Coherence or Backscatter) and then the case study areas 
+* Once these have been chosen, the third drop down menu will automatically be populated with a list of available polygons. These are listed by a unique reference number, followed by the date on which the burn occured. 
+* The last dropdown menu, is the dataset version you want to investigate. 
+* Once you have made the selections above, you are able to visualise the locations of each burn by generating an interactive map of all burns.
+* Then you can read in the ERA5 and CORINE data and visualise the Sentinel-1 data through time-series plots, plus you have the options of downloading the data as GeoTIFFs for use in a GIS package
+
+## Running the Initial Automated Detection
+
+<bold>Note:</bold> For running analysis - due to the large size of the area, running the analysis on too many files at once can cause the notebook to crash. Therefore, the number of files has been restricted to 20 but what your system can handle depends on the memory available. 
+
+* Similarly to the main notebook, the first couple of cells in the Initial_Automated_Detection notebook will set the initial configuration and allow you to choose the case study / dataset of interest. After these have been selected, the following cells will load and show an interactive dataframe which, again, allows you to filter out the data you want to investigate. 
+
+* After this, the images are loaded into an array and the analysis run (notebook currently setup to preform denormalisation followed by a gaussian filter with a sigma of 5). The analysis steps can be adjusted and or different steps can be added at a different point. More information on how to do this is given in the notebook. 
+
+* The resulting image is thresholded to provide us with a potential prediction, and this is compared to known burn areas with an accuracy and precision score calculated. 
+
+* The prediction, ground truth array and / or the pre thresholded total change can be saved either as a simple png, or projected to the correct coordinate system (OSGB36) and saved as a GeoTIFF.
+
+
+
diff --git a/SLC_Data_processing/XML_files/Mergeswaths.properties b/SLC_Data_processing/XML_files/Mergeswaths.properties
@@ -0,0 +1,6 @@
+input1=/home/asmith/notebooks/SLC_Data_processing/Intermediate_files/IW1.dim
+input2=/home/asmith/notebooks/SLC_Data_processing/Intermediate_files/IW2.dim
+input3=/home/asmith/notebooks/SLC_Data_processing/Intermediate_files/IW3.dim
+polarisation=VV
+output=/home/asmith/my_shared_data_folder/s1slc/Processed_images/20180713T175003_20180719T174916_VV
+polygon=POLYGON((-2.1235 53.6006,-1.4783 53.6106,-1.4769 52.9668,-2.1136 52.9611,-2.1235 53.6006))