Breakthrough-Energy · danielolsen · Dec 8, 2021 · Dec 6, 2021 · Dec 6, 2021 · Dec 6, 2021
diff --git a/prereise/cli/data_sources/wind_data.py b/prereise/cli/data_sources/wind_data.py
@@ -14,7 +14,8 @@
 )
 from prereise.cli.data_sources.data_source import DataSource
 from prereise.cli.helpers import validate_date, validate_file_path
-from prereise.gather.winddata.rap import impute, rap
+from prereise.gather.winddata import impute
+from prereise.gather.winddata.rap import rap
 
 
 class WindDataRapidRefresh(DataSource):

diff --git a/prereise/gather/winddata/hrrr/calculations.py b/prereise/gather/winddata/hrrr/calculations.py
@@ -7,6 +7,7 @@
 from tqdm import tqdm
 
 from prereise.gather.winddata.hrrr.helpers import formatted_filename
+from prereise.gather.winddata.impute import linear
 from prereise.gather.winddata.rap.power_curves import (
     get_power,
     get_state_power_curves,
@@ -80,52 +81,54 @@ def calculate_pout(wind_farms, start_dt, end_dt, directory):
         dt2    POUT        POUT
     """
 
-    wind_farm_ids = wind_farms.index
-    turbine_types = [
-        "Offshore"
-        if wind_farms.loc[i].type == "wind_offshore"
-        else id2abv[wind_farms.loc[i].zone_id]
-        for i in wind_farm_ids
-    ]
-    wind_farm_ct = len(wind_farms)
+    turbine_types = wind_farms.apply(
+        lambda x: "Offshore" if x["type"] == "wind_offshore" else id2abv[x["zone_id"]],
+        axis=1,
+    )
 
     turbine_power_curves = get_turbine_power_curves()
     state_power_curves = get_state_power_curves()
     wind_data_lat_long = get_wind_data_lat_long(start_dt, directory)
     wind_farm_to_closest_wind_grid_indices = find_closest_wind_grids(
         wind_farms, wind_data_lat_long
     )
-    dts = [
-        dt for dt in pd.date_range(start=start_dt, end=end_dt, freq="H").to_pydatetime()
-    ]
-    data = np.empty((len(dts), len(wind_farm_to_closest_wind_grid_indices)))
-    for i, dt in tqdm(enumerate(dts)):
+    dts = pd.date_range(start=start_dt, end=end_dt, freq="H").to_pydatetime()
+    # Fetch wind speed data for each wind farm (or store NaN as applicable)
+    wind_speed_data = pd.DataFrame(index=dts, columns=wind_farms.index, dtype=float)
+    for dt in tqdm(dts):
         gribs = pygrib.open(formatted_filename(dt))
-        u_component = gribs.select(name=U_COMPONENT_SELECTOR)[0].values.flatten()
-        v_component = gribs.select(name=V_COMPONENT_SELECTOR)[0].values.flatten()
-        wind_farm_specific_u_component = u_component[
-            wind_farm_to_closest_wind_grid_indices
-        ]
-        wind_farm_specific_v_component = v_component[
-            wind_farm_to_closest_wind_grid_indices
-        ]
-        wind_farm_specific_magnitude = np.sqrt(
-            pow(wind_farm_specific_u_component, 2)
-            + pow(wind_farm_specific_v_component, 2)
-        )
-        power = np.array(
-            [
-                get_power(
-                    turbine_power_curves,
-                    state_power_curves,
-                    wind_farm_specific_magnitude[j],
-                    turbine_types[j],
-                )
-                for j in range(wind_farm_ct)
+        try:
+            u_component = gribs.select(name=U_COMPONENT_SELECTOR)[0].values.flatten()
+            v_component = gribs.select(name=V_COMPONENT_SELECTOR)[0].values.flatten()
+            wind_farm_specific_u_component = u_component[
+                wind_farm_to_closest_wind_grid_indices
             ]
-        )
-        data[i] = power
-
-    df = pd.DataFrame(data=data, index=dts, columns=wind_farms.index)
+            wind_farm_specific_v_component = v_component[
+                wind_farm_to_closest_wind_grid_indices
+            ]
+            wind_speed_data.loc[dt] = np.sqrt(
+                pow(wind_farm_specific_u_component, 2)
+                + pow(wind_farm_specific_v_component, 2)
+            )
+        except ValueError:
+            # If the GRIB file is empty, no wind speed values can be selected
+            wind_speed_data.loc[dt] = np.nan
+
+    # For each column, linearly interpolate any NaN values
+    linear(wind_speed_data)
+    # Then calculate wind power based on wind speed
+    wind_power_data = [
+        [
+            get_power(
+                turbine_power_curves,
+                state_power_curves,
+                wind_speed_data.loc[dt, w],
+                turbine_types.loc[w],
+            )
+            for w in wind_farms.index
+        ]
+        for dt in tqdm(dts)
+    ]
+    df = pd.DataFrame(data=wind_power_data, index=dts, columns=wind_farms.index)
 
     return df
diff --git a/prereise/gather/winddata/rap/impute.py → prereise/gather/winddata/impute.py b/prereise/gather/winddata/rap/impute.py → prereise/gather/winddata/impute.py
@@ -137,3 +137,17 @@ def gaussian(data, wind_farm, inplace=True, curve="state"):
 
     if not inplace:
         return data_impute
+
+
+def linear(data, inplace=True):
+    """Given a 2D array, linearly interpolate any missing values column-wise.
+
+    :param numpy.array/pandas.DataFrame data: data to interpolate.
+    :param bool inplace: whether to modify the data inplace or return a modified copy.
+    :return: (*None/pandas.DataFrame*) -- if ``inplace`` is False, data frame with
+        missing entries imputed.
+    """
+    data_impute = data if inplace else data.copy()
+    data_impute[:] = pd.DataFrame(data_impute).interpolate()
+    if not inplace:
+        return data_impute