Merge pull request #255 from mhearne-usgs/appendfix

Replaced instances of deprecated dataframe.append with other methods …
usgs · Jan 26, 2022 · 98017da · 98017da
2 parents 7e65edb + 66384e4
commit 98017da
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 50 deletions.
diff --git a/install.sh b/install.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 
+cwd=$(pwd)
 unamestr=`uname`
 env_file=environment.yml
 if [ "$unamestr" == 'Linux' ]; then
@@ -71,6 +72,9 @@ if [ $? -ne 0 ]; then
     echo ". $_CONDA_ROOT/etc/profile.d/conda.sh" >> $prof
 fi
 
+# make sure we're in the project directory still
+cd $cwd
+
 # Start in conda base environment
 echo "Activate base virtual environment"
 conda activate base
@@ -80,12 +84,12 @@ conda remove -y -n $VENV --all
 
 # Package list:
 package_list=(
-      "python>=3.6"
+      "python>=3.7"
       "impactutils"
       "fiona>=1.8.20"
       "ipython"
       "jupyter"
-      "numpy"
+      "numpy=1.21"
       "obspy"
       "pandas"
       "pip"

diff --git a/libcomcat/classes.py b/libcomcat/classes.py
@@ -767,7 +767,7 @@ def getProducts(self, product_name, source="preferred", version="preferred"):
             dft = df[df["source"] == psource]
             dft = dft.sort_values("time")
             dft["version"] = np.arange(1, len(dft) + 1)
-            newframe = newframe.append(dft)
+            newframe = pd.concat([newframe, dft])
         df = newframe
 
         if source == "preferred":

diff --git a/libcomcat/dataframes.py b/libcomcat/dataframes.py
@@ -119,19 +119,6 @@ def get_phase_dataframe(detail, catalog="preferred"):
     """
     if catalog is None:
         catalog = "preferred"
-    df = pd.DataFrame(
-        columns=[
-            "Channel",
-            "Distance",
-            "Azimuth",
-            "Phase",
-            "Arrival Time",
-            "Status",
-            "Residual",
-            "Weight",
-            "Agency",
-        ]
-    )
 
     phasedata = detail.getProducts("phase-data", source=catalog)[0]
     quakeurl = phasedata.getContentURL("quakeml.xml")
@@ -150,14 +137,28 @@ def get_phase_dataframe(detail, catalog="preferred"):
             msg = fmt % (quakeurl, str(e))
             raise ParsingError(msg)
         catevent = catalog.events[0]
+        phaserows = []
         for pick in catevent.picks:
             station = pick.waveform_id.station_code
             fmt = "Getting pick %s for station%s..."
             logging.debug(fmt % (pick.time, station))
             phaserow = _get_phaserow(pick, catevent)
             if phaserow is None:
                 continue
-            df = df.append(phaserow, ignore_index=True)
+            phaserows.append(phaserow)
+    df = pd.DataFrame(phaserows)
+    columns = [
+        "Channel",
+        "Distance",
+        "Azimuth",
+        "Phase",
+        "Arrival Time",
+        "Status",
+        "Residual",
+        "Weight",
+        "Agency",
+    ]
+    df = df[columns]
     return df
 
 
@@ -319,7 +320,7 @@ def get_magnitude_data_frame(detail, catalog, magtype):
         AttributeError if input DetailEvent does not have a phase-data product
             for the input catalog.
     """
-    columns = columns = [
+    columns = [
         "Channel",
         "Type",
         "Amplitude",
@@ -331,7 +332,6 @@ def get_magnitude_data_frame(detail, catalog, magtype):
         "Azimuth",
         "MeasurementTime",
     ]
-    df = pd.DataFrame(columns=columns)
     phasedata = detail.getProducts("phase-data", source=catalog)[0]
     quakeurl = phasedata.getContentURL("quakeml.xml")
     try:
@@ -350,6 +350,7 @@ def get_magnitude_data_frame(detail, catalog, magtype):
             msg = fmt % (quakeurl, str(e))
             raise ParsingError(msg)
         catevent = catalog.events[0]  # match this to input catalog
+        rows = []
         for magnitude in catevent.magnitudes:
             if magnitude.magnitude_type.lower() != magtype.lower():
                 continue
@@ -403,8 +404,9 @@ def get_magnitude_data_frame(detail, catalog, magtype):
                 row["Weight"] = contribution.weight
                 row["Distance"] = distance
                 row["Azimuth"] = azimuth
+                rows.append(row)
 
-                df = df.append(row, ignore_index=True)
+    df = pd.DataFrame(rows)
     df = df[columns]
     return df
 
@@ -548,7 +550,7 @@ def get_pager_data_frame(
     if not detail.hasProduct("losspager"):
         return None
 
-    df = None
+    total_rows = []
     for pager in detail.getProducts("losspager", version="all"):
         total_row = {}
         default = {}
@@ -645,12 +647,11 @@ def get_pager_data_frame(
                 "predicted_dollars",
                 "dollars_sigma",
             ]
-        if df is None:
-            df = pd.DataFrame(columns=columns)
-        df = df.append(total_row, ignore_index=True)
+        total_rows.append(total_row)
         for ccode, country_row in country_rows.items():
-            df = df.append(country_row, ignore_index=True)
+            total_rows.append(country_row)
 
+    df = pd.DataFrame(total_rows)
     df = df[columns]
     # countries with zero fatalities don't report, so fill in with zeros
     if get_losses:
@@ -1002,24 +1003,25 @@ def get_history_data_frame(detail, products=None):
     else:
         products = PRODUCTS
 
-    dataframe = pd.DataFrame(columns=PRODUCT_COLUMNS)
+    allrows = []
     for product in products:
         logging.debug("Searching for %s products..." % product)
         if not event.hasProduct(product):
             continue
         prows = _get_product_rows(event, product)
-        dataframe = dataframe.append(prows, ignore_index=True)
+        allrows += prows
 
-    dataframe = dataframe.sort_values("Update Time")
-    dataframe["Elapsed (min)"] = np.round(dataframe["Elapsed (min)"], 1)
+    dataframe = pd.DataFrame(allrows)
     dataframe["Comment"] = ""
     dataframe = dataframe[PRODUCT_COLUMNS]
+    dataframe = dataframe.sort_values("Update Time")
+    dataframe["Elapsed (min)"] = np.round(dataframe["Elapsed (min)"], 1)
     return (dataframe, event)
 
 
 def _get_product_rows(event, product_name):
     products = event.getProducts(product_name, source="all", version="all")
-    prows = pd.DataFrame(columns=PRODUCT_COLUMNS)
+    prows = []
     for product in products:
         # if product.contents == ['']:
         #     continue
@@ -1047,7 +1049,7 @@ def _get_product_rows(event, product_name):
             continue
         if prow is None:
             continue
-        prows = prows.append(prow, ignore_index=True)
+        prows.append(prow)
 
     return prows
 
@@ -1774,6 +1776,7 @@ def split_history_frame(dataframe, product=None):
     parts = dataframe.iloc[0]["Description"].split("|")
     columns = [p.split("#")[0] for p in parts]
     df2 = pd.DataFrame(columns=columns)
+    hrows = []
     for idx, row in dataframe.iterrows():
         parts = row["Description"].split("|")
         columns = [p.split("#")[0].strip() for p in parts]
@@ -1790,8 +1793,9 @@ def split_history_frame(dataframe, product=None):
             newvalues.append(newval)
         ddict = dict(zip(columns, newvalues))
         row = pd.Series(ddict)
-        df2 = df2.append(row, ignore_index=True)
+        hrows.append(row)
 
+    df2 = pd.DataFrame(hrows)
     dataframe = dataframe.reset_index(drop=True)
     df2 = df2.reset_index(drop=True)
     dataframe = pd.concat([dataframe, df2], axis=1)
@@ -2114,7 +2118,7 @@ def associate(
             dlabels = ["dtime", "ddist", "dmag", "asq", "bsq", "csq", "psum"]
             talternates.drop(labels=dlabels, axis="columns", inplace=True)
             talternates["chosen_id"] = ef_row["id"]
-            alternates = alternates.append(talternates)
+            alternates = pd.concat([alternates, talternates])
 
         found_events.append(row)
     associated = pd.DataFrame(found_events)

diff --git a/tests/libcomcat/dataframes_test.py b/tests/libcomcat/dataframes_test.py
@@ -152,23 +152,22 @@ def test_history_data_frame():
     # SMOKE TEST
     cassettes, datadir = get_datadir()
     tape_file = os.path.join(cassettes, "dataframes_history.yaml")
+    products = [
+        "shakemap",
+        "dyfi",
+        "losspager",
+        "oaf",
+        "finite-fault",
+        "focal-mechanism",
+        "ground-failure",
+        "moment-tensor",
+        "phase-data",
+        "origin",
+    ]
+
     with vcr.use_cassette(tape_file, record_mode="new_episodes"):
         nc72852151 = get_event_by_id("nc72852151", includesuperseded=True)
-        (history, event) = get_history_data_frame(
-            nc72852151,
-            [
-                "shakemap",
-                "dyfi",
-                "losspager",
-                "oaf",
-                "finite-fault",
-                "focal-mechanism",
-                "ground-failure",
-                "moment-tensor",
-                "phase-data",
-                "origin",
-            ],
-        )
+        (history, event) = get_history_data_frame(nc72852151, products)
         us10008e3k = get_event_by_id("us10008e3k", includesuperseded=True)
         (history, event) = get_history_data_frame(
             us10008e3k,
@@ -415,6 +414,8 @@ def test_associate():
 
 
 if __name__ == "__main__":
+    print("Testing history frame...")
+    test_history_data_frame()
     print("Testing catalog association...")
     test_associate()
     print("Testing nan mags extraction...")
@@ -431,5 +432,3 @@ def test_associate():
     test_get_detail_data_frame()
     print("Testing magnitude frame...")
     test_magnitude_dataframe()
-    print("Testing history frame...")
-    test_history_data_frame()