Skip to content

Commit

Permalink
Merge pull request #2434 from moj-analytical-services/better_autocomp…
Browse files Browse the repository at this point in the history
…lete_for_dataframes

Better autocomplete for dataframes
  • Loading branch information
RobinL authored Sep 30, 2024
2 parents 5a9068b + 97df27b commit 316f5cc
Showing 1 changed file with 27 additions and 10 deletions.
37 changes: 27 additions & 10 deletions splink/internals/splink_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional

from duckdb import DuckDBPyRelation

from splink.internals.input_column import InputColumn

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -136,17 +138,32 @@ def as_pandas_dataframe(self, limit=None):

return pd.DataFrame(self.as_record_dict(limit=limit))

def _repr_pretty_(self, p, cycle):
msg = (
f"Table name in database: `{self.physical_name}`\n"
"\nTo retrieve records, you can call the following methods on this object:"
"\n`.as_record_dict(limit=5)` or "
"`.as_pandas_dataframe(limit=5)`.\n"
"\nYou may omit the `limit` argument to return all records."
"\n\nThis table represents the following splink entity: "
f"{self.templated_name}"
def as_duckdbpyrelation(self, limit: Optional[int] = None) -> DuckDBPyRelation:
"""Return the dataframe as a duckdbpyrelation. Only available when using the
DuckDB backend.
Args:
limit (int, optional): If provided, return this number of rows (equivalent
to a limit statement in SQL). Defaults to None, meaning return all rows
Returns:
duckdb.DuckDBPyRelation: A DuckDBPyRelation object
"""
raise NotImplementedError(
"This method is only available when using the DuckDB backend"
)

# Spark not guaranteed to be available so return type is not imported
def as_spark_dataframe(self) -> "SparkDataFrame": # type: ignore # noqa: F821
"""Return the dataframe as a spark dataframe. Only available when using the
Spark backend.
Returns:
spark.DataFrame: A Spark DataFrame
"""
raise NotImplementedError(
"This method is only available when using the Spark backend"
)
p.text(msg)

def to_parquet(self, filepath, overwrite=False):
"""Save the dataframe in parquet format.
Expand Down

0 comments on commit 316f5cc

Please sign in to comment.