diff --git a/docs/conf.py b/docs/conf.py
index 1e4cd97..a280e9a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -27,7 +27,11 @@ def _get_props() -> tuple[str, str, str]:
html_theme = 'furo'
-intersphinx_mapping = { 'python': ('https://docs.python.org/3', None) }
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3', None),
+ 'more-itertools': ('https://more-itertools.readthedocs.io/en/stable/', None),
+ 'pandas': ('https://pandas.pydata.org/docs/', None),
+}
def process_docstring(app, what, name, obj, options, lines :list[str]): # pylint: disable=too-many-positional-arguments,unused-argument # noqa: E501
if what=='module':
diff --git a/docs/index.rst b/docs/index.rst
index b1b7f67..2c0c5a1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,7 +9,7 @@ TL;DR
-----
- Code examples with Python's :mod:`csv`: :func:`toa5.read_header`
-- Code examples with `Pandas `_: :func:`toa5.read_pandas`
+- Code examples with :mod:`pandas`: :func:`toa5.read_pandas`
- `Command-Line TOA5-to-CSV Tool`_
Documentation
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 33953d3..fb4440f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,5 @@
+more-itertools >= 10.2.0
+pandas >= 2.2.3
sphinx == 8.1.3
furo == 2024.8.6
types-docutils == 0.21.0.20241005
diff --git a/tests/test_toa5.py b/tests/test_toa5.py
index c6c0ebd..b29f7de 100644
--- a/tests/test_toa5.py
+++ b/tests/test_toa5.py
@@ -124,8 +124,10 @@ def test_col_trans(self):
# test some claims from the documentation
self.assertEqual(toa5.default_col_hdr_transform(toa5.ColumnHeader("Test","","Min")),
toa5.default_col_hdr_transform(toa5.ColumnHeader("Test/Min","","Smp")) )
+ self.assertEqual(toa5.default_col_hdr_transform(toa5.ColumnHeader("Test","","Min")), "Test/Min" )
self.assertEqual(toa5.sql_col_hdr_transform(toa5.ColumnHeader("Test_1","Volts","")),
toa5.sql_col_hdr_transform(toa5.ColumnHeader("Test(1)","","Smp")))
+ self.assertEqual(toa5.sql_col_hdr_transform(toa5.ColumnHeader("Test_1","Volts","")), "test_1" )
def test_pandas(self):
el = toa5.EnvironmentLine(station_name='sn', logger_model='lm', logger_serial='ls', logger_os='os',
diff --git a/toa5/__init__.py b/toa5/__init__.py
index 33e451b..d0f5a63 100644
--- a/toa5/__init__.py
+++ b/toa5/__init__.py
@@ -6,6 +6,8 @@
3. The columns' units: :attr:`ColumnHeader.unit`
4. The columns' "data process": :attr:`ColumnHeader.prc`
+The following two functions can be used to read files with this header:
+
.. autofunction:: read_header
.. autofunction:: read_pandas
@@ -139,7 +141,12 @@ def sql_col_hdr_transform(col :ColumnHeader) -> str:
.. warning::
This transformation can potentially result in two columns on the same table
having the same name, for example, this would be the case with
- ``ColumnHeader("Test_1","Volts","")`` and ``ColumnHeader("Test(1)","","Smp")``.
+ ``ColumnHeader("Test_1","Volts","")`` and ``ColumnHeader("Test(1)","","Smp")``,
+ which would both result in ``"test_1"``.
+
+ Therefore, it is **strongly recommended** that you check for duplicate
+ column names after using this transformer. For example, see
+ :func:`more_itertools.classify_unique`.
:param col: The :class:`ColumnHeader` to process.
"""
@@ -158,7 +165,8 @@ def default_col_hdr_transform(col :ColumnHeader, *, short_units :Optional[dict[s
Although unlikely in practice (because column names usually only consist of letters, numbers,
and underscores, plus indices in parentheses), in theory, this transformation can result in
two columns on the same table having the same header. For example, this would be the case
- with ``ColumnHeader("Test","","Min")`` and ``ColumnHeader("Test/Min","","Smp")``.
+ with ``ColumnHeader("Test","","Min")`` and ``ColumnHeader("Test/Min","","Smp")``, which would
+ both result in ``"Test/Min"``.
:param col: The :class:`ColumnHeader` to process.
:param short_units: A lookup table in which the keys are the original unit names as
@@ -255,8 +263,8 @@ def write_header(env_line :EnvironmentLine, columns :Sequence[ColumnHeader]) ->
def read_pandas(filepath_or_buffer, *, encoding :str = 'UTF-8', encoding_errors :str = 'strict',
col_trans :ColumnHeaderTransformer = default_col_hdr_transform, **kwargs):
- """A helper function to read TOA5 files into a Pandas DataFrame with
- `pandas.read_csv `_.
+ """A helper function to read TOA5 files into a :class:`pandas.DataFrame`.
+ Uses :func:`pandas.read_csv` internally.
>>> import toa5, pandas
>>> df = toa5.read_pandas('Example.dat', low_memory=False)
@@ -272,15 +280,20 @@ def read_pandas(filepath_or_buffer, *, encoding :str = 'UTF-8', encoding_errors
table_name='Example')
:param filepath_or_buffer: A filename or file object from which to read the TOA5 data.
- *Unlike* ``pandas.read_csv``, URLs are not accepted, only such filenames that Python's :func:`open` accepts.
+
+ .. note::
+ Unlike :func:`pandas.read_csv`, URLs are not accepted, only such filenames that Python's :func:`open` accepts.
+
:param col_trans: The :class:`ColumnHeaderTransformer` to use to convert the :class:`ColumnHeader` objects
into column names. Defaults to :func:`default_col_hdr_transform`
- :param kwargs: Any additional keyword arguments are passed through to ``pandas.read_csv``.
- It is **not recommended** to set ``header`` and ``names``, since they are controlled by this function.
+ :param kwargs: Any additional keyword arguments are passed through to :func:`pandas.read_csv`.
+ It is **not recommended** to set ``header`` and ``names``, since they are provided by this function.
Other options that this function provides by default, such as ``na_values`` or ``index_col``, may be overridden.
- :return: A Pandas DataFrame.
- The :class:`EnvironmentLine` is stored in the DataFrame's ``attrs`` under the key ``toa5_env_line``.
- Note that, at the time of writing, Pandas documents ``attrs`` as being experimental.
+ :return: A :class:`pandas.DataFrame`.
+ The :class:`EnvironmentLine` is stored in :attr:`pandas.DataFrame.attrs` under the key ``"toa5_env_line"``.
+
+ .. note::
+ At the time of writing, :attr:`pandas.DataFrame.attrs` is documented as being experimental.
"""
pd = importlib.import_module('pandas')
cm :Any