docs

Mikata-Project · Feb 14, 2022 · aabfde4 · aabfde4
1 parent 965e3f7
commit aabfde4
Show file tree

Hide file tree

Showing 3 changed files with 140 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -2,14 +2,26 @@
 Python helpers for doing IO with Pandas DataFrames
 
 # Available methods
+## read_df
+
+* bzip2/gzip/zstandard compression
+* passing parameters to Pandas' readers
+* reading from anything, which `smart_open` supports (local files, AWS S3 etc)
+* most of the available formats, Pandas supports
+
 ## write_df
 
 This method supports:
 * streaming writes
 * chunked writes
-* gzip/zstandard compression
+* bzip2/gzip/zstandard compression
 * passing parameters to Pandas' writers
-* writing to AWS S3 and local files
+* writing to anything, which `smart_open` supports (local files, AWS S3 etc)
+* most of the available formats, Pandas supports
+
+# Documentation
+
+[API doc](https://github.com/Mikata-Project/df_io/tree/master/docs/df_io.md)
 
 ### Examples
 

diff --git a/df_io/__init__.py b/df_io/__init__.py
@@ -24,7 +24,20 @@ def _writer_wrapper(writer, fhs, writer_args, writer_options):
 
 
 def read_df(path, fmt="csv", reader_args=[], reader_options={}, open_kw={}):
-    """Read DataFrame."""
+    """Read DataFrame.
+
+    Args:
+        path (str): The path to read from. Can be anything, which `smart_open` supports, like `s3://bucket/file`.
+            Compression type is inferred 
+
+    Kwargs:
+        fmt (str): The format to read. Should work with most of Pandas `read_*` methods.
+        reader_args (list): Argument list for the Pandas `read_$fmt` method.
+        reader_options (dict): Keyword arguments for the Pandas `read_$fmt` method.
+        open_kw (dict): Keyword arguments for `smart_open`.
+    Returns:
+        The read Pandas DataFrame.
+    """
     reader_defaults = {"csv": {"encoding": "UTF_8"},
                        "json": {"orient": "records", "lines": True}}
     if not reader_options:
@@ -62,7 +75,7 @@ def write_df(df, path, copy_paths=[], fmt="csv", compress_level=6,
              chunksize=None, writer_args=[], writer_options={},
              zstd_options={"threads": -1}, open_kw={}):
     """
-    Pandas DataFrame write helper
+    Write Pandas DataFrame.
 
     Can write to local files and to S3 paths in any format, supported by the
     installed pandas version. Writer-specific arguments can be given in
@@ -73,6 +86,22 @@ def write_df(df, path, copy_paths=[], fmt="csv", compress_level=6,
     Additional output files can be specified in `copy_paths` parameter, as
     a list of either local, or `s3://...` paths. The same output will be written
     there as to `path` in parallel to reduce overhead.
+
+    Args:
+        df (pandas.DataFrame): The DataFrame to write.
+        path (str): The path to write to. Can be anything, which `smart_open` supports, like `s3://bucket/file`.
+
+    Kwargs:
+        copy_paths (list[str]): Place a copy to these paths as well. Writes in parallel.
+        fmt (str): The format to write. Should work with most of Pandas `write_*` methods.
+        compress_level (int): Compress level, passed through to the compressor. gzip/bzip2: 1-9, zstd: 1-22.
+        chunksize (int): Break DataFrame into `chunksize` sized chunks and write those. 
+        writer_args (list): Argument list for the Pandas `write_$fmt` method.
+        writer_options (dict): Keyword arguments for the Pandas `write_$fmt` method.
+        zstd_options (dict): Keyword arguments for the `zstd` compressor.
+        open_kw (dict): Keyword arguments for `smart_open`.
+    Returns:
+        None
     """
     if compress_level is not None:
         zstd_options["level"] = compress_level

diff --git a/docs/df_io.md b/docs/df_io.md
@@ -0,0 +1,95 @@
+<!-- markdownlint-disable -->
+
+<a href="../df_io/__init__.py#L0"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+
+# <kbd>module</kbd> `df_io`
+Helpers for reading/writing Pandas DataFrames. 
+
+
+---
+
+<a href="../df_io/__init__.py#L26"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+
+## <kbd>function</kbd> `read_df`
+
+```python
+read_df(path, fmt='csv', reader_args=[], reader_options={}, open_kw={})
+```
+
+Read DataFrame. 
+
+
+
+**Args:**
+
+ - <b>`path`</b> (str):  The path to read from. Can be anything, which `smart_open` supports, like `s3://bucket/file`.  Compression type is inferred  
+
+
+
+**Kwargs:**
+
+ - <b>`fmt`</b> (str):  The format to read. Should work with most of Pandas `read_*` methods. 
+ - <b>`reader_args`</b> (list):  Argument list for the Pandas `read_$fmt` method. 
+ - <b>`reader_options`</b> (dict):  Keyword arguments for the Pandas `read_$fmt` method. 
+ - <b>`open_kw`</b> (dict):  Keyword arguments for `smart_open`. 
+
+**Returns:**
+ The read Pandas DataFrame. 
+
+
+---
+
+<a href="../df_io/__init__.py#L74"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
+
+## <kbd>function</kbd> `write_df`
+
+```python
+write_df(
+    df,
+    path,
+    copy_paths=[],
+    fmt='csv',
+    compress_level=6,
+    chunksize=None,
+    writer_args=[],
+    writer_options={},
+    zstd_options={'threads': -1},
+    open_kw={}
+)
+```
+
+Write Pandas DataFrame. 
+
+Can write to local files and to S3 paths in any format, supported by the installed pandas version. Writer-specific arguments can be given in writer_args and writer_options. If the path parameter starts with s3://, it will try to do an S3 write, otherwise opens a local file with that path. 
+
+Additional output files can be specified in `copy_paths` parameter, as a list of either local, or `s3://...` paths. The same output will be written there as to `path` in parallel to reduce overhead. 
+
+
+
+**Args:**
+
+ - <b>`df`</b> (pandas.DataFrame):  The DataFrame to write. 
+ - <b>`path`</b> (str):  The path to write to. Can be anything, which `smart_open` supports, like `s3://bucket/file`. 
+
+
+
+**Kwargs:**
+
+ - <b>`copy_paths`</b> (list[str]):  Place a copy to these paths as well. Writes in parallel. 
+ - <b>`fmt`</b> (str):  The format to write. Should work with most of Pandas `write_*` methods. 
+ - <b>`compress_level`</b> (int):  Compress level, passed through to the compressor. gzip/bzip2: 1-9, zstd: 1-22. 
+ - <b>`chunksize`</b> (int):  Break DataFrame into `chunksize` sized chunks and write those.  
+ - <b>`writer_args`</b> (list):  Argument list for the Pandas `write_$fmt` method. 
+ - <b>`writer_options`</b> (dict):  Keyword arguments for the Pandas `write_$fmt` method. 
+ - <b>`zstd_options`</b> (dict):  Keyword arguments for the `zstd` compressor. 
+ - <b>`open_kw`</b> (dict):  Keyword arguments for `smart_open`. 
+
+**Returns:**
+ None 
+
+
+
+
+---
+
+_This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._