diff --git a/README.md b/README.md
index 0449c48..6156f47 100644
--- a/README.md
+++ b/README.md
@@ -2,14 +2,26 @@
Python helpers for doing IO with Pandas DataFrames
# Available methods
+## read_df
+
+* bzip2/gzip/zstandard compression
+* passing parameters to Pandas' readers
+* reading from anything, which `smart_open` supports (local files, AWS S3 etc)
+* most of the available formats, Pandas supports
+
## write_df
This method supports:
* streaming writes
* chunked writes
-* gzip/zstandard compression
+* bzip2/gzip/zstandard compression
* passing parameters to Pandas' writers
-* writing to AWS S3 and local files
+* writing to anything, which `smart_open` supports (local files, AWS S3 etc)
+* most of the available formats, Pandas supports
+
+# Documentation
+
+[API doc](https://github.com/Mikata-Project/df_io/tree/master/docs/df_io.md)
### Examples
diff --git a/df_io/__init__.py b/df_io/__init__.py
index 579b2f3..6c4e1e7 100644
--- a/df_io/__init__.py
+++ b/df_io/__init__.py
@@ -24,7 +24,20 @@ def _writer_wrapper(writer, fhs, writer_args, writer_options):
def read_df(path, fmt="csv", reader_args=[], reader_options={}, open_kw={}):
- """Read DataFrame."""
+ """Read DataFrame.
+
+ Args:
+ path (str): The path to read from. Can be anything, which `smart_open` supports, like `s3://bucket/file`.
+ Compression type is inferred
+
+ Kwargs:
+ fmt (str): The format to read. Should work with most of Pandas `read_*` methods.
+ reader_args (list): Argument list for the Pandas `read_$fmt` method.
+ reader_options (dict): Keyword arguments for the Pandas `read_$fmt` method.
+ open_kw (dict): Keyword arguments for `smart_open`.
+ Returns:
+ The read Pandas DataFrame.
+ """
reader_defaults = {"csv": {"encoding": "UTF_8"},
"json": {"orient": "records", "lines": True}}
if not reader_options:
@@ -62,7 +75,7 @@ def write_df(df, path, copy_paths=[], fmt="csv", compress_level=6,
chunksize=None, writer_args=[], writer_options={},
zstd_options={"threads": -1}, open_kw={}):
"""
- Pandas DataFrame write helper
+ Write Pandas DataFrame.
Can write to local files and to S3 paths in any format, supported by the
installed pandas version. Writer-specific arguments can be given in
@@ -73,6 +86,22 @@ def write_df(df, path, copy_paths=[], fmt="csv", compress_level=6,
Additional output files can be specified in `copy_paths` parameter, as
a list of either local, or `s3://...` paths. The same output will be written
there as to `path` in parallel to reduce overhead.
+
+ Args:
+ df (pandas.DataFrame): The DataFrame to write.
+ path (str): The path to write to. Can be anything, which `smart_open` supports, like `s3://bucket/file`.
+
+ Kwargs:
+ copy_paths (list[str]): Place a copy to these paths as well. Writes in parallel.
+ fmt (str): The format to write. Should work with most of Pandas `write_*` methods.
+ compress_level (int): Compress level, passed through to the compressor. gzip/bzip2: 1-9, zstd: 1-22.
+ chunksize (int): Break DataFrame into `chunksize` sized chunks and write those.
+ writer_args (list): Argument list for the Pandas `write_$fmt` method.
+ writer_options (dict): Keyword arguments for the Pandas `write_$fmt` method.
+ zstd_options (dict): Keyword arguments for the `zstd` compressor.
+ open_kw (dict): Keyword arguments for `smart_open`.
+ Returns:
+ None
"""
if compress_level is not None:
zstd_options["level"] = compress_level
diff --git a/docs/df_io.md b/docs/df_io.md
new file mode 100644
index 0000000..5ef6245
--- /dev/null
+++ b/docs/df_io.md
@@ -0,0 +1,95 @@
+
+
+
+
+# module `df_io`
+Helpers for reading/writing Pandas DataFrames.
+
+
+---
+
+
+
+## function `read_df`
+
+```python
+read_df(path, fmt='csv', reader_args=[], reader_options={}, open_kw={})
+```
+
+Read DataFrame.
+
+
+
+**Args:**
+
+ - `path` (str): The path to read from. Can be anything, which `smart_open` supports, like `s3://bucket/file`. Compression type is inferred
+
+
+
+**Kwargs:**
+
+ - `fmt` (str): The format to read. Should work with most of Pandas `read_*` methods.
+ - `reader_args` (list): Argument list for the Pandas `read_$fmt` method.
+ - `reader_options` (dict): Keyword arguments for the Pandas `read_$fmt` method.
+ - `open_kw` (dict): Keyword arguments for `smart_open`.
+
+**Returns:**
+ The read Pandas DataFrame.
+
+
+---
+
+
+
+## function `write_df`
+
+```python
+write_df(
+ df,
+ path,
+ copy_paths=[],
+ fmt='csv',
+ compress_level=6,
+ chunksize=None,
+ writer_args=[],
+ writer_options={},
+ zstd_options={'threads': -1},
+ open_kw={}
+)
+```
+
+Write Pandas DataFrame.
+
+Can write to local files and to S3 paths in any format, supported by the installed pandas version. Writer-specific arguments can be given in writer_args and writer_options. If the path parameter starts with s3://, it will try to do an S3 write, otherwise opens a local file with that path.
+
+Additional output files can be specified in `copy_paths` parameter, as a list of either local, or `s3://...` paths. The same output will be written there as to `path` in parallel to reduce overhead.
+
+
+
+**Args:**
+
+ - `df` (pandas.DataFrame): The DataFrame to write.
+ - `path` (str): The path to write to. Can be anything, which `smart_open` supports, like `s3://bucket/file`.
+
+
+
+**Kwargs:**
+
+ - `copy_paths` (list[str]): Place a copy to these paths as well. Writes in parallel.
+ - `fmt` (str): The format to write. Should work with most of Pandas `write_*` methods.
+ - `compress_level` (int): Compress level, passed through to the compressor. gzip/bzip2: 1-9, zstd: 1-22.
+ - `chunksize` (int): Break DataFrame into `chunksize` sized chunks and write those.
+ - `writer_args` (list): Argument list for the Pandas `write_$fmt` method.
+ - `writer_options` (dict): Keyword arguments for the Pandas `write_$fmt` method.
+ - `zstd_options` (dict): Keyword arguments for the `zstd` compressor.
+ - `open_kw` (dict): Keyword arguments for `smart_open`.
+
+**Returns:**
+ None
+
+
+
+
+---
+
+_This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._