Skip to content

Commit

Permalink
chore: add cli docs (#47)
Browse files Browse the repository at this point in the history
* added instructions for CLI to docs so users know they exist/how to use them

* added more instructions
  • Loading branch information
zbilodea authored Jan 29, 2024
1 parent ce3ed88 commit 294b4e3
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 100 deletions.
4 changes: 0 additions & 4 deletions docs/source/odapt.parquet_to_root.rst

This file was deleted.

4 changes: 0 additions & 4 deletions docs/source/odapt.rst

This file was deleted.

87 changes: 41 additions & 46 deletions src/odapt/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,26 @@ def main() -> None:
@main.command()
@click.argument("destination", type=click.Path())
@click.argument("file")
@click.option("--drop_branches", default=None, type=list, required=False)
@click.option("--branch_types", default=None, type=dict, required=False)
@click.option("--name", required=False, default="")
@click.option("--branch-types", default=None, type=dict, required=False)
@click.option("--title", required=False, default="")
@click.option(
"--initial_basket_capacity",
"--initial-basket-capacity",
default=10,
help="Number of TBaskets that can be written to the TTree without rewriting the TTree metadata to make room.",
)
@click.option(
"--resize_factor",
"--resize-factor",
default=10.0,
help="When the TTree metadata needs to be rewritten, this specifies how many more TBasket slots to allocate as a multiplicative factor.",
)
@click.option(
"--force",
default=True,
help="If True, overwrites destination file if it already exists.",
)
@click.option(
"--compression",
default="lz4",
help='Sets compression level for root file to write to. Can be one of "ZLIB", "LZMA", "LZ4", or "ZSTD". By default the compression algorithm is "LZ4".',
)
@click.option(
"--compression_level",
"--compression-level",
default=1,
help="Use a compression level particular to the chosen compressor. By default the compression level is 1.",
)
Expand Down Expand Up @@ -88,16 +83,16 @@ def parquet_to_root(
@main.command()
@click.argument("destination", type=click.Path())
@click.argument("file")
@click.option("--drop_branches", default=None, type=list, required=False)
@click.option("--branch_types", default=None, type=dict, required=False)
@click.option("--drop-branches", default=None, type=list, required=False)
@click.option("--branch-types", default=None, type=dict, required=False)
@click.option("--title", required=False, default="")
@click.option(
"--initial_basket_capacity",
"--initial-basket-capacity",
default=10,
help="Number of TBaskets that can be written to the TTree without rewriting the TTree metadata to make room.",
)
@click.option(
"--resize_factor",
"--resize-factor",
default=10.0,
help="When the TTree metadata needs to be rewritten, this specifies how many more TBasket slots to allocate as a multiplicative factor.",
)
Expand Down Expand Up @@ -160,12 +155,12 @@ def copy_root(
help='Sets compression level for root file to write to. Can be one of "ZLIB", "LZMA", "LZ4", or "ZSTD". By default the compression algorithm is "LZ4".',
)
@click.option(
"--compression_level",
"--compression-level",
default=1,
help="Use a compression level particular to the chosen compressor. By default the compression level is 1.",
)
@click.option(
"--skip_bad_files",
"--skip-bad-files",
default=False,
help="Skip corrupt or non-existent files without exiting",
)
Expand All @@ -175,7 +170,7 @@ def copy_root(
help="Adds the histograms that have the same name and appends all others to the new file",
)
@click.option(
"--same_names",
"--same-names",
default=False,
help="Only adds histograms together if they have the same name",
)
Expand Down Expand Up @@ -213,25 +208,25 @@ def add(
@click.argument("destination")
@click.argument("files")
@click.option(
"--branch_types",
"--branch-types",
default=None,
type=dict,
required=False,
help="Manually enter branch names and types to improve performance slightly.",
)
@click.option("--title", required=False, default="", help="Set title of new TTree.")
@click.option(
"--initial_basket_capacity",
"--initial-basket-capacity",
default=10,
help="Number of TBaskets that can be written to the TTree without rewriting the TTree metadata to make room.",
)
@click.option(
"--resize_factor",
"--resize-factor",
default=10.0,
help="When the TTree metadata needs to be rewritten, this specifies how many more TBasket slots to allocate as a multiplicative factor.",
)
@click.option(
"--step_size",
"--step-size",
default=100,
help="If an integer, the maximum number of entries to include in each iteration step; if a string, the maximum memory size to include. The string must be a number followed by a memory unit, such as “100 MB”.",
)
Expand All @@ -245,12 +240,12 @@ def add(
help='Sets compression level for root file to write to. Can be one of "ZLIB", "LZMA", "LZ4", or "ZSTD". By default the compression algorithm is "LZ4".',
)
@click.option(
"--compression_level",
"--compression-level",
default=1,
help="Use a compression level particular to the chosen compressor. By default the compression level is 1.",
)
@click.option(
"--skip_bad_files",
"--skip-bad-files",
default=False,
help="Skip corrupt or non-existent files without exiting",
)
Expand Down Expand Up @@ -297,8 +292,8 @@ def add_and_merge(


@main.command()
@click.argument("in_file", required=True)
@click.argument("out_file", required=True)
@click.argument("in-file", required=True)
@click.argument("out-file", required=True)
@click.option(
"-t",
"--tree",
Expand All @@ -315,35 +310,35 @@ def add_and_merge(
)
@click.option(
"-s",
"--step_size",
"--step-size",
default="100 MB",
help="Specify batch size for reading ROOT file. If an integer, the maximum number of entries to include in each iteration step; if a string, the maximum memory size to include.",
)
@click.option(
"--list_to32",
"--list-to32",
default=False,
type=bool,
help="If True, convert Awkward lists into 32-bit Arrow lists if they're small enough.",
)
@click.option(
"--string_to32",
"--string-to32",
default=True,
type=bool,
help="If True, convert Awkward lists into 32-bit Arrow string if they're small enough.",
)
@click.option(
"--bytestring_to32",
"--bytestring-to32",
default=True,
type=bool,
help="If True, convert Awkward lists into 32-bit Arrow binary if they're small enough.",
)
@click.option(
"--emptyarray_to",
"--emptyarray-to",
default=None,
help="If None, #ak.types.UnknownType maps to Arrow's null type; otherwise, it is converted a given numeric dtype.",
)
@click.option(
"--categorical_as_dictionary",
"--categorical-as-dictionary",
default=False,
help='If True, ak.contents.IndexedArray and ak.contents.IndexedOptionArray labeled with __array__ = "categorical" are mapped to Arrow `DictionaryArray`; otherwise, the projection is evaluated before conversion.',
)
Expand All @@ -354,7 +349,7 @@ def add_and_merge(
help="If True, this function returns extended Arrow arrays (at all levels of nesting), which preserve metadata so that Awkward \u2192 Arrow \u2192 Awkward preserves the array's ak.types.Type (though not the ak.forms.Form). If False, this function returns generic Arrow arrays that might be needed for third-party tools that don't recognize Arrow's extensions.",
)
@click.option(
"--count_nulls",
"--count-nulls",
default=True,
type=bool,
help="Count the number of missing values at each level and include these in the resulting Arrow array, which makes some downstream applications faster. If False, skip the up-front cost of counting them.",
Expand All @@ -366,79 +361,79 @@ def add_and_merge(
help='Compression algorithm name Parquet supports {"NONE", "SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD"}',
)
@click.option(
"--compression_level",
"--compression-level",
default=None,
type=int,
help="Set compression level for chosen compression algorithm.",
)
@click.option(
"-rg",
"--row_group_size",
"--row-group-size",
default=64 * 1024 * 1024,
type=int,
help="Choose number of entries in each row-group (except the last).",
)
@click.option(
"--data_page_size", default=None, help="Choose number of bytes in each data page."
"--data-page-size", default=None, help="Choose number of bytes in each data page."
)
@click.option(
"--parquet_flavor",
"--parquet-flavor",
default=None,
help='Choose flavor. If None, the output Parquet file will follow Arrow conventions; if "spark", it will follow Spark conventions.',
)
@click.option(
"--parquet_version", default="2.4", type=str, help="Parquet file format version."
"--parquet-version", default="2.4", type=str, help="Parquet file format version."
)
@click.option(
"--parquet_page_version",
"--parquet-page-version",
default="1.0",
type=str,
help="Parquet page format version.",
)
@click.option(
"--parquet_metadata_statistics",
"--parquet-metadata-statistics",
default=True,
type=bool,
help="Include summary statistics for each data page in the Parquet metadata.",
)
@click.option(
"--parquet_dictionary_encoding",
"--parquet-dictionary-encoding",
default=False,
type=bool,
help="Allow Parquet to pre-compress with dictionary encoding.",
)
@click.option(
"--parquet_byte_stream_split",
"--parquet-byte-stream-split",
default=False,
type=bool,
help="Pre-compress floating point fields ('float32' or 'float64') with byte stream splitting.",
)
@click.option(
"--parquet_coerce_timestamps",
"--parquet-coerce-timestamps",
default=None,
type=str,
help="Choose resolution of timestamps.",
)
@click.option(
"--parquet_old_int96_timestamps",
"--parquet-old-int96-timestamps",
default=None,
type=bool,
help="Choose to use INT96 format for any timestamps.",
)
@click.option(
"--parquet_compliant_nested",
"--parquet-compliant-nested",
default=False,
type=bool,
help="Choose to use the Spark/BigQuery/Parquet convention for nested list.",
)
@click.option(
"--parquet_extra_options",
"--parquet-extra-options",
default=None,
type=dict,
help="Options to pass to pyarrow.parquet.ParquetWriter",
)
@click.option(
"--storage_options",
"--storage-options",
default=None,
type=dict,
help="Any additional options to pass to fsspec.core.url_to_fs to open a remote file for writing.",
Expand Down
21 changes: 14 additions & 7 deletions src/odapt/copy_root.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,34 +33,36 @@ def copy_root(
:param files: Local ROOT file to copy. May contain glob patterns.
:type files: str
:param drop_branches: To remove branches from a tree, pass a list of names of branches to remove.
Defaults to None.
Defaults to None. Command line option: ``--drop-branches``.
:type drop_branches: list of str, optional
:param fieldname_separator: If data includes jagged arrays, pass the character that separates
TBranch names for columns, used for grouping columns (to avoid duplicate counters in ROOT file). Defaults to "_".
:type fieldname_separator: str, optional
:param branch_types: Name and type specification for the TBranches. Defaults to None.
:type branch_types: dict or pairs of str → NumPy dtype/Awkward type, optional
:param title: to change the title of the ttree, pass a new name. Defaults to None.
:param title: to change the title of the ttree, pass a new name. Defaults to None. Command line option: ``--title``.
:type title: str, optional
:param field_name: Function to generate TBranch names for columns of an Awkward record array or a
Pandas DataFrame. Defaults to ``lambda outer, inner: inner if outer == "" else outer + "_" +
inner``.
:type field_name: callable of str → str, optional
:param initial_basket_capacity: Number of TBaskets that can be written to the TTree without
rewriting the TTree metadata to make room. Defaults to 10.
rewriting the TTree metadata to make room. Defaults to 10. Command line option: ``--initial-basket-capacity``.
:type initial_basket_capacity: int, optional
:param resize_factor: When the TTree metadata needs to be rewritten, this specifies how many more
TBasket slots to allocate as a multiplicative factor. Defaults to 10.0.
TBasket slots to allocate as a multiplicative factor. Defaults to 10.0. Command line option: ``--resize-factor``.
:type resize_factor: float, optional.
:param counter_name: Function to generate counter-TBranch names for Awkward Arrays of variable-length
lists. Defaults to ``lambda counted: "n" + counted``.
:type counter_name: callable of str \u2192 str, optional
:param step_size: If an integer, the maximum number of entries to include in each iteration step; if
a string, the maximum memory size to include. The string must be a number followed by a memory unit, such as “100 MB”. Defaults to \100.
a string, the maximum memory size to include. The string must be a number followed by a memory unit, such as “100 MB”.
Defaults to \100. Command line option: ``--step-size``.
:type step_size: int or str, optional
:param compression: Sets compression level for root file to write to. Can be one of "ZLIB", "LZMA", "LZ4", or "ZSTD". Defaults to "LZ4".
:param compression: Sets compression level for root file to write to. Can be one of "ZLIB", "LZMA", "LZ4", or "ZSTD".
Defaults to "LZ4". Command line option: ``--compression``.
:type compression: str
:param compression_level: Use a compression level particular to the chosen compressor. Defaults to 1.
:param compression_level: Use a compression level particular to the chosen compressor. Defaults to 1. Command line option: ``--compression-level``.
:type compression_level: int
Expand All @@ -74,6 +76,11 @@ def copy_root(
>>> odapt.copy_root("copied_file.root", "original_file.root", drop_branches=["branch1", "branch2"])
Command Line Instructions:
--------------------------
This function can be run from the command line. Use command
>>> odapt copy-root [options] [OUT_FILE] [IN_FILE]
"""
if compression in ("LZMA", "lzma"):
Expand Down
17 changes: 13 additions & 4 deletions src/odapt/histogram_adding.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,30 +391,39 @@ def hadd(
May contain glob patterns.
:type files: str or list of str
:param force: If True, overwrites destination file if it exists. Force and append
cannot both be True. Defaults to True.
cannot both be True. Defaults to True. Command line options: ``-f`` or ``--force``.
:type force: bool, optional
:param append: If True, appends histograms to an existing file. Force and append
cannot both be True. Defaults to False.
cannot both be True. Defaults to False. Command line option: ``--append``.
:type append: bool, optional
:param compression: Sets compression level for root file to write to. Can be one of
"ZLIB", "LZMA", "LZ4", or "ZSTD". By default the compression algorithm is "LZ4".
Command line option: ``--compression``.
:type compression: path-like, optional
:param compression_level: Use a compression level particular to the chosen compressor.
By default the compression level is 1.
By default the compression level is 1. Command line option: ``--compression-level``.
:type compression: int
:param skip_bad_files: If True, skips corrupt or non-existent files without exiting.
Command line option: ``--skip-bad-files``.
:type skip_bad_files: bool, optional
:param union: If True, adds the histograms that have the same name and appends all others
to the new file. Defaults to True.
to the new file. Defaults to True. Command line option: ``--union``.
:type union: bool, optional
:param same_names: If True, only adds together histograms which have the same name (key). If False,
histograms are added together based on TTree structure (bins must be equal). Defaults to True.
Command line option: ``--same-names``.
:type same_names: bool, optional
Example:
--------
>>> odapt.hadd("destination.root", ["file1_to_hadd.root", "file2_to_hadd.root"])
Command Line Instructions:
--------------------------
This function can be run from the command line. Use command
>>> odapt add [options] [OUT_FILE] [IN_FILES]
"""
if compression in ("ZLIB", "zlib"):
compression_code = uproot.const.kZLIB
Expand Down
Loading

0 comments on commit 294b4e3

Please sign in to comment.