diff --git a/README.rst b/README.rst index 4b48b3b..56e9ddd 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ pytd **pytd** provides user-friendly interfaces to Treasure Data’s `REST APIs `__, `Presto query -engine `__, +engine `__, and `Plazma primary storage `__. @@ -29,9 +29,9 @@ Usage Colaboratory `__ Set your `API -key `__ +key `__ and -`endpoint `__ +`endpoint `__ to the environment variables, ``TD_API_KEY`` and ``TD_API_SERVER``, respectively, and create a client instance: @@ -123,7 +123,8 @@ Since td-spark gives special access to the main storage system via follow the instructions below: 1. Contact support@treasuredata.com to activate the permission to your - Treasure Data account. + Treasure Data account. Note that the underlying component, Plazma Public + API, limits its free tier at 100GB Read and 100TB Write. 2. Install pytd with ``[spark]`` option if you use the third option: ``pip install pytd[spark]`` @@ -146,7 +147,7 @@ Treasure Data offers three different Python clients on GitHub, and the following - Basic REST API wrapper. - Similar functionalities to td-client-{`ruby `__, `java `__, `node `__, `go `__}. - - The capability is limited by `what Treasure Data REST API can do `__. + - The capability is limited by `what Treasure Data REST API can do `__. 2. **pytd** diff --git a/doc/contributing.rst b/doc/contributing.rst index 1fb0261..2a0a601 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -87,9 +87,4 @@ Commit and push the latest code, and tag the version: git tag 1.0.0 git push --tags -Build a package and upload to PyPI: - -.. code:: sh - - python setup.py sdist bdist_wheel - twine upload --skip-existing dist/* +`GitHub Actions Workflow `__ then automatically releases the tagged version on PyPI. A tag and version number must be identical and following the `semantic versioning convention `__. diff --git a/pytd/client.py b/pytd/client.py index 0d56cf6..382a5fc 100644 --- a/pytd/client.py +++ b/pytd/client.py @@ -25,7 +25,7 @@ class Client(object): endpoint : str, optional Treasure Data API server. If not given, ``https://api.treasuredata.com`` is used by default. List of available endpoints is: - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1085143/Sites+and+Endpoints + https://docs.treasuredata.com/display/public/PD/Sites+and+Endpoints database : str, default: 'sample_datasets' Name of connected database. @@ -201,8 +201,8 @@ def query(self, query, engine=None, **kwargs): - ``wait_interval`` (int): sleep interval until job finish - ``wait_callback`` (function): called every interval against job itself - ``engine_version`` (str): run query with Hive 2 if this parameter - is set to ``"experimental"`` and ``engine`` denotes Hive. - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083123/Using+Hive+2+to+Create+Queries + is set to ``"stable"`` and ``engine`` denotes Hive. + https://docs.treasuredata.com/display/public/PD/Writing+Hive+Queries Meanwhile, when a following argument is set to ``True``, query is deterministically issued via ``tdclient``. diff --git a/pytd/pandas_td/__init__.py b/pytd/pandas_td/__init__.py index 34ba69d..4c224f5 100644 --- a/pytd/pandas_td/__init__.py +++ b/pytd/pandas_td/__init__.py @@ -23,7 +23,7 @@ def connect(apikey=None, endpoint=None, **kwargs): endpoint : str, optional Treasure Data API server. If not given, ``https://api.treasuredata.com`` is used by default. List of available endpoints is: - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1085143/Sites+and+Endpoints + https://docs.treasuredata.com/display/public/PD/Sites+and+Endpoints kwargs : dict, optional Optional arguments @@ -173,8 +173,8 @@ def read_td_query( - ``wait_interval`` (int): sleep interval until job finish - ``wait_callback`` (function): called every interval against job itself - ``engine_version`` (str): run query with Hive 2 if this parameter is - set to ``"experimental"`` in ``HiveQueryEngine``. - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083123/Using+Hive+2+to+Create+Queries + set to ``"stable"`` in ``HiveQueryEngine``. + https://docs.treasuredata.com/display/public/PD/Writing+Hive+Queries Returns ------- diff --git a/pytd/query_engine.py b/pytd/query_engine.py index 9ca6f74..fdd471c 100644 --- a/pytd/query_engine.py +++ b/pytd/query_engine.py @@ -74,8 +74,8 @@ def execute(self, query, **kwargs): - ``wait_interval`` (int): sleep interval until job finish - ``wait_callback`` (function): called every interval against job itself - ``engine_version`` (str): run query with Hive 2 if this parameter - is set to ``"experimental"`` in ``HiveQueryEngine``. - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083123/Using+Hive+2+to+Create+Queries + is set to ``"stable"`` in ``HiveQueryEngine``. + https://docs.treasuredata.com/display/public/PD/Writing+Hive+Queries Meanwhile, when a following argument is set to ``True``, query is deterministically issued via ``tdclient``. @@ -178,8 +178,8 @@ def _get_tdclient_cursor(self, con, **kwargs): - ``wait_interval`` (int): sleep interval until job finish - ``wait_callback`` (function): called every interval against job itself - ``engine_version`` (str): run query with Hive 2 if this parameter - is set to ``"experimental"`` in ``HiveQueryEngine``. - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083123/Using+Hive+2+to+Create+Queries + is set to ``"stable"`` in ``HiveQueryEngine``. + https://docs.treasuredata.com/display/public/PD/Writing+Hive+Queries Returns ------- @@ -397,8 +397,8 @@ def cursor(self, force_tdclient=True, **kwargs): - ``wait_interval`` (int): sleep interval until job finish - ``wait_callback`` (function): called every interval against job itself - ``engine_version`` (str): run query with Hive 2 if this parameter - is set to ``"experimental"``. - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083123/Using+Hive+2+to+Create+Queries + is set to ``"stable"``. + https://docs.treasuredata.com/display/public/PD/Writing+Hive+Queries Returns ------- diff --git a/pytd/spark.py b/pytd/spark.py index a29be5c..a84bc96 100644 --- a/pytd/spark.py +++ b/pytd/spark.py @@ -66,7 +66,7 @@ def fetch_td_spark_context( endpoint : str, optional Treasure Data API server. If not given, ``https://api.treasuredata.com`` is used by default. List of available endpoints is: - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1085143/Sites+and+Endpoints + https://docs.treasuredata.com/display/public/PD/Sites+and+Endpoints td_spark_path : str, optional Path to td-spark-assembly-{td-spark-version}_spark{spark-version}.jar. diff --git a/pytd/table.py b/pytd/table.py index 744f64a..e3df9aa 100644 --- a/pytd/table.py +++ b/pytd/table.py @@ -72,7 +72,7 @@ def create(self, column_names=[], column_types=[]): column_types : list of str, optional Column types corresponding to the names. Note that Treasure Data supports limited amount of types as documented in: - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083743/Schema+Management + https://docs.treasuredata.com/display/public/PD/Schema+Management """ if len(column_names) > 0: schema = ", ".join( diff --git a/pytd/writer.py b/pytd/writer.py index bbad006..0b996f5 100644 --- a/pytd/writer.py +++ b/pytd/writer.py @@ -232,7 +232,7 @@ def _insert_into(self, table, list_of_tuple, column_names, column_types, if_exis column_types : list of str Column types corresponding to the names. Note that Treasure Data supports limited amount of types as documented in: - https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083743/Schema+Management + https://docs.treasuredata.com/display/public/PD/Schema+Management if_exists : {'error', 'overwrite', 'append', 'ignore'} What happens when a target table already exists.