diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 607cba37..b4edd015 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,7 +17,7 @@ build: # Build documentation in the docs/ directory with Sphinx sphinx: - configuration: doc/conf.py + configuration: docs/conf.py # If using Sphinx, optionally build your docs in additional formats such as PDF #formats: @@ -26,4 +26,4 @@ sphinx: # Optionally declare the Python requirements required to build your docs python: install: - - requirements: doc/requirements.txt + - requirements: docs/requirements.txt diff --git a/doc/index.rst b/doc/index.rst deleted file mode 100644 index 65ce18ab..00000000 --- a/doc/index.rst +++ /dev/null @@ -1,31 +0,0 @@ -Welcome to pdf2docx's documentation! -==================================== - -`pdf2docx `_ is a Python library -to extract data from PDF with ``PyMuPDF``, parse layout with rule, and -generate docx file with ``python-docx``. - -.. image:: https://s1.ax1x.com/2020/08/04/aDryx1.png - -.. toctree:: - :maxdepth: 2 - :caption: USER GUIDE - - installation - quickstart - techdoc - - -.. toctree:: - :maxdepth: 2 - :caption: API DOCUMENTATION - - api/modules - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/doc/Makefile b/docs/Makefile similarity index 100% rename from doc/Makefile rename to docs/Makefile diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..819c9c80 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,34 @@ +# pdf2docx documentation + +Welcome to the **pdf2docx** documentation. This documentation relies on [Sphinx](https://www.sphinx-doc.org/en/master/) to publish HTML docs from markdown files written with [restructured text](https://en.wikipedia.org/wiki/ReStructuredText) (RST). + + +## Sphinx version + +This README assumes you have [Sphinx v5.0.2 installed](https://www.sphinx-doc.org/en/master/usage/installation.html) on your system. + + +## Updating the documentation + +Within `docs` update the associated restructured text (`.rst`) files. These files represent the corresponding document pages. + + +## Building HTML documentation + +- Ensure you have the `furo` theme installed: + +`pip install furo` + +Furo theme, Copyright (c) 2020 Pradyun Gedam , thank you to: + +https://github.com/pradyunsg/furo/blob/main/LICENSE + +- From the "docs" location run: + +`sphinx-build -b html . build/html` + +This then creates the HTML documentation within `build/html`. + +> Use: `sphinx-build -a -b html . build/html` to build all, including the assets in `_static` (important if you have updated CSS). + +For full details see: [Using Sphinx](https://www.sphinx-doc.org/en/master/usage/index.html) diff --git a/docs/_static/PyMuPDF.ico b/docs/_static/PyMuPDF.ico new file mode 100644 index 00000000..38e08e01 Binary files /dev/null and b/docs/_static/PyMuPDF.ico differ diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 00000000..5c3a8b9e --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,207 @@ +/* main document page: ensures pages fit to the available width and height */ +.wy-nav-content { + min-width: 100%; + min-height: 100vh; +} + +/* Accessibility: Artifex color for main document links */ +.wy-nav-content a { + color: #007aff; +} + +/* Artifex blue color for background elements */ +.wy-side-nav-search, .wy-nav-top { + background-color: #007aff; +} + +/* Accessibility: ensures that the version number is readable against the background color */ +.wy-side-nav-search>div.version { + color:hsla(0,0%,100%,1); +} + +.htmltag { + padding: 2px 5px; + background-color: #fbff68; + border-radius: 4px; + border: 1px solid #222; + color:#000; +} + +.discordLink { + display:flex; + justify-content:flex-end; + margin:0; + padding:0; + font-size: 13px; +} + +.discordLink img { + width: 30px; + height: 30px; + margin-left: 8px; +} + +.feedbackLink { + display:flex; + justify-content:flex-end; + margin:0 0 10px; + padding:0; + font-size: 13px; +} + +.intro-title { + font-size: 22px; + margin: 0 0 20px 0; +} + +h1 { + padding: 10px !important; + background-color: #007aff !important; + color: #fff !important; + border-radius: 5px !important; + margin-top: 20px !important; + margin-left: -10px !important; +} + +cite { + font-weight: bold; + font-style: normal; +} + +.red-color { + color: #cc0000; +} + +.orange-color { + color: #ff6600; +} + +.green-color { + color: #00cc00; +} + +button.cta { + -webkit-appearance: none; + -moz-appearance: none; + border:0; + text-transform:uppercase; + border-radius:5px; + font-size:16px; + font-weight:500; + min-height:40px; + line-height:40px; + padding: 0 15px; + color:#fff; + cursor:pointer; +} + +button.cta.orange { + width:auto; + background-image: linear-gradient(to right, #ea5842, #ec6343, #ed6d45, #ef7747, #f0804a) !important; +} + +button.cta.orange:hover { + background:#ea5842 !important; +} + +button.cta a { + color:#fff !important; +} + +.footer-version { + font-weight: bold; + font-size: 12px; + color: #999; +} + + +/*** Furo theme overrides ***/ +/* This is to do with hiding the Furo link text and the "Made with" text */ +.bottom-of-page .left-details { + font-size:0; +} + +.bottom-of-page .left-details a { + display:none; +} + +/* Now ensure that the other copyright text is visible here */ +.bottom-of-page .left-details > * { + font-size:12px; +} + +.sidebar-brand-text { + font-size: 13px; + padding: 0; + margin: 0; +} + +.sidebar-logo { + width: 60px; + height: 60px; +} + +.sidebar-container { + margin: 0; + padding: 0; +} + +.sidebar-container .sidebar-search-container { + display: none; +} + +.sidebar-search-container.top { + /*position:sticky; + top:10px;*/ + border-radius: 20px; + border: solid #333 1px; + background-color: #fff; +} + +.sidebar-search-container.top .sidebar-search { + border-top: 0 !important; + border-bottom: 0 !important; +} + +/* really important ! */ +.sidebar-search { + color: #000 !important; +} + +.toc-drawer .toc-title { + font-weight: bold; + text-decoration: underline; +} + +:target>h1:first-of-type, span:target~h1:first-of-type { + background-color: #007aff !important; + color: #fff !important; + padding-top: 40px; /* accomodates header search blocking target */ + margin-top: -40px; +} + + +:target>h2:first-of-type, :target>h3:first-of-type, +:target>h4:first-of-type, :target>h5:first-of-type, :target>h6:first-of-type, +span:target~h2:first-of-type, span:target~h3:first-of-type, +span:target~h4:first-of-type, span:target~h5:first-of-type, span:target~h6:first-of-type { + background-color: transparent !important; + padding-top: 40px; /* accomodates header search blocking target */ + margin-top: -40px; + text-decoration: underline; +} + + +/* Dark mode colors */ +@media (prefers-color-scheme: dark) { + + + +} + +/* small screens */ +@media all and (max-width : 550px) { + .discordLink img { + display: none; + } +} diff --git a/doc/api/modules.rst b/docs/api/modules.rst similarity index 100% rename from doc/api/modules.rst rename to docs/api/modules.rst diff --git a/doc/conf.py b/docs/conf.py similarity index 82% rename from doc/conf.py rename to docs/conf.py index 1f021cb8..3af09de3 100644 --- a/doc/conf.py +++ b/docs/conf.py @@ -12,27 +12,32 @@ # import os import sys +import datetime sys.path.insert(0, os.path.abspath("../pdf2docx/")) # -- Project information ----------------------------------------------------- project = 'pdf2docx' -copyright = '2023, Artifex' +thisday = datetime.date.today() +copyright = str(thisday.year) + ", Artifex" author = 'Artifex Software, Inc.' -# The full version, including alpha/beta/rc tags -# read version number from version.txt, otherwise alpha version -# Github CI can create version.txt dynamically. +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. def get_version(fname): if os.path.exists(fname): with open(fname, 'r') as f: - version = f.readline().strip() + release = f.readline().strip() else: - version = 'alpha' + release = 'alpha' + + return release - return version -release = get_version('../version.txt') +version = get_version('../version.txt') # -- General configuration --------------------------------------------------- @@ -40,7 +45,8 @@ def get_version(fname): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinxcontrib.apidoc' + 'sphinxcontrib.apidoc', + 'sphinx_copybutton' ] apidoc_module_dir = '../pdf2docx' @@ -63,14 +69,22 @@ def get_version(fname): # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -# html_theme = 'alabaster' -html_theme = 'sphinx_rtd_theme' +html_theme = 'furo' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] +html_static_path = ['_static'] + +# A list of CSS files. The entry must be a filename string or a tuple containing +# the filename string and the attributes dictionary. The filename must be +# relative to the html_static_path, or a full URI +html_css_files = ["custom.css"] + + +html_favicon = "_static/PyMuPDF.ico" + # -- Options for LaTeX output --------------------------------------------- latex_elements = { diff --git a/docs/footer.rst b/docs/footer.rst new file mode 100644 index 00000000..76664309 --- /dev/null +++ b/docs/footer.rst @@ -0,0 +1,31 @@ +.. raw:: html + + + + + +---- + +.. raw:: html + +

This software is provided AS-IS with no warranty, either express or implied. This software is distributed under license and may not be copied, modified or distributed except as expressly authorized under the terms of that license. Refer to licensing information at artifex.com or contact Artifex Software Inc., 39 Mesa Street, Suite 108A, San Francisco CA 94129, United States for further information.

+ + +.. rst-class:: footer-version + + This documentation covers all versions up to |version|. + + +.. note - this ensures that the Sphinx build system will pull in the image (as it is referenced in an RST file) to _images, + we don't want to display it via rst markup due to limitations (hence width:0), however we do want it available for our raw HTML + which we use in header.rst. + +.. image:: images/discord-mark-blue.svg + :alt: Discord logo + :width: 0 + :height: 0 + :target: https://discord.gg/TSpYGBW4eq diff --git a/docs/header.rst b/docs/header.rst new file mode 100644 index 00000000..f1b9fafe --- /dev/null +++ b/docs/header.rst @@ -0,0 +1,41 @@ +.. meta:: + :author: Artifex + :description: pdf2docx is a Python library to extract data from PDF with PyMuPDF, parse layout with rule, and generate docx file with python-docx + :keywords: PDF Text Extraction, PDF Image Extraction, PDF Conversion, PDF Tables + + +.. |history_begin| raw:: html + +
+ Show/hide history + +.. |history_end| raw:: html + +
+ + + +.. raw:: html + +
+ +
+ +
+ + + +
+ + diff --git a/docs/images/discord-mark-blue.svg b/docs/images/discord-mark-blue.svg new file mode 100644 index 00000000..ca654007 --- /dev/null +++ b/docs/images/discord-mark-blue.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/images/intro.png b/docs/images/intro.png new file mode 100644 index 00000000..c4830397 Binary files /dev/null and b/docs/images/intro.png differ diff --git a/docs/images/pdf-converter.png b/docs/images/pdf-converter.png new file mode 100644 index 00000000..1e9f9efe Binary files /dev/null and b/docs/images/pdf-converter.png differ diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..3f6ea6a7 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,45 @@ +.. include:: header.rst + +Welcome to pdf2docx +==================================== + +:title:`pdf2docx` is a **Python** library +to extract data from **PDF** with PyMuPDF_, parse layout with rule, and +generate **docx** files with ``python-docx``. + + +:title:`pdf2docx` is hosted on `GitHub `_ and registered on `PyPI `_. + +---- + + +.. image:: images/intro.png + +.. toctree:: + :maxdepth: 2 + :caption: USER GUIDE + + installation + quickstart + techdoc + + +.. toctree:: + :maxdepth: 2 + :caption: API DOCUMENTATION + + api/modules + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` + + + +.. include:: footer.rst + + +.. _PyMuPDF: https://pymupdf.io diff --git a/doc/installation.rst b/docs/installation.rst similarity index 74% rename from doc/installation.rst rename to docs/installation.rst index f53f8c96..14eba019 100644 --- a/doc/installation.rst +++ b/docs/installation.rst @@ -1,10 +1,12 @@ +.. include:: header.rst + Installation ==================== -``pdf2docx`` can be installed from either Pypi or the source code. +``pdf2docx`` can be installed from either PyPI or the source code. -Install from Pypi +Install from PyPI ------------------- Type the command below for a new installation:: @@ -24,7 +26,7 @@ Install ``pdf2docx`` directly from the ``master`` branch:: $ pip install git+git://github.com/dothinking/pdf2docx.git@master --upgrade .. note:: - In this way, ``pdf2docx`` might have a higher version than Pypi, which is not released yet. + In this way, ``pdf2docx`` might have a higher version than PyPI, which is not released yet. Install from source code locally @@ -44,4 +46,7 @@ Uninstall :: - $ pip uninstall pdf2docx \ No newline at end of file + $ pip uninstall pdf2docx + + +.. include:: footer.rst diff --git a/doc/quickstart.cli.rst b/docs/quickstart.cli.rst similarity index 96% rename from doc/quickstart.cli.rst rename to docs/quickstart.cli.rst index 61001d59..da001dbc 100644 --- a/doc/quickstart.cli.rst +++ b/docs/quickstart.cli.rst @@ -1,3 +1,5 @@ +.. include:: header.rst + Command Line Interface =========================== @@ -78,4 +80,6 @@ Turn on multi-processing with default count of CPU:: Specify the count of CPUs:: - $ pdf2docx convert test.pdf test.docx --multi_processing=True --cpu_count=4 \ No newline at end of file + $ pdf2docx convert test.pdf test.docx --multi_processing=True --cpu_count=4 + +.. include:: footer.rst diff --git a/doc/quickstart.convert.rst b/docs/quickstart.convert.rst similarity index 97% rename from doc/quickstart.convert.rst rename to docs/quickstart.convert.rst index f371e48b..1a5d2bdb 100644 --- a/doc/quickstart.convert.rst +++ b/docs/quickstart.convert.rst @@ -1,3 +1,5 @@ +.. include:: header.rst + Convert PDF ======================= @@ -87,3 +89,7 @@ Provide ``password`` to open and convert password protected pdf:: cv = Converter(pdf_file, password) cv.convert(docx_file) cv.close() + + +.. include:: footer.rst + diff --git a/doc/quickstart.gui.rst b/docs/quickstart.gui.rst similarity index 65% rename from doc/quickstart.gui.rst rename to docs/quickstart.gui.rst index 1c913405..2f69ed3c 100644 --- a/doc/quickstart.gui.rst +++ b/docs/quickstart.gui.rst @@ -1,3 +1,5 @@ +.. include:: header.rst + Graphic User Interface =========================== @@ -8,4 +10,6 @@ To launch the GUI:: $ pdf2docx gui -.. image:: https://z3.ax1x.com/2021/05/30/2ZYiUs.png \ No newline at end of file +.. image:: images/pdf-converter.png + +.. include:: footer.rst \ No newline at end of file diff --git a/doc/quickstart.rst b/docs/quickstart.rst similarity index 76% rename from doc/quickstart.rst rename to docs/quickstart.rst index 11bca625..ece28329 100644 --- a/doc/quickstart.rst +++ b/docs/quickstart.rst @@ -1,3 +1,5 @@ +.. include:: header.rst + Quickstart ============= @@ -10,4 +12,6 @@ Quickstart quickstart.convert quickstart.table quickstart.cli - quickstart.gui \ No newline at end of file + quickstart.gui + +.. include:: footer.rst diff --git a/doc/quickstart.table.rst b/docs/quickstart.table.rst similarity index 92% rename from doc/quickstart.table.rst rename to docs/quickstart.table.rst index a09a2651..9e93ccd2 100644 --- a/doc/quickstart.table.rst +++ b/docs/quickstart.table.rst @@ -1,3 +1,5 @@ +.. include:: header.rst + Extract table ====================== @@ -23,4 +25,7 @@ The output may look like:: ['Description C ', '1.00 ', '0.98 ', '0.94 ', '1.03 ', '0.32 '], ['Description D ', 'kg ', '0.84 ', '0.53 ', '0.52 ', '0.33 '], ['Description E ', '1.00 ', '0.15 ', None, None, None], - ['Description F ', '1.00 ', '0.86 ', '0.37 ', '0.78 ', '0.01 ']] \ No newline at end of file + ['Description F ', '1.00 ', '0.86 ', '0.37 ', '0.78 ', '0.01 ']] + + +.. include:: footer.rst diff --git a/doc/requirements.txt b/docs/requirements.txt similarity index 79% rename from doc/requirements.txt rename to docs/requirements.txt index 4d15edc3..b1da5653 100644 --- a/doc/requirements.txt +++ b/docs/requirements.txt @@ -4,5 +4,6 @@ rst2pdf # define sphinx versioning sphinx==5.3.0 autodoc -sphinx_rtd_theme +furo +sphinx_copybutton sphinxcontrib.apidoc diff --git a/doc/techdoc.rst b/docs/techdoc.rst similarity index 95% rename from doc/techdoc.rst rename to docs/techdoc.rst index c8987d82..b1ae6689 100644 --- a/doc/techdoc.rst +++ b/docs/techdoc.rst @@ -1,3 +1,5 @@ +.. include:: header.rst + Technical Documentation =========================== @@ -16,7 +18,10 @@ PDF文件遵循一定的格式规范,`PyMuPDF