diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 182c867..2fdcd18 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -10,7 +10,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index ddf1880..4e368a3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ sruthi.egg-info *.pyc *.swp .coverage +pyenv \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 85b7a70..3bdc4c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p ## [Unreleased] +## [2.0.0] - 2023-07-06 +### Added +- Pass in a custom requests session with the `session` parameter + +### Changed +- Use `black` code style + +### Removed +- BC-break: `requests_kwargs` was removed since we can now pass in a custom requests session +- BC-break: no more support for Python 3.6, minimum required version is now Python 3.7 + ## [1.0.0] - 2021-12-06 ### Added - Add support for SRU 1.1 by passing `sru_version='1.1'` to the client or the operation calls. @@ -76,7 +87,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p - `Fixed` for any bug fixes. - `Security` to invite users to upgrade in case of vulnerabilities. -[Unreleased]: https://github.com/metaodi/sruthi/compare/v1.0.0...HEAD +[Unreleased]: https://github.com/metaodi/sruthi/compare/v2.0.0...HEAD +[2.0.0]: https://github.com/metaodi/sruthi/compare/v1.0.0...v2.0.0 [1.0.0]: https://github.com/metaodi/sruthi/compare/v0.1.2...v1.0.0 [0.1.2]: https://github.com/metaodi/sruthi/compare/v0.1.1...v0.1.2 [0.1.1]: https://github.com/metaodi/sruthi/compare/v0.1.0...v0.1.1 diff --git a/Makefile b/Makefile index 5fba338..ef85a57 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,11 @@ deps: ## Install dependencies python -m pip install -r test-requirements.txt lint: ## Linting of source code - python -m flake8 --statistics --show-source . + python -m black --check sruthi examples tests + python -m flake8 --statistics --show-source sruthi examples tests + +format: ## Format source code (black codestyle) + python -m black sruthi examples tests test: ## Run tests python -m pytest --cov=sruthi tests/ diff --git a/README.md b/README.md index 9f7bf0b..834cf62 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ [![PyPI Version](https://img.shields.io/pypi/v/sruthi)](https://pypi.org/project/sruthi/) [![Tests + Linting Python](https://github.com/metaodi/sruthi/actions/workflows/lint_python.yml/badge.svg)](https://github.com/metaodi/sruthi/actions/workflows/lint_python.yml) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) # sruthi @@ -67,7 +68,8 @@ Verordnung der Stadt Zürich betreffend die Erfüllung von Amtspflichten durch d https://suche.staatsarchiv.djiktzh.ch/detail.aspx?Id=3796980 ``` -The return value of `searchretrieve` is iterable, so you can easily loop over it. Or you can use indices to access elements, e.g. `records[1]` to get the second elemenet, or `records[-1]` to get the last one. +The return value of `searchretrieve` is iterable, so you can easily loop over it. +Or you can use indices to access records, e.g. `records[1]` to get the second record, or `records[-1]` to get the last one. Even [slicing](https://python-reference.readthedocs.io/en/latest/docs/brackets/slicing.html) is supported, so you can do things like only iterate over the first 5 elements using @@ -122,6 +124,23 @@ By default sruthi uses SRU 1.2 to make requests, but you can specify the SRU ver 8985 ``` +### Custom parameters and settings + +If an SRU endpoint needs additional (custom) parameters, you can create your own session object and pass it to the client. +This is useful for adding authentication (username, password), custom headers or parameters, SSL verification settings etc. + +```python +>>> import sruthi +>>> import requests +>>> # customize session +>>> session = requests.Session() +>>> session.params = {"x-collection": "GGC"} +>>> # pass the customized session to sruthi +>>> records = sruthi.searchretrieve("https://jsru.kb.nl/sru", query="gruninger", session=session) +>>> records.count +4 +``` + ## Schemas sruthi does not make any assumptions about the record data schema. @@ -136,7 +155,7 @@ sruthi has been tested with the following schemas: To contribute to sruthi simply clone this repository and follow the instructions in [CONTRIBUTING.md](/CONTRIBUTING.md). -This project ha a Makefile with the most common commands. +This project has a `Makefile` with the most common commands. Type `make help` to get an overview. ## Release diff --git a/examples/authentication.py b/examples/authentication.py new file mode 100644 index 0000000..9bda20f --- /dev/null +++ b/examples/authentication.py @@ -0,0 +1,16 @@ +import os +import requests +from sruthi import Client + +# create authenticated session +user = os.getenv("CATALOG_USER") +pw = os.getenv("CATALOG_PASS") +session = requests.Session() +session.auth = (user, pw) + +# pass authenticated session to client +sru_client = Client("https://suche.staatsarchiv.djiktzh.ch/SRU/", session=session) + +# get records for query +records = sru_client.searchretrieve(query="Zürich") +print(records) diff --git a/examples/configure_client_with_explain.py b/examples/configure_client_with_explain.py index 5da8eba..b290ad9 100644 --- a/examples/configure_client_with_explain.py +++ b/examples/configure_client_with_explain.py @@ -1,7 +1,7 @@ from sruthi import Client # create a new client and call explain() -sru_client = Client('https://suche.staatsarchiv.djiktzh.ch/SRU/') +sru_client = Client("https://suche.staatsarchiv.djiktzh.ch/SRU/") info = sru_client.explain() for name, details in info.schema.items(): @@ -9,16 +9,16 @@ # configure the maximum records based on the config try: - sru_client.maximum_records = info.config['maximumRecords'] + sru_client.maximum_records = info.config["maximumRecords"] print(f"Set maximum_records to {sru_client.maximum_records}.") except KeyError: print("Config `maximum_records` not available, keep original value") # get records for query -records = sru_client.searchretrieve(query='Zürich') +records = sru_client.searchretrieve(query="Zürich") # display 5 records -print('') -print('First 5 results for `Zürich`') +print("") +print("First 5 results for `Zürich`") for r in records[:5]: - print("* ", r['title']) + print("* ", r["title"]) diff --git a/examples/custom_parameter.py b/examples/custom_parameter.py new file mode 100644 index 0000000..d189b65 --- /dev/null +++ b/examples/custom_parameter.py @@ -0,0 +1,25 @@ +import requests +import sruthi +from pprint import pprint + + +def print_url(r, *args, **kwargs): + print(r.url) + + +# create session with custom paramter session +session = requests.Session() + +# here some example of how a session can be used to customize parameters, settings etc. +session.params = {"x-collection": "GGC"} # add custom request parameter +session.verify = False # disable SSL verfications +session.hooks["response"].append(print_url) # add custom hook + +# pass custom session to client +sru_client = sruthi.Client("https://jsru.kb.nl/sru", session=session) + +# get records for query +records = sru_client.searchretrieve(query="gruninger") +pprint(records) +print("---") +pprint(records[0]) diff --git a/examples/explain.py b/examples/explain.py index 6e0868e..73587b5 100644 --- a/examples/explain.py +++ b/examples/explain.py @@ -4,19 +4,19 @@ import yaml sru_endpoints = [ - 'https://suche.staatsarchiv.djiktzh.ch/SRU/', - 'https://amsquery.stadt-zuerich.ch/SRU/', - 'http://lx2.loc.gov:210/LCDB?', - 'https://na01.alma.exlibrisgroup.com/view/sru/TR_INTEGRATION_INST', + "https://suche.staatsarchiv.djiktzh.ch/SRU/", + "https://amsquery.stadt-zuerich.ch/SRU/", + "http://lx2.loc.gov:210/LCDB?", + "https://na01.alma.exlibrisgroup.com/view/sru/TR_INTEGRATION_INST", ] def print_header(s): - cprint(s, 'green', attrs=['bold']) + cprint(s, "green", attrs=["bold"]) def print_title(s): - cprint(s, attrs=['bold']) + cprint(s, attrs=["bold"]) def dump(d): @@ -24,32 +24,32 @@ def dump(d): for endpoint in sru_endpoints: - print_header(20 * '=') - print_header('=') - print_header(f'= {endpoint}') - print_header('=') - print_header(20 * '=') + print_header(20 * "=") + print_header("=") + print_header(f"= {endpoint}") + print_header("=") + print_header(20 * "=") info = sruthi.explain(endpoint) - print_title('Server:') + print_title("Server:") dump(info.server) - print('') + print("") - print_title('Database:') + print_title("Database:") dump(info.database) - print('') + print("") - print_title('Index:') + print_title("Index:") dump(info.index) - print('') + print("") - print_title('Schema:') + print_title("Schema:") dump(info.schema) - print('') + print("") - print_title('Config:') + print_title("Config:") dump(info.config) - print('') + print("") - print('') - print('') + print("") + print("") diff --git a/examples/generate_csv.py b/examples/generate_csv.py index 786a1af..1083425 100644 --- a/examples/generate_csv.py +++ b/examples/generate_csv.py @@ -4,33 +4,33 @@ import traceback records = sruthi.searchretrieve( - 'https://amsquery.stadt-zuerich.ch/SRU/', - query="isad.reference = V.B.b.43.:1 AND isad.descriptionlevel = Dossier" + "https://amsquery.stadt-zuerich.ch/SRU/", + query="isad.reference = V.B.b.43.:1 AND isad.descriptionlevel = Dossier", ) try: header = [ - 'reference', - 'title', - 'year', - 'url', + "reference", + "title", + "year", + "url", ] writer = csv.DictWriter( sys.stdout, header, - delimiter=',', + delimiter=",", quotechar='"', - lineterminator='\n', - quoting=csv.QUOTE_MINIMAL + lineterminator="\n", + quoting=csv.QUOTE_MINIMAL, ) writer.writeheader() for record in records: row = { - 'reference': record['reference'], - 'title': record['title'], - 'year': record['date'], - 'url': record['extra']['link'], + "reference": record["reference"], + "title": record["title"], + "year": record["date"], + "url": record["extra"]["link"], } writer.writerow(row) except Exception as e: diff --git a/examples/isad.py b/examples/isad.py index 729805a..92dd809 100644 --- a/examples/isad.py +++ b/examples/isad.py @@ -2,19 +2,15 @@ from pprint import pprint # check supported schemas of server -server_url = 'https://suche.staatsarchiv.djiktzh.ch/SRU/' -schema = 'isad' +server_url = "https://suche.staatsarchiv.djiktzh.ch/SRU/" +schema = "isad" server = sruthi.explain(server_url) -print(20 * '=') -print('=') +print(20 * "=") +print("=") print(f"= Record with schema: {schema}") -print('=') -print(20 * '=') -records = sruthi.searchretrieve( - server_url, - query='Zurich', - record_schema=schema -) +print("=") +print(20 * "=") +records = sruthi.searchretrieve(server_url, query="Zurich", record_schema=schema) pprint(records[0]) diff --git a/examples/library_of_congress.py b/examples/library_of_congress.py index 036ce8e..4a6c78d 100644 --- a/examples/library_of_congress.py +++ b/examples/library_of_congress.py @@ -1,7 +1,7 @@ import sruthi import sys -LOC_BASE = 'http://lx2.loc.gov:210/LCDB?' +LOC_BASE = "http://lx2.loc.gov:210/LCDB?" def loc_search(isbn, sru_base): @@ -9,12 +9,12 @@ def loc_search(isbn, sru_base): try: records = sruthi.searchretrieve(sru_base, query=isbn) record = records[0] - fields = record.get('datafield', []) + fields = record.get("datafield", []) for field in fields: - if field['tag'] != '050': + if field["tag"] != "050": continue - if len(field.get('subfield', [])) > 0: - loc_lcc = (field['subfield'][0]['text']) + if len(field.get("subfield", [])) > 0: + loc_lcc = field["subfield"][0]["text"] break except Exception as e: print("Error: %s" % e, file=sys.stderr) @@ -22,6 +22,6 @@ def loc_search(isbn, sru_base): return loc_lcc -isbn = '0062509470' +isbn = "0062509470" result = loc_search(isbn, LOC_BASE) print(f"Tag 050 of ISBN '{isbn}': {result}") diff --git a/examples/schemas.py b/examples/schemas.py index 80a02fe..597f161 100644 --- a/examples/schemas.py +++ b/examples/schemas.py @@ -2,22 +2,20 @@ from pprint import pprint # check supported schemas of server -server = sruthi.explain('http://lx2.loc.gov:210/LCDB?') +server = sruthi.explain("http://lx2.loc.gov:210/LCDB?") print(f"Supported schemas: {', '.join(server.schema.keys())}") for schema in server.schema.keys(): - print(20 * '=') - print('=') + print(20 * "=") + print("=") print(f"= Record with schema: {schema}") - print('=') - print(20 * '=') + print("=") + print(20 * "=") records = sruthi.searchretrieve( - 'http://lx2.loc.gov:210/LCDB?', - query="human", - record_schema=schema + "http://lx2.loc.gov:210/LCDB?", query="human", record_schema=schema ) pprint(records[0]) - print('') - print('') + print("") + print("") diff --git a/examples/searchretrieve.py b/examples/searchretrieve.py index 6d316a7..2b84d14 100644 --- a/examples/searchretrieve.py +++ b/examples/searchretrieve.py @@ -1,14 +1,16 @@ import sruthi -records = sruthi.searchretrieve('https://suche.staatsarchiv.djiktzh.ch/SRU/', query='Zurich') +records = sruthi.searchretrieve( + "https://suche.staatsarchiv.djiktzh.ch/SRU/", query="Zurich" +) print("SRU version:", records.sru_version) print("Count:", records.count) -print('') +print("") for record in records: # print fields from schema - print(record['reference']) - print(record['title']) - print(record['date']) - print(record['extra']['link']) # extra record data is available at the 'extra' key - print('') + print(record["reference"]) + print(record["title"]) + print(record["date"]) + print(record["extra"]["link"]) # extra record data is available at the 'extra' key + print("") diff --git a/examples/slicing.py b/examples/slicing.py index 3a63a15..dde76f4 100644 --- a/examples/slicing.py +++ b/examples/slicing.py @@ -1,12 +1,14 @@ import sruthi -records = sruthi.searchretrieve('https://suche.staatsarchiv.djiktzh.ch/SRU/', query='Zurich') +records = sruthi.searchretrieve( + "https://suche.staatsarchiv.djiktzh.ch/SRU/", query="Zurich" +) print("records.count:", records.count) print("len(records.records):", len(records.records)) -print("records[0]:", records[0]) # print the first record -print("records[-1]:", records[-1]) # print the last record -print("records[-200]:", records[-200]) # print the 200th record from the end -print("records[410]:", records[410]) # print record at index 410 -print("records[:5]:", records[:5]) # print the first 5 records +print("records[0]:", records[0]) # print the first record +print("records[-1]:", records[-1]) # print the last record +print("records[-200]:", records[-200]) # print the 200th record from the end +print("records[410]:", records[410]) # print record at index 410 +print("records[:5]:", records[:5]) # print the first 5 records print("records[6:20:2]:", records[6:20:2]) # print every second record from 6-20 diff --git a/examples/sru1.1.py b/examples/sru1.1.py index 0c9a5d8..1aa31e3 100644 --- a/examples/sru1.1.py +++ b/examples/sru1.1.py @@ -2,13 +2,13 @@ from pprint import pprint # check supported schemas of server -server_url = 'https://services.dnb.de/sru/dnb' +server_url = "https://services.dnb.de/sru/dnb" # create sruthi client -client = sruthi.Client(server_url, record_schema='oai_dc', sru_version='1.1') +client = sruthi.Client(server_url, record_schema="oai_dc", sru_version="1.1") explain = client.explain() -print(f'SRU version: {explain.sru_version}') +print(f"SRU version: {explain.sru_version}") pprint(explain.server) pprint(explain.config) pprint(explain.index, depth=1) @@ -16,13 +16,11 @@ pprint(explain.database) -print(20 * '=') -print('=') +print(20 * "=") +print("=") print(f"= Record with schema: {client.record_schema}") -print('=') -print(20 * '=') -records = client.searchretrieve( - query='Zurich' -) -print(f'Total records: {records.count}') +print("=") +print(20 * "=") +records = client.searchretrieve(query="Zurich") +print(f"Total records: {records.count}") pprint(records[0]) diff --git a/setup.cfg b/setup.cfg index 8748475..321af87 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,6 @@ [flake8] max-complexity = 10 -# the new Torvalds default for line length -max-line-length = 100 + +# adaptions for black +max-line-length = 88 +extend-ignore = E203 diff --git a/setup.py b/setup.py index 7982654..451c2e7 100644 --- a/setup.py +++ b/setup.py @@ -35,9 +35,11 @@ 'Intended Audience :: Developers', 'Topic :: Software Development :: Libraries', 'Development Status :: 4 - Beta', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', ], - python_requires='>=3.6' + python_requires='>=3.7' ) diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..9b40d8d --- /dev/null +++ b/setup.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +[ ! -d pyenv ] && python -m venv pyenv +source pyenv/bin/activate + +pip install --upgrade pip +pip install -r requirements.txt +pip install -e . \ No newline at end of file diff --git a/sruthi/__init__.py b/sruthi/__init__.py index 6ebbae3..459fa27 100644 --- a/sruthi/__init__.py +++ b/sruthi/__init__.py @@ -1,19 +1,24 @@ -__version__ = '1.0.0' -__all__ = ['client', 'errors', 'response', 'xmlparse'] +__version__ = "2.0.0" +__all__ = ["client", "errors", "response", "xmlparse"] -from .errors import SruthiError, ServerIncompatibleError, SruError, NoMoreRecordsError # noqa -from .errors import SruthiWarning, WrongNamespaceWarning # noqa -from .client import Client # noqa +from .errors import ( # noqa + SruthiError, + ServerIncompatibleError, + SruError, + NoMoreRecordsError, +) +from .errors import SruthiWarning, WrongNamespaceWarning # noqa +from .client import Client # noqa def searchretrieve(url, query, **kwargs): - search_params = ['query', 'start_record', 'requests_kwargs'] + search_params = ["query", "start_record"] search_kwargs = {k: v for k, v in kwargs.items() if k in search_params} - search_kwargs['query'] = query + search_kwargs["query"] = query # assume all others kwargs are for the client client_kwargs = {k: v for k, v in kwargs.items() if k not in search_params} - client_kwargs['url'] = url + client_kwargs["url"] = url c = Client(**client_kwargs) return c.searchretrieve(**search_kwargs) diff --git a/sruthi/client.py b/sruthi/client.py index 7d15cf1..9041fc0 100644 --- a/sruthi/client.py +++ b/sruthi/client.py @@ -7,45 +7,52 @@ class Client(object): - def __init__(self, url=None, maximum_records=10, record_schema=None, sru_version='1.2'): + def __init__( + self, + url=None, + maximum_records=10, + record_schema=None, + sru_version="1.2", + session=None, + ): self.url = url self.maximum_records = maximum_records self.sru_version = sru_version self.record_schema = record_schema + self.session = session or requests.Session() - def searchretrieve(self, query, start_record=1, requests_kwargs=None): + def searchretrieve(self, query, start_record=1): params = { - 'operation': 'searchRetrieve', - 'version': self.sru_version, - 'query': query, - 'startRecord': start_record, - 'maximumRecords': self.maximum_records, + "operation": "searchRetrieve", + "version": self.sru_version, + "query": query, + "startRecord": start_record, + "maximumRecords": self.maximum_records, } if self.record_schema: - params['recordSchema'] = self.record_schema + params["recordSchema"] = self.record_schema - data_loader = DataLoader(self.url, params, requests_kwargs) + data_loader = DataLoader(self.url, self.session, params) return response.SearchRetrieveResponse(data_loader) - def explain(self, requests_kwargs=None): + def explain(self): params = { - 'operation': 'explain', - 'version': self.sru_version, + "operation": "explain", + "version": self.sru_version, } - data_loader = DataLoader(self.url, params, requests_kwargs) + data_loader = DataLoader(self.url, self.session, params) explain_response = response.ExplainResponse(data_loader) return explain_response.asdict() class DataLoader(object): - def __init__(self, url, params, requests_kwargs=None): - self.session = requests.Session() + def __init__(self, url, session, params): + self.session = session self.url = url self.params = params self.response = None self.xmlparser = xmlparse.XMLParser() - self.requests_kwargs = requests_kwargs or {} def load(self, **kwargs): self.params.update(kwargs) @@ -55,11 +62,7 @@ def load(self, **kwargs): def _get_content(self, url, params): try: - res = self.session.get( - url, - params=params, - **self.requests_kwargs - ) + res = self.session.get(url, params=params) res.raise_for_status() except requests.exceptions.HTTPError as e: raise errors.SruthiError("HTTP error: %s" % e) @@ -69,12 +72,9 @@ def _get_content(self, url, params): return self.xmlparser.parse(res.content) def _check_errors(self, xml): - sru = '{http://www.loc.gov/zing/srw/}' - diag = '{http://www.loc.gov/zing/srw/diagnostic/}' - diagnostics = self.xmlparser.find( - xml, - f'{sru}diagnostics/{diag}diagnostic' - ) + sru = "{http://www.loc.gov/zing/srw/}" + diag = "{http://www.loc.gov/zing/srw/diagnostic/}" + diagnostics = self.xmlparser.find(xml, f"{sru}diagnostics/{diag}diagnostic") if diagnostics: error_msg = ", ".join([d.text for d in diagnostics]) raise errors.SruError(error_msg) diff --git a/sruthi/response.py b/sruthi/response.py index 2ce58d7..17e6455 100644 --- a/sruthi/response.py +++ b/sruthi/response.py @@ -23,20 +23,20 @@ def maybe_int(self, s): return s def _check_response_tag(self, xml, tag): - sru = '{http://www.loc.gov/zing/srw/}' + sru = "{http://www.loc.gov/zing/srw/}" response = f"{sru}{tag}" if not xml.tag == response: # fix namespace for servers that provide the wrong namespace URI main_ns = self.xmlparser.namespace(xml) - if 'www.loc.gov/zing/srw' in main_ns: + if "www.loc.gov/zing/srw" in main_ns: warnings.warn( f""" The server has the wrong namespace for SRU, it should be {sru} but it's currently set to {{{main_ns}}}. """, - errors.WrongNamespaceWarning + errors.WrongNamespaceWarning, ) - self.xmlparser.namespaces['sru'] = main_ns + self.xmlparser.namespaces["sru"] = main_ns else: raise errors.ServerIncompatibleError( f"Server response did not contain a {response} tag" @@ -47,26 +47,28 @@ class SearchRetrieveResponse(Response): def __repr__(self): try: return ( - 'SearchRetrieveResponse(' - 'sru_version=%r,' - 'count=%r,' - 'next_start_record=%r)' - ) % ( - self.sru_version, - self.count, - self.next_start_record, - ) + "SearchRetrieveResponse(" + "sru_version=%r," + "count=%r," + "next_start_record=%r)" + ) % ( + self.sru_version, + self.count, + self.next_start_record, + ) except AttributeError: - return 'SearchRetrieveResponse(empty)' + return "SearchRetrieveResponse(empty)" def _parse_content(self, xml): - self._check_response_tag(xml, 'searchRetrieveResponse') + self._check_response_tag(xml, "searchRetrieveResponse") - self.sru_version = self.xmlparser.find(xml, './sru:version').text - self.count = self.maybe_int(self.xmlparser.find(xml, './sru:numberOfRecords').text) + self.sru_version = self.xmlparser.find(xml, "./sru:version").text + self.count = self.maybe_int( + self.xmlparser.find(xml, "./sru:numberOfRecords").text + ) self._extract_records(xml) - next_start_record = self.xmlparser.find(xml, './sru:nextRecordPosition').text + next_start_record = self.xmlparser.find(xml, "./sru:nextRecordPosition").text if next_start_record: self.next_start_record = self.maybe_int(next_start_record) else: @@ -121,15 +123,15 @@ def _load_new_data(self): def _extract_records(self, xml): new_records = [] - xml_recs = self.xmlparser.findall(xml, './sru:records/sru:record') + xml_recs = self.xmlparser.findall(xml, "./sru:records/sru:record") for xml_rec in xml_recs: record = defaultdict() - record['schema'] = self.xmlparser.find(xml_rec, './sru:recordSchema').text - record_data = self.xmlparser.find(xml_rec, './sru:recordData') - extra_data = self.xmlparser.find(xml_rec, './sru:extraRecordData') + record["schema"] = self.xmlparser.find(xml_rec, "./sru:recordSchema").text + record_data = self.xmlparser.find(xml_rec, "./sru:recordData") + extra_data = self.xmlparser.find(xml_rec, "./sru:extraRecordData") - record.update(self._tag_data(record_data, 'sru:recordData') or {}) - record['extra'] = self._tag_data(extra_data, 'sru:extraRecordData') + record.update(self._tag_data(record_data, "sru:recordData") or {}) + record["extra"] = self._tag_data(extra_data, "sru:extraRecordData") record = dict(record) new_records.append(record) @@ -148,8 +150,8 @@ def _tag_data(self, elem, parent): if len(record_data) == 1 and len(keys) > 0 and len(record_data[keys[0]]) > 0: record_data = record_data[keys[0]] - record_data.pop('schemaLocation', None) - record_data.pop('xmlns', None) + record_data.pop("schemaLocation", None) + record_data.pop("xmlns", None) def leaf_reducer(k1, k2): # only use key of leaf element @@ -165,48 +167,50 @@ def leaf_reducer(k1, k2): return record_data def _remove_namespace(self, elem): - ns_pattern = re.compile('{.+}') - tag_name = ns_pattern.sub('', elem.tag) + ns_pattern = re.compile("{.+}") + tag_name = ns_pattern.sub("", elem.tag) return tag_name class ExplainResponse(Response): def __repr__(self): return ( - 'ExplainResponse(' - 'sru_version=%r,' - 'server=%r,' - 'database=%r' - 'index=%r' - 'schema=%r' - 'config=%r)' - ) % ( - self.sru_version, - self.server, - self.database, - self.index, - self.schema, - self.config, - ) + "ExplainResponse(" + "sru_version=%r," + "server=%r," + "database=%r" + "index=%r" + "schema=%r" + "config=%r)" + ) % ( + self.sru_version, + self.server, + self.database, + self.index, + self.schema, + self.config, + ) def asdict(self): - return AttributeDict({ - 'sru_version': self.sru_version, - 'server': self.server, - 'database': self.database, - 'index': self.index, - 'schema': self.schema, - 'config': self.config, - }) + return AttributeDict( + { + "sru_version": self.sru_version, + "server": self.server, + "database": self.database, + "index": self.index, + "schema": self.schema, + "config": self.config, + } + ) def _parse_content(self, xml): - self._check_response_tag(xml, 'explainResponse') + self._check_response_tag(xml, "explainResponse") - record_schema = self.xmlparser.find(xml, './/sru:recordSchema').text + record_schema = self.xmlparser.find(xml, ".//sru:recordSchema").text if record_schema: - self.xmlparser.namespaces['zr'] = record_schema + self.xmlparser.namespaces["zr"] = record_schema - self.sru_version = self.xmlparser.find(xml, './sru:version').text + self.sru_version = self.xmlparser.find(xml, "./sru:version").text self.server = self._parse_server(xml) self.database = self._parse_database(xml) @@ -216,29 +220,25 @@ def _parse_content(self, xml): def _parse_server(self, xml): server_info = { - 'host': self.xmlparser.find( - xml, - [ - './/zr:serverInfo/zr:host', - './/zr2:serverInfo/zr:host' - ] - ).text, - 'port': self.xmlparser.find( - xml, - [ - './/zr:serverInfo/zr:port', - './/zr2:serverInfo/zr:port', - ] - ).text, - 'database': self.xmlparser.find( - xml, - [ - './/zr:serverInfo/zr:database', - './/zr2:serverInfo/zr:database', - ] - ).text, + "host": self.xmlparser.find( + xml, [".//zr:serverInfo/zr:host", ".//zr2:serverInfo/zr:host"] + ).text, + "port": self.xmlparser.find( + xml, + [ + ".//zr:serverInfo/zr:port", + ".//zr2:serverInfo/zr:port", + ], + ).text, + "database": self.xmlparser.find( + xml, + [ + ".//zr:serverInfo/zr:database", + ".//zr2:serverInfo/zr:database", + ], + ).text, } - server_info['port'] = self.maybe_int(server_info['port']) + server_info["port"] = self.maybe_int(server_info["port"]) return server_info def _parse_schema(self, xml): @@ -251,20 +251,20 @@ def ident(a): return a attributes = { - 'identifier': ident, - 'name': ident, - 'location': ident, - 'sort': bool_or_none, - 'retrieve': bool_or_none, + "identifier": ident, + "name": ident, + "location": ident, + "sort": bool_or_none, + "retrieve": bool_or_none, } schemas = {} xml_schemas = self.xmlparser.findall( xml, [ - './/zr:schemaInfo/zr:schema', - './/zr2:schemaInfo/zr2:schema', - ] + ".//zr:schemaInfo/zr:schema", + ".//zr2:schemaInfo/zr2:schema", + ], ) for schema in xml_schemas: schema_info = {} @@ -272,8 +272,8 @@ def ident(a): xml_attr = schema.attrib.get(attr) if xml_attr: schema_info[attr] = fn(xml_attr) - schema_info['title'] = self.xmlparser.find(schema, './zr:title').text - schemas[schema.attrib.get('name')] = schema_info + schema_info["title"] = self.xmlparser.find(schema, "./zr:title").text + schemas[schema.attrib.get("name")] = schema_info return schemas def _parse_config(self, xml): @@ -281,37 +281,39 @@ def _parse_config(self, xml): settings = self.xmlparser.findall( xml, [ - './/zr:configInfo/zr:setting', - './/zr2:configInfo/zr:setting', - ] + ".//zr:configInfo/zr:setting", + ".//zr2:configInfo/zr:setting", + ], ) for setting in settings: - t = setting.attrib['type'] + t = setting.attrib["type"] config[t] = self.maybe_int(setting.text) # defaults xml_defaults = self.xmlparser.findall( xml, [ - './/zr:configInfo/zr:default', - './/zr2:configInfo/zr:default', - ] + ".//zr:configInfo/zr:default", + ".//zr2:configInfo/zr:default", + ], ) defaults = {} for default in xml_defaults: - t = default.attrib['type'] + t = default.attrib["type"] defaults[t] = self.maybe_int(default.text) - config['defaults'] = defaults + config["defaults"] = defaults return config def _parse_database(self, xml): - db = self.xmlparser.find(xml, './/zr:databaseInfo') + db = self.xmlparser.find(xml, ".//zr:databaseInfo") if not db: return {} db_info = { - 'title': self.xmlparser.find(db, ['./zr:title', './title']).text, - 'description': self.xmlparser.find(db, ['./zr:description', './description']).text, - 'contact': self.xmlparser.find(db, ['./zr:contact', './contact']).text, + "title": self.xmlparser.find(db, ["./zr:title", "./title"]).text, + "description": self.xmlparser.find( + db, ["./zr:description", "./description"] + ).text, + "contact": self.xmlparser.find(db, ["./zr:contact", "./contact"]).text, } db_info = {k: v.strip() if v else v for (k, v) in db_info.items()} return db_info @@ -321,33 +323,25 @@ def _parse_index(self, xml): index_sets = self.xmlparser.findall( xml, [ - './/zr:indexInfo/zr:set', - './/zr2:indexInfo/zr2:set', - ] + ".//zr:indexInfo/zr:set", + ".//zr2:indexInfo/zr2:set", + ], ) for index_set in index_sets: - index[index_set.attrib['name']] = defaultdict() + index[index_set.attrib["name"]] = defaultdict() index_fields = self.xmlparser.findall( - xml, - [ - './/zr:indexInfo/zr:index', - './/zr2:indexInfo/zr2:index' - ] + xml, [".//zr:indexInfo/zr:index", ".//zr2:indexInfo/zr2:index"] ) for index_field in index_fields: - title = self.xmlparser.find(index_field, ['./zr:title', './title']).text + title = self.xmlparser.find(index_field, ["./zr:title", "./title"]).text if title: title = title.strip() names = self.xmlparser.findall( - index_field, - [ - './/zr:map/zr:name', - './/zr2:map/zr2:name' - ] + index_field, [".//zr:map/zr:name", ".//zr2:map/zr2:name"] ) for name in names: - index[name.attrib['set']][name.text.strip()] = title + index[name.attrib["set"]][name.text.strip()] = title return {k: dict(v) for k, v in dict(index).items()} diff --git a/sruthi/xmlparse.py b/sruthi/xmlparse.py index 7d12854..4ed9656 100644 --- a/sruthi/xmlparse.py +++ b/sruthi/xmlparse.py @@ -21,27 +21,27 @@ def iter(self): class XMLParser(object): def __init__(self): self.namespaces = { - 'sru': 'http://www.loc.gov/zing/srw/', - 'isad': 'http://www.expertisecentrumdavid.be/xmlschemas/isad.xsd', - 'rel': 'info:srw/extension/2/relevancy-1.0', - 'ap': 'http://www.archivportal.ch/srw/extension/', - 'zr': 'http://explain.z3950.org/dtd/2.1/', - 'zr2': 'http://explain.z3950.org/dtd/2.0/', + "sru": "http://www.loc.gov/zing/srw/", + "isad": "http://www.expertisecentrumdavid.be/xmlschemas/isad.xsd", + "rel": "info:srw/extension/2/relevancy-1.0", + "ap": "http://www.archivportal.ch/srw/extension/", + "zr": "http://explain.z3950.org/dtd/2.1/", + "zr2": "http://explain.z3950.org/dtd/2.0/", } self.dict_namespaces = { - 'http://www.loc.gov/zing/srw/': 'sru', - 'http://explain.z3950.org/dtd/2.1/': 'zr', - 'info:srw/extension/2/relevancy-1.0': None, - 'http://www.archivportal.ch/srw/extension/': None, - 'http://www.loc.gov/MARC21/slim': None, - 'info:lc/xmlns/marcxchange-v1': None, - 'http://www.loc.gov/mods/v3': None, - 'http://www.loc.gov/standards/mods/v3/mods-3-6.xsd': None, - 'http://www.loc.gov/standards/mods/v3/mods-3-6.xsd': None, - 'http://purl.org/dc/elements/1.1/': None, - 'http://www.expertisecentrumdavid.be/xmlschemas/isad.xsd': None, - 'http://www.w3.org/2001/XMLSchema-instance': None, - 'http://www.w3.org/XML/1998/namespace': None, + "http://www.loc.gov/zing/srw/": "sru", + "http://explain.z3950.org/dtd/2.1/": "zr", + "info:srw/extension/2/relevancy-1.0": None, + "http://www.archivportal.ch/srw/extension/": None, + "http://www.loc.gov/MARC21/slim": None, + "info:lc/xmlns/marcxchange-v1": None, + "http://www.loc.gov/mods/v3": None, + "http://www.loc.gov/standards/mods/v3/mods-3-6.xsd": None, + "http://www.loc.gov/standards/mods/v3/mods-3-6.xsd": None, + "http://purl.org/dc/elements/1.1/": None, + "http://www.expertisecentrumdavid.be/xmlschemas/isad.xsd": None, + "http://www.w3.org/2001/XMLSchema-instance": None, + "http://www.w3.org/XML/1998/namespace": None, } def parse(self, content): @@ -81,15 +81,15 @@ def todict(self, xml, **kwargs): xml = self.tostring(xml) dict_args = { - 'dict_constructor': dict, - 'process_namespaces': True, - 'namespaces': self.dict_namespaces, - 'attr_prefix': '', - 'cdata_key': 'text', + "dict_constructor": dict, + "process_namespaces": True, + "namespaces": self.dict_namespaces, + "attr_prefix": "", + "cdata_key": "text", } dict_args.update(kwargs) return dict(xmltodict.parse(xml, **dict_args)) def namespace(self, element): - m = re.match(r'\{(.*)\}', element.tag) - return m.group(1) if m else '' + m = re.match(r"\{(.*)\}", element.tag) + return m.group(1) if m else "" diff --git a/test-requirements.txt b/test-requirements.txt index 421bd5b..f4e7b08 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,7 +1,8 @@ # This file lists the dependencies of this extension. -# Install with a command like: pip install -r pip-requirements.txt +# Install with a command like: pip install -r test-requirements.txt flake8 mock pytest pytest-cov coverage +black diff --git a/tests/client_test.py b/tests/client_test.py index c3c548e..6ff2bc3 100644 --- a/tests/client_test.py +++ b/tests/client_test.py @@ -1,3 +1,4 @@ +import mock from sruthi_test import SruthiTestCase from sruthi.client import Client from sruthi.errors import WrongNamespaceWarning @@ -5,160 +6,147 @@ class TestSruthiClient(SruthiTestCase): def test_searchretrieve(self): - client = Client('http://test.com/sru') - r = client.searchretrieve('Test-Query') + client = Client("http://test.com/sru") + r = client.searchretrieve("Test-Query") self.assertEqual(r.count, 12) self.assertEqual(len(r.records), 12) self.assertEqual( r[0], { - 'reference': 'VII.335.:2.34.8.', - 'extra': { - 'score': '0.38', - 'link': 'https://amsquery.stadt-zuerich.ch/detail.aspx?Id=410130', # noqa - 'hasDigitizedItems': '0', - 'endDateISO': '1998-12-31', - 'beginDateISO': '1998-01-01', - 'beginApprox': '0', - 'endApprox': '0' + "reference": "VII.335.:2.34.8.", + "extra": { + "score": "0.38", + "link": "https://amsquery.stadt-zuerich.ch/detail.aspx?Id=410130", # noqa + "hasDigitizedItems": "0", + "endDateISO": "1998-12-31", + "beginDateISO": "1998-01-01", + "beginApprox": "0", + "endApprox": "0", }, - 'descriptionlevel': 'Dossier', - 'title': u'Podium "Frauen und Politik" beim Jubil\xe4umsanlass "Frauenrechte-Menschenrechte" des Bundes Schweizerischer Frauenorganisationen BSF zu 150 Jahre Bundesstaat, 50 Jahre UNO-Menschenrechtserkl\xe4rung und 27 Jahre politische Gleichberechtigung im Nationalratssaal in Bern vom 4. April 1998', # noqa - 'extent': None, - 'date': '1998', - 'creator': None, - 'schema': 'isad', - } + "descriptionlevel": "Dossier", + "title": 'Podium "Frauen und Politik" beim Jubil\xe4umsanlass "Frauenrechte-Menschenrechte" des Bundes Schweizerischer Frauenorganisationen BSF zu 150 Jahre Bundesstaat, 50 Jahre UNO-Menschenrechtserkl\xe4rung und 27 Jahre politische Gleichberechtigung im Nationalratssaal in Bern vom 4. April 1998', # noqa + "extent": None, + "date": "1998", + "creator": None, + "schema": "isad", + }, ) def test_searchretrieve_warning(self): with self.assertWarns(WrongNamespaceWarning): - client = Client('http://server-with-wrong-sru.namespace/sru/search') - r = client.searchretrieve('dc.title = Test') + client = Client("http://server-with-wrong-sru.namespace/sru/search") + r = client.searchretrieve("dc.title = Test") self.assertEqual(r.count, 10) def test_searchretrieve_slice(self): - client = Client('http://test.com/sru/search') - r = client.searchretrieve('dc.title = Zürich') + client = Client("http://test.com/sru/search") + r = client.searchretrieve("dc.title = Zürich") self.assertEqual(r.count, 10) self.assertEqual(len(r.records), 10) # access by index - self.assertEqual(r[0]['id'], '107853744') - self.assertEqual(r[3]['id'], '10723971X') - self.assertEqual(r[-1]['id'], '113008686') + self.assertEqual(r[0]["id"], "107853744") + self.assertEqual(r[3]["id"], "10723971X") + self.assertEqual(r[-1]["id"], "113008686") with self.assertRaises(IndexError): print(r[-200]) # slicing res = list(r[:5]) self.assertEqual(len(res), 5) - self.assertEqual(res[0]['id'], '107853744') - self.assertEqual(res[1]['id'], '105427527') - self.assertEqual(res[2]['id'], '106876457') - self.assertEqual(res[3]['id'], '10723971X') - self.assertEqual(res[4]['id'], '108757544') + self.assertEqual(res[0]["id"], "107853744") + self.assertEqual(res[1]["id"], "105427527") + self.assertEqual(res[2]["id"], "106876457") + self.assertEqual(res[3]["id"], "10723971X") + self.assertEqual(res[4]["id"], "108757544") res = list(r[8:]) self.assertEqual(len(res), 2) - self.assertEqual(res[0]['id'], '07865257X') - self.assertEqual(res[1]['id'], '113008686') + self.assertEqual(res[0]["id"], "07865257X") + self.assertEqual(res[1]["id"], "113008686") res = list(r[3:10:3]) self.assertEqual(len(res), 3) - self.assertEqual(res[0]['id'], '10723971X') - self.assertEqual(res[1]['id'], '075640988') - self.assertEqual(res[2]['id'], '113008686') + self.assertEqual(res[0]["id"], "10723971X") + self.assertEqual(res[1]["id"], "075640988") + self.assertEqual(res[2]["id"], "113008686") def test_searchretrieve_sru11(self): - client = Client('http://my-param.com/sru', sru_version='1.1') + client = Client("http://my-param.com/sru", sru_version="1.1") - r = client.searchretrieve('test-query') + r = client.searchretrieve("test-query") self.assertEqual(r.count, 790) self.assertEqual(len(r.records), 12) self.session_mock.return_value.get.assert_called_once_with( - 'http://my-param.com/sru', + "http://my-param.com/sru", params={ - 'operation': 'searchRetrieve', - 'version': '1.1', - 'query': 'test-query', - 'startRecord': 1, - 'maximumRecords': 10, - } + "operation": "searchRetrieve", + "version": "1.1", + "query": "test-query", + "startRecord": 1, + "maximumRecords": 10, + }, ) def test_explain(self): - client = Client('https://test.com/sru') + client = Client("https://test.com/sru") info = client.explain() # server server = info.server - self.assertEqual(server['host'], 'https://test.com/sru') - self.assertEqual(server['port'], 80) - self.assertEqual(server['database'], 'sru') + self.assertEqual(server["host"], "https://test.com/sru") + self.assertEqual(server["port"], 80) + self.assertEqual(server["database"], "sru") # database db = info.database - self.assertEqual(db['title'], 'Testarchiv Online Search') - self.assertEqual(db['description'], 'Durchsuchen der Bestände des Testarchivs.') - self.assertEqual(db['contact'], 'test@test.com') + self.assertEqual(db["title"], "Testarchiv Online Search") + self.assertEqual(db["description"], "Durchsuchen der Bestände des Testarchivs.") + self.assertEqual(db["contact"], "test@test.com") # index index = info.index self.assertEqual(len(index), 1) - self.assertEqual(list(index.keys()), ['isad']) - self.assertIn('title', index['isad']) - self.assertIn('reference', index['isad']) - self.assertIn('date', index['isad']) - self.assertIn('descriptionlevel', index['isad']) - self.assertEqual(index['isad']['reference'], 'Reference Code') + self.assertEqual(list(index.keys()), ["isad"]) + self.assertIn("title", index["isad"]) + self.assertIn("reference", index["isad"]) + self.assertIn("date", index["isad"]) + self.assertIn("descriptionlevel", index["isad"]) + self.assertEqual(index["isad"]["reference"], "Reference Code") # schema schema = info.schema self.assertEqual(len(schema), 1) - self.assertEqual(list(schema.keys()), ['isad']) - self.assertEqual(schema['isad']['name'], 'isad') - self.assertEqual(schema['isad']['title'], 'ISAD(G)') + self.assertEqual(list(schema.keys()), ["isad"]) + self.assertEqual(schema["isad"]["name"], "isad") + self.assertEqual(schema["isad"]["title"], "ISAD(G)") # config config = info.config print(config) - self.assertEqual(config['maximumRecords'], 99) - self.assertEqual(config['my-test-config'], 'test123') - self.assertEqual(config['defaults']['numberOfRecords'], 99) - - def test_explain_with_requests_kwargs(self): - client = Client('https://test.com/sru') - client.explain(requests_kwargs={'verify': False}) - - self.session_mock.return_value.get.assert_called_once_with( - 'https://test.com/sru', - params={ - 'operation': 'explain', - 'version': '1.2', - }, - verify=False - ) + self.assertEqual(config["maximumRecords"], 99) + self.assertEqual(config["my-test-config"], "test123") + self.assertEqual(config["defaults"]["numberOfRecords"], 99) def test_explain_with_zr2_namespace(self): - client = Client('https://example.com/sru') + client = Client("https://example.com/sru") info = client.explain() # server server = info.server - self.assertEqual(server['host'], 'example.com/sru') - self.assertEqual(server['port'], 443) + self.assertEqual(server["host"], "example.com/sru") + self.assertEqual(server["port"], 443) # index index = info.index self.assertEqual(len(index), 2) - self.assertEqual(list(index.keys()), ['alma', 'rec']) - self.assertIn('title', index['alma']) - self.assertIn('notes', index['alma']) - self.assertIn('date', index['alma']) - self.assertIn('description', index['alma']) - self.assertEqual(index['alma']['url'], 'URL (Electronic Portfolio)') + self.assertEqual(list(index.keys()), ["alma", "rec"]) + self.assertIn("title", index["alma"]) + self.assertIn("notes", index["alma"]) + self.assertIn("date", index["alma"]) + self.assertIn("description", index["alma"]) + self.assertEqual(index["alma"]["url"], "URL (Electronic Portfolio)") # schema schema = info.schema @@ -166,90 +154,93 @@ def test_explain_with_zr2_namespace(self): self.assertEqual( list(schema.keys()), [ - 'marcxml', - 'dc', - 'mods', - 'dcx', - 'unimarcxml', - 'kormarcxml', - 'cnmarcxml', - 'isohold', - ] + "marcxml", + "dc", + "mods", + "dcx", + "unimarcxml", + "kormarcxml", + "cnmarcxml", + "isohold", + ], ) - self.assertEqual(schema['marcxml']['name'], 'marcxml') - self.assertEqual(schema['marcxml']['sort'], True) + self.assertEqual(schema["marcxml"]["name"], "marcxml") + self.assertEqual(schema["marcxml"]["sort"], True) self.assertEqual( - schema['marcxml']['identifier'], - 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd' + schema["marcxml"]["identifier"], + "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd", ) # config config = info.config - self.assertEqual(config['maximumRecords'], 50) - self.assertEqual(config['defaults']['numberOfRecords'], 10) + self.assertEqual(config["maximumRecords"], 50) + self.assertEqual(config["defaults"]["numberOfRecords"], 10) def test_passing_maximum_records(self): - client = Client('http://my-param.com/sru', maximum_records=111) + client = Client("http://my-param.com/sru", maximum_records=111) self.assertEqual(client.maximum_records, 111) - client.searchretrieve('test-query') + client.searchretrieve("test-query") self.session_mock.return_value.get.assert_called_once_with( - 'http://my-param.com/sru', + "http://my-param.com/sru", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'test-query', - 'startRecord': 1, - 'maximumRecords': 111, - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "test-query", + "startRecord": 1, + "maximumRecords": 111, + }, ) def test_passing_record_schema(self): - client = Client('http://my-param.com/sru', record_schema='dc') - self.assertEqual(client.record_schema, 'dc') + client = Client("http://my-param.com/sru", record_schema="dc") + self.assertEqual(client.record_schema, "dc") - client.searchretrieve('test-query') + client.searchretrieve("test-query") self.session_mock.return_value.get.assert_called_once_with( - 'http://my-param.com/sru', + "http://my-param.com/sru", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'test-query', - 'startRecord': 1, - 'recordSchema': 'dc', - 'maximumRecords': 10, - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "test-query", + "startRecord": 1, + "recordSchema": "dc", + "maximumRecords": 10, + }, ) - def test_passing_requests_kwargs(self): - client = Client('https://my-param.com/sru', record_schema='dc') - self.assertEqual(client.record_schema, 'dc') + def test_passing_start_record(self): + client = Client("http://my-param.com/sru") - client.searchretrieve('test-query', requests_kwargs={'verify': False}) + client.searchretrieve("test-query", start_record=10) self.session_mock.return_value.get.assert_called_once_with( - 'https://my-param.com/sru', + "http://my-param.com/sru", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'test-query', - 'startRecord': 1, - 'recordSchema': 'dc', - 'maximumRecords': 10, + "operation": "searchRetrieve", + "version": "1.2", + "query": "test-query", + "startRecord": 10, + "maximumRecords": 10, }, - verify=False ) - def test_passing_start_record(self): - client = Client('http://my-param.com/sru') - client.searchretrieve('test-query', start_record=10) - self.session_mock.return_value.get.assert_called_once_with( - 'http://my-param.com/sru', +class TestSruthiClientNoSession: + def test_passing_session(self, valid_xml): + session_mock = mock.MagicMock( + get=mock.MagicMock(return_value=mock.MagicMock(content=valid_xml)) + ) # noqa + + client = Client("http://my-param.com/sru", session=session_mock) + + client.searchretrieve("test-query") + session_mock.get.assert_called_once_with( + "http://my-param.com/sru", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'test-query', - 'startRecord': 10, - 'maximumRecords': 10, - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "test-query", + "startRecord": 1, + "maximumRecords": 10, + }, ) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..398217d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,17 @@ +import pytest +import os + +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + + +def fixture_content(filename): + path = os.path.join(__location__, "fixtures", filename) + if not os.path.exists(path): + return "" + with open(path) as f: + return f.read() + + +@pytest.fixture +def valid_xml(): + return fixture_content("test_searchretrieve.xml") diff --git a/tests/fixtures/test_searchretrieve_with_requests_kwargs.xml b/tests/fixtures/test_searchretrieve_with_requests_kwargs.xml deleted file mode 100644 index e7c2509..0000000 --- a/tests/fixtures/test_searchretrieve_with_requests_kwargs.xml +++ /dev/null @@ -1,343 +0,0 @@ - - - 1.2 - 12 - - - isad - xml - - - - VII.335.:2.34.8. - Podium "Frauen und Politik" beim Jubiläumsanlass "Frauenrechte-Menschenrechte" des Bundes Schweizerischer Frauenorganisationen BSF zu 150 Jahre Bundesstaat, 50 Jahre UNO-Menschenrechtserklärung und 27 Jahre politische Gleichberechtigung im Nationalratssaal in Bern vom 4. April 1998 - 1998 - Dossier - - - - - - - - 1 - - 0.38 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=410130 - 1998-01-01 - 0 - 1998-12-31 - 0 - 0 - - - - isad - xml - - - - VII.424.:3.2.4.8. - Menschenrechte, Matinee der Schauspielunion, 18.04. (vor) 1964 - s. d. (sine dato) - Dossier - - - - - - - - 2 - - 0.38 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=495785 - 0001-01-01 - 0 - 9999-12-31 - 0 - 0 - - - - isad - xml - - - - V.E.c.63. - Stadtpolizei, Kriminalkommissariat KK III. Staatsschutzakten Registratur 1 und 2 - 1920 - 1990 - Bestand - - - - - - - - 3 - - 0.26 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=9514 - 1920-01-01 - 0 - 1990-12-31 - 0 - 1 - - - - isad - xml - - - - V.E.c.73. - Stadtpolizei, Bewilligungen. Akten - 1906 - 2014 - Bestand - - - - - - - - 4 - - 0.26 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=207460 - 1906-01-01 - 0 - 2014-12-31 - 0 - 1 - - - - isad - xml - - - - V.B.c.64. - Präsidialabteilung. Akten - 1955 - 1995 - Bestand - - - - - - - - 5 - - 0.17 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=8336 - 1955-01-01 - 0 - 1995-12-31 - 0 - 1 - - - - isad - xml - - - - V.B.c.900.:3.2. - Stadthaus. Drucksachen - 1951 - 2019 - Bestand - - - - - - - - 6 - - 0.17 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=8362 - 1951-01-01 - 0 - 2019-12-31 - 0 - 1 - - - - isad - xml - - - - VII.12. - Aktientheater, Stadttheater, Opernhaus Zürich AG. Theaterarchiv - 1830 - 1995 - Bestand - - - - - - - - 7 - - 0.09 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=11494 - 1830-01-01 - 0 - 1995-12-31 - 0 - 1 - - - - isad - xml - - - - VII.335. - Emilie Lieberherr (1924-2011), Stadträtin. Nachlass - approx. 1900 - 2004 - Bestand - - - - - - - - 8 - - 0.09 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=11736 - 1900-01-01 - 1 - 2004-12-31 - 0 - 1 - - - - isad - xml - - - - VII.424. - Dr. Peter Löffler (1926-2015), Dramaturg, Regisseur, künstlerischer Direktor am Schauspielhaus Zürich (1969/70). Nachlass - 1867 - 2012 - Bestand - - - - - - - - 9 - - 0.09 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=11835 - 1867-01-01 - 0 - 2012-12-31 - 0 - 1 - - - - isad - xml - - - - V.L.42. - Ausstellungen. Dokumentation - from 1846 - Bestand - - - - - - - - 10 - - 0.09 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=11389 - 1846-01-01 - 0 - 9999-12-31 - 0 - 1 - - - - isad - xml - - - - V.L.105. - Kongresse und Tagungen. Dokumentation - from 1899 - Bestand - - - - - - - - 11 - - 0.09 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=11243 - 1899-01-01 - 0 - 9999-12-31 - 0 - 1 - - - - isad - xml - - - - VII.103. - Alexander M. Kaiser (1887-1971) alias A. M. Cay, Karikaturist. Nachlass - 1945 - 1967 - Bestand - - - - - - - - 12 - - 0.09 - https://amsquery.stadt-zuerich.ch/detail.aspx?Id=11476 - 1945-01-01 - 0 - 1967-12-31 - 0 - 1 - - - - diff --git a/tests/fixtures/test_passing_requests_kwargs.xml b/tests/fixtures/test_searchretrieve_with_session.xml similarity index 100% rename from tests/fixtures/test_passing_requests_kwargs.xml rename to tests/fixtures/test_searchretrieve_with_session.xml diff --git a/tests/response_test.py b/tests/response_test.py index 1970c2f..78cdc39 100644 --- a/tests/response_test.py +++ b/tests/response_test.py @@ -2,47 +2,42 @@ from sruthi.response import SearchRetrieveResponse, ExplainResponse import os -__location__ = os.path.realpath( - os.path.join( - os.getcwd(), - os.path.dirname(__file__) - ) -) +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) class TestSearchRetrieveResponse(ResponseTestCase): def test_response_single(self): - data_loader = self._data_loader_mock(['response_single.xml']) + data_loader = self._data_loader_mock(["response_single.xml"]) res = SearchRetrieveResponse(data_loader) self.assertEqual(res.count, 1) self.assertEqual(res.__length_hint__(), 1) - self.assertEqual(res.sru_version, '1.2') + self.assertEqual(res.sru_version, "1.2") self.assertIsNone(res.next_start_record) def test_response_single_sru11(self): - data_loader = self._data_loader_mock(['response_single_sru11.xml']) + data_loader = self._data_loader_mock(["response_single_sru11.xml"]) res = SearchRetrieveResponse(data_loader) self.assertEqual(res.count, 8985) self.assertEqual(res.__length_hint__(), 8985) - self.assertEqual(res.sru_version, '1.1') + self.assertEqual(res.sru_version, "1.1") self.assertEqual(res.next_start_record, 2) def test_response_multi(self): - data_loader = self._data_loader_mock(['response_multiple_1.xml']) + data_loader = self._data_loader_mock(["response_multiple_1.xml"]) res = SearchRetrieveResponse(data_loader) self.assertEqual(res.count, 220) self.assertEqual(res.__length_hint__(), 220) - self.assertEqual(res.sru_version, '1.2') + self.assertEqual(res.sru_version, "1.2") self.assertEqual(res.next_start_record, 100) def test_response_iterator(self): filenames = [ - 'response_multiple_1.xml', - 'response_multiple_2.xml', - 'response_multiple_3.xml', + "response_multiple_1.xml", + "response_multiple_2.xml", + "response_multiple_3.xml", ] data_loader = self._data_loader_mock(filenames) res = SearchRetrieveResponse(data_loader) @@ -50,8 +45,8 @@ def test_response_iterator(self): next_res = next(iter(res)) self.assertIsNotNone(next_res) self.assertIsInstance(next_res, dict) - self.assertEqual(next_res['schema'], 'isad') - self.assertEqual(next_res['reference'], 'Z 248.24') + self.assertEqual(next_res["schema"], "isad") + self.assertEqual(next_res["reference"], "Z 248.24") records = [r for r in res] self.assertEqual(len(records), 220) @@ -59,9 +54,9 @@ def test_response_iterator(self): def test_response_index(self): filenames = [ - 'response_multiple_1.xml', - 'response_multiple_2.xml', - 'response_multiple_3.xml', + "response_multiple_1.xml", + "response_multiple_2.xml", + "response_multiple_3.xml", ] data_loader = self._data_loader_mock(filenames) res = SearchRetrieveResponse(data_loader) @@ -78,7 +73,7 @@ def test_response_index(self): class TestExplainResponse(ResponseTestCase): def test_response_simple(self): - data_loader = self._data_loader_mock(['test_explain.xml']) + data_loader = self._data_loader_mock(["test_explain.xml"]) res = ExplainResponse(data_loader) self.assertEqual(data_loader.load.call_count, 1) diff --git a/tests/sru_test.py b/tests/sru_test.py index c0d97ff..337df0d 100644 --- a/tests/sru_test.py +++ b/tests/sru_test.py @@ -1,97 +1,104 @@ +import mock from sruthi_test import SruthiTestCase import sruthi class TestSru(SruthiTestCase): def test_searchretrieve(self): - r = sruthi.searchretrieve('http://test.com/sru/', 'Test-Query') + r = sruthi.searchretrieve("http://test.com/sru/", "Test-Query") self.assertIsInstance(r, sruthi.response.SearchRetrieveResponse) self.session_mock.return_value.get.assert_called_once_with( - 'http://test.com/sru/', + "http://test.com/sru/", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'Test-Query', - 'startRecord': 1, - 'maximumRecords': 10, - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "Test-Query", + "startRecord": 1, + "maximumRecords": 10, + }, ) def test_searchretrieve_with_maximum_records(self): - r = sruthi.searchretrieve('http://test.com/sru/', 'Test-Query', maximum_records=100) + r = sruthi.searchretrieve( + "http://test.com/sru/", "Test-Query", maximum_records=100 + ) self.assertIsInstance(r, sruthi.response.SearchRetrieveResponse) self.session_mock.return_value.get.assert_called_once_with( - 'http://test.com/sru/', + "http://test.com/sru/", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'Test-Query', - 'startRecord': 1, - 'maximumRecords': 100, - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "Test-Query", + "startRecord": 1, + "maximumRecords": 100, + }, ) def test_searchretrieve_with_record_schema(self): - r = sruthi.searchretrieve('http://test.com/sru/', 'Test-Query', record_schema='isad') + r = sruthi.searchretrieve( + "http://test.com/sru/", "Test-Query", record_schema="isad" + ) self.assertIsInstance(r, sruthi.response.SearchRetrieveResponse) self.session_mock.return_value.get.assert_called_once_with( - 'http://test.com/sru/', + "http://test.com/sru/", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'Test-Query', - 'startRecord': 1, - 'maximumRecords': 10, - 'recordSchema': 'isad', - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "Test-Query", + "startRecord": 1, + "maximumRecords": 10, + "recordSchema": "isad", + }, ) def test_searchretrieve_with_start_record(self): - r = sruthi.searchretrieve('http://test.com/sru/', 'Test-Query', start_record=10) + r = sruthi.searchretrieve("http://test.com/sru/", "Test-Query", start_record=10) self.assertIsInstance(r, sruthi.response.SearchRetrieveResponse) self.session_mock.return_value.get.assert_called_once_with( - 'http://test.com/sru/', + "http://test.com/sru/", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'Test-Query', - 'startRecord': 10, - 'maximumRecords': 10, - } + "operation": "searchRetrieve", + "version": "1.2", + "query": "Test-Query", + "startRecord": 10, + "maximumRecords": 10, + }, ) - def test_searchretrieve_with_requests_kwargs(self): + def test_searchretrieve_with_session(self): + content, path = self._test_content() + session_mock = mock.MagicMock( + get=mock.MagicMock(return_value=mock.MagicMock(content=content)) + ) # noqa + # session_mock.verify = False r = sruthi.searchretrieve( - 'http://test.com/sru/', - 'Test-Query', - requests_kwargs={'verify': False} + "http://test.com/sru/", "Test-Query", session=session_mock ) self.assertIsInstance(r, sruthi.response.SearchRetrieveResponse) - self.session_mock.return_value.get.assert_called_once_with( - 'http://test.com/sru/', + session_mock.get.assert_called_once_with( + "http://test.com/sru/", params={ - 'operation': 'searchRetrieve', - 'version': '1.2', - 'query': 'Test-Query', - 'startRecord': 1, - 'maximumRecords': 10, + "operation": "searchRetrieve", + "version": "1.2", + "query": "Test-Query", + "startRecord": 1, + "maximumRecords": 10, }, - verify=False ) def test_explain(self): - info = sruthi.explain('http://test.com/sru/') - self.assertEqual(info.sru_version, '1.2'), - self.assertEqual(info['sru_version'], '1.2') + info = sruthi.explain("http://test.com/sru/") + self.assertEqual(info.sru_version, "1.2"), + self.assertEqual(info["sru_version"], "1.2") self.assertIsInstance(info, sruthi.response.AttributeDict) self.session_mock.return_value.get.assert_called_once_with( - 'http://test.com/sru/', + "http://test.com/sru/", params={ - 'operation': 'explain', - 'version': '1.2', - } + "operation": "explain", + "version": "1.2", + }, ) def test_client(self): - client = sruthi.Client('http://test.com/sru') + client = sruthi.Client("http://test.com/sru") self.assertIsInstance(client, sruthi.client.Client) diff --git a/tests/sruthi_test.py b/tests/sruthi_test.py index 08d04a3..36222ac 100644 --- a/tests/sruthi_test.py +++ b/tests/sruthi_test.py @@ -3,17 +3,12 @@ import os from sruthi import xmlparse -__location__ = os.path.realpath( - os.path.join( - os.getcwd(), - os.path.dirname(__file__) - ) -) +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) class SruthiTestCase(unittest.TestCase): def setUp(self): - self.patcher = mock.patch('sruthi.client.requests.Session') + self.patcher = mock.patch("sruthi.client.requests.Session") self.session_mock = self.patcher.start() self._session_mock(self.session_mock) @@ -21,18 +16,25 @@ def tearDown(self): self.patcher.stop() def _session_mock(self, session_mock, filename=None): + content, path = self._test_content(filename=filename) + + if not path: + return + + session_mock.return_value.get.return_value = mock.MagicMock(content=content) + + def _test_content(self, filename=None): if not filename: filename = self._testMethodName + ".xml" - path = os.path.join( - __location__, - 'fixtures', - filename - ) + + path = os.path.join(__location__, "fixtures", filename) if not os.path.exists(path): - return + return ("", None) with open(path) as file: - session_mock.return_value.get.return_value = mock.MagicMock(content=file.read()) # noqa + content = file.read() + + return (content, path) class ResponseTestCase(SruthiTestCase): @@ -45,11 +47,7 @@ def _data_loader_mock(self, filenames): return m def _load_xml(self, filename): - path = os.path.join( - __location__, - 'fixtures', - filename - ) + path = os.path.join(__location__, "fixtures", filename) xmlparser = xmlparse.XMLParser() with open(path) as file: content = file.read() diff --git a/validate.sh b/validate.sh index 38d7a61..8333cb0 100755 --- a/validate.sh +++ b/validate.sh @@ -9,7 +9,7 @@ function cleanup { trap "cleanup" EXIT # Check PEP-8 code style and McCabe complexity -flake8 . --count --show-source --statistics +make lint # run tests with test coverage -pytest --cov=sruthi tests/ +make test