From 024f209e26e70f20f49b441d1a3cbc0ac5673f84 Mon Sep 17 00:00:00 2001 From: Tome Cirun Date: Mon, 13 Sep 2021 11:48:57 +0200 Subject: [PATCH 01/11] replace usage of unicode with unicode_safe --- ckanext/scheming/custom_group_with_status.json | 4 ++-- ckanext/scheming/custom_org_with_address.json | 4 ++-- ckanext/scheming/group_with_bookface.json | 4 ++-- ckanext/scheming/org_with_dept_id.json | 4 ++-- ckanext/scheming/presets.json | 12 ++++++------ ckanext/scheming/validation.py | 3 ++- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/ckanext/scheming/custom_group_with_status.json b/ckanext/scheming/custom_group_with_status.json index 3bf5ae0b..186ca223 100644 --- a/ckanext/scheming/custom_group_with_status.json +++ b/ckanext/scheming/custom_group_with_status.json @@ -6,7 +6,7 @@ { "field_name": "title", "label": "Name", - "validators": "ignore_missing unicode", + "validators": "ignore_missing unicode_safe", "form_snippet": "large_text.html", "form_attrs": {"data-module": "slug-preview-target"}, "form_placeholder": "My theme" @@ -14,7 +14,7 @@ { "field_name": "name", "label": "URL", - "validators": "not_empty unicode name_validator group_name_validator", + "validators": "not_empty unicode_safe name_validator group_name_validator", "form_snippet": "slug.html", "form_placeholder": "my-theme" }, diff --git a/ckanext/scheming/custom_org_with_address.json b/ckanext/scheming/custom_org_with_address.json index 0d52fcb9..6f6926b9 100644 --- a/ckanext/scheming/custom_org_with_address.json +++ b/ckanext/scheming/custom_org_with_address.json @@ -6,7 +6,7 @@ { "field_name": "title", "label": "Name", - "validators": "ignore_missing unicode", + "validators": "ignore_missing unicode_safe", "form_snippet": "large_text.html", "form_attrs": {"data-module": "slug-preview-target"}, "form_placeholder": "My theme" @@ -14,7 +14,7 @@ { "field_name": "name", "label": "URL", - "validators": "not_empty unicode name_validator group_name_validator", + "validators": "not_empty unicode_safe name_validator group_name_validator", "form_snippet": "slug.html", "form_placeholder": "my-theme" }, diff --git a/ckanext/scheming/group_with_bookface.json b/ckanext/scheming/group_with_bookface.json index 8e8ea284..05c6b05f 100644 --- a/ckanext/scheming/group_with_bookface.json +++ b/ckanext/scheming/group_with_bookface.json @@ -6,7 +6,7 @@ { "field_name": "title", "label": "Name", - "validators": "ignore_missing unicode", + "validators": "ignore_missing unicode_safe", "form_snippet": "large_text.html", "form_attrs": {"data-module": "slug-preview-target"}, "form_placeholder": "My Organization" @@ -14,7 +14,7 @@ { "field_name": "name", "label": "URL", - "validators": "not_empty unicode name_validator group_name_validator", + "validators": "not_empty unicode_safe name_validator group_name_validator", "form_snippet": "slug.html", "form_placeholder": "my-organization" }, diff --git a/ckanext/scheming/org_with_dept_id.json b/ckanext/scheming/org_with_dept_id.json index 0092e81e..c629cd8c 100644 --- a/ckanext/scheming/org_with_dept_id.json +++ b/ckanext/scheming/org_with_dept_id.json @@ -6,7 +6,7 @@ { "field_name": "title", "label": "Name", - "validators": "ignore_missing unicode", + "validators": "ignore_missing unicode_safe", "form_snippet": "large_text.html", "form_attrs": {"data-module": "slug-preview-target"}, "form_placeholder": "My Organization" @@ -14,7 +14,7 @@ { "field_name": "name", "label": "URL", - "validators": "not_empty unicode name_validator group_name_validator", + "validators": "not_empty unicode_safe name_validator group_name_validator", "form_snippet": "slug.html", "form_placeholder": "my-organization" }, diff --git a/ckanext/scheming/presets.json b/ckanext/scheming/presets.json index e568dce9..5d34d685 100644 --- a/ckanext/scheming/presets.json +++ b/ckanext/scheming/presets.json @@ -6,7 +6,7 @@ { "preset_name": "title", "values": { - "validators": "if_empty_same_as(name) unicode", + "validators": "if_empty_same_as(name) unicode_safe", "form_snippet": "large_text.html", "form_attrs": { "data-module": "slug-preview-target" @@ -16,7 +16,7 @@ { "preset_name": "dataset_slug", "values": { - "validators": "not_empty unicode name_validator package_name_validator", + "validators": "not_empty unicode_safe name_validator package_name_validator", "form_snippet": "slug.html" } }, @@ -35,14 +35,14 @@ { "preset_name": "dataset_organization", "values": { - "validators": "owner_org_validator unicode", + "validators": "owner_org_validator unicode_safe", "form_snippet": "organization.html" } }, { "preset_name": "resource_url_upload", "values": { - "validators": "ignore_missing unicode remove_whitespace", + "validators": "ignore_missing unicode_safe remove_whitespace", "form_snippet": "upload.html", "form_placeholder": "http://example.com/my-data.csv", "upload_field": "upload", @@ -53,7 +53,7 @@ { "preset_name": "organization_url_upload", "values": { - "validators": "ignore_missing unicode remove_whitespace", + "validators": "ignore_missing unicode_safe remove_whitespace", "form_snippet": "organization_upload.html", "form_placeholder": "http://example.com/my-data.csv" } @@ -61,7 +61,7 @@ { "preset_name": "resource_format_autocomplete", "values": { - "validators": "if_empty_guess_format ignore_missing clean_format unicode", + "validators": "if_empty_guess_format ignore_missing clean_format unicode_safe", "form_placeholder": "eg. CSV, XML or JSON", "form_attrs": { "data-module": "autocomplete", diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index b07bdae5..7824e64f 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -8,6 +8,7 @@ import ckan.lib.helpers as h from ckan.lib.navl.dictization_functions import convert +from ckan.lib.navl.validators import unicode_safe from ckantoolkit import ( get_validator, UnknownValidator, @@ -431,7 +432,7 @@ def get_validator_or_converter(name): Get a validator or converter by name """ if name == 'unicode': - return six.text_type + return unicode_safe try: v = get_validator(name) return v From 6371ad839007b6e2357c2a56415757291ab28d06 Mon Sep 17 00:00:00 2001 From: Tome Cirun Date: Fri, 17 Sep 2021 11:35:08 +0200 Subject: [PATCH 02/11] replace usage of unicode with unicode_safe --- README.md | 4 ++-- ckanext/scheming/ckan_dataset.yaml | 2 +- ckanext/scheming/tests/test_helpers.py | 3 ++- ckanext/scheming/validation.py | 6 ++---- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2f5ec5eb..b361c8d7 100644 --- a/README.md +++ b/README.md @@ -413,13 +413,13 @@ passing the comma-separated values within as string parameters and the result is used as the validator/converter. ```yaml -validators: if_empty_same_as(name) unicode +validators: if_empty_same_as(name) unicode_safe ``` is the same as a plugin using the validators: ```python -[get_validator('if_empty_same_as')("name"), unicode] +[get_validator('if_empty_same_as')("name"), unicode_safe] ``` This string does not contain arbitrary python code to be executed, diff --git a/ckanext/scheming/ckan_dataset.yaml b/ckanext/scheming/ckan_dataset.yaml index 5c7acab9..9559ec76 100644 --- a/ckanext/scheming/ckan_dataset.yaml +++ b/ckanext/scheming/ckan_dataset.yaml @@ -43,7 +43,7 @@ dataset_fields: - field_name: version label: Version - validators: ignore_missing unicode package_version_validator + validators: ignore_missing unicode_safe package_version_validator form_placeholder: '1.0' - field_name: author diff --git a/ckanext/scheming/tests/test_helpers.py b/ckanext/scheming/tests/test_helpers.py index a8f1a31d..bb2e0569 100644 --- a/ckanext/scheming/tests/test_helpers.py +++ b/ckanext/scheming/tests/test_helpers.py @@ -54,7 +54,8 @@ def test_explicit_required_false(self): assert not scheming_field_required({"required": False}) def test_not_empty_in_validators(self): - assert scheming_field_required({"validators": "not_empty unicode"}) + assert scheming_field_required(\ + {"validators": "not_empty unicode_safe"}) def test_not_empty_not_in_validators(self): assert not scheming_field_required({"validators": "maybe_not_empty"}) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index 7824e64f..672a8f2f 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -409,8 +409,8 @@ def validators_from_string(s, field, schema): """ convert a schema validators string to a list of validators - e.g. "if_empty_same_as(name) unicode" becomes: - [if_empty_same_as("name"), unicode] + e.g. "if_empty_same_as(name) unicode_safe" becomes: + [if_empty_same_as("name"), unicode_safe] """ out = [] parts = s.split() @@ -431,8 +431,6 @@ def get_validator_or_converter(name): """ Get a validator or converter by name """ - if name == 'unicode': - return unicode_safe try: v = get_validator(name) return v From 2e2a9a1e63f428f27a0ac134d7500b35eb96439d Mon Sep 17 00:00:00 2001 From: Tome Cirun Date: Tue, 5 Oct 2021 11:34:31 +0200 Subject: [PATCH 03/11] add copy of unicode_safe --- ckanext/scheming/validation.py | 40 +++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index 672a8f2f..7e13d9b0 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -8,7 +8,6 @@ import ckan.lib.helpers as h from ckan.lib.navl.dictization_functions import convert -from ckan.lib.navl.validators import unicode_safe from ckantoolkit import ( get_validator, UnknownValidator, @@ -557,3 +556,42 @@ def repeating_text_output(value): return json.loads(value) except ValueError: return [value] + + +@scheming_validator +@register_validator +def unicode_safe(value): + ''' + Make sure value passed is treated as unicode, but don't raise + an error if it's not, just make a reasonable attempt to + convert other types passed. + + This validator is a safer alternative to the old ckan idiom + of using the unicode() function as a validator. It tries + not to pollute values with Python repr garbage e.g. when passed + a list of strings (uses json format instead). It also + converts binary strings assuming either UTF-8 or CP1252 + encodings (not ASCII, with occasional decoding errors) + ''' + if isinstance(value, str): + return value + if hasattr(value, 'filename'): + # cgi.FieldStorage instance for uploaded files, show the name + value = value.filename + if value is missing or value is None: + return u'' + if isinstance(value, bytes): + # bytes only arrive when core ckan or plugins call + # actions from Python code + try: + return six.ensure_text(value) + except UnicodeDecodeError: + return value.decode(u'cp1252') + try: + return json.dumps(value, sort_keys=True, ensure_ascii=False) + except Exception: + # at this point we have given up. Just don't error out + try: + return str(value) + except Exception: + return u'\N{REPLACEMENT CHARACTER}' \ No newline at end of file From 3603dc9d8a2ec2253f3230d87fc7815001913005 Mon Sep 17 00:00:00 2001 From: Tome Cirun Date: Tue, 5 Oct 2021 14:29:52 +0200 Subject: [PATCH 04/11] remove scheming_validator --- ckanext/scheming/validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index 7e13d9b0..eb8d1914 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -558,7 +558,6 @@ def repeating_text_output(value): return [value] -@scheming_validator @register_validator def unicode_safe(value): ''' From 48e4b50ca74ee7e455c8ea8c92e5491bce33591b Mon Sep 17 00:00:00 2001 From: Tome Cirun Date: Tue, 5 Oct 2021 14:49:57 +0200 Subject: [PATCH 05/11] update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b361c8d7..13899379 100644 --- a/README.md +++ b/README.md @@ -419,7 +419,7 @@ validators: if_empty_same_as(name) unicode_safe is the same as a plugin using the validators: ```python -[get_validator('if_empty_same_as')("name"), unicode_safe] +[get_validator('if_empty_same_as')("name"), get_validator('unicode_safe')] ``` This string does not contain arbitrary python code to be executed, From 0c7fa61e80d3d1c0629f600789838e73576dd2af Mon Sep 17 00:00:00 2001 From: Tome Cirun Date: Mon, 29 Nov 2021 15:47:34 +0100 Subject: [PATCH 06/11] add comment --- ckanext/scheming/validation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index eb8d1914..b149e4a5 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -572,6 +572,9 @@ def unicode_safe(value): converts binary strings assuming either UTF-8 or CP1252 encodings (not ASCII, with occasional decoding errors) ''' + # This code was copied from core CKAN and was added to allow + # support for >= 2.9 CKAN versions. + if isinstance(value, str): return value if hasattr(value, 'filename'): From 73a91d5db1ddfe94fe90b53b53f2390eab623434 Mon Sep 17 00:00:00 2001 From: Christian Foerster Date: Wed, 5 Apr 2023 11:46:43 +0000 Subject: [PATCH 07/11] updated workflow to include ckan 2.10 using latest file on master --- .github/workflows/test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b310ef0c..ae3cbdbd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: needs: lint strategy: matrix: - ckan-version: [2.9, 2.9-py2, 2.8, 2.7] + ckan-version: ["2.10", 2.9, 2.9-py2, 2.8, 2.7] fail-fast: false name: CKAN ${{ matrix.ckan-version }} @@ -26,7 +26,7 @@ jobs: image: openknowledge/ckan-dev:${{ matrix.ckan-version }} services: solr: - image: ckan/ckan-solr-dev:${{ matrix.ckan-version }} + image: ckan/ckan-solr:${{ matrix.ckan-version }} postgres: image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }} env: @@ -47,7 +47,6 @@ jobs: - uses: actions/checkout@v2 - name: Install requirements run: | - pip install -r test-requirements.txt pip install -e . # Replace default path to CKAN core config file with the one on the container sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini @@ -55,10 +54,12 @@ jobs: - name: Setup extension (CKAN >= 2.9) if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }} run: | + pip install -r test-requirements.txt ckan -c test.ini db init - name: Setup extension (CKAN < 2.9) if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }} run: | + pip install -r test-requirements-py2.txt paster --plugin=ckan db init -c test.ini - name: Run all tests run: pytest --ckan-ini=test.ini --cov=ckanext.scheming ckanext/scheming/tests From 4a824f47c5812f7b4a2b4b08afb076513268c5e6 Mon Sep 17 00:00:00 2001 From: Christian Foerster Date: Thu, 20 Apr 2023 08:38:29 +0000 Subject: [PATCH 08/11] tidying up artefacts and wrong formatting --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e887b263..b1c2ee74 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,7 @@ Table of contents: - [Example Schemas](#example-schemas) - [Storing non-string data](#storing-non-string-data) - [Common Schema Keys](#common-schema-keys) - - [`scheming_version`](#scheming_version) .github/workflows/test.yml -README.md -ckanext/scheming/tests/test_helpers.py + - [`scheming_version`](#scheming_version) - [`about_url`](#about_url) - [Dataset Schema Keys](#dataset-schema-keys) - [`dataset_type`](#dataset_type) @@ -562,7 +560,7 @@ passing the comma-separated values within as string parameters and the result is used as the validator/converter. ```yaml -validators: if_empty_same_as(name) unicode_safe + validators: if_empty_same_as(name) unicode_safe ``` is the same as a plugin using the validators: From 568c8516c29724a17ef514d3b6b7812b713c6274 Mon Sep 17 00:00:00 2001 From: Christian Foerster Date: Thu, 20 Apr 2023 08:40:40 +0000 Subject: [PATCH 09/11] added unicode check to get_validator_or_converter --- ckanext/scheming/validation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index ca5210d0..17df11ef 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -354,6 +354,8 @@ def get_validator_or_converter(name): """ Get a validator or converter by name """ + if name == 'unicode': + return six.text_type try: v = get_validator(name) return v From d80826e3b7b514c4e90e74661185020c2558290b Mon Sep 17 00:00:00 2001 From: Christian Foerster Date: Thu, 20 Apr 2023 09:26:28 +0000 Subject: [PATCH 10/11] removed import of unicode_safe from ckantoolkit, copy pasted latest version from ckan core instead --- ckanext/scheming/validation.py | 82 ++++++++++++++++------------------ 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index 17df11ef..85b28eaf 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -15,7 +15,6 @@ Invalid, StopOnError, _, - unicode_safe, ) import ckanext.scheming.helpers as sh @@ -47,7 +46,45 @@ def scheming_validator(fn): return fn -register_validator(unicode_safe) +@register_validator +def unicode_safe(value: Any) -> str: + ''' + Make sure value passed is treated as unicode, but don't raise + an error if it's not, just make a reasonable attempt to + convert other types passed. + + This validator is a safer alternative to the old ckan idiom + of using the unicode() function as a validator. It tries + not to pollute values with Python repr garbage e.g. when passed + a list of strings (uses json format instead). It also + converts binary strings assuming either UTF-8 or CP1252 + encodings (not ASCII, with occasional decoding errors) + ''' + + # This code was copied from ckan core + + if isinstance(value, str): + return value + if hasattr(value, 'filename'): + # cgi.FieldStorage instance for uploaded files, show the name + value = value.filename + if value is missing or value is None: + return u'' + if isinstance(value, bytes): + # bytes only arrive when core ckan or plugins call + # actions from Python code + try: + return bytes.decode(value) + except UnicodeDecodeError: + return value.decode(u'cp1252') + try: + return json.dumps(value, sort_keys=True, ensure_ascii=False) + except Exception: + # at this point we have given up. Just don't error out + try: + return str(value) + except Exception: + return u'\N{REPLACEMENT CHARACTER}' @register_validator @@ -482,44 +519,3 @@ def repeating_text_output(value): return json.loads(value) except ValueError: return [value] - - -@register_validator -def unicode_safe(value): - ''' - Make sure value passed is treated as unicode, but don't raise - an error if it's not, just make a reasonable attempt to - convert other types passed. - - This validator is a safer alternative to the old ckan idiom - of using the unicode() function as a validator. It tries - not to pollute values with Python repr garbage e.g. when passed - a list of strings (uses json format instead). It also - converts binary strings assuming either UTF-8 or CP1252 - encodings (not ASCII, with occasional decoding errors) - ''' - # This code was copied from core CKAN and was added to allow - # support for >= 2.9 CKAN versions. - - if isinstance(value, str): - return value - if hasattr(value, 'filename'): - # cgi.FieldStorage instance for uploaded files, show the name - value = value.filename - if value is missing or value is None: - return u'' - if isinstance(value, bytes): - # bytes only arrive when core ckan or plugins call - # actions from Python code - try: - return six.ensure_text(value) - except UnicodeDecodeError: - return value.decode(u'cp1252') - try: - return json.dumps(value, sort_keys=True, ensure_ascii=False) - except Exception: - # at this point we have given up. Just don't error out - try: - return str(value) - except Exception: - return u'\N{REPLACEMENT CHARACTER}' \ No newline at end of file From 947ae629e36c2cee73dc195d7c0f8c3471b5c6c3 Mon Sep 17 00:00:00 2001 From: Christian Foerster Date: Thu, 20 Apr 2023 11:02:28 +0000 Subject: [PATCH 11/11] removing type hints for unicode_safe --- ckanext/scheming/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/scheming/validation.py b/ckanext/scheming/validation.py index 85b28eaf..1c934b66 100644 --- a/ckanext/scheming/validation.py +++ b/ckanext/scheming/validation.py @@ -47,7 +47,7 @@ def scheming_validator(fn): @register_validator -def unicode_safe(value: Any) -> str: +def unicode_safe(value): ''' Make sure value passed is treated as unicode, but don't raise an error if it's not, just make a reasonable attempt to