diff --git a/.vscode/settings.json b/.vscode/settings.json index 2ec7d9a4..3bb5277c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -21,5 +21,6 @@ }, "[json]": { "editor.formatOnSave": true - } + }, + "mochaExplorer.files": "test/**/*.test.{ts,js}" } diff --git a/package-lock.json b/package-lock.json index 56dd6484..7e8dfdc6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "impresso-middle-layer", - "version": "2.5.0", + "version": "3.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "impresso-middle-layer", - "version": "2.5.0", + "version": "3.0.1", "dependencies": { "@feathersjs/authentication": "5.0.25", "@feathersjs/authentication-local": "5.0.25", @@ -58,6 +58,7 @@ "http-proxy-middleware": "^2.0.1", "impresso-jscommons": "https://github.com/impresso/impresso-jscommons/tarball/v1.4.3", "json2csv": "^4.3.3", + "jsonpath-plus": "^10.0.1", "jsonschema": "^1.4.1", "lodash": "^4.17.21", "lodash.first": "^3.0.0", @@ -90,7 +91,7 @@ "wikidata-sdk": "^5.15.10", "winston": "3.13.0", "xml2js": "^0.6.2", - "yaml": "^2.1.1" + "yaml": "^2.6.0" }, "devDependencies": { "@openapi-contrib/json-schema-to-openapi-schema": "3.0.1", @@ -943,11 +944,22 @@ "resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz", "integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==" }, + "node_modules/@jsep-plugin/assignment": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.2.1.tgz", + "integrity": "sha512-gaHqbubTi29aZpVbBlECRpmdia+L5/lh2BwtIJTmtxdbecEyyX/ejAOg7eQDGNvGOUmPY7Z2Yxdy9ioyH/VJeA==", + "license": "MIT", + "engines": { + "node": ">= 10.16.0" + }, + "peerDependencies": { + "jsep": "^0.4.0||^1.0.0" + } + }, "node_modules/@jsep-plugin/regex": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.3.tgz", "integrity": "sha512-XfZgry4DwEZvSFtS/6Y+R48D7qJYJK6R9/yJFyUFHCIUMEEHuJ4X95TDgJp5QkmzfLYvapMPzskV5HpIDrREug==", - "dev": true, "engines": { "node": ">= 10.16.0" }, @@ -1573,6 +1585,16 @@ "node": "^12.20 || >=14.13" } }, + "node_modules/@stoplight/spectral-core/node_modules/jsonpath-plus": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-7.1.0.tgz", + "integrity": "sha512-gTaNRsPWO/K2KY6MrqaUFClF9kmuM6MFH5Dhg1VYDODgFbByw1yb7xu3hrViE/sz+dGOeMWgCzwUwQtAnCTE9g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/@stoplight/spectral-formats": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/@stoplight/spectral-formats/-/spectral-formats-1.6.0.tgz", @@ -7211,10 +7233,10 @@ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "node_modules/jsep": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.3.8.tgz", - "integrity": "sha512-qofGylTGgYj9gZFsHuyWAN4jr35eJ66qJCK4eKDnldohuUoQFbU3iZn2zjvEbd9wOAhP9Wx5DsAAduTyE1PSWQ==", - "dev": true, + "version": "1.3.9", + "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.3.9.tgz", + "integrity": "sha512-i1rBX5N7VPl0eYb6+mHNp52sEuaS2Wi8CDYx1X5sn9naevL78+265XJqy1qENEk7mRKwS06NHpUqiBwR7qeodw==", + "license": "MIT", "engines": { "node": ">= 10.16.0" } @@ -7445,12 +7467,21 @@ ] }, "node_modules/jsonpath-plus": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-7.1.0.tgz", - "integrity": "sha512-gTaNRsPWO/K2KY6MrqaUFClF9kmuM6MFH5Dhg1VYDODgFbByw1yb7xu3hrViE/sz+dGOeMWgCzwUwQtAnCTE9g==", - "dev": true, + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.0.1.tgz", + "integrity": "sha512-30DeH2QD4nL1IpDLPIFz09G5XyLvh+oNMUI2Zxf4tbrlsVHs0e3VPnwpOnSTFb4yM0dfQK2WGKLsSaAS8V62rw==", + "license": "MIT", + "dependencies": { + "@jsep-plugin/assignment": "^1.2.1", + "@jsep-plugin/regex": "^1.0.3", + "jsep": "^1.3.9" + }, + "bin": { + "jsonpath": "bin/jsonpath-cli.js", + "jsonpath-plus": "bin/jsonpath-cli.js" + }, "engines": { - "node": ">=12.0.0" + "node": ">=18.0.0" } }, "node_modules/jsonpointer": { @@ -11876,9 +11907,10 @@ } }, "node_modules/yaml": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.2.tgz", - "integrity": "sha512-B3VqDZ+JAg1nZpaEmWtTXUlBneoGx6CPM9b0TENK6aoSu5t73dItudwdgmi6tHlIZZId4dZ9skcAQ2UbcyAeVA==", + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.6.0.tgz", + "integrity": "sha512-a6ae//JvKDEra2kdi1qzCyrJW/WZCgFi8ydDV+eXExl95t+5R+ijnqHJbz9tmMh8FUjx3iv2fCQ4dclAQlO2UQ==", + "license": "ISC", "bin": { "yaml": "bin.mjs" }, diff --git a/package.json b/package.json index 2a485051..1c766968 100644 --- a/package.json +++ b/package.json @@ -25,8 +25,8 @@ "watch": "tsc -p ./tsconfig.json -w & tscp -w", "build": "tsc -p ./tsconfig.json", "copy-files": "tscp", - "test": "mocha 'test/**/*.test.js'", - "test-watch": "mocha 'test/**/*.test.js' --watch", + "test": "mocha --require ts-node/register 'test/**/*.test.{js,ts}'", + "test-watch": "mocha --require ts-node/register --watch 'test/**/*.test.{js,ts}'", "integration-test": "NODE_ENV=test mocha --config ./.mocharc-integration.json 'test/integration/**/*.test.js'", "lintfix": "eslint src/. --config .eslintrc.js --fix", "lint": "eslint src/. --config .eslintrc.js", @@ -92,6 +92,7 @@ "http-proxy-middleware": "^2.0.1", "impresso-jscommons": "https://github.com/impresso/impresso-jscommons/tarball/v1.4.3", "json2csv": "^4.3.3", + "jsonpath-plus": "^10.0.1", "jsonschema": "^1.4.1", "lodash": "^4.17.21", "lodash.first": "^3.0.0", @@ -123,7 +124,7 @@ "wikidata-sdk": "^5.15.10", "winston": "3.13.0", "xml2js": "^0.6.2", - "yaml": "^2.1.1", + "yaml": "^2.6.0", "undici": "6.19.8" }, "devDependencies": { diff --git a/src/authentication.ts b/src/authentication.ts index e0f3c67a..3a034648 100644 --- a/src/authentication.ts +++ b/src/authentication.ts @@ -59,6 +59,7 @@ export interface SlimUser { uid: string id: number isStaff: boolean + groups: string[] } /** @@ -91,6 +92,7 @@ class NoDBJWTStrategy extends JWTStrategy { uid: payload.userId, id: parseInt(payload.sub), isStaff: payload.isStaff ?? false, + groups: payload.userGroups ?? [], } return { ...result, diff --git a/src/hooks/redaction.ts b/src/hooks/redaction.ts new file mode 100644 index 00000000..290d3fd4 --- /dev/null +++ b/src/hooks/redaction.ts @@ -0,0 +1,88 @@ +import { HookContext, HookFunction } from '@feathersjs/feathers' +import { FindResponse } from '../models/common' +import { ImpressoApplication } from '../types' +import { Redactable, RedactionPolicy, redactObject } from '../util/redaction' +import { SlimUser } from '../authentication' + +export type RedactCondition = (context: HookContext) => boolean + +/** + * Redact the response object using the provided redaction policy. + * If the condition is provided, the redaction will only be applied if the condition is met. + */ +export const redactResponse = ( + policy: RedactionPolicy, + condition?: (context: HookContext) => boolean +): HookFunction => { + return context => { + if (context.type != 'after') throw new Error('The redactResponse hook should be used as an after hook only') + + if (condition != null && !condition(context)) return context + + if (context.result != null) { + context.result = redactObject(context.result, policy) + } + return context + } +} + +/** + * Redact the response object using the provided redaction policy. + * Assumes that the response is a FindResponse object (has a `data` field with + * an array of objects). + * If the condition is provided, the redaction will only be applied if the condition is met. + */ +export const redactResponseDataItem = ( + policy: RedactionPolicy, + condition?: (context: HookContext) => boolean, + dataItemsField?: string +): HookFunction => { + return context => { + if (context.type != 'after') throw new Error('The redactResponseDataItem hook should be used as an after hook only') + + if (condition != null && !condition(context)) return context + + if (context.result != null) { + if (dataItemsField != null) { + const result = context.result as Record + result[dataItemsField] = result[dataItemsField].map((item: Redactable) => redactObject(item, policy)) + } else { + const result = context.result as any as FindResponse + result.data = result.data.map(item => redactObject(item, policy)) + } + } + return context + } +} + +/** + * Below are conditions that can be used in the redactResponse hook. + */ +export const inPublicApi: RedactCondition = context => { + return context.app.get('isPublicApi') == true +} + +/** + * Condition is: + * - user is not authenticated + * - OR user is authenticated and is not in the specified group + */ +export const notInGroup = + (groupName: string): RedactCondition => + context => { + const user = context.params?.user as any as SlimUser + return user == null || !user.groups.includes(groupName) + } + +const NoRedactionGroup = 'NoRedaction' + +/** + * Default condition we should currently use: + * - running as Public API + * - AND user is not in the NoRedaction group + */ +export const defaultCondition: RedactCondition = context => { + return inPublicApi(context) && notInGroup(NoRedactionGroup)(context) +} + +export type { RedactionPolicy } diff --git a/src/schema/common/redactionPolicy.json b/src/schema/common/redactionPolicy.json new file mode 100644 index 00000000..17b09bb2 --- /dev/null +++ b/src/schema/common/redactionPolicy.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "RedactionPolicy", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "items": { + "type": "array", + "items": { + "$ref": "#/definitions/RedactionPolicyItem" + } + } + }, + "required": ["name", "items"], + "definitions": { + "RedactionPolicyItem": { + "type": "object", + "properties": { + "jsonPath": { + "type": "string" + }, + "valueConverterName": { + "type": "string", + "enum": ["redact", "contextNotAllowedImage", "remove", "emptyArray"] + } + }, + "required": ["jsonPath", "valueConverterName"] + } + } +} diff --git a/src/schema/schemas/Topic.json b/src/schema/schemas/Topic.json index aa004e61..139fb897 100644 --- a/src/schema/schemas/Topic.json +++ b/src/schema/schemas/Topic.json @@ -47,6 +47,10 @@ "w": { "type": "number", "description": "TODO" + }, + "avg": { + "type": "number", + "description": "TODO" } }, "required": ["uid", "w"] diff --git a/src/services/articles/articles.hooks.js b/src/services/articles/articles.hooks.js index 3bf61e34..08c59993 100644 --- a/src/services/articles/articles.hooks.js +++ b/src/services/articles/articles.hooks.js @@ -1,5 +1,7 @@ import { rateLimit } from '../../hooks/rateLimiter' import { authenticateAround as authenticate } from '../../hooks/authenticate' +import { redactResponse, redactResponseDataItem, defaultCondition } from '../../hooks/redaction' +import { loadYamlFile } from '../../util/yaml' const { utils, @@ -17,6 +19,8 @@ const { resolveTopics, resolveUserAddons } = require('../../hooks/resolvers/arti const { obfuscate } = require('../../hooks/access-rights') const { SolrMappings } = require('../../data/constants') +const articleRedactionPolicy = loadYamlFile(`${__dirname}/resources/articleRedactionPolicy.yml`) + module.exports = { around: { all: [authenticate({ allowUnauthenticated: true }), rateLimit()], @@ -90,6 +94,7 @@ module.exports = { resolveTopics(), saveResultsInCache(), obfuscate(), + redactResponseDataItem(articleRedactionPolicy, defaultCondition), ], get: [ // save here cache, flush cache here @@ -100,6 +105,7 @@ module.exports = { saveResultsInCache(), resolveUserAddons(), obfuscate(), + redactResponse(articleRedactionPolicy, defaultCondition), ], create: [], update: [], diff --git a/src/services/articles/resources/articleRedactionPolicy.yml b/src/services/articles/resources/articleRedactionPolicy.yml new file mode 100644 index 00000000..b95f3038 --- /dev/null +++ b/src/services/articles/resources/articleRedactionPolicy.yml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=../../../schema/common/redactionPolicy.json +name: artice-redaction-policy +items: + - jsonPath: $.title + valueConverterName: redact + - jsonPath: $.excerpt + valueConverterName: redact + - jsonPath: $.content + valueConverterName: redact + - jsonPath: $.regions + valueConverterName: emptyArray + - jsonPath: $.matches + valueConverterName: emptyArray + - jsonPath: $.pages[*].iiif + valueConverterName: contextNotAllowedImage + - jsonPath: $.pages[*].iiifThumbnail + valueConverterName: contextNotAllowedImage \ No newline at end of file diff --git a/src/services/search/search.hooks.js b/src/services/search/search.hooks.js index b7ded73b..27f32cfc 100644 --- a/src/services/search/search.hooks.js +++ b/src/services/search/search.hooks.js @@ -1,5 +1,7 @@ import { authenticateAround as authenticate } from '../../hooks/authenticate' import { rateLimit } from '../../hooks/rateLimiter' +import { redactResponseDataItem, defaultCondition } from '../../hooks/redaction' +import { loadYamlFile } from '../../util/yaml' const { protect } = require('@feathersjs/authentication-local').hooks const { @@ -16,6 +18,8 @@ const { paramsValidator, eachFilterValidator, eachFacetFilterValidator } = requi const { SolrMappings } = require('../../data/constants') const { SolrNamespaces } = require('../../solr') +const articleRedactionPolicy = loadYamlFile(`${__dirname}/../articles/resources/articleRedactionPolicy.yml`) + module.exports = { around: { find: [authenticate({ allowUnauthenticated: true }), rateLimit()], @@ -93,7 +97,12 @@ module.exports = { after: { all: [], - find: [displayQueryParams(['queryComponents', 'filters']), resolveQueryComponents(), protect('content')], + find: [ + displayQueryParams(['queryComponents', 'filters']), + resolveQueryComponents(), + protect('content'), + redactResponseDataItem(articleRedactionPolicy, defaultCondition), + ], get: [], create: [], update: [], diff --git a/src/services/text-reuse-clusters/resources/trClusterRedactionPolicy.yml b/src/services/text-reuse-clusters/resources/trClusterRedactionPolicy.yml new file mode 100644 index 00000000..d86509db --- /dev/null +++ b/src/services/text-reuse-clusters/resources/trClusterRedactionPolicy.yml @@ -0,0 +1,5 @@ +# yaml-language-server: $schema=../../../schema/common/redactionPolicy.json +name: tr-cluster-redaction-policy +items: + - jsonPath: $.textSample + valueConverterName: redact diff --git a/src/services/text-reuse-clusters/text-reuse-clusters.hooks.js b/src/services/text-reuse-clusters/text-reuse-clusters.hooks.js index e8f1d9c3..f9424aef 100644 --- a/src/services/text-reuse-clusters/text-reuse-clusters.hooks.js +++ b/src/services/text-reuse-clusters/text-reuse-clusters.hooks.js @@ -3,9 +3,13 @@ import { rateLimit } from '../../hooks/rateLimiter' import { decodeJsonQueryParameters } from '../../hooks/parameters' import { validate } from '../../hooks/params' import { parseFilters } from '../../util/queryParameters' +import { redactResponse, redactResponseDataItem, defaultCondition } from '../../hooks/redaction' +import { loadYamlFile } from '../../util/yaml' // const { validateWithSchema } = require('../../hooks/schema') +const trPassageRedactionPolicy = loadYamlFile(`${__dirname}/resources/trClusterRedactionPolicy.yml`) + module.exports = { around: { all: [authenticate({ allowUnauthenticated: true }), rateLimit()], @@ -30,6 +34,8 @@ module.exports = { after: { all: [], + get: [redactResponse(trPassageRedactionPolicy, defaultCondition)], + find: [redactResponseDataItem(trPassageRedactionPolicy, defaultCondition, 'clusters')], // find: [validateWithSchema('services/text-reuse-clusters/schema/find/response.json', 'result')], // get: [validateWithSchema('services/text-reuse-clusters/schema/get/response.json', 'result')], create: [], diff --git a/src/services/text-reuse-passages/resources/trPassageRedactionPolicy.yml b/src/services/text-reuse-passages/resources/trPassageRedactionPolicy.yml new file mode 100644 index 00000000..162f56c2 --- /dev/null +++ b/src/services/text-reuse-passages/resources/trPassageRedactionPolicy.yml @@ -0,0 +1,7 @@ +# yaml-language-server: $schema=../../../schema/common/redactionPolicy.json +name: tr-passage-redaction-policy +items: + - jsonPath: $.title + valueConverterName: redact + - jsonPath: $.content + valueConverterName: redact diff --git a/src/services/text-reuse-passages/text-reuse-passages.hooks.js b/src/services/text-reuse-passages/text-reuse-passages.hooks.js index 2c44347b..4bd4aa83 100644 --- a/src/services/text-reuse-passages/text-reuse-passages.hooks.js +++ b/src/services/text-reuse-passages/text-reuse-passages.hooks.js @@ -3,6 +3,10 @@ import { decodeJsonQueryParameters, decodePathParameters } from '../../hooks/par import { validate } from '../../hooks/params' import { rateLimit } from '../../hooks/rateLimiter' import { parseFilters } from '../../util/queryParameters' +import { redactResponse, redactResponseDataItem, defaultCondition } from '../../hooks/redaction' +import { loadYamlFile } from '../../util/yaml' + +const trPassageRedactionPolicy = loadYamlFile(`${__dirname}/resources/trPassageRedactionPolicy.yml`) // import { validateParameters } from '../../util/openapi' // import { docs } from './text-reuse-passages.schema' @@ -27,4 +31,8 @@ module.exports = { // validateParameters(docs.operations.find.parameters), // ], }, + after: { + get: [redactResponse(trPassageRedactionPolicy, defaultCondition)], + find: [redactResponseDataItem(trPassageRedactionPolicy, defaultCondition)], + }, } diff --git a/src/util/redaction.ts b/src/util/redaction.ts new file mode 100644 index 00000000..c4e3b33c --- /dev/null +++ b/src/util/redaction.ts @@ -0,0 +1,45 @@ +import { JSONPath } from 'jsonpath-plus' + +export type Redactable = Record +export type ValueConverter = (value: any) => any + +export type DefaultConvertersNames = 'redact' | 'contextNotAllowedImage' | 'remove' | 'emptyArray' + +export interface RedactionPolicyItem { + jsonPath: string + valueConverterName: DefaultConvertersNames +} + +export interface RedactionPolicy { + name: string + items: RedactionPolicyItem[] +} + +const DefaultConverters: Record = { + redact: value => '[REDACTED]', + contextNotAllowedImage: value => 'https://impresso-project.ch/assets/images/not-allowed.png', + remove: value => undefined, + emptyArray: value => [], +} + +export const redactObject = (object: T, policy: RedactionPolicy): T => { + if (typeof object !== 'object' || object === null || Array.isArray(object)) { + throw new Error('The provided object is not Redactable') + } + + const objectCopy = JSON.parse(JSON.stringify(object)) + + policy.items.forEach(item => { + JSONPath({ + path: item.jsonPath, + json: objectCopy, + resultType: 'value', + callback: (value, type, payload) => { + const valueConverter = DefaultConverters[item.valueConverterName] + payload.parent[payload.parentProperty] = valueConverter(value) + }, + }) + }) + + return objectCopy +} diff --git a/src/util/yaml.ts b/src/util/yaml.ts new file mode 100644 index 00000000..692ecd30 --- /dev/null +++ b/src/util/yaml.ts @@ -0,0 +1,7 @@ +import { parse } from 'yaml' +import { readFileSync } from 'fs' + +export const loadYamlFile = (filePath: string): T => { + const content = readFileSync(filePath, 'utf8') + return parse(content) as T +} diff --git a/test/util/redaction.test.ts b/test/util/redaction.test.ts new file mode 100644 index 00000000..61160a98 --- /dev/null +++ b/test/util/redaction.test.ts @@ -0,0 +1,62 @@ +import { RedactionPolicy, redactObject } from '../../src/util/redaction' +import assert from 'assert' + +interface TestDocument { + title: string + images: { url: string }[] + secret?: string +} + +const incorrectInputs = [null, [1, 2, 3], 1, 'string', false, undefined] + +describe('redactObject', () => { + it('redacts object successfully', () => { + const policy = { + name: 'test', + items: [ + { + jsonPath: '$.title', + valueConverterName: 'redact', + }, + { + jsonPath: '$.images[*].url', + valueConverterName: 'contextNotAllowedImage', + }, + { + jsonPath: '$.secret', + valueConverterName: 'remove', + }, + ], + } satisfies RedactionPolicy + + const input = { + title: 'This is a title', + images: [{ url: 'https://example.com/image1.jpg' }], + secret: 'This is a secret', + } satisfies TestDocument + + const expectedOutput = { + title: '[REDACTED]', + images: [{ url: 'https://impresso-project.ch/assets/images/not-allowed.png' }], + secret: undefined, + } satisfies TestDocument + + assert.deepStrictEqual(redactObject(input, policy), expectedOutput) + }) + + incorrectInputs.forEach(input => { + it(`fails to redact unknown type of input: ${input}`, () => { + const policy = { + name: 'test', + items: [ + { + jsonPath: '$.title', + valueConverterName: 'redact', + }, + ], + } satisfies RedactionPolicy + + assert.throws(() => redactObject(input as any, policy), Error, 'The provided object is not Redactable') + }) + }) +})