From 2daad3fc8854fc5aea73ed5a7588ba623dadfdb4 Mon Sep 17 00:00:00 2001 From: Roman Kalyakin Date: Fri, 17 Jan 2025 11:55:44 +0100 Subject: [PATCH] initial service --- src/models/generated/common.d.ts | 8 +++ src/models/generated/schemas.d.ts | 28 ++------ src/models/generated/schemasPublic.d.ts | 23 +++++++ src/models/generated/solr.d.ts | 41 +++++++++++ src/schema/common/solrConfiguration.json | 4 ++ src/schema/schemas/Image.json | 14 ++-- src/schema/schemasPublic/Image.json | 27 ++++++++ src/schema/solr/Image.json | 60 +++++++++++++++++ src/scripts/generate-types.js | 2 +- src/services/images/images-v1.class.js | 2 +- src/services/images/images.class.ts | 86 ++++++++++++++++++++++++ src/services/images/images.hooks.ts | 10 +++ src/services/images/images.schema.ts | 58 ++++++++++++++++ src/services/images/images.service.js | 18 ----- src/services/images/images.service.ts | 36 ++++++++++ src/services/index.ts | 2 +- 16 files changed, 366 insertions(+), 53 deletions(-) create mode 100644 src/models/generated/solr.d.ts create mode 100644 src/schema/schemasPublic/Image.json create mode 100644 src/schema/solr/Image.json create mode 100644 src/services/images/images.class.ts create mode 100644 src/services/images/images.hooks.ts create mode 100644 src/services/images/images.schema.ts delete mode 100644 src/services/images/images.service.js create mode 100644 src/services/images/images.service.ts diff --git a/src/models/generated/common.d.ts b/src/models/generated/common.d.ts index 0fbecc0b..789712a8 100644 --- a/src/models/generated/common.d.ts +++ b/src/models/generated/common.d.ts @@ -370,6 +370,10 @@ export interface SolrServerNamespaceConfiguration { * Solr index name */ index: string; + /** + * Version of the data schema used in the index. Optional. + */ + schemaVersion?: string; } export interface ProxyConfig { /** @@ -514,4 +518,8 @@ export interface SolrServerNamespaceConfiguration { * Solr index name */ index: string; + /** + * Version of the data schema used in the index. Optional. + */ + schemaVersion?: string; } diff --git a/src/models/generated/schemas.d.ts b/src/models/generated/schemas.d.ts index 6928196c..c2715a52 100644 --- a/src/models/generated/schemas.d.ts +++ b/src/models/generated/schemas.d.ts @@ -879,36 +879,18 @@ export interface Image { * The unique identifier of the image */ uid: string; + /** + * The unique identifier of the issue that the image belongs to. + */ + issueUid: string; /** * The unique identifier of the content item that the image belongs to. */ - contentItemUid: string; + contentItemUid?: string; /** * The URL of the image preview */ previewUrl: string; - /** - * The URL of the IIIF info.json file - */ - iiifInfoUrl: string; - permissions?: ContentPermissions; -} -/** - * Content item permissions - */ -export interface ContentPermissions { - /** - * Bitmap representing the 'explore' permissions of the content item - */ - exploreBitmap?: number; - /** - * Bitmap representing the 'get transcript' permissions of the content item - */ - getTranscriptBitmap?: number; - /** - * Bitmap representing the 'get images' permissions of the content item - */ - getImagesBitmap?: number; } diff --git a/src/models/generated/schemasPublic.d.ts b/src/models/generated/schemasPublic.d.ts index 16874a44..242c7188 100644 --- a/src/models/generated/schemasPublic.d.ts +++ b/src/models/generated/schemasPublic.d.ts @@ -374,6 +374,29 @@ export interface Freeform { } +/** + * An image from a content item + */ +export interface Image { + /** + * The unique identifier of the image + */ + uid: string; + /** + * The unique identifier of the issue that the image belongs to. + */ + issueUid: string; + /** + * The unique identifier of the content item that the image belongs to. + */ + contentItemUid?: string; + /** + * The URL of the image preview + */ + previewUrl: string; +} + + /** * A media source is what a content item belongs to. This can be a newspaper, a TV or a radio station, etc. */ diff --git a/src/models/generated/solr.d.ts b/src/models/generated/solr.d.ts new file mode 100644 index 00000000..56e335cf --- /dev/null +++ b/src/models/generated/solr.d.ts @@ -0,0 +1,41 @@ + +/* eslint-disable */ +/** + * This file was automatically generated by json-schema-to-typescript. + * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file, + * and run json-schema-to-typescript to regenerate this file. + */ + + +/** + * Image Solr document in Impresso v2 + */ +export interface Image { + id?: string; + meta_journal_s?: string; + meta_year_i?: number; + meta_day_i?: number; + meta_ed_s?: string; + meta_issue_id_s?: string; + meta_date_dt?: string; + linked_ci_s?: string; + reading_order_i?: number; + caption_txt?: string[]; + item_type_s?: string; + page_nb_is?: number[]; + coords_is?: number[]; + front_b?: boolean; + iiif_url_s?: string; + cc_b?: boolean; + rights_data_domain_s?: string; + rights_copyright_s?: string; + rights_perm_use_explore_plain?: string[]; + rights_perm_use_get_tr_plain?: string[]; + rights_perm_use_get_img_plain?: string[]; + rights_bm_explore_l?: number; + rights_bm_get_tr_l?: number; + rights_bm_get_img_l?: number; + dinov2_emb_v1024?: number[]; + openclip_emb_v768?: number[]; + _version_?: number; +} diff --git a/src/schema/common/solrConfiguration.json b/src/schema/common/solrConfiguration.json index 3eecbb77..999359fb 100644 --- a/src/schema/common/solrConfiguration.json +++ b/src/schema/common/solrConfiguration.json @@ -90,6 +90,10 @@ "index": { "type": "string", "description": "Solr index name" + }, + "schemaVersion": { + "type": "string", + "description": "Version of the data schema used in the index. Optional." } }, "required": ["namespaceId", "serverId", "index"] diff --git a/src/schema/schemas/Image.json b/src/schema/schemas/Image.json index b1a4c678..3a582a7f 100644 --- a/src/schema/schemas/Image.json +++ b/src/schema/schemas/Image.json @@ -9,6 +9,10 @@ "type": "string", "description": "The unique identifier of the image" }, + "issueUid": { + "type": "string", + "description": "The unique identifier of the issue that the image belongs to." + }, "contentItemUid": { "type": "string", "description": "The unique identifier of the content item that the image belongs to." @@ -17,15 +21,7 @@ "type": "string", "format": "uri", "description": "The URL of the image preview" - }, - "iiifInfoUrl": { - "type": "string", - "format": "uri", - "description": "The URL of the IIIF info.json file" - }, - "permissions": { - "$ref": "./ContentPermissions.json" } }, - "required": ["uid", "contentItemUid", "previewUrl", "iiifInfoUrl"] + "required": ["uid", "issueUid", "previewUrl"] } diff --git a/src/schema/schemasPublic/Image.json b/src/schema/schemasPublic/Image.json new file mode 100644 index 00000000..3a582a7f --- /dev/null +++ b/src/schema/schemasPublic/Image.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Image", + "description": "An image from a content item", + "type": "object", + "additionalProperties": false, + "properties": { + "uid": { + "type": "string", + "description": "The unique identifier of the image" + }, + "issueUid": { + "type": "string", + "description": "The unique identifier of the issue that the image belongs to." + }, + "contentItemUid": { + "type": "string", + "description": "The unique identifier of the content item that the image belongs to." + }, + "previewUrl": { + "type": "string", + "format": "uri", + "description": "The URL of the image preview" + } + }, + "required": ["uid", "issueUid", "previewUrl"] +} diff --git a/src/schema/solr/Image.json b/src/schema/solr/Image.json new file mode 100644 index 00000000..4b99543d --- /dev/null +++ b/src/schema/solr/Image.json @@ -0,0 +1,60 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Image", + "description": "Image Solr document in Impresso v2", + "properties": { + "id": { "type": "string" }, + "meta_journal_s": { "type": "string" }, + "meta_year_i": { "type": "integer" }, + "meta_day_i": { "type": "integer" }, + "meta_ed_s": { "type": "string" }, + "meta_issue_id_s": { "type": "string" }, + "meta_date_dt": { "type": "string", "format": "date-time" }, + "linked_ci_s": { "type": "string" }, + "reading_order_i": { "type": "integer" }, + "caption_txt": { + "type": "array", + "items": { "type": "string" } + }, + "item_type_s": { "type": "string" }, + "page_nb_is": { + "type": "array", + "items": { "type": "integer" } + }, + "coords_is": { + "type": "array", + "items": { "type": "integer" } + }, + "front_b": { "type": "boolean" }, + "iiif_url_s": { "type": "string", "format": "uri" }, + "cc_b": { "type": "boolean" }, + "rights_data_domain_s": { "type": "string" }, + "rights_copyright_s": { "type": "string" }, + "rights_perm_use_explore_plain": { + "type": "array", + "items": { "type": "string" } + }, + "rights_perm_use_get_tr_plain": { + "type": "array", + "items": { "type": "string" } + }, + "rights_perm_use_get_img_plain": { + "type": "array", + "items": { "type": "string" } + }, + "rights_bm_explore_l": { "type": "integer" }, + "rights_bm_get_tr_l": { "type": "integer" }, + "rights_bm_get_img_l": { "type": "integer" }, + "dinov2_emb_v1024": { + "type": "array", + "items": { "type": "number" } + }, + "openclip_emb_v768": { + "type": "array", + "items": { "type": "number" } + }, + "_version_": { "type": "number" } + }, + "additionalProperties": false +} diff --git a/src/scripts/generate-types.js b/src/scripts/generate-types.js index 680dc142..095cfdab 100644 --- a/src/scripts/generate-types.js +++ b/src/scripts/generate-types.js @@ -12,7 +12,7 @@ const banner = ` const basePath = './src/schema' const outputPath = './src/models/generated' -const schemaBits = ['schemas', 'schemasPublic', 'shared', 'parameters', 'requestBodies', 'responses', 'common'] +const schemaBits = ['schemas', 'schemasPublic', 'shared', 'parameters', 'requestBodies', 'responses', 'common', 'solr'] const directories = fs .readdirSync(basePath) .filter(item => { diff --git a/src/services/images/images-v1.class.js b/src/services/images/images-v1.class.js index 16678aea..9ab478c5 100644 --- a/src/services/images/images-v1.class.js +++ b/src/services/images/images-v1.class.js @@ -12,7 +12,7 @@ const { } = require('../../solr') export default class Service { - constructor({ app = null, name = '' }) { + constructor({ app, name = '' }) { this.app = app this.name = name this.sequelizeClient = this.app.get('sequelizeClient') diff --git a/src/services/images/images.class.ts b/src/services/images/images.class.ts new file mode 100644 index 00000000..4a31b591 --- /dev/null +++ b/src/services/images/images.class.ts @@ -0,0 +1,86 @@ +import { NotFound } from '@feathersjs/errors' +import { ClientService, Id, Params } from '@feathersjs/feathers' +import { SimpleSolrClient } from '../../internalServices/simpleSolr' +import { PublicFindResponse } from '../../models/common' +import { Image } from '../../models/generated/schemas' +import { Image as ImageDocument } from '../../models/generated/solr' +import { SolrNamespaces } from '../../solr' + +const DefaultLimit = 10 +const ImageSimilarityVectorField: keyof ImageDocument = 'dinov2_emb_v1024' + +export interface FindQuery { + similar_to_image_id?: string + limit?: number + offset?: number +} + +export class Images implements Pick>, 'find' | 'get'> { + constructor(private readonly solrClient: SimpleSolrClient) {} + + async find(params?: Params): Promise> { + const limit = params?.query?.limit ?? DefaultLimit + const offset = params?.query?.offset ?? 0 + + const queryParts: string[] = [] + + if (params?.query?.similar_to_image_id) { + const referenceId = params.query.similar_to_image_id + const referenceImage = await this.getImageDocument(referenceId, [ImageSimilarityVectorField]) + const vector = referenceImage?.[ImageSimilarityVectorField] as number[] + + if (referenceImage == null || vector == null) + return { + data: [], + pagination: { + limit: limit ?? 0, + offset: offset ?? 0, + total: 0, + }, + } + + queryParts.push(`{!knn f=${ImageSimilarityVectorField} topK=${limit}}${JSON.stringify(vector)}`) + } + + const query = queryParts.length > 0 ? queryParts.join(' AND ') : '*:*' + + const results = await this.solrClient.select(SolrNamespaces.Images, { + body: { + query, + limit, + offset, + }, + }) + + return { + data: results?.response?.docs?.map(toImage) ?? [], + pagination: { + limit: 0, + offset: 0, + total: 0, + }, + } + } + + async get(id: Id, params?: Params): Promise { + const imageDoc = await this.getImageDocument(String(id)) + if (imageDoc == null) throw new NotFound(`Image with id ${id} not found`) + return toImage(imageDoc) + } + + async getImageDocument(id: string, fields?: (keyof ImageDocument)[]): Promise { + const result = await this.solrClient.selectOne(SolrNamespaces.Images, { + body: { query: `id:${id}`, limit: 1, fields: fields != null ? fields?.join(',') : undefined }, + }) + return result + } +} + +const toImage = (doc: ImageDocument): Image => { + return { + uid: doc.id!, + ...(doc.linked_ci_s != null ? { contentItemUid: doc.linked_ci_s } : {}), + issueUid: doc.meta_issue_id_s!, + previewUrl: doc.iiif_url_s!, + } +} diff --git a/src/services/images/images.hooks.ts b/src/services/images/images.hooks.ts new file mode 100644 index 00000000..38fb0073 --- /dev/null +++ b/src/services/images/images.hooks.ts @@ -0,0 +1,10 @@ +import { HookMap } from '@feathersjs/feathers' +import { authenticateAround as authenticate } from '../../hooks/authenticate' +import { rateLimit } from '../../hooks/rateLimiter' +import { AppServices, ImpressoApplication } from '../../types' + +export default { + around: { + all: [authenticate({ allowUnauthenticated: true }), rateLimit()], + }, +} satisfies HookMap diff --git a/src/services/images/images.schema.ts b/src/services/images/images.schema.ts new file mode 100644 index 00000000..808640cf --- /dev/null +++ b/src/services/images/images.schema.ts @@ -0,0 +1,58 @@ +import { ServiceSwaggerOptions } from 'feathers-swagger' +import { getStandardParameters, getStandardResponses, MethodParameter, QueryParameter } from '../../util/openapi' + +const parameterTerm: QueryParameter = { + in: 'query', + name: 'term', + required: false, + schema: { + type: 'string', + maxLength: 100, + }, + description: 'Search images with a specific term in their caption', +} + +const similarToImageId: QueryParameter = { + in: 'query', + name: 'similar_to_image_id', + required: false, + schema: { + type: 'string', + maxLength: 128, + }, + description: 'Find images similar to the image with the given ID', +} + +const findParameters: MethodParameter[] = [ + // parameterTerm, + similarToImageId, + ...getStandardParameters({ method: 'find', maxPageSize: 100 }), +] +const getParameters: MethodParameter[] = [...getStandardParameters({ method: 'get' })] + +export const getDocs = (isPublicApi: boolean): ServiceSwaggerOptions => ({ + description: 'Images', + securities: ['find', 'get'], + operations: { + find: { + operationId: 'findImages', + description: 'Find images', + parameters: findParameters, + responses: getStandardResponses({ + method: 'find', + schema: 'Image', + isPublic: isPublicApi, + }), + }, + get: { + operationId: 'getImage', + description: 'Get image by ID', + parameters: getParameters, + responses: getStandardResponses({ + method: 'get', + schema: 'Image', + isPublic: isPublicApi, + }), + }, + }, +}) diff --git a/src/services/images/images.service.js b/src/services/images/images.service.js deleted file mode 100644 index 680dbac6..00000000 --- a/src/services/images/images.service.js +++ /dev/null @@ -1,18 +0,0 @@ -import hooksV1 from './images-v1.hooksoks' -import ServiceV1 from './images-v1.class' - -module.exports = function (app) { - // Initialize our service with any options it requires - app.use( - '/images', - new ServiceV1({ - app, - name: 'images', - }) - ) - - // Get our initialized service so that we can register hooks - const service = app.service('images') - - service.hooks(hooksV1) -} diff --git a/src/services/images/images.service.ts b/src/services/images/images.service.ts new file mode 100644 index 00000000..7e0447f7 --- /dev/null +++ b/src/services/images/images.service.ts @@ -0,0 +1,36 @@ +import { ServiceOptions } from '@feathersjs/feathers' +import { createSwaggerServiceOptions } from 'feathers-swagger' +import { ImpressoApplication } from '../../types' +import ServiceV1 from './images-v1.class' +import hooksV1 from './images-v1.hooks' +import { Images as ServiceV2 } from './images.class' +import hooksV2 from './images.hooks' +import { getDocs } from './images.schema' + +const SchemaVersionV2 = 'v2' + +const init = (app: ImpressoApplication) => { + const isPublicApi = app.get('isPublicApi') ?? false + const schemaVersion = app + .get('solrConfiguration') + .namespaces?.find(({ namespaceId }) => namespaceId === 'images')?.schemaVersion + + const isSchemaVersionV2 = schemaVersion === SchemaVersionV2 + const isPublicApiEnabled = isPublicApi && isSchemaVersionV2 + + const service = isSchemaVersionV2 + ? new ServiceV2(app.service('simpleSolrClient')) + : new ServiceV1({ + app, + name: 'images', + }) + const hooks = isSchemaVersionV2 ? hooksV2 : hooksV1 + + app.use('/images', service, { + events: [], + docs: createSwaggerServiceOptions({ schemas: {}, docs: getDocs(isPublicApiEnabled) }), + } as ServiceOptions) + app.service('images').hooks(hooks) +} + +export default init diff --git a/src/services/index.ts b/src/services/index.ts index 21c18b4e..28188266 100644 --- a/src/services/index.ts +++ b/src/services/index.ts @@ -19,6 +19,7 @@ const publicApiServices = [ 'entities', 'impresso-ner', 'media-sources', + 'images', ] const adminServices = ['admin'] @@ -35,7 +36,6 @@ const internalApiServices = [ 'articles-timelines', 'jobs', 'logs', - 'images', 'articles-suggestions', 'uploaded-images', 'mentions',