From f77dc3d04351bc2f9ae6b6395dabd39c2464b442 Mon Sep 17 00:00:00 2001 From: Pierre Gayvallet Date: Mon, 13 Jan 2025 10:05:16 +0100 Subject: [PATCH] [product doc] adapt for new format of semantic_text field (#206051) ## Summary fix https://github.com/elastic/kibana/issues/205908 Adapt the product documentation's usages of `semantic_text` for the breaking changes that will be introduced in 8.18 and 9.0. This PR introduces a new format version (`2.0.0`) for the product documentation, introducing the required changes for the incoming `semantic_text` breaking change. - include the `_inference_fields` meta field when bundling the doc artifacts - set the `index.mapping.semantic_text.use_legacy_format` index setting to `false` to force the new format - change the way we're internally overriding the `inference_id` when ingesting the data - adapt the `search` logic to retrieve the data at the right place Doing that with a new format version also makes the transition invisible, as our system will simply adapt depending on the version of the artifact's manifest. ### How to test **1. test that the behavior is not broken for current artifacts** Run the branch, install the product doc from the prod repository, make sure that the 8.17 artifacts are installed, then check if the feature still works using the o11y assistant. **2. test that the behavior works with the new artifacts** **Keeping your ES instance up**, configure your local Kibana to use the dev repository (where the 8.18 artifacts with the new format are present) ```yaml xpack.productDocBase.artifactRepositoryUrl: "https://storage.googleapis.com/kibana-ai-assistant-kb-artifacts-dev" ``` Then restart Kibana, confirms the artifacts gets updated to 8.18 automatically, and then test that the feature still works as expected using the o11y assistant. --- .../src/artifact/manifest.ts | 4 +- .../src/tasks/create_artifact.ts | 15 ++++-- .../src/tasks/create_chunk_files.ts | 2 + .../src/tasks/create_index.ts | 5 +- .../src/tasks/process_documents.ts | 13 ++++- .../ai-infra/product-doc-common/index.ts | 2 +- .../product-doc-common/src/documents.ts | 7 +-- .../product-doc-common/src/manifest.ts | 2 + .../package_installer.test.mocks.ts | 2 + .../package_installer.test.ts | 16 +++++++ .../package_installer/package_installer.ts | 9 +++- .../steps/create_index.test.ts | 34 +++++++++++++- .../package_installer/steps/create_index.ts | 6 +++ .../steps/populate_index.test.ts | 47 +++++++++++++++++++ .../package_installer/steps/populate_index.ts | 30 ++++++++++-- .../services/package_installer/utils/index.ts | 1 + .../utils/manifest_versions.test.ts | 19 ++++++++ .../utils/manifest_versions.ts | 15 ++++++ .../services/search/utils/map_result.test.ts | 25 ++++++++++ .../services/search/utils/map_result.ts | 3 +- 20 files changed, 240 insertions(+), 17 deletions(-) create mode 100644 x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.test.ts create mode 100644 x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.ts diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/artifact/manifest.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/artifact/manifest.ts index a8aa927c5ef1f..8d101b737c839 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/artifact/manifest.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/artifact/manifest.ts @@ -10,12 +10,14 @@ import type { ArtifactManifest, ProductName } from '@kbn/product-doc-common'; export const getArtifactManifest = ({ productName, stackVersion, + formatVersion, }: { productName: ProductName; stackVersion: string; + formatVersion: string; }): ArtifactManifest => { return { - formatVersion: '1.0.0', + formatVersion, productName, productVersion: stackVersion, }; diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_artifact.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_artifact.ts index 056887a41a4d2..bd6d005936574 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_artifact.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_artifact.ts @@ -8,9 +8,14 @@ import Path from 'path'; import AdmZip from 'adm-zip'; import type { ToolingLog } from '@kbn/tooling-log'; -import { getArtifactName, type ProductName } from '@kbn/product-doc-common'; +import { + LATEST_MANIFEST_FORMAT_VERSION, + getArtifactName, + type ProductName, +} from '@kbn/product-doc-common'; import { getArtifactMappings } from '../artifact/mappings'; import { getArtifactManifest } from '../artifact/manifest'; +import { DEFAULT_ELSER } from './create_index'; export const createArtifact = async ({ productName, @@ -31,11 +36,15 @@ export const createArtifact = async ({ const zip = new AdmZip(); - const mappings = getArtifactMappings('.default-elser'); + const mappings = getArtifactMappings(DEFAULT_ELSER); const mappingFileContent = JSON.stringify(mappings, undefined, 2); zip.addFile('mappings.json', Buffer.from(mappingFileContent, 'utf-8')); - const manifest = getArtifactManifest({ productName, stackVersion }); + const manifest = getArtifactManifest({ + productName, + stackVersion, + formatVersion: LATEST_MANIFEST_FORMAT_VERSION, + }); const manifestFileContent = JSON.stringify(manifest, undefined, 2); zip.addFile('manifest.json', Buffer.from(manifestFileContent, 'utf-8')); diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_chunk_files.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_chunk_files.ts index 73cf8f0109228..e3032c3ef9717 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_chunk_files.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_chunk_files.ts @@ -30,6 +30,8 @@ export const createChunkFiles = async ({ const searchRes = await client.search({ index, size: 10000, + // includes inference field meta info in source + fields: ['_inference_fields'], query: { bool: { must: [{ term: { product_name: productName } }], diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_index.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_index.ts index 23915b3a1ab09..b867edc31b85a 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_index.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_index.ts @@ -8,7 +8,7 @@ import type { Client } from '@elastic/elasticsearch'; import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types'; -const DEFAULT_ELSER = '.elser-2-elasticsearch'; +export const DEFAULT_ELSER = '.elser-2-elasticsearch'; const mappings: MappingTypeMapping = { dynamic: 'strict', @@ -46,5 +46,8 @@ export const createTargetIndex = async ({ await client.indices.create({ index: indexName, mappings, + settings: { + 'index.mapping.semantic_text.use_legacy_format': false, + }, }); }; diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/process_documents.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/process_documents.ts index 80f975baacae4..950e0e6ccba72 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/process_documents.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/process_documents.ts @@ -56,12 +56,23 @@ const processDocument = (document: ExtractedDocument) => { }) // remove edit links .replaceAll(/\[\s*edit\s*\]\(\s*[^)]+\s*\)/g, '') - // remove empty links + // // remove empty links .replaceAll('[]()', '') + // remove image links + .replaceAll(/\[\]\(\s*[^)]+\s*\)/g, '') // limit to 2 consecutive carriage return .replaceAll(/\n\n+/g, '\n\n'); document.content_title = document.content_title.split('|')[0].trim(); + // specific to security: remove rule query section as it's usually large without much value for the LLM + if (document.product_name === 'security') { + const ruleQueryTitle = '### Rule query'; + const ruleQueryPos = document.content_body.indexOf(ruleQueryTitle); + if (ruleQueryPos > -1) { + document.content_body = document.content_body.substring(0, ruleQueryPos); + } + } + return document; }; diff --git a/x-pack/platform/packages/shared/ai-infra/product-doc-common/index.ts b/x-pack/platform/packages/shared/ai-infra/product-doc-common/index.ts index 1a96737138991..354d68e0b57e8 100644 --- a/x-pack/platform/packages/shared/ai-infra/product-doc-common/index.ts +++ b/x-pack/platform/packages/shared/ai-infra/product-doc-common/index.ts @@ -6,7 +6,7 @@ */ export { getArtifactName, parseArtifactName } from './src/artifact'; -export { type ArtifactManifest } from './src/manifest'; +export { LATEST_MANIFEST_FORMAT_VERSION, type ArtifactManifest } from './src/manifest'; export { DocumentationProduct, type ProductName } from './src/product'; export { isArtifactContentFilePath } from './src/artifact_content'; export { diff --git a/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/documents.ts b/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/documents.ts index ef81b3d6411cc..ab2f78c0ab37c 100644 --- a/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/documents.ts +++ b/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/documents.ts @@ -18,14 +18,15 @@ interface SemanticTextArrayField { export interface ProductDocumentationAttributes { content_title: string; - content_body: SemanticTextField; + // backward compatibility for the legacy semantic_text mode + content_body: string | SemanticTextField; product_name: ProductName; root_type: string; slug: string; url: string; version: string; ai_subtitle: string; - ai_summary: SemanticTextField; - ai_questions_answered: SemanticTextArrayField; + ai_summary: string | SemanticTextField; + ai_questions_answered: string[] | SemanticTextArrayField; ai_tags: string[]; } diff --git a/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/manifest.ts b/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/manifest.ts index 6c246cf58fd5f..d640e3b3b0c90 100644 --- a/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/manifest.ts +++ b/x-pack/platform/packages/shared/ai-infra/product-doc-common/src/manifest.ts @@ -7,6 +7,8 @@ import type { ProductName } from './product'; +export const LATEST_MANIFEST_FORMAT_VERSION = '2.0.0'; + export interface ArtifactManifest { formatVersion: string; productName: ProductName; diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.mocks.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.mocks.ts index 7be639af7b332..f040a8d4d9266 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.mocks.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.mocks.ts @@ -24,6 +24,7 @@ jest.doMock('./steps', () => { export const downloadToDiskMock = jest.fn(); export const openZipArchiveMock = jest.fn(); export const loadMappingFileMock = jest.fn(); +export const loadManifestFileMock = jest.fn(); export const ensureDefaultElserDeployedMock = jest.fn(); jest.doMock('./utils', () => { @@ -33,6 +34,7 @@ jest.doMock('./utils', () => { downloadToDisk: downloadToDiskMock, openZipArchive: openZipArchiveMock, loadMappingFile: loadMappingFileMock, + loadManifestFile: loadManifestFileMock, ensureDefaultElserDeployed: ensureDefaultElserDeployedMock, }; }); diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.ts index e436f2bb79c2e..b85510b5d07de 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.ts @@ -10,6 +10,7 @@ import { createIndexMock, populateIndexMock, loadMappingFileMock, + loadManifestFileMock, openZipArchiveMock, validateArtifactArchiveMock, fetchArtifactVersionsMock, @@ -36,6 +37,8 @@ const callOrder = (fn: { mock: { invocationCallOrder: number[] } }): number => { return fn.mock.invocationCallOrder[0]; }; +const TEST_FORMAT_VERSION = '2.0.0'; + describe('PackageInstaller', () => { let logger: MockedLogger; let esClient: ReturnType; @@ -55,6 +58,12 @@ describe('PackageInstaller', () => { artifactRepositoryUrl, kibanaVersion, }); + + loadManifestFileMock.mockResolvedValue({ + formatVersion: TEST_FORMAT_VERSION, + productName: 'kibana', + productVersion: '8.17', + }); }); afterEach(() => { @@ -62,6 +71,7 @@ describe('PackageInstaller', () => { createIndexMock.mockReset(); populateIndexMock.mockReset(); loadMappingFileMock.mockReset(); + loadManifestFileMock.mockReset(); openZipArchiveMock.mockReset(); validateArtifactArchiveMock.mockReset(); fetchArtifactVersionsMock.mockReset(); @@ -99,10 +109,14 @@ describe('PackageInstaller', () => { expect(loadMappingFileMock).toHaveBeenCalledTimes(1); expect(loadMappingFileMock).toHaveBeenCalledWith(zipArchive); + expect(loadManifestFileMock).toHaveBeenCalledTimes(1); + expect(loadManifestFileMock).toHaveBeenCalledWith(zipArchive); + expect(createIndexMock).toHaveBeenCalledTimes(1); expect(createIndexMock).toHaveBeenCalledWith({ indexName, mappings, + manifestVersion: TEST_FORMAT_VERSION, esClient, log: logger, }); @@ -111,6 +125,7 @@ describe('PackageInstaller', () => { expect(populateIndexMock).toHaveBeenCalledWith({ indexName, archive: zipArchive, + manifestVersion: TEST_FORMAT_VERSION, esClient, log: logger, }); @@ -130,6 +145,7 @@ describe('PackageInstaller', () => { expect(callOrder(downloadToDiskMock)).toBeLessThan(callOrder(openZipArchiveMock)); expect(callOrder(openZipArchiveMock)).toBeLessThan(callOrder(loadMappingFileMock)); expect(callOrder(loadMappingFileMock)).toBeLessThan(callOrder(createIndexMock)); + expect(callOrder(loadManifestFileMock)).toBeLessThan(callOrder(createIndexMock)); expect(callOrder(createIndexMock)).toBeLessThan(callOrder(populateIndexMock)); expect(callOrder(populateIndexMock)).toBeLessThan( callOrder(productDocClient.setInstallationSuccessful) diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.ts index 5e63fd5c5283c..95a31fb0ddedf 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.ts @@ -18,6 +18,7 @@ import { downloadToDisk, openZipArchive, loadMappingFile, + loadManifestFile, ensureDefaultElserDeployed, type ZipArchive, } from './utils'; @@ -158,19 +159,25 @@ export class PackageInstaller { validateArtifactArchive(zipArchive); - const mappings = await loadMappingFile(zipArchive); + const [manifest, mappings] = await Promise.all([ + loadManifestFile(zipArchive), + loadMappingFile(zipArchive), + ]); + const manifestVersion = manifest.formatVersion; const indexName = getProductDocIndexName(productName); await createIndex({ indexName, mappings, + manifestVersion, esClient: this.esClient, log: this.log, }); await populateIndex({ indexName, + manifestVersion, archive: zipArchive, esClient: this.esClient, log: this.log, diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.test.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.test.ts index fca8b5283c300..691aeffa40a5b 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.test.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.test.ts @@ -9,9 +9,12 @@ import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types'; import { loggerMock, type MockedLogger } from '@kbn/logging-mocks'; import type { ElasticsearchClient } from '@kbn/core/server'; import { elasticsearchServiceMock } from '@kbn/core/server/mocks'; +import { LATEST_MANIFEST_FORMAT_VERSION } from '@kbn/product-doc-common'; import { createIndex } from './create_index'; import { internalElserInferenceId } from '../../../../common/consts'; +const LEGACY_SEMANTIC_TEXT_VERSION = '1.0.0'; + describe('createIndex', () => { let log: MockedLogger; let esClient: ElasticsearchClient; @@ -21,7 +24,33 @@ describe('createIndex', () => { esClient = elasticsearchServiceMock.createElasticsearchClient(); }); - it('calls esClient.indices.create with the right parameters', async () => { + it('calls esClient.indices.create with the right parameters for the current manifest version', async () => { + const mappings: MappingTypeMapping = { + properties: {}, + }; + const indexName = '.some-index'; + + await createIndex({ + indexName, + mappings, + manifestVersion: LATEST_MANIFEST_FORMAT_VERSION, + log, + esClient, + }); + + expect(esClient.indices.create).toHaveBeenCalledTimes(1); + expect(esClient.indices.create).toHaveBeenCalledWith({ + index: indexName, + mappings, + settings: { + number_of_shards: 1, + auto_expand_replicas: '0-1', + 'index.mapping.semantic_text.use_legacy_format': false, + }, + }); + }); + + it('calls esClient.indices.create with the right parameters for the manifest version 1.0.0', async () => { const mappings: MappingTypeMapping = { properties: {}, }; @@ -30,6 +59,7 @@ describe('createIndex', () => { await createIndex({ indexName, mappings, + manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION, log, esClient, }); @@ -41,6 +71,7 @@ describe('createIndex', () => { settings: { number_of_shards: 1, auto_expand_replicas: '0-1', + 'index.mapping.semantic_text.use_legacy_format': true, }, }); }); @@ -61,6 +92,7 @@ describe('createIndex', () => { await createIndex({ indexName: '.some-index', mappings, + manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION, log, esClient, }); diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.ts index decd62e556ba5..1b3cbe10f18ce 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/create_index.ts @@ -9,20 +9,25 @@ import type { Logger } from '@kbn/logging'; import type { ElasticsearchClient } from '@kbn/core/server'; import type { MappingTypeMapping, MappingProperty } from '@elastic/elasticsearch/lib/api/types'; import { internalElserInferenceId } from '../../../../common/consts'; +import { isLegacySemanticTextVersion } from '../utils'; export const createIndex = async ({ esClient, indexName, + manifestVersion, mappings, log, }: { esClient: ElasticsearchClient; indexName: string; + manifestVersion: string; mappings: MappingTypeMapping; log: Logger; }) => { log.debug(`Creating index ${indexName}`); + const legacySemanticText = isLegacySemanticTextVersion(manifestVersion); + overrideInferenceId(mappings, internalElserInferenceId); await esClient.indices.create({ @@ -31,6 +36,7 @@ export const createIndex = async ({ settings: { number_of_shards: 1, auto_expand_replicas: '0-1', + 'index.mapping.semantic_text.use_legacy_format': legacySemanticText, }, }); }; diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.test.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.test.ts index 2f301f9928e9a..ef74f13691974 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.test.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.test.ts @@ -8,10 +8,13 @@ import { times } from 'lodash'; import { loggerMock, type MockedLogger } from '@kbn/logging-mocks'; import { elasticsearchServiceMock } from '@kbn/core/server/mocks'; +import { LATEST_MANIFEST_FORMAT_VERSION } from '@kbn/product-doc-common'; import { internalElserInferenceId } from '../../../../common/consts'; import type { ZipArchive } from '../utils/zip_archive'; import { populateIndex } from './populate_index'; +const LEGACY_SEMANTIC_TEXT_VERSION = '1.0.0'; + const createMockArchive = (entries: Record): ZipArchive => { return { hasEntry: (entryPath) => Object.keys(entries).includes(entryPath), @@ -44,6 +47,7 @@ describe('populateIndex', () => { await populateIndex({ indexName: '.foo', + manifestVersion: LATEST_MANIFEST_FORMAT_VERSION, archive, log, esClient, @@ -59,6 +63,7 @@ describe('populateIndex', () => { await populateIndex({ indexName: '.foo', + manifestVersion: LATEST_MANIFEST_FORMAT_VERSION, archive, log, esClient, @@ -77,6 +82,47 @@ describe('populateIndex', () => { }); it('rewrites the inference_id of semantic fields', async () => { + const archive = createMockArchive({ + 'content/content-0.ndjson': JSON.stringify({ + semantic: 'foo', + _inference_fields: { + semantic: { + inference: { + inference_id: '.some-inference', + }, + }, + }, + }), + }); + + await populateIndex({ + indexName: '.foo', + manifestVersion: LATEST_MANIFEST_FORMAT_VERSION, + archive, + log, + esClient, + }); + + expect(esClient.bulk).toHaveBeenCalledTimes(1); + expect(esClient.bulk).toHaveBeenCalledWith({ + refresh: false, + operations: [ + { index: { _index: '.foo' } }, + { + semantic: 'foo', + _inference_fields: { + semantic: { + inference: { + inference_id: internalElserInferenceId, + }, + }, + }, + }, + ], + }); + }); + + it('rewrites the inference_id of semantic fields for legacy semantic_field', async () => { const archive = createMockArchive({ 'content/content-0.ndjson': JSON.stringify({ semantic: { text: 'foo', inference: { inference_id: '.some-inference' } }, @@ -85,6 +131,7 @@ describe('populateIndex', () => { await populateIndex({ indexName: '.foo', + manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION, archive, log, esClient, diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.ts index 017757ca90b99..46c71eefd4e83 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.ts @@ -11,27 +11,32 @@ import type { ElasticsearchClient } from '@kbn/core/server'; import { isArtifactContentFilePath } from '@kbn/product-doc-common'; import { internalElserInferenceId } from '../../../../common/consts'; import type { ZipArchive } from '../utils/zip_archive'; +import { isLegacySemanticTextVersion } from '../utils'; export const populateIndex = async ({ esClient, indexName, + manifestVersion, archive, log, }: { esClient: ElasticsearchClient; indexName: string; + manifestVersion: string; archive: ZipArchive; log: Logger; }) => { log.debug(`Starting populating index ${indexName}`); + const legacySemanticText = isLegacySemanticTextVersion(manifestVersion); + const contentEntries = archive.getEntryPaths().filter(isArtifactContentFilePath); for (let i = 0; i < contentEntries.length; i++) { const entryPath = contentEntries[i]; log.debug(`Indexing content for entry ${entryPath}`); const contentBuffer = await archive.getEntryContent(entryPath); - await indexContentFile({ indexName, esClient, contentBuffer }); + await indexContentFile({ indexName, esClient, contentBuffer, legacySemanticText }); } log.debug(`Done populating index ${indexName}`); @@ -41,10 +46,12 @@ const indexContentFile = async ({ indexName, contentBuffer, esClient, + legacySemanticText, }: { indexName: string; contentBuffer: Buffer; esClient: ElasticsearchClient; + legacySemanticText: boolean; }) => { const fileContent = contentBuffer.toString('utf-8'); const lines = fileContent.split('\n'); @@ -55,7 +62,13 @@ const indexContentFile = async ({ .map((line) => { return JSON.parse(line); }) - .map((doc) => rewriteInferenceId(doc, internalElserInferenceId)); + .map((doc) => + rewriteInferenceId({ + document: doc, + inferenceId: internalElserInferenceId, + legacySemanticText, + }) + ); const operations = documents.reduce((ops, document) => { ops!.push(...[{ index: { _index: indexName } }, document]); @@ -73,9 +86,18 @@ const indexContentFile = async ({ } }; -const rewriteInferenceId = (document: Record, inferenceId: string) => { +const rewriteInferenceId = ({ + document, + inferenceId, + legacySemanticText, +}: { + document: Record; + inferenceId: string; + legacySemanticText: boolean; +}) => { + const semanticFieldsRoot = legacySemanticText ? document : document._inference_fields; // we don't need to handle nested fields, we don't have any and won't. - Object.values(document).forEach((field) => { + Object.values(semanticFieldsRoot ?? {}).forEach((field: any) => { if (field.inference) { field.inference.inference_id = inferenceId; } diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/index.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/index.ts index 1a1a2247b9dc0..78aa127e7ef18 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/index.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/index.ts @@ -9,3 +9,4 @@ export { downloadToDisk } from './download'; export { openZipArchive, type ZipArchive } from './zip_archive'; export { loadManifestFile, loadMappingFile } from './archive_accessors'; export { ensureDefaultElserDeployed } from './ensure_default_elser_deployed'; +export { isLegacySemanticTextVersion } from './manifest_versions'; diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.test.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.test.ts new file mode 100644 index 0000000000000..2c14dc919cdfb --- /dev/null +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.test.ts @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { isLegacySemanticTextVersion } from './manifest_versions'; + +describe('isLegacySemanticTextVersion', () => { + it('returns true for version 1.0.0', () => { + expect(isLegacySemanticTextVersion('1.0.0')).toBe(true); + }); + + it('returns false for version 2.0.0 and higher', () => { + expect(isLegacySemanticTextVersion('2.0.0')).toBe(false); + expect(isLegacySemanticTextVersion('4.92.3')).toBe(false); + }); +}); diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.ts new file mode 100644 index 0000000000000..66d5307fb6ef2 --- /dev/null +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/utils/manifest_versions.ts @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import Semver from 'semver'; + +/** + * checks if the provided manifest version was a version where legacy semantic_text behavior was being used + */ +export const isLegacySemanticTextVersion = (manifestVersion: string): boolean => { + return Semver.lte(manifestVersion, '1.0.0'); +}; diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.test.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.test.ts index 56e8ce4875cc5..c4ad52e972e8d 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.test.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.test.ts @@ -20,6 +20,31 @@ const createHit = ( describe('mapResult', () => { it('returns the expected shape', () => { + const input = createHit({ + content_title: 'content_title', + content_body: 'content_body', + product_name: 'kibana', + root_type: 'documentation', + slug: 'foo.html', + url: 'http://lost.com/foo.html', + version: '8.16', + ai_subtitle: 'ai_subtitle', + ai_summary: 'ai_summary', + ai_questions_answered: ['question A'], + ai_tags: ['foo', 'bar', 'test'], + }); + + const output = mapResult(input); + + expect(output).toEqual({ + content: 'content_body', + productName: 'kibana', + title: 'content_title', + url: 'http://lost.com/foo.html', + }); + }); + + it('returns the expected shape for legacy semantic_text fields', () => { const input = createHit({ content_title: 'content_title', content_body: { text: 'content_body' }, diff --git a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.ts b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.ts index f4f66b2111827..4cc5ae12ec19b 100644 --- a/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.ts +++ b/x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/search/utils/map_result.ts @@ -10,9 +10,10 @@ import type { ProductDocumentationAttributes } from '@kbn/product-doc-common'; import type { DocSearchResult } from '../types'; export const mapResult = (docHit: SearchHit): DocSearchResult => { + const content = docHit._source!.content_body; return { title: docHit._source!.content_title, - content: docHit._source!.content_body.text, + content: typeof content === 'string' ? content : content.text, url: docHit._source!.url, productName: docHit._source!.product_name, };