Skip to content

Commit

Permalink
[product doc] adapt for new format of semantic_text field (elastic#20…
Browse files Browse the repository at this point in the history
…6051)

## Summary

fix elastic#205908

Adapt the product documentation's usages of `semantic_text` for the
breaking changes that will be introduced in 8.18 and 9.0.

This PR introduces a new format version (`2.0.0`) for the product
documentation, introducing the required changes for the incoming
`semantic_text` breaking change.
- include the `_inference_fields` meta field when bundling the doc
artifacts
- set the `index.mapping.semantic_text.use_legacy_format` index setting
to `false` to force the new format
- change the way we're internally overriding the `inference_id` when
ingesting the data
- adapt the `search` logic to retrieve the data at the right place  


Doing that with a new format version also makes the transition
invisible, as our system will simply adapt depending on the version of
the artifact's manifest.


### How to test

**1. test that the behavior is not broken for current artifacts**

Run the branch, install the product doc from the prod repository, make
sure that the 8.17 artifacts are installed, then check if the feature
still works using the o11y assistant.

**2. test that the behavior works with the new artifacts**

**Keeping your ES instance up**, configure your local Kibana to use the
dev repository (where the 8.18 artifacts with the new format are
present)

```yaml
xpack.productDocBase.artifactRepositoryUrl: "https://storage.googleapis.com/kibana-ai-assistant-kb-artifacts-dev"
```

Then restart Kibana, confirms the artifacts gets updated to 8.18
automatically, and then test that the feature still works as expected
using the o11y assistant.
  • Loading branch information
pgayvallet authored Jan 13, 2025
1 parent 683a768 commit f77dc3d
Show file tree
Hide file tree
Showing 20 changed files with 240 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ import type { ArtifactManifest, ProductName } from '@kbn/product-doc-common';
export const getArtifactManifest = ({
productName,
stackVersion,
formatVersion,
}: {
productName: ProductName;
stackVersion: string;
formatVersion: string;
}): ArtifactManifest => {
return {
formatVersion: '1.0.0',
formatVersion,
productName,
productVersion: stackVersion,
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@
import Path from 'path';
import AdmZip from 'adm-zip';
import type { ToolingLog } from '@kbn/tooling-log';
import { getArtifactName, type ProductName } from '@kbn/product-doc-common';
import {
LATEST_MANIFEST_FORMAT_VERSION,
getArtifactName,
type ProductName,
} from '@kbn/product-doc-common';
import { getArtifactMappings } from '../artifact/mappings';
import { getArtifactManifest } from '../artifact/manifest';
import { DEFAULT_ELSER } from './create_index';

export const createArtifact = async ({
productName,
Expand All @@ -31,11 +36,15 @@ export const createArtifact = async ({

const zip = new AdmZip();

const mappings = getArtifactMappings('.default-elser');
const mappings = getArtifactMappings(DEFAULT_ELSER);
const mappingFileContent = JSON.stringify(mappings, undefined, 2);
zip.addFile('mappings.json', Buffer.from(mappingFileContent, 'utf-8'));

const manifest = getArtifactManifest({ productName, stackVersion });
const manifest = getArtifactManifest({
productName,
stackVersion,
formatVersion: LATEST_MANIFEST_FORMAT_VERSION,
});
const manifestFileContent = JSON.stringify(manifest, undefined, 2);
zip.addFile('manifest.json', Buffer.from(manifestFileContent, 'utf-8'));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ export const createChunkFiles = async ({
const searchRes = await client.search({
index,
size: 10000,
// includes inference field meta info in source
fields: ['_inference_fields'],
query: {
bool: {
must: [{ term: { product_name: productName } }],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import type { Client } from '@elastic/elasticsearch';
import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';

const DEFAULT_ELSER = '.elser-2-elasticsearch';
export const DEFAULT_ELSER = '.elser-2-elasticsearch';

const mappings: MappingTypeMapping = {
dynamic: 'strict',
Expand Down Expand Up @@ -46,5 +46,8 @@ export const createTargetIndex = async ({
await client.indices.create({
index: indexName,
mappings,
settings: {
'index.mapping.semantic_text.use_legacy_format': false,
},
});
};
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,23 @@ const processDocument = (document: ExtractedDocument) => {
})
// remove edit links
.replaceAll(/\[\s*edit\s*\]\(\s*[^)]+\s*\)/g, '')
// remove empty links
// // remove empty links
.replaceAll('[]()', '')
// remove image links
.replaceAll(/\[\]\(\s*[^)]+\s*\)/g, '')
// limit to 2 consecutive carriage return
.replaceAll(/\n\n+/g, '\n\n');

document.content_title = document.content_title.split('|')[0].trim();

// specific to security: remove rule query section as it's usually large without much value for the LLM
if (document.product_name === 'security') {
const ruleQueryTitle = '### Rule query';
const ruleQueryPos = document.content_body.indexOf(ruleQueryTitle);
if (ruleQueryPos > -1) {
document.content_body = document.content_body.substring(0, ruleQueryPos);
}
}

return document;
};
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*/

export { getArtifactName, parseArtifactName } from './src/artifact';
export { type ArtifactManifest } from './src/manifest';
export { LATEST_MANIFEST_FORMAT_VERSION, type ArtifactManifest } from './src/manifest';
export { DocumentationProduct, type ProductName } from './src/product';
export { isArtifactContentFilePath } from './src/artifact_content';
export {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ interface SemanticTextArrayField {

export interface ProductDocumentationAttributes {
content_title: string;
content_body: SemanticTextField;
// backward compatibility for the legacy semantic_text mode
content_body: string | SemanticTextField;
product_name: ProductName;
root_type: string;
slug: string;
url: string;
version: string;
ai_subtitle: string;
ai_summary: SemanticTextField;
ai_questions_answered: SemanticTextArrayField;
ai_summary: string | SemanticTextField;
ai_questions_answered: string[] | SemanticTextArrayField;
ai_tags: string[];
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import type { ProductName } from './product';

export const LATEST_MANIFEST_FORMAT_VERSION = '2.0.0';

export interface ArtifactManifest {
formatVersion: string;
productName: ProductName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jest.doMock('./steps', () => {
export const downloadToDiskMock = jest.fn();
export const openZipArchiveMock = jest.fn();
export const loadMappingFileMock = jest.fn();
export const loadManifestFileMock = jest.fn();
export const ensureDefaultElserDeployedMock = jest.fn();

jest.doMock('./utils', () => {
Expand All @@ -33,6 +34,7 @@ jest.doMock('./utils', () => {
downloadToDisk: downloadToDiskMock,
openZipArchive: openZipArchiveMock,
loadMappingFile: loadMappingFileMock,
loadManifestFile: loadManifestFileMock,
ensureDefaultElserDeployed: ensureDefaultElserDeployedMock,
};
});
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
createIndexMock,
populateIndexMock,
loadMappingFileMock,
loadManifestFileMock,
openZipArchiveMock,
validateArtifactArchiveMock,
fetchArtifactVersionsMock,
Expand All @@ -36,6 +37,8 @@ const callOrder = (fn: { mock: { invocationCallOrder: number[] } }): number => {
return fn.mock.invocationCallOrder[0];
};

const TEST_FORMAT_VERSION = '2.0.0';

describe('PackageInstaller', () => {
let logger: MockedLogger;
let esClient: ReturnType<typeof elasticsearchServiceMock.createElasticsearchClient>;
Expand All @@ -55,13 +58,20 @@ describe('PackageInstaller', () => {
artifactRepositoryUrl,
kibanaVersion,
});

loadManifestFileMock.mockResolvedValue({
formatVersion: TEST_FORMAT_VERSION,
productName: 'kibana',
productVersion: '8.17',
});
});

afterEach(() => {
downloadToDiskMock.mockReset();
createIndexMock.mockReset();
populateIndexMock.mockReset();
loadMappingFileMock.mockReset();
loadManifestFileMock.mockReset();
openZipArchiveMock.mockReset();
validateArtifactArchiveMock.mockReset();
fetchArtifactVersionsMock.mockReset();
Expand Down Expand Up @@ -99,10 +109,14 @@ describe('PackageInstaller', () => {
expect(loadMappingFileMock).toHaveBeenCalledTimes(1);
expect(loadMappingFileMock).toHaveBeenCalledWith(zipArchive);

expect(loadManifestFileMock).toHaveBeenCalledTimes(1);
expect(loadManifestFileMock).toHaveBeenCalledWith(zipArchive);

expect(createIndexMock).toHaveBeenCalledTimes(1);
expect(createIndexMock).toHaveBeenCalledWith({
indexName,
mappings,
manifestVersion: TEST_FORMAT_VERSION,
esClient,
log: logger,
});
Expand All @@ -111,6 +125,7 @@ describe('PackageInstaller', () => {
expect(populateIndexMock).toHaveBeenCalledWith({
indexName,
archive: zipArchive,
manifestVersion: TEST_FORMAT_VERSION,
esClient,
log: logger,
});
Expand All @@ -130,6 +145,7 @@ describe('PackageInstaller', () => {
expect(callOrder(downloadToDiskMock)).toBeLessThan(callOrder(openZipArchiveMock));
expect(callOrder(openZipArchiveMock)).toBeLessThan(callOrder(loadMappingFileMock));
expect(callOrder(loadMappingFileMock)).toBeLessThan(callOrder(createIndexMock));
expect(callOrder(loadManifestFileMock)).toBeLessThan(callOrder(createIndexMock));
expect(callOrder(createIndexMock)).toBeLessThan(callOrder(populateIndexMock));
expect(callOrder(populateIndexMock)).toBeLessThan(
callOrder(productDocClient.setInstallationSuccessful)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
downloadToDisk,
openZipArchive,
loadMappingFile,
loadManifestFile,
ensureDefaultElserDeployed,
type ZipArchive,
} from './utils';
Expand Down Expand Up @@ -158,19 +159,25 @@ export class PackageInstaller {

validateArtifactArchive(zipArchive);

const mappings = await loadMappingFile(zipArchive);
const [manifest, mappings] = await Promise.all([
loadManifestFile(zipArchive),
loadMappingFile(zipArchive),
]);

const manifestVersion = manifest.formatVersion;
const indexName = getProductDocIndexName(productName);

await createIndex({
indexName,
mappings,
manifestVersion,
esClient: this.esClient,
log: this.log,
});

await populateIndex({
indexName,
manifestVersion,
archive: zipArchive,
esClient: this.esClient,
log: this.log,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';
import { loggerMock, type MockedLogger } from '@kbn/logging-mocks';
import type { ElasticsearchClient } from '@kbn/core/server';
import { elasticsearchServiceMock } from '@kbn/core/server/mocks';
import { LATEST_MANIFEST_FORMAT_VERSION } from '@kbn/product-doc-common';
import { createIndex } from './create_index';
import { internalElserInferenceId } from '../../../../common/consts';

const LEGACY_SEMANTIC_TEXT_VERSION = '1.0.0';

describe('createIndex', () => {
let log: MockedLogger;
let esClient: ElasticsearchClient;
Expand All @@ -21,7 +24,33 @@ describe('createIndex', () => {
esClient = elasticsearchServiceMock.createElasticsearchClient();
});

it('calls esClient.indices.create with the right parameters', async () => {
it('calls esClient.indices.create with the right parameters for the current manifest version', async () => {
const mappings: MappingTypeMapping = {
properties: {},
};
const indexName = '.some-index';

await createIndex({
indexName,
mappings,
manifestVersion: LATEST_MANIFEST_FORMAT_VERSION,
log,
esClient,
});

expect(esClient.indices.create).toHaveBeenCalledTimes(1);
expect(esClient.indices.create).toHaveBeenCalledWith({
index: indexName,
mappings,
settings: {
number_of_shards: 1,
auto_expand_replicas: '0-1',
'index.mapping.semantic_text.use_legacy_format': false,
},
});
});

it('calls esClient.indices.create with the right parameters for the manifest version 1.0.0', async () => {
const mappings: MappingTypeMapping = {
properties: {},
};
Expand All @@ -30,6 +59,7 @@ describe('createIndex', () => {
await createIndex({
indexName,
mappings,
manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION,
log,
esClient,
});
Expand All @@ -41,6 +71,7 @@ describe('createIndex', () => {
settings: {
number_of_shards: 1,
auto_expand_replicas: '0-1',
'index.mapping.semantic_text.use_legacy_format': true,
},
});
});
Expand All @@ -61,6 +92,7 @@ describe('createIndex', () => {
await createIndex({
indexName: '.some-index',
mappings,
manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION,
log,
esClient,
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,25 @@ import type { Logger } from '@kbn/logging';
import type { ElasticsearchClient } from '@kbn/core/server';
import type { MappingTypeMapping, MappingProperty } from '@elastic/elasticsearch/lib/api/types';
import { internalElserInferenceId } from '../../../../common/consts';
import { isLegacySemanticTextVersion } from '../utils';

export const createIndex = async ({
esClient,
indexName,
manifestVersion,
mappings,
log,
}: {
esClient: ElasticsearchClient;
indexName: string;
manifestVersion: string;
mappings: MappingTypeMapping;
log: Logger;
}) => {
log.debug(`Creating index ${indexName}`);

const legacySemanticText = isLegacySemanticTextVersion(manifestVersion);

overrideInferenceId(mappings, internalElserInferenceId);

await esClient.indices.create({
Expand All @@ -31,6 +36,7 @@ export const createIndex = async ({
settings: {
number_of_shards: 1,
auto_expand_replicas: '0-1',
'index.mapping.semantic_text.use_legacy_format': legacySemanticText,
},
});
};
Expand Down
Loading

0 comments on commit f77dc3d

Please sign in to comment.