Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spaces Reader - provide version + track totals optimization #1331

Merged
merged 17 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asset/asset.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "elasticsearch",
"version": "4.0.7",
"version": "4.1.0",
"minimum_teraslice_version": "2.0.0"
}
4 changes: 2 additions & 2 deletions asset/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "asset",
"displayName": "Asset",
"version": "4.0.7",
"version": "4.1.0",
"private": true,
"description": "",
"license": "MIT",
Expand All @@ -20,7 +20,7 @@
"dependencies": {
"@terascope/data-mate": "~1.7.2",
"@terascope/elasticsearch-api": "~4.7.1",
"@terascope/elasticsearch-asset-apis": "~1.0.6",
"@terascope/elasticsearch-asset-apis": "~1.1.0",
"@terascope/job-components": "~1.9.1",
"@terascope/teraslice-state-storage": "~1.7.1",
"@terascope/utils": "~1.7.1",
Expand Down
11 changes: 11 additions & 0 deletions asset/src/spaces_reader_api/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ const apiSchema = {
default: undefined,
format: String
},
includeTotals: {
doc: 'By default, data fetching is optimized by disabling total count calculation to achieve '
+ 'faster query execution. If you require total counts in your queries set this value to true '
+ `Some endpoints support setting to a fixed integer to limit the count up to that number then `
+ `stop... set to 'number' to count up to the query or slice size then stop. `,
default: false,
format(val: unknown) {
if (val === 'number' || typeof val !== 'number') return;
throw new Error(`Invalid parameter includeTotals, must be a boolean or string 'number', got ${getTypeOf(val)}`);
}
}
};

const spacesSchema = Object.assign({}, clone, apiSchema) as AnyObject;
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "elasticsearch-assets",
"displayName": "Elasticsearch Assets",
"version": "4.0.7",
"version": "4.1.0",
"private": true,
"description": "bundle of processors for teraslice",
"homepage": "https://github.com/terascope/elasticsearch-assets#readme",
Expand Down Expand Up @@ -46,7 +46,7 @@
"devDependencies": {
"@terascope/data-types": "~1.7.1",
"@terascope/elasticsearch-api": "~4.7.1",
"@terascope/elasticsearch-asset-apis": "~1.0.6",
"@terascope/elasticsearch-asset-apis": "~1.1.0",
"@terascope/eslint-config": "~1.1.4",
"@terascope/job-components": "~1.9.1",
"@terascope/scripts": "~1.9.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/elasticsearch-asset-apis/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/elasticsearch-asset-apis",
"displayName": "Elasticsearch Asset Apis",
"version": "1.0.6",
"version": "1.1.0",
"description": "Elasticsearch reader and sender apis",
"homepage": "https://github.com/terascope/elasticsearch-assets",
"repository": "[email protected]:terascope/elasticsearch-assets.git",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -666,19 +666,17 @@ export class ElasticsearchReaderAPI {
private async getIndexDate(date: string | null | undefined, order: string): Promise<FetchDate> {
// we have a date, parse and return it
if (date) return parseDate(date);
// we are in auto, so we determine each part
const sortObj: Record<string, { order: 'asc' | 'desc' }> = {};
const sortOrder = order === 'start' ? 'asc' : 'desc';

sortObj[this.config.date_field_name] = { order: sortOrder };

// we are in auto, so we determine each part
const query: AnyObject = {
index: this.config.index,
size: 1,
body: {
sort: [
sortObj
]
sort: [{
[this.config.date_field_name]: {
order: order === 'start' ? 'asc' : 'desc'
}
}]
}
};

Expand Down Expand Up @@ -767,8 +765,8 @@ export class ElasticsearchReaderAPI {
return this.config.size;
}

get version(): number {
return this.client.getESVersion();
get version(): number | undefined {
return this.client.getESVersion?.();
}

async verifyIndex(): Promise<void> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import tls from 'tls';
import {
Logger, TSError, get, isNil,
AnyObject, withoutNil, DataEntity,
isKey,
isBoolean, isKey,
} from '@terascope/utils';
import { ClientParams, ClientResponse } from '@terascope/types';
import { DataTypeConfig } from '@terascope/data-types';
Expand Down Expand Up @@ -135,6 +135,8 @@ export class SpacesReaderClient implements ReaderClient {
protected translateSearchQuery(queryConfig: ClientParams.SearchParams): AnyObject {
const { config } = this;

const size = queryConfig?.size ?? config.size;

const fields = get(queryConfig, '_source', null) as string[] | null;

const dateFieldName = this.config.date_field_name;
Expand All @@ -146,8 +148,8 @@ export class SpacesReaderClient implements ReaderClient {
const fieldsQuery = fields ? { fields: fields.join(',') } : {};
const mustQuery = get(queryConfig, 'body.query.bool.must', null);

function parseQueryConfig(mustArray: null | any[]): AnyObject {
const queryOptions = {
function parseQueryConfig(mustArray: null | any[], trackTotalHits?: any): AnyObject {
const queryOptions: Record<string, (op: any) => string> = {
query_string: _parseEsQ,
range: _parseDate,
wildcard: _parseWildCard,
Expand Down Expand Up @@ -188,17 +190,11 @@ export class SpacesReaderClient implements ReaderClient {
});
}

let { size } = queryConfig;

if (size == null) {
({ size } = config);
}

return Object.assign({}, geoQuery, sortQuery, fieldsQuery, {
token: config.token,
q: luceneQuery,
size,
trackTotalHits: true
track_total_hits: trackTotalHits
});
}

Expand Down Expand Up @@ -262,7 +258,20 @@ export class SpacesReaderClient implements ReaderClient {
return `(${terms.join(' OR ')})`;
}

return parseQueryConfig(mustQuery);
let trackTotalHits: boolean | number = false;

if (isBoolean(config.includeTotals)) {
trackTotalHits = config.includeTotals;
}
if (config.includeTotals === 'number') {
trackTotalHits = size + 1;
}
if (size === 0) {
// in case client uses a search API instead of count API
trackTotalHits = true;
}

return parseQueryConfig(mustQuery, trackTotalHits);
}

async getDataType(): Promise<DataTypeConfig> {
Expand Down Expand Up @@ -352,10 +361,6 @@ export class SpacesReaderClient implements ReaderClient {
*/
async verify(): Promise<void> {}

getESVersion(): number {
return 6;
}

async getSettings(_index: string): Promise<ClientResponse.IndicesGetSettingsResponse> {
const { index, endpoint, token } = this.config;
const uri = `${endpoint}/${index}/_info`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ export interface ReaderClient {
* Gets the elasticsearch major server version,
* this will be used to format the search parameters
*/
getESVersion(): number;
getESVersion?(): number;

/**
* Verify that the cluster is up,
Expand Down Expand Up @@ -365,6 +365,7 @@ export interface SpacesAPIConfig extends ESReaderOptions {
retry?: number;
variables?: xLuceneVariables;
caCertificate?: string;
includeTotals?: boolean | 'number';
}

export interface DetermineSliceResults {
Expand Down
43 changes: 29 additions & 14 deletions packages/elasticsearch-asset-apis/test/spaces/spaces-client-spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import { DataFrame } from '@terascope/data-mate';
import { DataTypeConfig, FieldType, ClientParams } from '@terascope/types';
import {
DataTypeConfig, FieldType,
ClientParams, SearchParams
} from '@terascope/types';
import { debugLogger } from '@terascope/utils';
import 'jest-extended';
import nock from 'nock';
import nock, { RequestBodyMatcher } from 'nock';
import {
buildQuery,
FetchResponseType,
Expand Down Expand Up @@ -47,7 +50,7 @@ describe('Spaces Reader Client', () => {
connection: 'default',
interval: '1m',
delay: '30s',
...overrides,
...overrides
}, logger);
}

Expand All @@ -61,7 +64,8 @@ describe('Spaces Reader Client', () => {

describe('when given a simple request', () => {
const client = newClient({
query: 'foo:bar'
query: 'foo:bar',
includeTotals: true
});
let query: ClientParams.SearchParams;

Expand All @@ -72,15 +76,20 @@ describe('Spaces Reader Client', () => {
});

it('should be able to make a search request without use data frames', async () => {
scope.post(`/${index}?token=${token}`, {
const params: SearchParams = {
q: '(foo:bar)',
size: 100,
trackTotalHits: true
}).reply(200, {
results: [{ foo: 'foo', bar: 'bar', byte: 10 }],
returning: 1,
total: 1000
});
track_total_hits: true
};
scope.post(
`/${index}?token=${token}`,
params as RequestBodyMatcher
).reply(
200, {
results: [{ foo: 'foo', bar: 'bar', byte: 10 }],
returning: 1,
total: 1000
});

const result = await client.search(query, FetchResponseType.data_entities);
expect(result).toEqual([
Expand All @@ -101,11 +110,17 @@ describe('Spaces Reader Client', () => {
}
);

scope.post(`/${index}?token=${token}&format=dfjson`, {
const params: SearchParams = {
q: '(foo:bar)',
size: 100,
trackTotalHits: true
}).reply(200, frame.serialize());
track_total_hits: true
};
scope.post(
`/${index}?token=${token}&format=dfjson`,
params as RequestBodyMatcher
).reply(
200, frame.serialize()
);

const result = await client.search(
query,
Expand Down
5 changes: 3 additions & 2 deletions test/spaces_reader/fetcher-spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import 'jest-extended';
import nock from 'nock';
import { SearchParams } from '@terascope/types';
import {
DataEntity, newTestJobConfig, TestClientConfig,
debugLogger
Expand Down Expand Up @@ -198,7 +199,7 @@ describe('spaces_reader fetcher', () => {
// query size are overridden for unbounded fetches
query.size = maxSize;

(query as any).trackTotalHits = true;
(query as SearchParams).track_total_hits = false;

let results: DataEntity[];

Expand Down Expand Up @@ -272,7 +273,7 @@ describe('spaces_reader fetcher', () => {
scope.post(`/${testIndex}?token=${token}`, {
q: '(test:query)',
size: 100000,
trackTotalHits: true
track_total_hits: false
})
.delay(500)
.reply(200, {
Expand Down
2 changes: 1 addition & 1 deletion test/spaces_reader/slicer-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ describe('spaces_reader slicer', () => {
q: `created:[${start.toISOString()} TO ${end.toISOString()}} AND (slicer:query)`,
size: 0,
variables,
trackTotalHits: true
track_total_hits: true
};

scope.get(`/${testIndex}/_info?token=${token}`)
Expand Down
6 changes: 3 additions & 3 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,7 @@ __metadata:
languageName: node
linkType: hard

"@terascope/elasticsearch-asset-apis@npm:~1.0.6, @terascope/elasticsearch-asset-apis@workspace:packages/elasticsearch-asset-apis":
"@terascope/elasticsearch-asset-apis@npm:~1.1.0, @terascope/elasticsearch-asset-apis@workspace:packages/elasticsearch-asset-apis":
version: 0.0.0-use.local
resolution: "@terascope/elasticsearch-asset-apis@workspace:packages/elasticsearch-asset-apis"
dependencies:
Expand Down Expand Up @@ -2452,7 +2452,7 @@ __metadata:
dependencies:
"@terascope/data-mate": "npm:~1.7.2"
"@terascope/elasticsearch-api": "npm:~4.7.1"
"@terascope/elasticsearch-asset-apis": "npm:~1.0.6"
"@terascope/elasticsearch-asset-apis": "npm:~1.1.0"
"@terascope/job-components": "npm:~1.9.1"
"@terascope/teraslice-state-storage": "npm:~1.7.1"
"@terascope/utils": "npm:~1.7.1"
Expand Down Expand Up @@ -3483,7 +3483,7 @@ __metadata:
dependencies:
"@terascope/data-types": "npm:~1.7.1"
"@terascope/elasticsearch-api": "npm:~4.7.1"
"@terascope/elasticsearch-asset-apis": "npm:~1.0.6"
"@terascope/elasticsearch-asset-apis": "npm:~1.1.0"
"@terascope/eslint-config": "npm:~1.1.4"
"@terascope/job-components": "npm:~1.9.1"
"@terascope/scripts": "npm:~1.9.0"
Expand Down
Loading