Skip to content

Commit

Permalink
feat: config options for fuzzy search (#898)
Browse files Browse the repository at this point in the history
Co-authored-by: Bob den Os <[email protected]>
  • Loading branch information
johannes-vogel and BobdenOs authored Nov 22, 2024
1 parent 45bcdcf commit f6593e6
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 66 deletions.
2 changes: 1 addition & 1 deletion db-service/lib/cql-functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const StandardFunctions = {
val = sub[2] || sub[3] || ''
}
arg.val = arg.__proto__.val = val
const refs = ref.list || [ref]
const refs = ref.list
const { toString } = ref
return '(' + refs.map(ref2 => this.contains(this.tolower(toString(ref2)), this.tolower(arg))).join(' or ') + ')'
},
Expand Down
2 changes: 1 addition & 1 deletion db-service/lib/cqn4sql.js
Original file line number Diff line number Diff line change
Expand Up @@ -2203,7 +2203,7 @@ function cqn4sql(originalQuery, model) {
const searchFunc = {
func: 'search',
args: [
searchIn.length > 1 ? { list: searchIn } : { ...searchIn[0] },
{ list: searchIn },
xpr.length === 1 && 'val' in xpr[0] ? xpr[0] : { xpr },
],
}
Expand Down
105 changes: 58 additions & 47 deletions db-service/test/cqn4sql/search.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ describe('Replace attribute search by search predicate', () => {

let res = cqn4sql(query, model)
// single val is stored as val directly, not as expr with val
const expected = CQL`SELECT from bookshop.WithStructuredKey as wsk {
wsk.second
} where search(wsk.second, 'x')`
const expected = CQL`SELECT from bookshop.WithStructuredKey as wsk { wsk.second }`
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ ref: ['wsk', 'second']}] }, {val: 'x'}]}]
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(expected)
})

Expand All @@ -28,9 +27,8 @@ describe('Replace attribute search by search predicate', () => {
query.SELECT.search = [{ val: 'x' }, 'or', { val: 'y' }]

let res = cqn4sql(query, model)
const expected = CQL`SELECT from bookshop.WithStructuredKey as wsk {
wsk.second
} where search(wsk.second, ('x' OR 'y'))`
const expected = CQL`SELECT from bookshop.WithStructuredKey as wsk { wsk.second }`
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ ref: ['wsk', 'second']}] }, {xpr: [{val: 'x'}, 'or', {val: 'y'}]}]}]
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(expected)
})

Expand Down Expand Up @@ -109,16 +107,16 @@ describe('Replace attribute search by search predicate', () => {
query.SELECT.search = [{ val: 'x' }, 'or', { val: 'y' }]

let res = cqn4sql(query, model)
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(
CQL`
SELECT from bookshop.Books as Books
left join bookshop.Authors as author on author.ID = Books.author_ID
left join bookshop.Books as books2 on books2.author_ID = author.ID
{
Books.ID,
books2.title as authorsBook
} where search(books2.title, ('x' OR 'y')) group by Books.title `,
)
const expected = CQL`
SELECT from bookshop.Books as Books
left join bookshop.Authors as author on author.ID = Books.author_ID
left join bookshop.Books as books2 on books2.author_ID = author.ID
{
Books.ID,
books2.title as authorsBook
} where search(books2.title, ('x' OR 'y')) group by Books.title `
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ ref: ['books2', 'title']}] }, {xpr: [{val: 'x'}, 'or', {val: 'y'}]}]}]
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(expected)
})
it('Search on navigation', () => {
let query = CQL`SELECT from bookshop.Authors:books { ID }`
Expand Down Expand Up @@ -147,11 +145,12 @@ describe('Replace attribute search by search predicate', () => {
.columns({ args: [{ ref: ['title'] }], as: 'firstInAlphabet', func: 'MIN' })
.groupBy('title')
.search('Cat')

expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(CQL`
SELECT from bookshop.Books as Books {
MIN(Books.title) as firstInAlphabet
} group by Books.title having search(MIN(Books.title), 'Cat')`)
const expected = CQL`
SELECT from bookshop.Books as Books {
MIN(Books.title) as firstInAlphabet
} group by Books.title having search(MIN(Books.title), 'Cat')`
expected.SELECT.having = [ {func: 'search', args: [{ list: [{func: 'MIN', args: [{ ref: ['Books', 'title']}]}] }, {val: 'Cat'}]}]
expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(expected)
})

it('Ignore non string aggregates from being searched', () => {
Expand All @@ -163,12 +162,13 @@ describe('Replace attribute search by search predicate', () => {
`

query.SELECT.search = [{ val: 'x' }]

expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(CQL`
SELECT from bookshop.Books as Books {
Books.title,
AVG(Books.stock) as searchRelevant,
} where search(Books.title, 'x') group by Books.title`)
const expected = CQL`
SELECT from bookshop.Books as Books {
Books.title,
AVG(Books.stock) as searchRelevant,
} where search(Books.title, 'x') group by Books.title`
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ ref: ['Books', 'title']}] }, {val: 'x'}]}]
expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(expected)
})
it('aggregations which are not of type string are not searched', () => {
const query = CQL`
Expand Down Expand Up @@ -197,12 +197,16 @@ describe('Replace attribute search by search predicate', () => {
`

query.SELECT.search = [{ val: 'x' }]

expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(CQL`
SELECT from bookshop.Books as Books {
Books.ID,
substring(Books.stock) as searchRelevantViaCast: cds.String,
} group by Books.title having search(substring(Books.stock), 'x')`)
const expected = CQL`
SELECT from bookshop.Books as Books {
Books.ID,
substring(Books.stock) as searchRelevantViaCast: cds.String,
} group by Books.title having search(substring(Books.stock), 'x')`
expected.SELECT.having = [ {func: 'search', args: [{ list: [{
args: [ { ref: [ 'Books', 'stock' ] } ],
func: 'substring'
}] }, {val: 'x'}]}]
expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(expected)
})
it('xpr is search relevant via cast', () => {
// this aggregation is not relevant for search per default
Expand All @@ -216,13 +220,21 @@ describe('Replace attribute search by search predicate', () => {
`

query.SELECT.search = [{ val: 'x' }]

expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(CQL`
SELECT from bookshop.Books as Books {
Books.ID,
('very' + 'useful' + 'string') as searchRelevantViaCast: cds.String,
('1' + '2' + '3') as notSearchRelevant: cds.Integer,
} group by Books.title having search(('very' + 'useful' + 'string'), 'x')`)
const expected = CQL`
SELECT from bookshop.Books as Books {
Books.ID,
('very' + 'useful' + 'string') as searchRelevantViaCast: cds.String,
('1' + '2' + '3') as notSearchRelevant: cds.Integer,
} group by Books.title`
expected.SELECT.having = [ {func: 'search', args: [{ list: [{
xpr: [
{ val: 'very' },
'+',
{ val: 'useful' },
'+',
{ val: 'string' }
] }] }, {val: 'x'}]}]
expect(JSON.parse(JSON.stringify(cqn4sql(query, model)))).to.deep.equal(expected)
})
})

Expand All @@ -242,7 +254,8 @@ describe('search w/ path expressions', () => {
{
BooksSearchAuthorName.ID,
BooksSearchAuthorName.title
} where search(author.lastName, 'x')`
}`
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ref: ['author', 'lastName']}]}, {val: 'x'}]}]
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(expected)
})

Expand Down Expand Up @@ -286,7 +299,8 @@ describe('search w/ path expressions', () => {
{
BookShelf.ID,
BookShelf.genre
} where search((BookShelf.genre), 'Harry Plotter')`
}`
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ref: ['BookShelf', 'genre']}]}, {val: 'Harry Plotter'}]}]
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(expected)
})
})
Expand Down Expand Up @@ -316,11 +330,8 @@ describe('calculated elements', () => {
query.SELECT.search = [{ val: 'x' }]

let res = cqn4sql(query, model)
const expected = CQL`
SELECT from search.CalculatedAddressesWithoutAnno as Address
{
Address.ID
} where search((Address.city), 'x')`
const expected = CQL`SELECT from search.CalculatedAddressesWithoutAnno as Address { Address.ID }`
expected.SELECT.where = [ {func: 'search', args: [{ list: [{ref: ['Address', 'city']}]}, {val: 'x'}]}]
expect(JSON.parse(JSON.stringify(res))).to.deep.equal(expected)
})
})
Expand Down
66 changes: 64 additions & 2 deletions hana/lib/cql-functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,72 @@ const StandardFunctions = {
contains: (...args) => args.length > 2 ? `CONTAINS(${args})` : `(CASE WHEN coalesce(locate(${args}),0)>0 THEN TRUE ELSE FALSE END)`,
concat: (...args) => `(${args.map(a => (a.xpr ? `(${a})` : a)).join(' || ')})`,
search: function (ref, arg) {
if (cds.env.hana.fuzzy === false) {
// REVISIT: remove once the protocol adapter only creates vals
arg = arg.xpr ? arg.xpr : arg
if (Array.isArray(arg)) arg = [{ val: arg.filter(a => a.val).map(a => a.val).join(' ') }]
else arg = [arg]
const searchTerms = arg[0].val
.match(/("")|("(?:[^"]|\\")*(?:[^\\]|\\\\)")|(\S*)/g)
.filter(el => el.length).map(el => `%${el.replace(/^\"|\"$/g, '').toLowerCase()}%`)

const columns = ref.list
const xpr = []
for (const s of searchTerms) {
const nestedXpr = []
for (const c of columns) {
if (nestedXpr.length) nestedXpr.push('or')
nestedXpr.push({ func: 'lower', args: [c]}, 'like', {val: s})
}
if (xpr.length) xpr.push('and')
xpr.push({xpr: nestedXpr})
}

const { toString } = ref
return `(CASE WHEN (${toString({ xpr })}) THEN TRUE ELSE FALSE END)`
}

// fuzziness config
const fuzzyIndex = cds.env.hana?.fuzzy || 0.7

const csnElements = ref.list
// if column specific value is provided, the configuration has to be defined on column level
if (csnElements.some(e => e.element?.['@Search.ranking'] || e.element?.['@Search.fuzzinessThreshold'])) {
csnElements.forEach(e => {
let fuzzy = `FUZZY`

// weighted search
const rank = e.element?.['@Search.ranking']?.['=']
switch(rank) {
case 'HIGH':
fuzzy += ' WEIGHT 0.8'
break
case 'LOW':
fuzzy += ' WEIGHT 0.3'
break
case 'MEDIUM':
case undefined:
fuzzy += ' WEIGHT 0.5'
break
default: throw new Error(`Invalid configuration ${rank} for @Search.ranking. HIGH, MEDIUM, LOW are supported values.`)
}

// fuzziness
fuzzy+= ` MINIMAL TOKEN SCORE ${e.element?.['@Search.fuzzinessThreshold'] || fuzzyIndex} SIMILARITY CALCULATION MODE 'search'`

// rewrite ref to xpr to mix in search config
// ensure in place modification to reuse .toString method that ensures quoting
e.xpr = [{ ref: e.ref }, fuzzy]
delete e.ref
})
} else {
ref = `${ref} FUZZY MINIMAL TOKEN SCORE ${fuzzyIndex} SIMILARITY CALCULATION MODE 'search'`
}

// REVISIT: remove once the protocol adapter only creates vals
if (Array.isArray(arg.xpr)) arg = { val: arg.xpr.filter(a => a.val).map(a => a.val).join(' ') }
// REVISIT: make this more configurable
return (`(CASE WHEN SCORE(${arg} IN ${ref} FUZZY MINIMAL TOKEN SCORE 0.7 SIMILARITY CALCULATION MODE 'search') > 0 THEN TRUE ELSE FALSE END)`)

return (`(CASE WHEN SCORE(${arg} IN ${ref}) > 0 THEN TRUE ELSE FALSE END)`)
},

// Date and Time Functions
Expand Down
6 changes: 5 additions & 1 deletion hana/test/fuzzy.cds
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
using {sap.capire.bookshop.Books as Books} from '../../test/bookshop/db/schema.cds';
using {sap.capire.bookshop.BooksAnnotated as BooksAnnotated} from '../../test/bookshop/db/schema.cds';

annotate BooksAnnotated with @cds.search: {title, descr, currency.code};
annotate BooksAnnotated:title with @(Search.ranking: HIGH, Search.fuzzinessThreshold: 0.9);
annotate BooksAnnotated:descr with @(Search.ranking: LOW, Search.fuzzinessThreshold: 0.9);
81 changes: 70 additions & 11 deletions hana/test/fuzzy.test.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,78 @@
const cds = require('../../test/cds')

describe('Fuzzy search', () => {
describe('search', () => {
const { expect } = cds.test(__dirname, 'fuzzy.cds')

test('select', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const res = await SELECT.from(Books).where({
func: 'contains',
args: [
{ list: [{ ref: ['title'] }, { ref: ['descr'] }] },
{ val: 'poem' },
{ func: 'FUZZY', args: [{ val: 0.8 }, { val: 'similarCalculationMode=searchCompare' }] }
]
beforeEach (() => {
delete cds.env.hana.fuzzy
})

describe('fuzzy', () => {
test('default', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"').columns('1')
const {sql} = cqn.toSQL()
expect(sql).to.include('FUZZY MINIMAL TOKEN SCORE 0.7')
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

//HCE returns different result than HXE
test.skip('multiple search terms', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio" "jane"').columns('1')
const {sql, values} = cqn.toSQL()
expect(sql).to.include('FUZZY MINIMAL TOKEN SCORE 0.7')
expect(values[0]).to.eq('"autobio" "jane"') // taken as is
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

test('global config', async () => {
cds.env.hana.fuzzy = 1
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"').columns('1')
const {sql} = cqn.toSQL()
expect(sql).to.include('FUZZY MINIMAL TOKEN SCORE 1')
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

expect(res).to.have.property('length').to.be.eq(1)
test('annotations', async () => {
const { BooksAnnotated } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(BooksAnnotated).search('"first-person"').columns('1')
const {sql} = cqn.toSQL()
expect(sql).to.include('title FUZZY WEIGHT 0.8 MINIMAL TOKEN SCORE 0.9')
expect(sql).to.include('code FUZZY WEIGHT 0.5 MINIMAL TOKEN SCORE 0.7')
expect(sql).to.include('descr FUZZY WEIGHT 0.3 MINIMAL TOKEN SCORE 0.9')

const res = await cqn
expect(res.length).to.be(1) // jane eyre
})
})

describe('like', () => {
beforeEach (() => cds.env.hana.fuzzy = false)
test('fallback - 1 search term', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"').columns('1')
const {sql} = cqn.toSQL()
// 5 columns to be searched createdBy, modifiedBy, title, descr, currency_code
expect(sql.match(/(like)/g).length).to.be(5)
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

test('fallback - 2 search terms', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"', '"Jane"').columns('1')
const {sql, values} = cqn.toSQL()
// 5 columns to be searched createdBy, modifiedBy, title, descr, currency_code
expect(sql.match(/(like)/g).length).to.be(10)
expect(values).to.include('%autobio%')
expect(values).to.include('%jane%')
const res = await cqn
expect(res.length).to.be(1) // Jane Eyre
})
})
})
3 changes: 2 additions & 1 deletion test/bookshop/db/schema.cds
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,5 @@ entity C : managed {
B : Integer;
toB : Composition of many B
on toB.ID = $self.B;
}
};
entity BooksAnnotated as projection on Books;
4 changes: 2 additions & 2 deletions test/compliance/SELECT.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ describe('SELECT', () => {
// search tests don't check results as the search behavior is undefined
test('search one column', async () => {
const { string } = cds.entities('basic.literals')
const cqn = CQL`SELECT * FROM ${string} WHERE search((string),${'yes'})`
const cqn = SELECT.from(string).where([{func: 'search', args: [{list: [{ref: ['string']}]}, {val: 'yes'}]}])
await cds.run(cqn)
})

Expand Down Expand Up @@ -994,7 +994,7 @@ describe('SELECT', () => {
unified.scalar = [
// TODO: investigate search issue for nvarchar columns
...unified.ref.filter(ref => cds.builtin.types[ref.element?.type] === cds.builtin.types.LargeString).map(ref => {
return unified.string.map(val => ({ func: 'search', args: [ref, val] }))
return unified.string.map(val => ({ func: 'search', args: [{list:[ref]}, val] }))
}).flat(),
// ...unified.string.map(val => ({ func: 'search', args: [{ list: unified.ref.filter(stringRefs) }, val] })),
...unified.ref.filter(stringRefs).filter(noBooleanRefs).map(X => {
Expand Down

0 comments on commit f6593e6

Please sign in to comment.