Skip to content

Commit

Permalink
Add document frequency to query items.
Browse files Browse the repository at this point in the history
  • Loading branch information
toregge committed Jul 2, 2024
1 parent 9d554b9 commit f532c7e
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 3 deletions.
31 changes: 29 additions & 2 deletions container-search/abi-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -480,10 +480,30 @@
"public void setExplicitSignificance(boolean)",
"public boolean hasExplicitSignificance()",
"public double getSignificance()",
"public void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public java.util.Optional getDocumentFrequency()",
"public boolean hasUniqueID()"
],
"fields" : [ ]
},
"com.yahoo.prelude.query.DocumentFrequency" : {
"superClass" : "java.lang.Record",
"interfaces" : [ ],
"attributes" : [
"public",
"final",
"record"
],
"methods" : [
"public void <init>(long, long)",
"public final java.lang.String toString()",
"public final int hashCode()",
"public final boolean equals(java.lang.Object)",
"public long frequency()",
"public long corpusSize()"
],
"fields" : [ ]
},
"com.yahoo.prelude.query.DotProductItem" : {
"superClass" : "com.yahoo.prelude.query.WeightedSetItem",
"interfaces" : [ ],
Expand Down Expand Up @@ -874,7 +894,8 @@
"protected com.yahoo.prelude.query.Item connectedBacklink",
"protected double connectivity",
"protected double significance",
"protected boolean explicitSignificance"
"protected boolean explicitSignificance",
"protected com.yahoo.prelude.query.DocumentFrequency documentFrequency"
]
},
"com.yahoo.prelude.query.ItemHelper" : {
Expand Down Expand Up @@ -1578,6 +1599,8 @@
"public void setExplicitSignificance(boolean)",
"public boolean hasExplicitSignificance()",
"public double getSignificance()",
"public void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public java.util.Optional getDocumentFrequency()",
"public boolean hasUniqueID()"
],
"fields" : [ ]
Expand Down Expand Up @@ -1679,7 +1702,9 @@
"public abstract void setSignificance(double)",
"public abstract boolean hasExplicitSignificance()",
"public abstract void setExplicitSignificance(boolean)",
"public abstract double getSignificance()"
"public abstract double getSignificance()",
"public abstract void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public abstract java.util.Optional getDocumentFrequency()"
],
"fields" : [ ]
},
Expand All @@ -1703,6 +1728,8 @@
"public void setExplicitSignificance(boolean)",
"public boolean hasExplicitSignificance()",
"public double getSignificance()",
"public void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public java.util.Optional getDocumentFrequency()",
"public boolean hasUniqueID()"
],
"fields" : [ ]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* Common implementation for Item classes implementing the TaggableItem interface.
* Note that this file exists in 3 copies that should be kept in sync:
Expand Down Expand Up @@ -68,6 +70,12 @@ public double getSignificance() {
return significance;
}

@Override
public void setDocumentFrequency(DocumentFrequency documentFrequency) { this.documentFrequency = documentFrequency; }

@Override
public Optional<DocumentFrequency> getDocumentFrequency() { return Optional.ofNullable(documentFrequency); }

//Change access privilege from protected to public.
public boolean hasUniqueID() {
return super.hasUniqueID();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

/*
* The expected number of documents matching the item given a corpus of
* multiple documents. This is the raw data used to calculate variants
* of idf, used as significance.
*/
public record DocumentFrequency(long frequency, long corpusSize) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ public enum ItemCreator {
protected double significance = 0;
protected boolean explicitSignificance = false;

protected DocumentFrequency documentFrequency = null;

/** Whether this item is eligible for change by query rewriters (false) or should be kept as-is (true) */
private boolean isProtected;

Expand Down Expand Up @@ -495,6 +497,8 @@ public void disclose(Discloser discloser) {
discloser.addProperty("usePositionData", usePositionData);
if (explicitSignificance)
discloser.addProperty("significance", significance);
if (documentFrequency != null)
discloser.addProperty("documentFrequency", documentFrequency);
if (weight != 100)
discloser.addProperty("weight", weight);
if (label != null)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* Common implementation for Item classes implementing the TaggableItem interface.
* Note that this file exist in 3 copies that should be kept in sync:
Expand Down Expand Up @@ -68,6 +70,12 @@ public double getSignificance() {
return significance;
}

@Override
public void setDocumentFrequency(DocumentFrequency documentFrequency) { this.documentFrequency = documentFrequency; }

@Override
public Optional<DocumentFrequency> getDocumentFrequency() { return Optional.ofNullable(documentFrequency); }

//Change access privilege from protected to public.
public boolean hasUniqueID() {
return super.hasUniqueID();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* An interface used for anything which may be addressed using an external,
* unique ID in the query tree in the backend.
Expand Down Expand Up @@ -44,4 +46,6 @@ public interface TaggableItem {
void setExplicitSignificance(boolean significance);
double getSignificance();

void setDocumentFrequency(DocumentFrequency documentFrequency);
Optional<DocumentFrequency> getDocumentFrequency();
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* Common implementation for Item classes implementing the TaggableItem interface.
* Note that this file exist in 3 copies that should be kept in sync:
Expand Down Expand Up @@ -81,6 +83,12 @@ public double getSignificance() {
return significance;
}

@Override
public void setDocumentFrequency(DocumentFrequency documentFrequency) { this.documentFrequency = documentFrequency; }

@Override
public Optional<DocumentFrequency> getDocumentFrequency() { return Optional.ofNullable(documentFrequency); }

//Change access privilege from protected to public.
@Override
public boolean hasUniqueID() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void requireSimilarAPIs() {
.getDeclaredMethods();
final Method[] simple = SimpleTaggableItem.class.getDeclaredMethods();
final Method[] segment = TaggableSegmentItem.class.getDeclaredMethods();
final int numberOfMethods = 10;
final int numberOfMethods = 12;
assertEquals(numberOfMethods, composite.length);
assertEquals(numberOfMethods, simple.length);
assertEquals(numberOfMethods, segment.length);
Expand Down Expand Up @@ -152,4 +152,13 @@ final void testSetSignificance() {
assertTrue(p.hasExplicitSignificance());
}

@Test
final void testSetDocumentFrequency() {
final PhraseSegmentItem p = new PhraseSegmentItem("farmyards", false, false);
assertFalse(p.getDocumentFrequency().isPresent());
p.setDocumentFrequency(new DocumentFrequency(13, 100));
assertTrue(p.getDocumentFrequency().isPresent());
assertEquals(new DocumentFrequency(13, 100), p.getDocumentFrequency().get());
}

}

0 comments on commit f532c7e

Please sign in to comment.