-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQryIop.java
246 lines (211 loc) · 7.77 KB
/
QryIop.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/**
* Copyright (c) 2016, Carnegie Mellon University. All Rights Reserved.
*/
import java.io.*;
import java.util.*;
/**
* All query operators that return inverted lists are subclasses of
* the QryIop class. This class has two main purposes. First, it
* allows query operators to easily recognize any nested query
* operator that returns an inverted list (e.g., #AND (a #NEAR/1 (b c)).
* Second, it is a place to store data structures and methods that are
* common to all query operators that return inverted lists.
* <p>
* After a QryIop operator is initialized, it caches a full inverted
* list, and information from the inverted list is accessible. Document
* and location information are accessed via Qry.docIterator and
* QryIop.locIterator. Corpus-level information, for example,
* document frequency (df) and collection term frequency (ctf), are
* available via specific methods (e.g., getDf and getCtf).
* </p><p>
* QryIop operators support iteration over the locations in the
* document that Qry.docIteratorHasMatch matches. The semantics
* and use of the QryIop.locIterator are similar to the Qry.docIterator.
* The QryIop.locIterator is initialized automatically each time
* Qry.docIteratorHasMatch finds a match; no additional initialization
* is required.
*/
public abstract class QryIop extends Qry {
/*
* IMPLEMENTATION NOTES:
*
* Iteration in QryIop and QrySop is very different. In QryIop,
* docIterator and locIterator iterate over the cached inverted
* list, NOT recursively over the query arguments.
*/
/**
* An invalid index for docIterator and locIterator.
*/
private static final int INVALID_ITERATOR_INDEX = -1;
/**
* The document field that the query operator applies to; this is
* inferred from query operator arguments.
*/
protected String field = null;
/**
* The inverted list that is produced when the query operator is
* initialized; use the docIterator to access this list.
*/
protected InvList invertedList = null;
/**
* The index of the document that the docIterator points to now.
*/
private int docIteratorIndex = QryIop.INVALID_ITERATOR_INDEX;
/**
* The index of the location that the locIterator points to now.
*/
private int locIteratorIndex = QryIop.INVALID_ITERATOR_INDEX;
/**
* Advance the query operator's internal iterator beyond the
* specified document.
* @param docid The document's internal document id
*/
public void docIteratorAdvancePast (int docid) {
while ((this.docIteratorIndex < this.invertedList.df) &&
(this.invertedList.getDocid (this.docIteratorIndex) <= docid)) {
this.docIteratorIndex ++;
}
this.locIteratorIndex = 0;
}
/**
* Advance the query operator's internal iterator to the specified
* document if it exists, or beyond if it doesn't.
* @param docid The document's internal document id
*/
public void docIteratorAdvanceTo (int docid) {
while ((this.docIteratorIndex < this.invertedList.df) &&
(this.invertedList.getDocid (this.docIteratorIndex) < docid)) {
this.docIteratorIndex ++;
}
this.locIteratorIndex = 0;
}
/**
* Advance the query operator's internal iterator beyond the
* any possible document.
*/
public void docIteratorFinish () {
this.docIteratorIndex = this.invertedList.postings.size();
}
/**
* Return the id of the document that the query operator's internal
* iterator points to now. Use docIteratorHasMatch to determine whether
* the iterator currently points to a document. If the iterator
* doesn't point to a document, an invalid document id is returned.
* @return The internal id of the current document.
*/
public int docIteratorGetMatch () {
return this.invertedList.getDocid (this.docIteratorIndex);
}
/**
* Return the postings for the document that the docIterator points to
* now, or throw an error if the docIterator doesn't point at a document.
* @return A document posting.
*/
public InvList.DocPosting docIteratorGetMatchPosting () {
return this.invertedList.postings.get(docIteratorIndex);
}
/**
* Indicates whether the query has a matching document.
* @param r A retrieval model (that is ignored - it can be null)
* @return True if the query matches a document, otherwise false.
*/
public boolean docIteratorHasMatch (RetrievalModel r) {
return (this.docIteratorIndex < this.invertedList.df);
}
/**
* Get the collection term frequency (ctf) associated with this
* query operator. It is an error to call this method before the
* object's initialize method is called.
* @return The collection term frequency (ctf).
*/
public int getCtf () {
return this.invertedList.ctf;
}
/**
* Get the document frequency (df) associated with this query
* operator. It is an error to call this method before the
* object's initialize method is called.
* @return The document frequency (df).
*/
public int getDf () {
return this.invertedList.df;
}
/**
* Get the field associated with this query operator.
* @return The field associated with this query operator.
*/
public String getField () {
return this.field;
}
/**
* Evaluate the query operator; the result is an internal inverted
* list that may be accessed via the internal iterators.
* @throws IOException Error accessing the Lucene index.
*/
protected abstract void evaluate () throws IOException;
/**
* Initialize the query operator (and its arguments), including any
* internal iterators; this method must be called before iteration
* can begin.
* @param r A retrieval model (that is ignored)
*/
public void initialize(RetrievalModel r) throws IOException {
// Initialize the query arguments (if any).
for (Qry q_i: this.args) {
((QryIop) q_i).initialize (r);
}
// Evaluate the operator.
this.evaluate ();
// Initialize the internal iterators.
this.docIteratorIndex = 0;
this.locIteratorIndex = 0;
}
/**
* Advance the query operator's internal iterator to the
* next location.
*/
public void locIteratorAdvance () {
this.locIteratorIndex ++;
}
/**
* Advance the query operator's internal iterator beyond the
* specified location.
* @param loc The location to advance beyond.
*/
public void locIteratorAdvancePast (int loc) {
int tf = this.invertedList.postings.get(this.docIteratorIndex).tf;
Vector<Integer> positions = this.invertedList.postings.get(this.docIteratorIndex).positions;
while ((this.locIteratorIndex < tf) &&
(positions.get (this.locIteratorIndex) <= loc)) {
locIteratorIndex ++;
}
}
/**
* Advance the query operator's internal iterator beyond
* any possible location.
*/
public void locIteratorFinish () {
this.locIteratorIndex =
this.invertedList.postings.get(this.docIteratorIndex).tf;
}
/**
* Return the document location that the query operator's internal
* iterator points to now. Use iterHasLoc to determine whether
* the iterator currently points to a location. If the iterator
* doesn't point to a location, an invalid document location is returned.
* @return The internal id of the current document.
*/
public int locIteratorGetMatch () {
Vector<Integer> locations = this.docIteratorGetMatchPosting().positions;
return locations.get (this.locIteratorIndex);
}
/**
* Returns true if the query operator's internal iterator currently
* points to a location.
* @return True if the iterator currently points to a location.
*/
public boolean locIteratorHasMatch () {
return (this.locIteratorIndex <
this.invertedList.getTf (this.docIteratorIndex));
}
}