Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ Other
Applications using SecurityManager now need to grant SerializablePermission("serialFilter")
to the analysis-smartcn module. (Uwe Schindler, Isaac David)

* GITHUB#15476: Enforce fallback support for float vector retrieval in quantized KNN vector formats. (Pulkit Gupta)

Build
---------------------
* Upgrade forbiddenapis to version 3.10. (Uwe Schindler)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,9 @@ public void testQuantizedVectorsWriteAndRead() throws IOException {
}
}
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,9 @@ public void testMergingWithDifferentByteKnnFields() {
public void testMismatchedFields() throws Exception {
// requires byte support
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,9 @@ public void testMergingWithDifferentByteKnnFields() {
public void testMismatchedFields() throws Exception {
// requires byte support
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,9 @@ public void testMergingWithDifferentByteKnnFields() {
public void testMismatchedFields() throws Exception {
// requires byte support
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
"Lucene94RWHnswVectorsFormat(name=Lucene94RWHnswVectorsFormat, maxConn=10, beamWidth=20)";
assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString());
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
"Lucene95RWHnswVectorsFormat(name=Lucene95RWHnswVectorsFormat, maxConn=10, beamWidth=20)";
assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString());
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,9 @@ public void testVectorSimilarityFuncs() {
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.lucene.backward_codecs.lucene99;

import static java.lang.String.format;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.oneOf;
Expand All @@ -26,16 +25,13 @@
import java.util.List;
import java.util.Locale;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
Expand All @@ -44,11 +40,7 @@
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
Expand Down Expand Up @@ -86,21 +78,6 @@ private Codec getCodec(float confidenceInterval) {
confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false));
}

protected List<float[]> getRandomFloatVector(int numVectors, int dim, boolean normalize) {
List<float[]> vectors = new ArrayList<>(numVectors);
for (int i = 0; i < numVectors; i++) {
float[] vec = randomVector(dim);
if (normalize) {
float[] copy = new float[vec.length];
System.arraycopy(vec, 0, copy, 0, copy.length);
VectorUtil.l2normalize(copy);
vec = copy;
}
vectors.add(vec);
}
return vectors;
}

public void testSearch() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Expand Down Expand Up @@ -219,145 +196,6 @@ public void testQuantizedVectorsWriteAndRead() throws Exception {
}
}

public void testReadQuantizedVectorWithEmptyRawVectors() throws Exception {
String vectorFieldName = "vec1";
int numVectors = 1 + random().nextInt(50);
int dim = random().nextInt(64) + 1;
if (dim % 2 == 1) {
dim++;
}
VectorSimilarityFunction similarityFunction = randomSimilarity();
List<float[]> vectors =
getRandomFloatVector(
numVectors, dim, similarityFunction == VectorSimilarityFunction.COSINE);

try (BaseDirectoryWrapper dir = newDirectory();
IndexWriter w =
new IndexWriter(
dir,
new IndexWriterConfig()
.setMaxBufferedDocs(numVectors + 1)
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setMergePolicy(NoMergePolicy.INSTANCE)
.setUseCompoundFile(false)
.setCodec(getCodec(1f)))) {
dir.setCheckIndexOnClose(false);

for (int i = 0; i < numVectors; i++) {
Document doc = new Document();
doc.add(new KnnFloatVectorField(vectorFieldName, vectors.get(i), similarityFunction));
w.addDocument(doc);
}
w.commit();

simulateEmptyRawVectors(dir);

try (IndexReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
if (r instanceof CodecReader codecReader) {
KnnVectorsReader knnVectorsReader = codecReader.getVectorReader();
knnVectorsReader = knnVectorsReader.unwrapReaderForField(vectorFieldName);
if (knnVectorsReader instanceof Lucene99ScalarQuantizedVectorsReader quantizedReader) {
FloatVectorValues floatVectorValues =
quantizedReader.getFloatVectorValues(vectorFieldName);
if (floatVectorValues instanceof OffHeapQuantizedFloatVectorValues) {
KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator();
for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) {
float[] dequantizedVector = floatVectorValues.vectorValue(iter.index());
for (int i = 0; i < dim; i++) {
assertEquals(
"docId=" + docId + " i=" + i,
dequantizedVector[i],
vectors.get(docId)[i],
0.2f);
}
}
} else {
fail("floatVectorValues is not OffHeapQuantizedFloatVectorValues");
}
} else {
System.out.println("Vector READER:: " + knnVectorsReader.toString());
fail("reader is not Lucene99ScalarQuantizedVectorsReader");
}
} else {
fail("reader is not CodecReader");
}
}
}
}

/** Simulates empty raw vectors by modifying index files. */
private void simulateEmptyRawVectors(Directory dir) throws Exception {
final String[] indexFiles = dir.listAll();
final String RAW_VECTOR_EXTENSION = "vec";
final String VECTOR_META_EXTENSION = "vemf";

for (String file : indexFiles) {
if (file.endsWith("." + RAW_VECTOR_EXTENSION)) {
replaceWithEmptyVectorFile(dir, file);
} else if (file.endsWith("." + VECTOR_META_EXTENSION)) {
updateVectorMetadataFile(dir, file);
}
}
}

/** Replaces a raw vector file with an empty one that has valid header/footer. */
private void replaceWithEmptyVectorFile(Directory dir, String fileName) throws Exception {
byte[] indexHeader;
try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) {
indexHeader = CodecUtil.readIndexHeader(in);
}
dir.deleteFile(fileName);
try (IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT)) {
// Write header
out.writeBytes(indexHeader, 0, indexHeader.length);
// Write footer (no content in between)
CodecUtil.writeFooter(out);
}
}

/** Updates vector metadata file to indicate zero vector length. */
private void updateVectorMetadataFile(Directory dir, String fileName) throws Exception {
// Read original metadata
byte[] indexHeader;
int fieldNumber, vectorEncoding, vectorSimilarityFunction, dimension;
long vectorStartPos;

try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) {
indexHeader = CodecUtil.readIndexHeader(in);
fieldNumber = in.readInt();
vectorEncoding = in.readInt();
vectorSimilarityFunction = in.readInt();
vectorStartPos = in.readVLong();
in.readVLong(); // Skip original vector length
dimension = in.readVInt();
}

// Create updated metadata file
dir.deleteFile(fileName);
try (IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT)) {
// Write header
out.writeBytes(indexHeader, 0, indexHeader.length);

// Write metadata with zero vector length
out.writeInt(fieldNumber);
out.writeInt(vectorEncoding);
out.writeInt(vectorSimilarityFunction);
out.writeVLong(vectorStartPos);
out.writeVLong(0); // Set vector length to 0
out.writeVInt(dimension);
out.writeInt(0);

// Write configuration
OrdToDocDISIReaderConfiguration.writeStoredMeta(
DIRECT_MONOTONIC_BLOCK_SHIFT, out, null, 0, 0, null);

// Mark end of fields and write footer
out.writeInt(-1);
CodecUtil.writeFooter(out);
}
}

public void testToString() {
FilterCodec customCodec =
new FilterCodec("foo", Codec.getDefault()) {
Expand Down Expand Up @@ -407,4 +245,19 @@ public void testRandomWithUpdatesAndGraph() {
public void testSearchWithVisitedLimit() {
// search not supported
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return true;
}

@Override
protected int getQuantizationBits() {
return bits;
}

@Override
protected Codec getCodecForQuantizedTest() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also rename this Quantized -> FloatVectorFallback or so?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed in new revision.

return getCodec(1f);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,9 @@ public void testRandomBytes() throws Exception {
public void testSortedIndexBytes() throws Exception {
// unimplemented
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected boolean isScalarQuantizedVectorsFormat() {
return false;
}
}
Loading
Loading