diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4b74688b5c39..60b6cb4455fe 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -214,6 +214,8 @@ API Changes * GITHUB#15502: Add `count()` method to `FilterWeight` (Prudhvi Godithi) +* GITHUB#15621: Add validation to prevent zero vectors in KNN fields (Vigya Sharma) + New Features --------------------- * GITHUB#15328: VectorSimilarityFunction.getValues() now implements doubleVal allowing its diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java index 49dd2dc68e00..cd0b32c1c82e 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java @@ -55,7 +55,9 @@ public void testFloatVectorFails() throws IOException { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {1f, 0f, 0f, 1f}, VectorSimilarityFunction.DOT_PRODUCT)); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc)); e.getMessage().contains("HnswBitVectorsFormat only supports BYTE encoding"); @@ -68,8 +70,7 @@ public void testIndexAndSearchBitVectors() throws IOException { new byte[] {(byte) 0b10101110, (byte) 0b01010111}, new byte[] {(byte) 0b11111000, (byte) 0b00001111}, new byte[] {(byte) 0b11001100, (byte) 0b00110011}, - new byte[] {(byte) 0b11111111, (byte) 0b00000000}, - new byte[] {(byte) 0b00000000, (byte) 0b00000000} + new byte[] {(byte) 0b11111111, (byte) 0b00000000} }; try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { diff --git a/lucene/core/src/java/org/apache/lucene/document/KnnByteVectorField.java b/lucene/core/src/java/org/apache/lucene/document/KnnByteVectorField.java index ddeef44c72ae..d68cdf732b0d 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KnnByteVectorField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KnnByteVectorField.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.VectorUtil; /** * A field that contains a single byte numeric vector (or none) for each document. Vectors are dense @@ -97,6 +98,9 @@ public static FieldType createFieldType( public KnnByteVectorField( String name, byte[] vector, VectorSimilarityFunction similarityFunction) { super(name, createType(vector, similarityFunction)); + if (VectorUtil.isZeroVector(vector) == true) { + throw new IllegalArgumentException("zero vector not allowed for vector field value"); + } fieldsData = vector; // null-check done above } @@ -138,6 +142,9 @@ public KnnByteVectorField(String name, byte[] vector, FieldType fieldType) { throw new IllegalArgumentException( "The number of vector dimensions does not match the field type"); } + if (VectorUtil.isZeroVector(vector) == true) { + throw new IllegalArgumentException("zero vector not allowed for vector field value"); + } fieldsData = vector; } diff --git a/lucene/core/src/java/org/apache/lucene/document/KnnFloatVectorField.java b/lucene/core/src/java/org/apache/lucene/document/KnnFloatVectorField.java index 63a55ddc669f..456b05d7f407 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KnnFloatVectorField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KnnFloatVectorField.java @@ -98,6 +98,9 @@ public static Query newVectorQuery(String field, float[] queryVector, int k) { public KnnFloatVectorField( String name, float[] vector, VectorSimilarityFunction similarityFunction) { super(name, createType(vector, similarityFunction)); + if (VectorUtil.isZeroVector(vector) == true) { + throw new IllegalArgumentException("zero vector not allowed for vector field value"); + } fieldsData = VectorUtil.checkFinite(vector); // null check done above } @@ -139,6 +142,9 @@ public KnnFloatVectorField(String name, float[] vector, FieldType fieldType) { throw new IllegalArgumentException( "The number of vector dimensions does not match the field type"); } + if (VectorUtil.isZeroVector(vector) == true) { + throw new IllegalArgumentException("zero vector not allowed for vector field value"); + } fieldsData = VectorUtil.checkFinite(vector); } diff --git a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java index 061e253bde90..d3bfe9529e0f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java @@ -437,6 +437,26 @@ public static float[] checkFinite(float[] v) { return v; } + /** Returns true if all dimensions of provided vector are zero, false otherwise. */ + public static boolean isZeroVector(float[] v) { + for (float value : v) { + if (value != 0) { + return false; + } + } + return true; + } + + /** Returns true if all dimensions of provided vector are zero, false otherwise. */ + public static boolean isZeroVector(byte[] v) { + for (float value : v) { + if (value != 0) { + return false; + } + } + return true; + } + /** * Given an array {@code buffer} that is sorted between indexes {@code 0} inclusive and {@code to} * exclusive, find the first array index whose value is greater than or equal to {@code target}. diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java index 60548dd4e356..4387f6a1c02c 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java @@ -55,6 +55,7 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.index.RandomCodec; import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.TestVectorUtil; import org.hamcrest.MatcherAssert; /** Basic tests of PerFieldDocValuesFormat */ @@ -232,14 +233,14 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { }); try (IndexWriter writer = new IndexWriter(directory, iwc)) { Document doc1 = new Document(); - doc1.add(new KnnFloatVectorField("field1", new float[33])); + doc1.add(new KnnFloatVectorField("field1", TestVectorUtil.randomVector(33))); Exception exc = expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc1)); assertTrue(exc.getMessage().contains("vector's dimensions must be <= [32]")); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("field1", new float[32])); - doc2.add(new KnnFloatVectorField("field2", new float[33])); + doc2.add(new KnnFloatVectorField("field1", TestVectorUtil.randomVector(32))); + doc2.add(new KnnFloatVectorField("field2", TestVectorUtil.randomVector(33))); writer.addDocument(doc2); } diff --git a/lucene/core/src/test/org/apache/lucene/document/TestField.java b/lucene/core/src/test/org/apache/lucene/document/TestField.java index 5c1b8f17294f..1a6ea7aca9e8 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestField.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestField.java @@ -698,17 +698,45 @@ public void testKnnVectorField() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - byte[] b = new byte[5]; + byte[] empty = new byte[5]; + IllegalArgumentException zeroError = + expectThrows( + IllegalArgumentException.class, () -> new KnnByteVectorField("binaryZeroErr", empty)); + assertTrue(zeroError.getMessage().contains("zero vector not allowed")); + + byte[] b = new byte[] {1, 1, 1, 1, 1}; KnnByteVectorField field = new KnnByteVectorField("binary", b, VectorSimilarityFunction.EUCLIDEAN); assertNull(field.binaryValue()); assertArrayEquals(b, field.vectorValue()); + expectThrows( IllegalArgumentException.class, () -> new KnnFloatVectorField("bogus", new float[] {1}, (FieldType) field.fieldType())); + zeroError = + expectThrows( + IllegalArgumentException.class, + () -> + new KnnByteVectorField( + "float", new byte[] {0, 0, 0, 0, 0}, (FieldType) field.fieldType())); + assertTrue(zeroError.getMessage().contains("zero vector not allowed")); + zeroError = + expectThrows( + IllegalArgumentException.class, + () -> new KnnFloatVectorField("zerovec", new float[] {0, 0, 0, 0})); + assertTrue(zeroError.getMessage().contains("zero vector not allowed")); + float[] vector = new float[] {1, 2}; Field field2 = new KnnFloatVectorField("float", vector); assertNull(field2.binaryValue()); + zeroError = + expectThrows( + IllegalArgumentException.class, + () -> + new KnnFloatVectorField( + "float", new float[] {0, 0}, (FieldType) field2.fieldType())); + assertTrue(zeroError.getMessage().contains("zero vector not allowed")); + doc.add(field); doc.add(field2); w.addDocument(doc); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java index 4bee0c635184..8ea382685691 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java @@ -41,6 +41,7 @@ import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.TestVectorUtil; /** Test that the default codec detects mismatched checksums at open or checkIntegrity time. */ @SuppressFileSystems("ExtrasFS") @@ -69,7 +70,7 @@ public void test() throws Exception { doc.add(pointNumber); Field dvNumber = new NumericDocValuesField("long", 0L); doc.add(dvNumber); - KnnFloatVectorField vector = new KnnFloatVectorField("vector", new float[16]); + KnnFloatVectorField vector = new KnnFloatVectorField("vector", TestVectorUtil.randomVector(16)); doc.add(vector); for (int i = 0; i < 100; i++) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java index f8c9c42f6387..0674fb725b6e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java @@ -42,6 +42,7 @@ import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.TestVectorUtil; /** Test that a plain default detects index file truncation early (on opening a reader). */ @SuppressFileSystems("ExtrasFS") @@ -82,7 +83,7 @@ private void doTest(boolean cfs) throws Exception { doc.add(pointNumber); Field dvNumber = new NumericDocValuesField("long", 0L); doc.add(dvNumber); - KnnFloatVectorField vector = new KnnFloatVectorField("vector", new float[16]); + KnnFloatVectorField vector = new KnnFloatVectorField("vector", TestVectorUtil.randomVector(16)); doc.add(vector); for (int i = 0; i < 100; i++) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java index 0d52481b908e..73db7486b63f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java @@ -49,6 +49,7 @@ import org.apache.lucene.util.SameThreadExecutorService; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.SuppressForbidden; +import org.apache.lucene.util.TestVectorUtil; import org.apache.lucene.util.Version; public class TestConcurrentMergeScheduler extends LuceneTestCase { @@ -109,7 +110,7 @@ public Executor getIntraMergeExecutor(MergePolicy.OneMerge merge) { IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); - KnnFloatVectorField knnField = new KnnFloatVectorField("knn", new float[] {0.0f, 0.0f}); + KnnFloatVectorField knnField = new KnnFloatVectorField("knn", TestVectorUtil.randomVector(2)); doc.add(idField); // Add knn float vectors to test parallel merge doc.add(knnField); @@ -244,7 +245,7 @@ public void testNoWaitClose() throws IOException { Directory directory = newDirectory(); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); - KnnFloatVectorField knnField = new KnnFloatVectorField("knn", new float[] {0.0f, 0.0f}); + KnnFloatVectorField knnField = new KnnFloatVectorField("knn", TestVectorUtil.randomVector(2)); doc.add(idField); doc.add(knnField); IndexWriterConfig iwc = diff --git a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java index d8b196e04df2..c360c2c21f61 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java @@ -270,7 +270,8 @@ private void indexData(IndexWriter iw) throws IOException { for (int i = 0; i < values.length; i++) { // System.out.printf("%d: (%d, %d)\n", i, index % n, index / n); int x = index % n, y = index / n; - values[i] = new float[] {x, y}; + // avoid zero vectors + values[i] = new float[] {x + 1e-5f, y + 1e-5f}; index = (index + stepSize) % (n * n); add(iw, i, values[i]); if (i == 13) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java index c03afd92be69..5635de8ba65b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java @@ -176,7 +176,7 @@ public void testSortOnAddIndicesRandom() throws IOException { doc.add( new SortedSetDocValuesField("sorted_set_dv", new BytesRef(Integer.toString(docId)))); if (dense || docId % 2 == 0) { - doc.add(new KnnFloatVectorField("vector", new float[] {(float) docId})); + doc.add(new KnnFloatVectorField("vector", new float[] {(float) docId + 1e-6f})); } doc.add(new NumericDocValuesField("foo", random().nextInt(20))); diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java index f97eb6ef368d..0d830c091379 100644 --- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java +++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java @@ -177,10 +177,10 @@ public void testEmptyIndex() throws IOException { */ public void testFindAll() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); - AbstractKnnVectorQuery kvq = getKnnVectorQuery("field", new float[] {0, 0}, 10); + AbstractKnnVectorQuery kvq = getKnnVectorQuery("field", new float[] {1, 1}, 10); assertMatches(searcher, kvq, 3); ScoreDoc[] scoreDocs = searcher.search(kvq, 3).scoreDocs; assertIdMatches(reader, "id2", scoreDocs[0]); @@ -191,10 +191,10 @@ public void testFindAll() throws IOException { public void testFindFewer() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); - AbstractKnnVectorQuery kvq = getKnnVectorQuery("field", new float[] {0, 0}, 2); + AbstractKnnVectorQuery kvq = getKnnVectorQuery("field", new float[] {1, 1}, 2); assertMatches(searcher, kvq, 2); ScoreDoc[] scoreDocs = searcher.search(kvq, 3).scoreDocs; assertEquals(scoreDocs.length, 2); @@ -204,11 +204,11 @@ public void testFindFewer() throws IOException { public void testSearchBoost() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); - Query vectorQuery = getKnnVectorQuery("field", new float[] {0, 0}, 10); + Query vectorQuery = getKnnVectorQuery("field", new float[] {1, 1}, 10); ScoreDoc[] scoreDocs = searcher.search(vectorQuery, 3).scoreDocs; Query boostQuery = new BoostQuery(vectorQuery, 3.0f); @@ -228,11 +228,11 @@ public void testSearchBoost() throws IOException { /** Tests that a AbstractKnnVectorQuery applies the filter query */ public void testSimpleFilter() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); Query filter = new TermQuery(new Term("id", "id2")); - Query kvq = getKnnVectorQuery("field", new float[] {0, 0}, 10, filter); + Query kvq = getKnnVectorQuery("field", new float[] {1, 1}, 10, filter); TopDocs topDocs = searcher.search(kvq, 3); assertEquals(1, topDocs.totalHits.value()); assertIdMatches(reader, "id2", topDocs.scoreDocs[0]); @@ -241,12 +241,12 @@ public void testSimpleFilter() throws IOException { public void testFilterWithNoVectorMatches() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); Query filter = new TermQuery(new Term("other", "value")); - Query kvq = getKnnVectorQuery("field", new float[] {0, 0}, 10, filter); + Query kvq = getKnnVectorQuery("field", new float[] {1, 1}, 10, filter); TopDocs topDocs = searcher.search(kvq, 3); assertEquals(0, topDocs.totalHits.value()); } @@ -254,13 +254,13 @@ public void testFilterWithNoVectorMatches() throws IOException { public void testMatchAllFilter() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); // make sure we don't drop to exact search, even though the filter matches fewer than k docs Query kvq = - getThrowingKnnVectorQuery("field", new float[] {0, 0}, 10, MatchAllDocsQuery.INSTANCE); + getThrowingKnnVectorQuery("field", new float[] {1, 1}, 10, MatchAllDocsQuery.INSTANCE); TopDocs topDocs = searcher.search(kvq, 3); assertEquals(3, topDocs.totalHits.value()); } @@ -269,7 +269,7 @@ public void testMatchAllFilter() throws IOException { /** testDimensionMismatch */ public void testDimensionMismatch() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); AbstractKnnVectorQuery kvq = getKnnVectorQuery("field", new float[] {0}, 1); @@ -282,7 +282,7 @@ public void testDimensionMismatch() throws IOException { /** testNonVectorField */ public void testNonVectorField() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); assertMatches(searcher, getKnnVectorQuery("xyzzy", new float[] {0}, 10), 0); @@ -297,7 +297,7 @@ public void testIllegalArguments() throws IOException { public void testDifferentReader() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {2, 3}, 3); Query dasq = query.rewrite(newSearcher(reader)); @@ -311,7 +311,7 @@ public void testDifferentReader() throws IOException { public void testScoreEuclidean() throws IOException { float[][] vectors = new float[5][]; for (int j = 0; j < 5; j++) { - vectors[j] = new float[] {j, j}; + vectors[j] = new float[] {j + 1, j + 1}; } try (Directory d = getStableIndexStore("field", vectors); IndexReader reader = DirectoryReader.open(d)) { @@ -332,17 +332,17 @@ public void testScoreEuclidean() throws IOException { DocIdSetIterator it = scorer.iterator(); assertEquals(3, it.cost()); int firstDoc = it.nextDoc(); - if (firstDoc == 1) { + if (firstDoc == 0) { assertEquals(1 / 6f, scorer.score(), 0); - assertEquals(3, it.advance(3)); + assertEquals(2, it.advance(2)); assertEquals(1 / 2f, scorer.score(), 0); - assertEquals(NO_MORE_DOCS, it.advance(4)); + assertEquals(NO_MORE_DOCS, it.advance(3)); } else { - assertEquals(2, firstDoc); + assertEquals(1, firstDoc); assertEquals(1 / 2f, scorer.score(), 0); - assertEquals(4, it.advance(4)); + assertEquals(3, it.advance(3)); assertEquals(1 / 6f, scorer.score(), 0); - assertEquals(NO_MORE_DOCS, it.advance(5)); + assertEquals(NO_MORE_DOCS, it.advance(4)); } expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score); } @@ -397,7 +397,7 @@ public void testScoreCosine() throws IOException { } public void testScoreMIP() throws IOException { - float[][] vectors = {{0, 1}, {1, 2}, {0, 0}}; + float[][] vectors = {{0, 1}, {1, 2}, {1, 1}}; try (Directory d = getStableIndexStore("field", VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT, vectors); IndexReader reader = DirectoryReader.open(d)) { @@ -405,11 +405,11 @@ public void testScoreMIP() throws IOException { AbstractKnnVectorQuery kvq = getKnnVectorQuery("field", new float[] {0, -1}, 10); assertMatches(searcher, kvq, 3); ScoreDoc[] scoreDocs = searcher.search(kvq, 3).scoreDocs; - assertIdMatches(reader, "id2", scoreDocs[0]); - assertIdMatches(reader, "id0", scoreDocs[1]); + assertIdMatches(reader, "id0", scoreDocs[0]); + assertIdMatches(reader, "id2", scoreDocs[1]); assertIdMatches(reader, "id1", scoreDocs[2]); - assertEquals(1.0, scoreDocs[0].score, 1e-7); + assertEquals(1 / 2f, scoreDocs[0].score, 1e-7); assertEquals(1 / 2f, scoreDocs[1].score, 1e-7); assertEquals(1 / 3f, scoreDocs[2].score, 1e-7); } @@ -418,7 +418,7 @@ public void testScoreMIP() throws IOException { public void testExplain() throws IOException { try (Directory d = newDirectoryForTest()) { try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) { - for (int j = 0; j < 5; j++) { + for (int j = 1; j <= 5; j++) { Document doc = new Document(); doc.add(getKnnVectorField("field", new float[] {j, j})); w.addDocument(doc); @@ -446,7 +446,7 @@ public void testExplain() throws IOException { public void testExplainMultipleSegments() throws IOException { try (Directory d = newDirectoryForTest()) { try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) { - for (int j = 0; j < 5; j++) { + for (int j = 1; j <= 5; j++) { Document doc = new Document(); doc.add(getKnnVectorField("field", new float[] {j, j})); w.addDocument(doc); @@ -480,7 +480,7 @@ public void testSkewedIndex() throws IOException { */ try (Directory d = newDirectoryForTest()) { try (IndexWriter w = new IndexWriter(d, configStandardCodec())) { - int r = 0; + int r = 1; for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { Document doc = new Document(); @@ -494,10 +494,10 @@ public void testSkewedIndex() throws IOException { } try (IndexReader reader = DirectoryReader.open(d)) { IndexSearcher searcher = newSearcher(reader); - TopDocs results = searcher.search(getKnnVectorQuery("field", new float[] {0, 0}, 8), 10); + TopDocs results = searcher.search(getKnnVectorQuery("field", new float[] {1, 1}, 8), 10); assertEquals(8, results.scoreDocs.length); - assertIdMatches(reader, "id0", results.scoreDocs[0]); - assertIdMatches(reader, "id7", results.scoreDocs[7]); + assertIdMatches(reader, "id1", results.scoreDocs[0]); + assertIdMatches(reader, "id8", results.scoreDocs[7]); // test some results in the middle of the sequence - also tests docid tiebreaking results = searcher.search(getKnnVectorQuery("field", new float[] {10, 10}, 8), 10); @@ -916,7 +916,7 @@ public void testBitSetQuery() throws IOException { /** Test functionality of {@link TimeLimitingKnnCollectorManager}. */ public void testTimeLimitingKnnCollectorManager() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); @@ -962,7 +962,7 @@ public void testTimeLimitingKnnCollectorManager() throws IOException { /** Test that the query times out correctly. */ public void testTimeout() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnByteVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnByteVectorQuery.java index a74579b4b6ec..79c54214b46d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestKnnByteVectorQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnByteVectorQuery.java @@ -81,7 +81,7 @@ static byte[] floatToBytes(float[] query) { public void testToString() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {0, 1}, 10); assertEquals("KnnByteVectorQuery:field[0,...][10]", query.toString("ignored")); @@ -104,7 +104,7 @@ public void testGetTarget() { public void testVectorEncodingMismatch() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { Query filter = null; if (random().nextBoolean()) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnFloatVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnFloatVectorQuery.java index 45fbc32bd24a..2da999d22886 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestKnnFloatVectorQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnFloatVectorQuery.java @@ -81,7 +81,7 @@ Field getKnnVectorField(String name, float[] vector) { public void testToString() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {0.0f, 1.0f}, 10); assertEquals("KnnFloatVectorQuery:field[0.0,...][10]", query.toString("ignored")); @@ -97,7 +97,7 @@ public void testToString() throws IOException { public void testVectorEncodingMismatch() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { Query filter = null; if (random().nextBoolean()) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPatienceByteVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPatienceByteVectorQuery.java index 810932e12126..b35234502133 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPatienceByteVectorQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPatienceByteVectorQuery.java @@ -90,7 +90,7 @@ Field getKnnVectorField(String name, float[] vector) { public void testToString() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {0.0f, 1.0f}, 10); assertEquals( diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPatienceFloatVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPatienceFloatVectorQuery.java index d0e71662f9d2..e709f66a3a40 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPatienceFloatVectorQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPatienceFloatVectorQuery.java @@ -80,7 +80,7 @@ Field getKnnVectorField(String name, float[] vector) { public void testToString() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {0.0f, 1.0f}, 10); assertEquals( diff --git a/lucene/core/src/test/org/apache/lucene/search/TestVectorScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestVectorScorer.java index 5432879a1341..1cccef64da54 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestVectorScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestVectorScorer.java @@ -39,7 +39,7 @@ public void testFindAll() throws IOException { VectorEncoding encoding = RandomPicks.randomFrom(random(), VectorEncoding.values()); try (Directory indexStore = getIndexStore( - "field", encoding, new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + "field", encoding, new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { assert reader.leaves().size() == 1; LeafReaderContext context = reader.leaves().get(0); diff --git a/lucene/join/src/test/org/apache/lucene/search/join/ParentBlockJoinKnnVectorQueryTestCase.java b/lucene/join/src/test/org/apache/lucene/search/join/ParentBlockJoinKnnVectorQueryTestCase.java index f69c64d80709..0468ca648876 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/ParentBlockJoinKnnVectorQueryTestCase.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/ParentBlockJoinKnnVectorQueryTestCase.java @@ -176,7 +176,7 @@ d, newIndexWriterConfig().setMergePolicy(newMergePolicy(random(), false)))) { public void testFilterWithNoVectorMatches() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { IndexSearcher searcher = newSearcher(reader); Query filter = new TermQuery(new Term("other", "value")); @@ -251,7 +251,7 @@ public void testSkewedIndex() throws IOException { try (Directory d = newDirectory()) { try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) { - int r = 0; + int r = 1; for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { List toAdd = new ArrayList<>(); @@ -271,11 +271,11 @@ public void testSkewedIndex() throws IOException { TopDocs results = searcher.search( getParentJoinKnnQuery( - "field", new float[] {0, 0}, null, 8, parentFilter(searcher.getIndexReader())), + "field", new float[] {1, 1}, null, 8, parentFilter(searcher.getIndexReader())), 10); assertEquals(8, results.scoreDocs.length); - assertIdMatches(reader, "0", results.scoreDocs[0].doc); - assertIdMatches(reader, "7", results.scoreDocs[7].doc); + assertIdMatches(reader, "1", results.scoreDocs[0].doc); + assertIdMatches(reader, "8", results.scoreDocs[7].doc); // test some results in the middle of the sequence - also tests docid tiebreaking results = @@ -297,7 +297,7 @@ public void testSkewedIndex() throws IOException { /** Test that the query times out correctly. */ public void testTimeout() throws IOException { try (Directory indexStore = - getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0}); + getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {1, 1}); IndexReader reader = DirectoryReader.open(indexStore)) { BitSetProducer parentFilter = parentFilter(reader); IndexSearcher searcher = newSearcher(reader); diff --git a/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java index 302ce0d751cc..c68dabd5b1ff 100644 --- a/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java +++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java @@ -257,7 +257,7 @@ public void testReorderDoesntHaveEnoughRAM() throws IOException { doc.add(idField); StringField bodyField = new StringField("body", "", Store.YES); doc.add(bodyField); - KnnFloatVectorField vectorField = new KnnFloatVectorField("vector", new float[] {0}); + KnnFloatVectorField vectorField = new KnnFloatVectorField("vector", new float[] {1}); doc.add(vectorField); for (int i = 0; i < 10; ++i) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java index 8d338bc9d494..02a8ea3adaf4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java @@ -161,7 +161,7 @@ private int getVectorsMaxDimensions(String fieldName) { } public void testFieldConstructor() { - float[] v = new float[1]; + float[] v = new float[] {1f}; KnnFloatVectorField field = new KnnFloatVectorField("f", v); assertEquals(1, field.fieldType().vectorDimension()); assertEquals(VectorSimilarityFunction.EUCLIDEAN, field.fieldType().vectorSimilarityFunction()); @@ -169,20 +169,21 @@ public void testFieldConstructor() { } public void testFieldConstructorExceptions() { - expectThrows(IllegalArgumentException.class, () -> new KnnFloatVectorField(null, new float[1])); + expectThrows( + IllegalArgumentException.class, () -> new KnnFloatVectorField(null, new float[] {1f})); expectThrows(IllegalArgumentException.class, () -> new KnnFloatVectorField("f", null)); expectThrows( IllegalArgumentException.class, - () -> new KnnFloatVectorField("f", new float[1], (VectorSimilarityFunction) null)); + () -> new KnnFloatVectorField("f", new float[] {1f}, (VectorSimilarityFunction) null)); expectThrows(IllegalArgumentException.class, () -> new KnnFloatVectorField("f", new float[0])); } public void testFieldSetValue() { - KnnFloatVectorField field = new KnnFloatVectorField("f", new float[1]); - float[] v1 = new float[1]; + KnnFloatVectorField field = new KnnFloatVectorField("f", new float[] {1f}); + float[] v1 = new float[] {1f}; field.setVectorValue(v1); assertSame(v1, field.vectorValue()); - expectThrows(IllegalArgumentException.class, () -> field.setVectorValue(new float[2])); + expectThrows(IllegalArgumentException.class, () -> field.setVectorValue(new float[] {0f, 1f})); expectThrows(IllegalArgumentException.class, () -> field.setVectorValue(null)); } @@ -192,11 +193,15 @@ public void testIllegalDimChangeTwoDocs() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[6], VectorSimilarityFunction.DOT_PRODUCT)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f, 4f, 5f}, VectorSimilarityFunction.DOT_PRODUCT)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2)); String errMsg = @@ -209,12 +214,16 @@ public void testIllegalDimChangeTwoDocs() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); w.commit(); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[6], VectorSimilarityFunction.DOT_PRODUCT)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f, 4f, 5f}, VectorSimilarityFunction.DOT_PRODUCT)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2)); String errMsg = @@ -229,11 +238,15 @@ public void testIllegalSimilarityFunctionChange() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2)); String errMsg = @@ -246,12 +259,16 @@ public void testIllegalSimilarityFunctionChange() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); w.commit(); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc2)); String errMsg = @@ -265,13 +282,17 @@ public void testIllegalDimChangeTwoWriters() throws Exception { try (Directory dir = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) { Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[2], VectorSimilarityFunction.DOT_PRODUCT)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f}, VectorSimilarityFunction.DOT_PRODUCT)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2)); assertEquals( @@ -307,7 +328,7 @@ public KnnVectorsFormat knnVectorsFormat() { writer.addDocument(doc); } writer.commit(); - for (int i = 0; i < 10; i++) { + for (int i = 1; i <= 10; i++) { var doc = new Document(); doc.add(new KnnFloatVectorField("otherVector", new float[] {i, i, i, i})); writer.addDocument(doc); @@ -338,14 +359,14 @@ public KnnVectorsFormat knnVectorsFormat() { iwc.setMergeScheduler(mergeScheduler); iwc.setMergePolicy(new ForceMergePolicy(iwc.getMergePolicy())); try (var writer = new IndexWriter(dir, iwc)) { - for (int i = 0; i < 10; i++) { + for (int i = 1; i <= 10; i++) { var doc = new Document(); doc.add( new KnnByteVectorField("field", new byte[] {(byte) i, (byte) i, (byte) i, (byte) i})); writer.addDocument(doc); } writer.commit(); - for (int i = 0; i < 10; i++) { + for (int i = 1; i <= 10; i++) { var doc = new Document(); doc.add( new KnnByteVectorField( @@ -448,13 +469,17 @@ public void testIllegalSimilarityFunctionChangeTwoWriters() throws Exception { try (Directory dir = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig())) { Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w2.addDocument(doc2)); assertEquals( @@ -468,7 +493,9 @@ public void testIllegalSimilarityFunctionChangeTwoWriters() throws Exception { public void testAddIndexesDirectory0() throws Exception { String fieldName = "field"; Document doc = new Document(); - doc.add(new KnnFloatVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + fieldName, new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); try (Directory dir = newDirectory(); Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { @@ -499,7 +526,8 @@ public void testAddIndexesDirectory1() throws Exception { w.addDocument(doc); } doc.add( - new KnnFloatVectorField(fieldName, new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + new KnnFloatVectorField( + fieldName, new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { w2.addDocument(doc); w2.addIndexes(dir); @@ -518,7 +546,7 @@ public void testAddIndexesDirectory1() throws Exception { public void testAddIndexesDirectory01() throws Exception { String fieldName = "field"; - float[] vector = new float[2]; + float[] vector = new float[] {0f, 1f}; Document doc = new Document(); doc.add(new KnnFloatVectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT)); try (Directory dir = newDirectory(); @@ -553,12 +581,16 @@ public void testIllegalDimChangeViaAddIndexesDirectory() throws Exception { Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[6], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f, 4f, 5f}, VectorSimilarityFunction.DOT_PRODUCT)); w2.addDocument(doc); IllegalArgumentException expected = expectThrows( @@ -576,12 +608,16 @@ public void testIllegalSimilarityFunctionChangeViaAddIndexesDirectory() throws E Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); w2.addDocument(doc); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir)); @@ -598,12 +634,16 @@ public void testIllegalDimChangeViaAddIndexesCodecReader() throws Exception { Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[6], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f, 4f, 5f}, VectorSimilarityFunction.DOT_PRODUCT)); w2.addDocument(doc); try (DirectoryReader r = DirectoryReader.open(dir)) { IllegalArgumentException expected = @@ -624,12 +664,16 @@ public void testIllegalSimilarityFunctionChangeViaAddIndexesCodecReader() throws Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); w2.addDocument(doc); try (DirectoryReader r = DirectoryReader.open(dir)) { IllegalArgumentException expected = @@ -650,12 +694,16 @@ public void testIllegalDimChangeViaAddIndexesSlowCodecReader() throws Exception Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[6], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f, 4f, 5f}, VectorSimilarityFunction.DOT_PRODUCT)); w2.addDocument(doc); try (DirectoryReader r = DirectoryReader.open(dir)) { IllegalArgumentException expected = @@ -674,12 +722,16 @@ public void testIllegalSimilarityFunctionChangeViaAddIndexesSlowCodecReader() th Directory dir2 = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.EUCLIDEAN)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); w2.addDocument(doc); try (DirectoryReader r = DirectoryReader.open(dir)) { IllegalArgumentException expected = @@ -697,8 +749,12 @@ public void testIllegalMultipleValues() throws Exception { try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc)); assertEquals( @@ -714,7 +770,7 @@ public void testIllegalDimensionTooLarge() throws Exception { doc.add( new KnnFloatVectorField( "f", - new float[getVectorsMaxDimensions("f") + 1], + randomFloatVector(getVectorsMaxDimensions("f") + 1), VectorSimilarityFunction.DOT_PRODUCT)); Exception exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc)); assertTrue( @@ -722,14 +778,15 @@ public void testIllegalDimensionTooLarge() throws Exception { .contains("vector's dimensions must be <= [" + getVectorsMaxDimensions("f") + "]")); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[2], VectorSimilarityFunction.DOT_PRODUCT)); + doc2.add( + new KnnFloatVectorField("f", new float[] {0f, 1f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc2); Document doc3 = new Document(); doc3.add( new KnnFloatVectorField( "f", - new float[getVectorsMaxDimensions("f") + 1], + randomFloatVector(getVectorsMaxDimensions("f") + 1), VectorSimilarityFunction.DOT_PRODUCT)); exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc3)); assertTrue( @@ -744,7 +801,7 @@ public void testIllegalDimensionTooLarge() throws Exception { doc4.add( new KnnFloatVectorField( "f", - new float[getVectorsMaxDimensions("f") + 1], + randomFloatVector(getVectorsMaxDimensions("f") + 1), VectorSimilarityFunction.DOT_PRODUCT)); exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc4)); assertTrue( @@ -767,7 +824,9 @@ public void testIllegalEmptyVector() throws Exception { assertEquals("cannot index an empty vector", e.getMessage()); Document doc2 = new Document(); - doc2.add(new KnnFloatVectorField("f", new float[2], VectorSimilarityFunction.EUCLIDEAN)); + doc2.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); w.addDocument(doc2); } } @@ -777,14 +836,18 @@ public void testDifferentCodecs1() throws Exception { try (Directory dir = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setCodec(Codec.forName("SimpleText")); try (IndexWriter w = new IndexWriter(dir, iwc)) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); w.forceMerge(1); } @@ -798,12 +861,16 @@ public void testDifferentCodecs2() throws Exception { try (Directory dir = newDirectory()) { try (IndexWriter w = new IndexWriter(dir, iwc)) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); } try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { Document doc = new Document(); - doc.add(new KnnFloatVectorField("f", new float[4], VectorSimilarityFunction.DOT_PRODUCT)); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.DOT_PRODUCT)); w.addDocument(doc); w.forceMerge(1); } @@ -812,11 +879,11 @@ public void testDifferentCodecs2() throws Exception { public void testInvalidKnnVectorFieldUsage() { KnnFloatVectorField field = - new KnnFloatVectorField("field", new float[2], VectorSimilarityFunction.EUCLIDEAN); + new KnnFloatVectorField("field", new float[] {0f, 1f}, VectorSimilarityFunction.EUCLIDEAN); expectThrows(IllegalArgumentException.class, () -> field.setIntValue(14)); - expectThrows(IllegalArgumentException.class, () -> field.setVectorValue(new float[1])); + expectThrows(IllegalArgumentException.class, () -> field.setVectorValue(new float[] {1f})); assertNull(field.numericValue()); } @@ -1154,7 +1221,7 @@ public void testIndexedValueNotAliased() throws Exception { // We copy indexed values (as for BinaryDocValues) so the input float[] can be reused across // calls to IndexWriter.addDocument. String fieldName = "field"; - float[] v = {0, 0}; + float[] v = {0, 1}; try (Directory dir = newDirectory(); IndexWriter iw = new IndexWriter( @@ -1789,7 +1856,7 @@ public void testAdvance() throws Exception { if (random().nextInt(4) == 3) { doc.add( new KnnFloatVectorField( - fieldName, new float[4], VectorSimilarityFunction.EUCLIDEAN)); + fieldName, new float[] {0f, 1f, 2f, 3f}, VectorSimilarityFunction.EUCLIDEAN)); } w.addDocument(doc); } @@ -2349,4 +2416,12 @@ protected TopDocs approximateSearch( } }; } + + float[] randomFloatVector(int dims) { + float[] fa = new float[dims]; + for (int i = 0; i < dims; ++i) { + fa[i] = random().nextFloat(); + } + return fa; + } }