Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2299b7b
changes to test test_mixed_predicates
daddaman-amz Nov 18, 2025
9aa50c2
Add prefilter evalaution for text
daddaman-amz Nov 24, 2025
b5fec4d
Add unit test
daddaman-amz Nov 25, 2025
12bde54
Merge remote-tracking branch 'upstream/fulltext' into daddaman-prefil…
daddaman-amz Nov 26, 2025
a72b680
separate function and fixtest
daddaman-amz Nov 26, 2025
ef636ef
Add prefilter config in integ test
daddaman-amz Nov 26, 2025
6be1db2
Merge remote-tracking branch 'upstream/fulltext' into daddaman-prefil…
daddaman-amz Dec 1, 2025
488f2cc
changes to sizeestimator and rebase changes
daddaman-amz Dec 1, 2025
41adb94
Add one check iterators validity
daddaman-amz Dec 2, 2025
4887e63
format changes
daddaman-amz Dec 2, 2025
529e2d8
comment cleanup and test parmeterised
daddaman-amz Dec 2, 2025
5d6d8ea
DCO Remediation Commit for Manish Addanki <[email protected]>
daddaman-amz Dec 2, 2025
9889dfb
Merge remote-tracking branch 'upstream/fulltext' into daddaman-prefil…
daddaman-amz Dec 3, 2025
3aa09db
Address RC1
daddaman-amz Dec 5, 2025
6f53422
Make prefilter config non-text specific
daddaman-amz Dec 5, 2025
c044bc6
Merge remote-tracking branch 'upstream/fulltext' into daddaman-prefil…
daddaman-amz Dec 8, 2025
ada906c
Formatting after rebase
daddaman-amz Dec 8, 2025
661e3b1
Add prefilter check in composed and/or
daddaman-amz Dec 9, 2025
31cd787
correct the filter tests to only evalaute parsing for punctuation
daddaman-amz Dec 9, 2025
5dd05aa
Add new config to disable proximity in prefilter (easy to check perf)
daddaman-amz Dec 10, 2025
f358b1a
Merge remote-tracking branch 'upstream/fulltext' into daddaman-prefil…
daddaman-amz Dec 10, 2025
18e9ae1
fix format
daddaman-amz Dec 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 39 additions & 21 deletions integration/test_fulltext.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,18 @@ def validate_fulltext_search(client: Valkey):

class TestFullText(ValkeySearchTestCaseDebugMode):

def test_text_search(self):
@pytest.mark.parametrize("prefilter_eval_enabled", [pytest.param(False, id="Prefilter_eval=False"), pytest.param(True, id="Prefilter_eval=True")])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could just store "yes" / "no" as param values and then we can execute config set commands without any IF statement. It will keep the tests less verbose

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack will do this in followup as discussed

def test_text_search(self, prefilter_eval_enabled):
"""
Test FT.SEARCH command with a text index.
Tests with both prefilter disabled and enabled.
"""
client: Valkey = self.server.get_new_client()
# Override config
if not prefilter_eval_enabled:
client.execute_command(
"CONFIG", "SET", "search.enable-prefilter-eval", "no"
)
# Create the text index on Hash documents
assert client.execute_command(text_index_on_hash) == b"OK"
# Data population:
Expand Down Expand Up @@ -761,64 +768,64 @@ def test_suffix_search(self):
assert result[0] == 1 # Only doc:1 is matched for "running"
assert result[1] == b'doc:1'

def test_mixed_predicates(self):
@pytest.mark.parametrize("prefilter_eval_enabled", [pytest.param(False, id="Prefilter_eval=False"), pytest.param(True, id="Prefilter_eval=True")])
def test_mixed_predicates(self, prefilter_eval_enabled):
"""
Test queries with mixed text, numeric, and tag predicates.
Tests there is no regression with predicate evaluator changes for text.
"""
client: Valkey = self.server.get_new_client()

if not prefilter_eval_enabled:
client.execute_command(
"CONFIG", "SET", "search.enable-prefilter-eval", "no"
)
# Index with text, numeric, and tag fields
client.execute_command("FT.CREATE", "idx", "ON", "HASH", "SCHEMA", "content", "TEXT", "NOSTEM","title", "TEXT", "NOSTEM", "Addr", "TEXT", "NOSTEM",
"salary", "NUMERIC",
"skills", "TAG", "SEPARATOR", "|")

# Test documents
client.execute_command("HSET", "doc:1", "content", "software engineer developer", "title", "Title:1", "Addr", "12 Apt ABC", "salary", "100000", "skills", "python|java")
client.execute_command("HSET", "doc:2", "content", "software development manager", "title", "Title:2", "Addr", "12 Apt EFG", "salary", "120000", "skills", "python|ml|leadership")
client.execute_command("HSET", "doc:3", "content", "product manager", "title", "Title:2", "Addr", "12 Apt EFG", "salary", "90000", "skills", "strategy|leadership")

# Wait for index backfill to complete
IndexingTestHelper.wait_for_backfill_complete_on_node(client, "idx")

# Test 1: Text + Numeric (AND)
result = client.execute_command("FT.SEARCH", "idx", '@content:"manager" @salary:[90000 110000]')
assert result[0] == 1 # Should find doc:1

assert (result[0], result[1]) == (1, b"doc:3")
result = client.execute_command("FT.SEARCH", "idx", '@content:"manager" @salary:[90000 130000]')
assert result[0] == 2 # Should find doc:2, doc:3
assert (result[0], set(result[1::2])) == (2, {b"doc:2", b"doc:3"})

# Test 1.1: Text prefix + Numeric (AND)
result = client.execute_command("FT.SEARCH", "idx", '@content:develop* @salary:[90000 110000]')
assert result[0] == 1 # Should find doc:1
assert (result[0], result[1]) == (1, b"doc:1")

# Test 2: Text + Tag (OR)
result = client.execute_command("FT.SEARCH", "idx", '@content:"product" | @skills:{java}')
assert result[0] == 2 # Should find doc:1 (java) and doc:2 (scientist)
assert (result[0], set(result[1::2])) == (2, {b"doc:1", b"doc:3"})

# Test 3: All three types (complex OR)
result = client.execute_command("FT.SEARCH", "idx", '@content:"manager" | @salary:[115000 125000] | @skills:{python}')
assert result[0] == 3 # Should find all docs
assert (result[0], set(result[1::2])) == (3, {b"doc:1", b"doc:2", b"doc:3"})

# Test 4: All three types (complex AND)
result = client.execute_command("FT.SEARCH", "idx", '@content:"engineer" @salary:[90000 110000] @skills:{python}')
assert result[0] == 1 # Should find doc:1 only
assert (result[0], result[1]) == (1, b"doc:1")

# Test 5: Exact phrase with numeric filter (nested case)
result = client.execute_command("FT.SEARCH", "idx", '@content:"software engineer" @salary:[90000 110000]')
assert result[0] == 1 # Should find doc:1 (exact phrase + salary match)
assert (result[0], result[1]) == (1, b"doc:1")

# Test 6: Exact phrase with tag filter
result = client.execute_command("FT.SEARCH", "idx", '@content:"software engineer" @skills:{python}')
assert result[0] == 1 # Should find doc:1 (exact phrase + tag match)

assert (result[0], result[1]) == (1, b"doc:1")
# Test 7: Proximity with numeric - tests iterator propagation
result = client.execute_command("FT.SEARCH", "idx", '(@content:software @salary:[90000 110000]) @content:engineer', "SLOP", "1", "INORDER")
assert result[0] == 1 # Should find doc:1 (proximity + numeric filter)
assert (result[0], result[1]) == (1, b"doc:1")

# # Test 8: Negation with mixed types
# Test 8: Negation with mixed types
# result = client.execute_command("FT.SEARCH", "idx", '-@content:"manager" @skills:{python}')
# assert result[0] == 1 # Should find doc:1 only
# assert (result[0], result[1]) == (1, b"doc:1")

def test_nooffsets_option(self):
"""
Expand Down Expand Up @@ -867,8 +874,14 @@ def test_nooffsets_option(self):
client.execute_command("FT.SEARCH", "idx", 'term1 term2 term3 term4 term5 term6 term7 term8 term9 term10', "SLOP", "2", "INORDER")
assert "Index does not support offsets" in str(err.value)

def test_proximity_predicate(self):
@pytest.mark.parametrize("prefilter_eval_enabled", [pytest.param(False, id="Prefilter_eval=False"), pytest.param(True, id="Prefilter_eval=True")])
def test_proximity_predicate(self, prefilter_eval_enabled):
client: Valkey = self.server.get_new_client()
# Override config
if not prefilter_eval_enabled:
client.execute_command(
"CONFIG", "SET", "search.enable-prefilter-eval", "no"
)
# Create index with text fields
client.execute_command("FT.CREATE", "idx", "ON", "HASH", "SCHEMA",
"content", "TEXT", "NOSTEM", "title", "TEXT", "NOSTEM", "WITHSUFFIXTRIE")
Expand Down Expand Up @@ -1538,12 +1551,17 @@ def test_ft_info_text_index_fields(self):

class TestFullTextCluster(ValkeySearchClusterTestCaseDebugMode):

def test_fulltext_search_cluster(self):
@pytest.mark.parametrize("prefilter_eval_enabled", [pytest.param(False, id="Prefilter_eval=False"), pytest.param(True, id="Prefilter_eval=True")])
def test_fulltext_search_cluster(self, prefilter_eval_enabled):
"""
Test a fulltext search queries on Hash docs in Valkey Search CME.
"""
cluster_client: ValkeyCluster = self.new_cluster_client()
client: Valkey = self.new_client_for_primary(0)
if not prefilter_eval_enabled:
# Set config on all primary nodes
for primary_client in self.get_all_primary_clients():
primary_client.execute_command("CONFIG", "SET", "search.enable-prefilter-eval", "no")
# Create the text index on Hash documents
assert client.execute_command(text_index_on_hash) == b"OK"
# Data population:
Expand Down
35 changes: 33 additions & 2 deletions src/indexes/text.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ std::unique_ptr<EntriesFetcherIteratorBase> Text::EntriesFetcher::Begin() {
namespace valkey_search::query {

void* TextPredicate::Search(bool negate) const {
// TODO: Add logic to calculate the size based on number of keys estimated.
size_t estimated_size = EstimateSize();
auto fetcher = std::make_unique<indexes::Text::EntriesFetcher>(
0, GetTextIndexSchema()->GetTextIndex(), nullptr, GetFieldMask());
estimated_size, GetTextIndexSchema()->GetTextIndex(), nullptr,
GetFieldMask());
fetcher->predicate_ = this;
return fetcher.release();
}
Expand Down Expand Up @@ -213,4 +214,34 @@ std::unique_ptr<indexes::text::TextIterator> FuzzyPredicate::BuildTextIterator(
CHECK(false) << "Unsupported TextPredicate type";
}

// Size apis for estimation
size_t TermPredicate::EstimateSize() const {
// TODO: Implementation
return 0;
}

size_t PrefixPredicate::EstimateSize() const {
// TODO: Implementation
return 0;
}

size_t SuffixPredicate::EstimateSize() const {
// TODO: Implementation
return 0;
}

size_t ProximityPredicate::EstimateSize() const {
// TODO: Implementation
return 0;
}

size_t InfixPredicate::EstimateSize() const {
// TODO: Implementation
return 0;
}

size_t FuzzyPredicate::EstimateSize() const {
// TODO: Implementation
return 0;
}
} // namespace valkey_search::query
5 changes: 5 additions & 0 deletions src/indexes/text/orproximity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,9 @@ FieldMaskPredicate OrProximityIterator::CurrentFieldMask() const {
return current_field_mask_;
}

bool OrProximityIterator::IsIteratorValid() const {
return current_key_ && current_position_.has_value() &&
current_field_mask_ != 0ULL;
}

} // namespace valkey_search::indexes::text
1 change: 1 addition & 0 deletions src/indexes/text/orproximity.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class OrProximityIterator : public TextIterator {
const PositionRange& CurrentPosition() const override;
bool NextPosition() override;
FieldMaskPredicate CurrentFieldMask() const override;
bool IsIteratorValid() const override;

private:
std::vector<std::unique_ptr<TextIterator>> iters_;
Expand Down
6 changes: 6 additions & 0 deletions src/indexes/text/proximity.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ class ProximityIterator : public TextIterator {
const PositionRange& CurrentPosition() const override;
bool NextPosition() override;
FieldMaskPredicate CurrentFieldMask() const override;
// Returns true if iterator is at a valid state with current key, position,
// and field.
bool IsIteratorValid() const override {
return current_key_ && current_position_.has_value() &&
current_field_mask_ != 0ULL;
}

private:
// List of all the Text Predicates contained in the Proximity AND.
Expand Down
9 changes: 9 additions & 0 deletions src/indexes/text/term.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ class TermIterator : public TextIterator {
const PositionRange& CurrentPosition() const override;
bool NextPosition() override;
FieldMaskPredicate CurrentFieldMask() const override;
// Returns true if iterator is at a valid state with current key, position,
// and field.
bool IsIteratorValid() const override {
if (require_positions_) {
return current_key_ && current_position_.has_value() &&
current_field_mask_ != 0ULL;
}
return current_key_ ? true : false;
}
/* Implementation of APIs unique to TermIterator */
// It is possible to implement a `CurrentKeyIterVecIdx` API that returns the
// index of the vector of the posting iterator (provided on init) that matches
Expand Down
3 changes: 3 additions & 0 deletions src/indexes/text/text_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class TextIterator {
// Returns the field mask for the current position.
// ASSERT: !DonePositions()
virtual FieldMaskPredicate CurrentFieldMask() const = 0;
// Returns true if iterator is at a valid state (has current key, position,
// and field)
virtual bool IsIteratorValid() const = 0;
};

} // namespace valkey_search::indexes::text
Expand Down
20 changes: 16 additions & 4 deletions src/indexes/vector_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "src/query/predicate.h"
#include "src/rdb_serialization.h"
#include "src/utils/string_interning.h"
#include "src/valkey_search_options.h"
#include "src/vector_externalizer.h"
#include "third_party/hnswlib/hnswlib.h"
#include "third_party/hnswlib/space_ip.h"
Expand Down Expand Up @@ -103,10 +104,21 @@ query::EvaluationResult PrefilterEvaluator::EvaluateNumeric(

query::EvaluationResult PrefilterEvaluator::EvaluateText(
const query::TextPredicate &predicate, bool require_positions) {
// CHECK(key_);
// auto text = predicate.GetIndex()->GetRawValue(*key_);
// return predicate.Evaluate(*text);
return query::EvaluationResult(true);
CHECK(key_);
// Evaluate using per-key text index
// This acquires the lock and looks up the key in per_key_text_indexes
auto text_index_schema = predicate.GetTextIndexSchema();
return text_index_schema->WithPerKeyTextIndexes(
[&](auto &per_key_indexes) -> query::EvaluationResult {
auto it = per_key_indexes.find(*key_);
if (it == per_key_indexes.end()) {
VMSDK_LOG(WARNING, nullptr)
<< "Target key not found in index for pre-filter evaluation";
return query::EvaluationResult(false);
}
// Evaluate predicate against this key's text index
return predicate.Evaluate(it->second, *key_, require_positions);
});
}

template <typename T>
Expand Down
8 changes: 3 additions & 5 deletions src/indexes/vector_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,13 +262,11 @@ class PrefilterEvaluator : public query::Evaluator {
public:
bool Evaluate(const query::Predicate& predicate,
const InternedStringPtr& key);

// TODO: Implement this in prefilter implementation. For now just return
// nullptr.
const InternedStringPtr& GetTargetKey() const override {
static const InternedStringPtr null_key;
return null_key;
CHECK(key_);
return *key_;
}
bool IsPrefilterEvaluator() const override { return true; }

private:
query::EvaluationResult EvaluateTags(
Expand Down
29 changes: 15 additions & 14 deletions src/query/predicate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "src/indexes/text/proximity.h"
#include "src/indexes/text/text_index.h"
#include "src/indexes/text/text_iterator.h"
#include "src/valkey_search_options.h"
#include "vmsdk/src/log.h"
#include "vmsdk/src/managed_pointers.h"

Expand All @@ -35,17 +36,13 @@ EvaluationResult NegatePredicate::Evaluate(Evaluator& evaluator) const {
EvaluationResult BuildTextEvaluationResult(
std::unique_ptr<indexes::text::TextIterator> iterator,
bool require_positions) {
if (!iterator->IsIteratorValid()) {
return EvaluationResult(false);
}
if (require_positions) {
if (iterator->DoneKeys() || iterator->DonePositions()) {
return EvaluationResult(false);
}
return EvaluationResult(true, std::move(iterator));
} else {
if (iterator->DoneKeys()) {
return EvaluationResult(false);
}
return EvaluationResult(true);
}
return EvaluationResult(true);
}

TermPredicate::TermPredicate(
Expand Down Expand Up @@ -342,8 +339,14 @@ EvaluationResult EvaluatePredicate(const Predicate* predicate,
// ProximityIterator to validate term positions meet distance and order
// requirements.
EvaluationResult ComposedPredicate::Evaluate(Evaluator& evaluator) const {
// Determine if children need to return positions for proximity checks
bool require_positions = slop_.has_value() || inorder_;
// Determine if children need to return positions for proximity checks.
// Proximity check in Prefilter also depends on the configuration.
bool has_proximity_constraint = slop_.has_value() || inorder_;
bool require_positions =
evaluator.IsPrefilterEvaluator()
? has_proximity_constraint &&
options::GetEnableProximityPrefilterEval().GetValue()
: has_proximity_constraint;
// Handle AND logic
if (GetType() == PredicateType::kComposedAnd) {
// Short-circuit on first false
Expand Down Expand Up @@ -377,8 +380,7 @@ EvaluationResult ComposedPredicate::Evaluate(Evaluator& evaluator) const {
std::make_unique<indexes::text::ProximityIterator>(
std::move(iterators), slop_, inorder_, query_field_mask, nullptr);
// Check if any valid proximity matches exist
if (proximity_iterator->DoneKeys() ||
proximity_iterator->DonePositions()) {
if (!proximity_iterator->IsIteratorValid()) {
return EvaluationResult(false);
}
// Validate against original target key from evaluator
Expand Down Expand Up @@ -421,8 +423,7 @@ EvaluationResult ComposedPredicate::Evaluate(Evaluator& evaluator) const {
std::make_unique<indexes::text::OrProximityIterator>(
std::move(filter_iterators), nullptr);
// Check if any valid matches exist
if (or_proximity_iterator->DoneKeys() ||
or_proximity_iterator->DonePositions()) {
if (!or_proximity_iterator->IsIteratorValid()) {
return EvaluationResult(false);
}
// Validate against original target key from evaluator
Expand Down
Loading
Loading