Skip to content

Commit ff415fb

Browse files
authored
RAAE-1309: add support for wildcard TAG filters (#454)
Allow using modulo queries to enable wildcard (prefix search) support in `Tag` filters, e.g.: ```python wildcard_match = Tag("category") % "tech*" ``` closes #453 / RAAE-1309
1 parent c50fd44 commit ff415fb

File tree

3 files changed

+130
-2
lines changed

3 files changed

+130
-2
lines changed

redisvl/query/filter.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,13 @@ class Tag(FilterField):
120120
FilterOperator.EQ: "==",
121121
FilterOperator.NE: "!=",
122122
FilterOperator.IN: "==",
123+
FilterOperator.LIKE: "%",
123124
}
124125
OPERATOR_MAP: Dict[FilterOperator, str] = {
125126
FilterOperator.EQ: "@%s:{%s}",
126127
FilterOperator.NE: "(-@%s:{%s})",
127128
FilterOperator.IN: "@%s:{%s}",
129+
FilterOperator.LIKE: "@%s:{%s}",
128130
}
129131
SUPPORTED_VAL_TYPES = (list, set, tuple, str, type(None))
130132

@@ -177,9 +179,41 @@ def __ne__(self, other) -> "FilterExpression":
177179
self._set_tag_value(other, FilterOperator.NE)
178180
return FilterExpression(str(self))
179181

182+
def __mod__(self, other: Union[List[str], str]) -> "FilterExpression":
183+
"""Create a Tag wildcard filter expression for pattern matching.
184+
185+
This enables wildcard pattern matching on tag fields using the ``*``
186+
character. Unlike the equality operator, wildcards are not escaped,
187+
allowing patterns with wildcards in any position, such as prefix
188+
(``"tech*"``), suffix (``"*tech"``), or middle (``"*tech*"``)
189+
matches.
190+
191+
Args:
192+
other (Union[List[str], str]): The tag pattern(s) to filter on.
193+
Use ``*`` for wildcard matching (e.g., ``"tech*"``, ``"*tech"``,
194+
or ``"*tech*"``).
195+
196+
.. code-block:: python
197+
198+
from redisvl.query.filter import Tag
199+
200+
f = Tag("category") % "tech*" # Prefix match
201+
f = Tag("category") % "*tech" # Suffix match
202+
f = Tag("category") % "*tech*" # Contains match
203+
f = Tag("category") % "elec*|*soft" # Multiple wildcard patterns
204+
f = Tag("category") % ["tech*", "*science"] # List of patterns
205+
206+
"""
207+
self._set_tag_value(other, FilterOperator.LIKE)
208+
return FilterExpression(str(self))
209+
180210
@property
181211
def _formatted_tag_value(self) -> str:
182-
return "|".join([self.escaper.escape(tag) for tag in self._value])
212+
# For LIKE operator, preserve wildcards (*) in the pattern
213+
preserve_wildcards = self._operator == FilterOperator.LIKE
214+
return "|".join(
215+
[self.escaper.escape(tag, preserve_wildcards) for tag in self._value]
216+
)
183217

184218
def __str__(self) -> str:
185219
"""Return the Redis Query string for the Tag filter"""

redisvl/utils/token_escaper.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,30 @@ class TokenEscaper:
1212
# Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization
1313
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"
1414

15+
# Same as above but excludes * to allow wildcard patterns
16+
ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ]"
17+
1518
def __init__(self, escape_chars_re: Optional[Pattern] = None):
1619
if escape_chars_re:
1720
self.escaped_chars_re = escape_chars_re
1821
else:
1922
self.escaped_chars_re = re.compile(self.DEFAULT_ESCAPED_CHARS)
23+
self.escaped_chars_no_wildcard_re = re.compile(self.ESCAPED_CHARS_NO_WILDCARD)
24+
25+
def escape(self, value: str, preserve_wildcards: bool = False) -> str:
26+
"""Escape special characters in a string for use in Redis queries.
27+
28+
Args:
29+
value: The string value to escape.
30+
preserve_wildcards: If True, preserves * characters for wildcard
31+
matching. Defaults to False.
32+
33+
Returns:
34+
The escaped string.
2035
21-
def escape(self, value: str) -> str:
36+
Raises:
37+
TypeError: If value is not a string.
38+
"""
2239
if not isinstance(value, str):
2340
raise TypeError(
2441
f"Value must be a string object for token escaping, got type {type(value)}"
@@ -28,4 +45,6 @@ def escape_symbol(match):
2845
value = match.group(0)
2946
return f"\\{value}"
3047

48+
if preserve_wildcards:
49+
return self.escaped_chars_no_wildcard_re.sub(escape_symbol, value)
3150
return self.escaped_chars_re.sub(escape_symbol, value)

tests/unit/test_filter.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,81 @@ def test_tag_filter_varied(operation, tags, expected):
5555
assert str(tf) == expected
5656

5757

58+
@pytest.mark.parametrize(
59+
"pattern,expected",
60+
[
61+
# Basic prefix wildcard
62+
("tech*", "@tag_field:{tech*}"),
63+
# Multiple patterns via list
64+
(["tech*", "soft*"], "@tag_field:{tech*|soft*}"),
65+
# Wildcard with special chars that still get escaped
66+
("tech*-pro", "@tag_field:{tech*\\-pro}"),
67+
# Prefix with space (space escaped, wildcard preserved)
68+
("hello w*", "@tag_field:{hello\\ w*}"),
69+
# Multiple wildcards in same pattern
70+
("*test*", "@tag_field:{*test*}"),
71+
# Empty pattern returns wildcard match-all
72+
("", "*"),
73+
([], "*"),
74+
(None, "*"),
75+
# Pattern with special characters
76+
("cat$*", "@tag_field:{cat\\$*}"),
77+
],
78+
ids=[
79+
"prefix_wildcard",
80+
"multiple_patterns",
81+
"wildcard_with_special_char",
82+
"prefix_with_space",
83+
"multiple_wildcards",
84+
"empty_string",
85+
"empty_list",
86+
"none",
87+
"special_char_with_wildcard",
88+
],
89+
)
90+
def test_tag_wildcard_filter(pattern, expected):
91+
"""Test Tag % operator for wildcard/prefix matching."""
92+
tf = Tag("tag_field") % pattern
93+
assert str(tf) == expected
94+
95+
96+
def test_tag_wildcard_preserves_asterisk():
97+
"""Verify that * is not escaped when using % operator."""
98+
# With == operator, * should be escaped
99+
tf_eq = Tag("tag_field") == "tech*"
100+
assert str(tf_eq) == "@tag_field:{tech\\*}"
101+
102+
# With % operator, * should NOT be escaped
103+
tf_like = Tag("tag_field") % "tech*"
104+
assert str(tf_like) == "@tag_field:{tech*}"
105+
106+
107+
def test_tag_wildcard_combined_with_exact_match():
108+
"""Test combining wildcard and exact match Tag filters in the same query."""
109+
# Create filters with different operators
110+
exact_match = Tag("brand") == "nike"
111+
wildcard_match = Tag("category") % "tech*"
112+
113+
# Verify individual filters work correctly
114+
assert str(exact_match) == "@brand:{nike}"
115+
assert str(wildcard_match) == "@category:{tech*}"
116+
117+
# Combine with AND - wildcard should be preserved, exact match should not have *
118+
combined_and = exact_match & wildcard_match
119+
assert str(combined_and) == "(@brand:{nike} @category:{tech*})"
120+
121+
# Combine with OR
122+
combined_or = exact_match | wildcard_match
123+
assert str(combined_or) == "(@brand:{nike} | @category:{tech*})"
124+
125+
# More complex: mix of exact, wildcard, and exact with * in value
126+
exact_with_asterisk = Tag("status") == "active*" # * should be escaped
127+
complex_filter = exact_match & wildcard_match & exact_with_asterisk
128+
assert "@brand:{nike}" in str(complex_filter)
129+
assert "@category:{tech*}" in str(complex_filter) # wildcard preserved
130+
assert "@status:{active\\*}" in str(complex_filter) # asterisk escaped
131+
132+
58133
def test_nullable():
59134
tag = Tag("tag_field") == None
60135
assert str(tag) == "*"

0 commit comments

Comments
 (0)