Skip to content

Commit caa9621

Browse files
authored
fix: prevent timestamp collision in ChatMessage ID generation (issue #449) (#450)
Issue[ #449 ](#449) In redisvl/extensions/message_history/schema.py, the ChatMessage class generates IDs using only the session tag and timestamp: `values[ID_FIELD_NAME] = f"{values[SESSION_FIELD_NAME]}:{values[TIMESTAMP_FIELD_NAME]}"` When multiple messages are added rapidly (e.g., via add_messages(messages) or store(prompt, response)), they can receive the same timestamp, resulting in identical IDs. Since Redis uses these IDs as keys, newer messages overwrite older ones with the same ID, causing message loss. Fix: - Add UUID suffix to ChatMessage entry_id to ensure uniqueness - Update ID format from 'session:timestamp' to 'session:timestamp:uuid' - Add test for rapid message creation with same timestamp
1 parent d992f8f commit caa9621

File tree

3 files changed

+3204
-3150
lines changed

3 files changed

+3204
-3150
lines changed

redisvl/extensions/message_history/schema.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import Dict, List, Optional
2+
from uuid import uuid4
23

34
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
45

@@ -14,7 +15,7 @@
1415
)
1516
from redisvl.redis.utils import array_to_buffer
1617
from redisvl.schema import IndexSchema
17-
from redisvl.utils.utils import current_timestamp, deserialize
18+
from redisvl.utils.utils import current_timestamp
1819

1920

2021
class ChatMessage(BaseModel):
@@ -44,8 +45,11 @@ def generate_id(cls, values):
4445
if TIMESTAMP_FIELD_NAME not in values:
4546
values[TIMESTAMP_FIELD_NAME] = current_timestamp()
4647
if ID_FIELD_NAME not in values:
48+
# Add UUID suffix to prevent timestamp collisions when creating
49+
# multiple messages rapidly (e.g., in add_messages or store)
50+
unique_suffix = uuid4().hex[:8]
4751
values[ID_FIELD_NAME] = (
48-
f"{values[SESSION_FIELD_NAME]}:{values[TIMESTAMP_FIELD_NAME]}"
52+
f"{values[SESSION_FIELD_NAME]}:{values[TIMESTAMP_FIELD_NAME]}:{unique_suffix}"
4953
)
5054
return values
5155

tests/unit/test_message_history_schema.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from redisvl.extensions.message_history.schema import ChatMessage
55
from redisvl.redis.utils import array_to_buffer
6-
from redisvl.utils.utils import create_ulid, current_timestamp, deserialize, serialize
6+
from redisvl.utils.utils import create_ulid, current_timestamp, serialize
77

88

99
def test_chat_message_creation():
@@ -41,7 +41,13 @@ def test_chat_message_default_id_generation():
4141
timestamp=timestamp,
4242
)
4343

44-
assert chat_message.entry_id == f"{session_tag}:{timestamp}"
44+
# ID should start with session:timestamp and have a UUID suffix for uniqueness
45+
assert chat_message.entry_id.startswith(f"{session_tag}:{timestamp}:")
46+
# Verify the UUID suffix is 8 hex characters
47+
parts = chat_message.entry_id.split(":")
48+
assert len(parts) == 3
49+
assert len(parts[2]) == 8
50+
assert all(c in "0123456789abcdef" for c in parts[2])
4551

4652

4753
def test_chat_message_with_tool_call_id():
@@ -164,3 +170,29 @@ def test_chat_message_invalid_role():
164170
session_tag=session_tag,
165171
timestamp=timestamp,
166172
)
173+
174+
175+
def test_chat_message_unique_ids_for_rapid_creation():
176+
"""Test that rapidly created messages get unique IDs even with same timestamp."""
177+
session_tag = create_ulid()
178+
timestamp = current_timestamp()
179+
180+
# Create multiple messages with the same session and timestamp
181+
messages = []
182+
for i in range(10):
183+
msg = ChatMessage(
184+
role="user",
185+
content=f"Message {i}",
186+
session_tag=session_tag,
187+
timestamp=timestamp,
188+
)
189+
messages.append(msg)
190+
191+
# All IDs should be unique
192+
ids = [msg.entry_id for msg in messages]
193+
assert len(ids) == len(set(ids)), "All message IDs should be unique"
194+
195+
# All IDs should start with the same session:timestamp prefix
196+
expected_prefix = f"{session_tag}:{timestamp}:"
197+
for msg_id in ids:
198+
assert msg_id.startswith(expected_prefix)

0 commit comments

Comments
 (0)