Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20251228023802955549.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Fix DynamicCommunitySelection type mismatch where children IDs as integers were not matching string report keys"
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,14 @@ async def select(self, query: str) -> tuple[list[CommunityReport], dict[str, Any
# TODO check why some sub_communities are NOT in report_df
if community in self.communities:
for child in self.communities[community].children:
if child in self.reports:
communities_to_rate.append(child)
# Ensure child is a string to match self.reports keys
child_id = str(child)
if child_id in self.reports:
communities_to_rate.append(child_id)
else:
logger.debug(
"dynamic community selection: cannot find community %s in reports",
child,
child_id,
)
# remove parent node if the current node is deemed relevant
if not self.keep_parent and community in self.communities:
Expand Down
205 changes: 205 additions & 0 deletions tests/unit/query/context_builder/test_dynamic_community_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""Tests for dynamic community selection with type handling."""

from unittest.mock import MagicMock

from graphrag.data_model.community import Community
from graphrag.data_model.community_report import CommunityReport
from graphrag.query.context_builder.dynamic_community_selection import (
DynamicCommunitySelection,
)


def create_mock_tokenizer() -> MagicMock:
"""Create a mock tokenizer."""
tokenizer = MagicMock()
tokenizer.encode.return_value = [1, 2, 3]
return tokenizer


def create_mock_model() -> MagicMock:
"""Create a mock chat model."""
return MagicMock()


def test_dynamic_community_selection_handles_int_children():
"""Test that DynamicCommunitySelection correctly handles children IDs as integers.

This tests the fix for issue #2004 where children IDs could be integers
while self.reports keys are strings, causing child communities to be skipped.
"""
# Create communities with integer children (simulating the bug scenario)
# Note: Even though the type annotation says list[str], actual data may have ints
communities = [
Community(
id="comm-0",
short_id="0",
title="Root Community",
level="0",
parent="",
children=[1, 2], # type: ignore[list-item] # Integer children - testing bug fix
),
Community(
id="comm-1",
short_id="1",
title="Child Community 1",
level="1",
parent="0",
children=[],
),
Community(
id="comm-2",
short_id="2",
title="Child Community 2",
level="1",
parent="0",
children=[],
),
]

# Create community reports with string community_id
reports = [
CommunityReport(
id="report-0",
short_id="0",
title="Report 0",
community_id="0",
summary="Root community summary",
full_content="Root community full content",
rank=1.0,
),
CommunityReport(
id="report-1",
short_id="1",
title="Report 1",
community_id="1",
summary="Child 1 summary",
full_content="Child 1 full content",
rank=1.0,
),
CommunityReport(
id="report-2",
short_id="2",
title="Report 2",
community_id="2",
summary="Child 2 summary",
full_content="Child 2 full content",
rank=1.0,
),
]

model = create_mock_model()
tokenizer = create_mock_tokenizer()

selector = DynamicCommunitySelection(
community_reports=reports,
communities=communities,
model=model,
tokenizer=tokenizer,
threshold=1,
keep_parent=False,
max_level=2,
)

# Verify that reports are keyed by string
assert "0" in selector.reports
assert "1" in selector.reports
assert "2" in selector.reports

# Verify that communities are keyed by string short_id
assert "0" in selector.communities
assert "1" in selector.communities
assert "2" in selector.communities

# Verify that the children are properly accessible
# Before the fix, int children would fail the `in self.reports` check
root_community = selector.communities["0"]
for child in root_community.children:
child_id = str(child)
# This should now work with the fix
assert child_id in selector.reports, (
f"Child {child} (as '{child_id}') should be found in reports"
)


def test_dynamic_community_selection_handles_str_children():
"""Test that DynamicCommunitySelection works correctly with string children IDs."""
communities = [
Community(
id="comm-0",
short_id="0",
title="Root Community",
level="0",
parent="",
children=["1", "2"], # String children - expected type
),
Community(
id="comm-1",
short_id="1",
title="Child Community 1",
level="1",
parent="0",
children=[],
),
Community(
id="comm-2",
short_id="2",
title="Child Community 2",
level="1",
parent="0",
children=[],
),
]

reports = [
CommunityReport(
id="report-0",
short_id="0",
title="Report 0",
community_id="0",
summary="Root community summary",
full_content="Root community full content",
rank=1.0,
),
CommunityReport(
id="report-1",
short_id="1",
title="Report 1",
community_id="1",
summary="Child 1 summary",
full_content="Child 1 full content",
rank=1.0,
),
CommunityReport(
id="report-2",
short_id="2",
title="Report 2",
community_id="2",
summary="Child 2 summary",
full_content="Child 2 full content",
rank=1.0,
),
]

model = create_mock_model()
tokenizer = create_mock_tokenizer()

selector = DynamicCommunitySelection(
community_reports=reports,
communities=communities,
model=model,
tokenizer=tokenizer,
threshold=1,
keep_parent=False,
max_level=2,
)

# Verify that children can be found in reports
root_community = selector.communities["0"]
for child in root_community.children:
child_id = str(child)
assert child_id in selector.reports, (
f"Child {child} (as '{child_id}') should be found in reports"
)