From b7e004c5c3155e8137ac5a0848d5a9eb6ef3a7aa Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Wed, 10 Dec 2025 15:40:13 +0100 Subject: [PATCH 1/4] better handling of circular references --- src/py_avro_schema/_schemas.py | 169 ++++++++++++++++++++++++++++----- tests/test_plain_class.py | 29 +++++- 2 files changed, 172 insertions(+), 26 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index eee82bb..98dc798 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -26,6 +26,7 @@ import re import sys import types +import typing import uuid from enum import StrEnum from typing import ( @@ -173,6 +174,7 @@ def schema( namespace: Optional[str] = None, names: Optional[NamesType] = None, options: Option = Option(0), + processing: set[type] | None = None, ) -> JSONType: """ Generate and return an Avro schema for a given Python type @@ -187,12 +189,17 @@ def schema( """ if names is None: names = [] - schema_obj = _schema_obj(py_type, namespace=namespace, options=options) + schema_obj = _schema_obj(py_type, namespace=namespace, options=options, processing=processing) schema_data = schema_obj.data(names=names) return schema_data -def _schema_obj(py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)) -> "Schema": +def _schema_obj( + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, +) -> "Schema": """ Dispatch to relevant schema classes @@ -200,10 +207,11 @@ def _schema_obj(py_type: Type, namespace: Optional[str] = None, options: Option :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ + processing = processing or set() # Find concrete Schema subclasses defined in the current module for schema_class in sorted(_SCHEMA_CLASSES, key=lambda c: getattr(c, "__py_avro_priority", 0)): # Find the first schema class that handles py_type - schema_obj = schema_class(py_type, namespace=namespace, options=options) # type: ignore + schema_obj = schema_class(py_type, namespace=namespace, options=options, processing=processing) # type: ignore if schema_obj: return schema_obj raise TypeNotSupportedError(f"Cannot generate Avro schema for Python type {py_type}") @@ -223,7 +231,13 @@ def validate_name(value: str) -> str: class Schema(abc.ABC): """Schema base""" - def __new__(cls, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __new__( + cls, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ Create an instance of this schema class if it handles py_type @@ -236,17 +250,25 @@ def __new__(cls, py_type: Type, namespace: Optional[str] = None, options: Option else: return None - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ A schema base :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. + :param processing: Internal parameter to track types currently being processed (for circular dependencies). """ self.py_type = py_type self.options = options self._namespace = namespace # Namespace override + self.processing = processing or set() @property def namespace_override(self) -> Optional[str]: @@ -355,7 +377,13 @@ def data(self, names: NamesType) -> JSONObj: class LiteralSchema(Schema): """An Avro schema of any type for a Python Literal type, e.g. ``Literal[""]``""" - def __init__(self, py_type: Type[Any], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[Any], + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """ An Avro schema of any type for a Python Literal type, e.g. ``Literal[""]`` @@ -389,7 +417,13 @@ def data(self, names: NamesType) -> JSONType: class FinalSchema(Schema): """An Avro schema for Python ``typing.Final``""" - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """An Avro schema for Python ``typing.Final``""" super().__init__(py_type, namespace, options) py_type = _type_from_annotated(py_type) @@ -678,6 +712,7 @@ def __init__( py_type: Type[collections.abc.MutableSequence], namespace: Optional[str] = None, options: Option = Option(0), + **kwargs, ): """ An Avro array schema for a given Python sequence @@ -722,6 +757,7 @@ def __init__( py_type: type[collections.abc.MutableSet], namespace: str | None = None, options: Option = Option(0), + **kwargs, ): """ An Avro array schema for a given Python sequence @@ -751,6 +787,7 @@ def __init__( py_type: Type[collections.abc.MutableMapping], namespace: Optional[str] = None, options: Option = Option(0), + processing: set[type] | None = None, ): """ An Avro map schema for a given Python mapping @@ -758,8 +795,9 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. + :param processing: Internal parameter to track types currently being processed (for circular dependencies). """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) args = get_args(py_type) if args[0] != str and not issubclass(args[0], StrEnum): @@ -791,7 +829,13 @@ def handles_type(cls, py_type: Type) -> bool: return origin == Union or origin == union_type return origin == Union - def __init__(self, py_type: Type[Union[Any]], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[Union[Any]], + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """ An Avro union schema for a given Python union type @@ -853,7 +897,13 @@ def make_default(self, py_default: Any) -> JSONType: class NamedSchema(Schema): """A named Avro schema base class""" - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ A named Avro schema base class @@ -861,7 +911,7 @@ def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Opti :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) self.name = py_type.__name__ @@ -909,7 +959,13 @@ def handles_type(cls, py_type: Type) -> bool: """Whether this schema class can represent a given Python class""" return _is_class(py_type, enum.Enum) - def __init__(self, py_type: Type[enum.Enum], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[enum.Enum], + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """ An Avro enum schema for a Python enum with string values @@ -973,15 +1029,25 @@ def data_before_deduplication(self, names: NamesType) -> JSONObj: class RecordSchema(NamedSchema): """An Avro record schema base class""" - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema base class :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. + :param processing: Internal parameter to track types currently being processed (for circular dependencies). """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) + self.processing = processing or set() + # Add this type to the processing set to detect circular dependencies + self.processing.add(py_type) self.record_fields: collections.abc.Sequence[RecordField] = [] def data_before_deduplication(self, names: NamesType) -> JSONObj: @@ -1000,6 +1066,7 @@ def data_before_deduplication(self, names: NamesType) -> JSONObj: doc = _doc_for_class(self.py_type) if doc: record_schema["doc"] = doc + self.processing.discard(self.py_type) return record_schema @@ -1015,6 +1082,7 @@ def __init__( default: Any = dataclasses.MISSING, docs: str = "", options: Option = Option(0), + processing: set[type] | None = None, ): """ An Avro record field @@ -1027,6 +1095,8 @@ def __init__( :param docs: Field documentation or description :param options: Schema generation options """ + if processing is None: + processing = set() if aliases is None: aliases = [] self.py_type = py_type @@ -1036,7 +1106,14 @@ def __init__( self.default = default self.docs = docs self.options = options - self.schema = _schema_obj(self.py_type, namespace=self._namespace, options=options) + + _type = self.py_type + # Check for circular dependency + if self.py_type in processing and hasattr(self.py_type, "__name__"): + # This is a circular reference - use a ForwardRef to break the cycle + _type = ForwardRef(py_type.__name__) # type: ignore + + self.schema = _schema_obj(_type, namespace=self._namespace, options=options, processing=processing) if self.default != dataclasses.MISSING: if isinstance(self.schema, UnionSchema): @@ -1075,7 +1152,13 @@ def handles_type(cls, py_type: Type) -> bool: py_type = _type_from_annotated(py_type) return dataclasses.is_dataclass(py_type) - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a given Python dataclass @@ -1083,7 +1166,7 @@ def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Opti :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) self.py_fields = dataclasses.fields(py_type) self.record_fields = [self._record_field(field) for field in self.py_fields] @@ -1101,6 +1184,7 @@ def _record_field(self, py_field: dataclasses.Field) -> RecordField: default=default, aliases=aliases, options=self.options, + processing=self.processing, ) return field_obj @@ -1116,7 +1200,13 @@ def handles_type(cls, py_type: Type) -> bool: py_type = _type_from_annotated(py_type) return hasattr(py_type, "__pydantic_private__") - def __init__(self, py_type: Type[pydantic.BaseModel], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[pydantic.BaseModel], + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a given Pydantic model class @@ -1124,7 +1214,7 @@ def __init__(self, py_type: Type[pydantic.BaseModel], namespace: Optional[str] = :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) if Option.USE_CLASS_ALIAS in self.options: self.name = py_type.model_config.get("title") or self.name self.py_fields = py_type.model_fields @@ -1144,6 +1234,7 @@ def _record_field(self, name: str, py_field: pydantic.fields.FieldInfo) -> Recor aliases=aliases, docs=py_field.description or "", options=self.options, + processing=self.processing, ) return field_obj @@ -1190,10 +1281,16 @@ def handles_type(cls, py_type: Type) -> bool: # If we are subclassing a string, used the "named string" approach and (inspect.isclass(py_type) and not issubclass(py_type, str)) # and any other class with typed annotations - and bool(get_type_hints(py_type)) + and has_annotations(py_type) ) - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a plain Python class with type hints @@ -1201,7 +1298,7 @@ def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Opti :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) # Try to get resolved type hints, but fall back to raw annotations if there are unresolved forward refs @@ -1236,6 +1333,7 @@ def _record_field(self, py_field: tuple[str, Type]) -> RecordField: default=default, aliases=aliases, options=self.options, + processing=self.processing, ) return field_obj @@ -1249,7 +1347,13 @@ def handles_type(cls, py_type: Type) -> bool: """Whether this schema can represent a TypedDict""" return is_typeddict(py_type) - def __init__(self, py_type: Type, namespace: str | None = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: str | None = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a given Python TypedDict @@ -1257,7 +1361,7 @@ def __init__(self, py_type: Type, namespace: str | None = None, options: Option :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) self.is_total = py_type.__dict__.get("__total__", True) self.py_fields: dict[str, Type] = get_type_hints(py_type, include_extras=True) @@ -1280,6 +1384,7 @@ def _record_field(self, py_field: tuple[str, Type]) -> RecordField: namespace=self.namespace_override, aliases=aliases, options=self.options, + processing=self.processing, ) return field_obj @@ -1322,7 +1427,11 @@ def is_logically_json(py_type: Type) -> bool: return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) -def _is_class(py_type: Any, of_types: Union[Type, Tuple[Type, ...]], include_subclasses: bool = True) -> bool: +def _is_class( + py_type: Any, + of_types: Union[Type, Tuple[Type, ...]], + include_subclasses: bool = True, +) -> bool: """Return whether the given type is a (sub) class of a type or types""" py_type = _type_from_annotated(py_type) if include_subclasses: @@ -1345,3 +1454,13 @@ def _type_from_annotated(py_type: Type) -> Type: return args[0] else: return py_type + + +def has_annotations(py_type: Type) -> bool: + """Checks if a type has annotations""" + py_type = _type_from_annotated(py_type) + try: + return bool(typing.get_type_hints(py_type)) + except Exception: + pass + return hasattr(py_type, "__annotations__") diff --git a/tests/test_plain_class.py b/tests/test_plain_class.py index a90da2c..c8245ae 100644 --- a/tests/test_plain_class.py +++ b/tests/test_plain_class.py @@ -10,7 +10,7 @@ # specific language governing permissions and limitations under the License. import re -from typing import Annotated, Final +from typing import Annotated, Final, ForwardRef import pytest @@ -179,3 +179,30 @@ def __init__(self): } assert_schema(PyType, expected) + + +class PyType: + backend: ForwardRef("Backend") + value: str + + +class Backend: + py_type: PyType + + +def test_circular_dependencies(): + expected = { + "fields": [ + { + "name": "py_type", + "type": { + "fields": [{"name": "backend", "type": "Backend"}, {"name": "value", "type": "string"}], + "name": "PyType", + "type": "record", + }, + } + ], + "name": "Backend", + "type": "record", + } + assert_schema(Backend, expected) From ee1acd14386f66e48accf82c066691ea36db7d9e Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 11 Dec 2025 09:57:35 +0100 Subject: [PATCH 2/4] cleanup --- src/py_avro_schema/_schemas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 98dc798..5e8df9d 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -26,7 +26,6 @@ import re import sys import types -import typing import uuid from enum import StrEnum from typing import ( @@ -1460,7 +1459,7 @@ def has_annotations(py_type: Type) -> bool: """Checks if a type has annotations""" py_type = _type_from_annotated(py_type) try: - return bool(typing.get_type_hints(py_type)) + return bool(get_type_hints(py_type)) except Exception: pass return hasattr(py_type, "__annotations__") From b4e4e7520df8ab678b91fe7d34f4ab426aff80cd Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 11 Dec 2025 13:38:02 +0100 Subject: [PATCH 3/4] naming --- src/py_avro_schema/_schemas.py | 8 ++++---- tests/test_plain_class.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 5e8df9d..4b11b39 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -262,7 +262,7 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. - :param processing: Internal parameter to track types currently being processed (for circular dependencies). + :param processing: Internal parameter to track types currently being processed (for circular references). """ self.py_type = py_type self.options = options @@ -794,7 +794,7 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. - :param processing: Internal parameter to track types currently being processed (for circular dependencies). + :param processing: Internal parameter to track types currently being processed (for circular references). """ super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) @@ -1041,11 +1041,11 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. - :param processing: Internal parameter to track types currently being processed (for circular dependencies). + :param processing: Internal parameter to track types currently being processed (for circular references). """ super().__init__(py_type, namespace=namespace, options=options, processing=processing) self.processing = processing or set() - # Add this type to the processing set to detect circular dependencies + # Add this type to the processing set to detect circular references self.processing.add(py_type) self.record_fields: collections.abc.Sequence[RecordField] = [] diff --git a/tests/test_plain_class.py b/tests/test_plain_class.py index c8245ae..613b4dd 100644 --- a/tests/test_plain_class.py +++ b/tests/test_plain_class.py @@ -190,7 +190,7 @@ class Backend: py_type: PyType -def test_circular_dependencies(): +def test_circular_references(): expected = { "fields": [ { From 56a9afba7d47e864e26c39c65e9b6ce3250c9c54 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 11 Dec 2025 15:35:04 +0100 Subject: [PATCH 4/4] get_origin might fall into infinite recusion --- src/py_avro_schema/_schemas.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 4b11b39..2efbc8e 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -1423,7 +1423,10 @@ def _is_list_any(py_type: Type) -> bool: def is_logically_json(py_type: Type) -> bool: """Returns whether a given type is logically a JSON and can be serialized as such""" - return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) + try: + return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) + except RecursionError: + return False def _is_class(