diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 832651a477dd..3967645c2019 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -63,7 +63,7 @@ def maybe_nullable(beam_type, nullable): json_type = json_schema.get('type', None) if json_type != 'object': - raise ValueError('Expected object type, got {json_type}.') + raise ValueError(f'Expected object type, got {json_type}.') if 'properties' not in json_schema: # Technically this is a valid (vacuous) schema, but as it's not generally # meaningful, throw an informative error instead. @@ -314,24 +314,34 @@ def _validate_compatible(weak_schema, strong_schema): return if weak_schema['type'] != strong_schema['type']: raise ValueError( - 'Incompatible types: %r vs %r' % - (weak_schema['type'] != strong_schema['type'])) + f"Incompatible types: {weak_schema['type']} vs {strong_schema['type']}") if weak_schema['type'] == 'array': _validate_compatible(weak_schema['items'], strong_schema['items']) - elif weak_schema == 'object': + elif weak_schema['type'] == 'object': + # If the weak schema allows for arbitrary keys (is a map), + # the strong schema must also allow for arbitrary keys. + if weak_schema.get('additionalProperties'): + if not strong_schema.get('additionalProperties', True): + raise ValueError('Incompatible types: map vs object') + _validate_compatible( + weak_schema['additionalProperties'], + strong_schema['additionalProperties']) for required in strong_schema.get('required', []): if required not in weak_schema['properties']: - raise ValueError('Missing or unkown property %r' % required) - for name, spec in weak_schema.get('properties', {}): + raise ValueError(f"Missing or unknown property '{required}'") + for name, spec in weak_schema.get('properties', {}).items(): + if name in strong_schema['properties']: try: _validate_compatible(spec, strong_schema['properties'][name]) except Exception as exn: - raise ValueError('Incompatible schema for %r' % name) from exn - elif not strong_schema.get('additionalProperties'): + raise ValueError(f"Incompatible schema for '{name}'") from exn + elif not strong_schema.get('additionalProperties', True): + # The property is not explicitly in the strong schema, and the strong + # schema does not allow for extra properties. raise ValueError( - 'Prohibited property: {property}; ' - 'perhaps additionalProperties: False is missing?') + f"Prohibited property: '{name}'; " + "perhaps additionalProperties: False is missing?") def row_validator(beam_schema: schema_pb2.Schema, diff --git a/sdks/python/apache_beam/yaml/json_utils_test.py b/sdks/python/apache_beam/yaml/json_utils_test.py index 3d4d5ea3dd41..e930577ec1e6 100644 --- a/sdks/python/apache_beam/yaml/json_utils_test.py +++ b/sdks/python/apache_beam/yaml/json_utils_test.py @@ -152,6 +152,130 @@ def test_json_to_row_with_missing_required_field(self): with self.assertRaises(KeyError): converter(json_data) + def test_validate_compatible(self): + from apache_beam.yaml.json_utils import _validate_compatible + + # Compatible cases + _validate_compatible({'type': 'string'}, {'type': 'string'}) + _validate_compatible( + { + 'type': 'object', 'properties': { + 'f': { + 'type': 'string' + } + } + }, { + 'type': 'object', 'properties': { + 'f': { + 'type': 'string' + } + } + }) + + # Incompatible types + with self.assertRaisesRegex(ValueError, 'Incompatible types'): + _validate_compatible({'type': 'string'}, {'type': 'integer'}) + + # Missing property + with self.assertRaisesRegex(ValueError, 'Missing or unknown property'): + _validate_compatible({ + 'type': 'object', 'properties': {} + }, + { + 'type': 'object', + 'properties': { + 'f': { + 'type': 'string' + } + }, + 'required': ['f'] + }) + + # Incompatible property type + with self.assertRaisesRegex(ValueError, 'Incompatible schema for \'f\''): + _validate_compatible( + { + 'type': 'object', 'properties': { + 'f': { + 'type': 'integer' + } + } + }, { + 'type': 'object', 'properties': { + 'f': { + 'type': 'string' + } + } + }) + + def test_validate_compatible_map(self): + from apache_beam.yaml.json_utils import _validate_compatible + + # Compatible maps + _validate_compatible( + { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }, { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }) + + # Incompatible map values + with self.assertRaisesRegex(ValueError, 'Incompatible types'): + _validate_compatible( + { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }, { + 'type': 'object', 'additionalProperties': { + 'type': 'integer' + } + }) + + # Map vs Object + with self.assertRaisesRegex(ValueError, + 'Incompatible types: map vs object'): + _validate_compatible( + { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }, { + 'type': 'object', 'properties': {}, 'additionalProperties': False + }) + + def test_validate_compatible_extra_properties(self): + from apache_beam.yaml.json_utils import _validate_compatible + + # Extra properties in weak_schema should be allowed if strong_schema + # doesn't explicitly forbid them (default additionalProperties=True). + _validate_compatible({ + 'type': 'object', 'properties': { + 'extra': { + 'type': 'string' + } + } + }, { + 'type': 'object', 'properties': {} + }) + + # But if strong_schema says additionalProperties: False, it should raise. + with self.assertRaisesRegex(ValueError, 'Prohibited property'): + _validate_compatible( + { + 'type': 'object', 'properties': { + 'extra': { + 'type': 'string' + } + } + }, { + 'type': 'object', 'properties': {}, 'additionalProperties': False + }) + if __name__ == '__main__': unittest.main()