From a041b5d549eb430314bdc5a76363cccea6201675 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 23 Oct 2025 13:16:08 -0400 Subject: [PATCH 1/2] fix rss/atom validation errors --- .../render/test_cardsearch_atom_renderer.py | 16 ++++++++++++++-- .../render/test_cardsearch_rss_renderer.py | 9 ++++++--- trove/render/cardsearch_atom.py | 17 ++++++++++++----- trove/render/cardsearch_rss.py | 16 ++++++++++++---- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/tests/trove/render/test_cardsearch_atom_renderer.py b/tests/trove/render/test_cardsearch_atom_renderer.py index c07e35c3e..bd8d7d9c4 100644 --- a/tests/trove/render/test_cardsearch_atom_renderer.py +++ b/tests/trove/render/test_cardsearch_atom_renderer.py @@ -1,3 +1,6 @@ +from unittest import mock +import datetime + from trove.render.cardsearch_atom import CardsearchAtomRenderer from trove.render.rendering import EntireRendering from . import _base @@ -15,8 +18,9 @@ class TestCardsearchAtomRenderer(_base.TrovesearchRendererTests): b'' b'shtrove search results' b'feed of metadata records matching given filters' - b'http://blarg.example/vocab/aSearch' + b'' b'http://blarg.example/vocab/aSearch' + b'2345-06-07T08:09:10Z' b'' ), ), @@ -27,8 +31,9 @@ class TestCardsearchAtomRenderer(_base.TrovesearchRendererTests): b'' b'shtrove search results' b'feed of metadata records matching given filters' - b'http://blarg.example/vocab/aSearchFew' + b'' b'http://blarg.example/vocab/aSearchFew' + b'2345-06-07T08:09:10Z' b'' b'' b'http://blarg.example/vocab/aCard' @@ -42,7 +47,14 @@ class TestCardsearchAtomRenderer(_base.TrovesearchRendererTests): b'http://blarg.example/vocab/aCarddd' b'an itemmm, yes' b'2001-02-03T00:00:00Z' + b'a person indeedhttp://blarg.example/vocab/aPerson' b'' ), ), } + + def setUp(self): + self.enterContext(mock.patch( + 'django.utils.timezone.now', + return_value=datetime.datetime(2345, 6, 7, 8, 9, 10, tzinfo=datetime.UTC), + )) diff --git a/tests/trove/render/test_cardsearch_rss_renderer.py b/tests/trove/render/test_cardsearch_rss_renderer.py index 237a6b6da..a376b6cda 100644 --- a/tests/trove/render/test_cardsearch_rss_renderer.py +++ b/tests/trove/render/test_cardsearch_rss_renderer.py @@ -12,10 +12,11 @@ class TestCardsearchRssRenderer(_base.TrovesearchRendererTests): mediatype='application/rss+xml', entire_content=( b"\n" - b'' + b'' b'' b'shtrove search results' b'http://blarg.example/vocab/aSearch' + b'' b'feed of metadata records matching given filters' b'share-support@cos.io' b'' @@ -25,9 +26,11 @@ class TestCardsearchRssRenderer(_base.TrovesearchRendererTests): mediatype='application/rss+xml', entire_content=( b"\n" - b'' + b'' + b'' b'shtrove search results' b'http://blarg.example/vocab/aSearchFew' + b'' b'feed of metadata records matching given filters' b'share-support@cos.io' b'' @@ -43,7 +46,7 @@ class TestCardsearchRssRenderer(_base.TrovesearchRendererTests): b'http://blarg.example/vocab/anItemmm' b'an itemmm, yes' b'Sat, 03 Feb 2001 00:00:00 -0000' - b'http://blarg.example/vocab/aPerson (a person indeed)' + b'http://blarg.example/vocab/aPerson (a person indeed)' b'' ), ), diff --git a/trove/render/cardsearch_atom.py b/trove/render/cardsearch_atom.py index f845e3e71..9d8188b1d 100644 --- a/trove/render/cardsearch_atom.py +++ b/trove/render/cardsearch_atom.py @@ -2,6 +2,7 @@ import itertools import typing +from django.utils import timezone from django.utils.translation import gettext as _ from primitive_metadata import primitive_rdf as rdf @@ -38,8 +39,9 @@ def _dates(*path: str) -> Iterator[str]: _xb = XmlBuilder('feed', {'xmlns': 'http://www.w3.org/2005/Atom'}) _xb.leaf('title', text=_('shtrove search results')) _xb.leaf('subtitle', text=_('feed of metadata records matching given filters')) - _xb.leaf('link', text=self.response_focus.single_iri()) + _xb.leaf('link', {'href': self.response_focus.single_iri()}) _xb.leaf('id', text=self.response_focus.single_iri()) + _xb.leaf('updated', text=datetime_isoformat_z(timezone.now())) for _card_iri, _osfmap_json in itertools.chain.from_iterable(card_pages): with _xb.nest('entry'): _iri = _osfmap_json.get('@id', _card_iri) @@ -47,13 +49,20 @@ def _dates(*path: str) -> Iterator[str]: _xb.leaf('id', text=self._atom_id(_card_iri)) for _title in _strs('title'): _xb.leaf('title', text=_title) + for _filename in _strs('fileName'): + _xb.leaf('title', text=_filename) for _desc in _strs('description'): _xb.leaf('summary', text=_desc) for _keyword in _strs('keyword'): - _xb.leaf('category', text=_keyword) + _xb.leaf('category', {'term': _keyword}) for _created in _dates('dateCreated'): _xb.leaf('published', text=_created) - for _creator_obj in json_vals(_osfmap_json, 'creator'): + for _modified in _dates('dateModified'): + _xb.leaf('updated', text=_modified) + _creator_objs = list(json_vals(_osfmap_json, ['creator'])) + if not _creator_objs: + _creator_objs = list(json_vals(_osfmap_json, ['isContainedBy', 'creator'])) + for _creator_obj in _creator_objs: assert isinstance(_creator_obj, dict) with _xb.nest('author'): for _name in json_strs(_creator_obj, ['name']): @@ -61,8 +70,6 @@ def _dates(*path: str) -> Iterator[str]: _creator_iri = _creator_obj.get('@id') if _creator_iri: _xb.leaf('uri', text=_creator_iri) - for _sameas_iri in json_strs(_creator_obj, ['sameAs']): - _xb.leaf('uri', text=_sameas_iri) return EntireRendering( mediatype=self.MEDIATYPE, entire_content=bytes(_xb), diff --git a/trove/render/cardsearch_rss.py b/trove/render/cardsearch_rss.py index 0218e47b9..2d93ea54a 100644 --- a/trove/render/cardsearch_rss.py +++ b/trove/render/cardsearch_rss.py @@ -35,11 +35,19 @@ def _dates(*path: str) -> Iterator[str]: for _dt in json_datetimes(_osfmap_json, path): yield rfc2822_datetime(_dt) - _xb = XmlBuilder('rss', {'version': '2.0'}) + _xb = XmlBuilder('rss', { + 'version': '2.0', + 'xmlns:dc': 'http://purl.org/dc/elements/1.1/', + 'xmlns:atom': 'http://www.w3.org/2005/Atom', + }) with _xb.nest('channel'): # see https://www.rssboard.org/rss-specification#requiredChannelElements _xb.leaf('title', text=_('shtrove search results')) _xb.leaf('link', text=self.response_focus.single_iri()) + _xb.leaf('atom:link', { + 'rel': 'self', + 'href': self.response_focus.single_iri(), + }) _xb.leaf('description', text=_('feed of metadata records matching given filters')) _xb.leaf('webMaster', text=settings.SHARE_SUPPORT_EMAIL) for _card_iri, _osfmap_json in itertools.chain.from_iterable(card_pages): @@ -48,8 +56,8 @@ def _dates(*path: str) -> Iterator[str]: _iri = _osfmap_json.get('@id', _card_iri) _xb.leaf('link', text=_iri) _xb.leaf('guid', {'isPermaLink': 'true'}, text=_iri) - for _title in _strs('title'): - _xb.leaf('title', text=_title) + _titles = itertools.chain(_strs('title'), _strs('fileName')) + _xb.leaf('title', text=next(_titles, '')) for _desc in _strs('description'): _xb.leaf('description', text=_desc) for _keyword in _strs('keyword'): @@ -60,7 +68,7 @@ def _dates(*path: str) -> Iterator[str]: assert isinstance(_creator_obj, dict) _creator_name = next(json_strs(_creator_obj, ['name'])) _creator_id = _creator_obj.get('@id', _creator_name) - _xb.leaf('author', text=f'{_creator_id} ({_creator_name})') + _xb.leaf('dc:creator', text=f'{_creator_id} ({_creator_name})') return EntireRendering( mediatype=self.MEDIATYPE, entire_content=bytes(_xb), From 8d486496142cde36ea823e3a5cadc417046e3cb0 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 24 Oct 2025 12:05:11 -0400 Subject: [PATCH 2/2] add feed links to json renderer --- trove/render/trovesearch_json.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/trove/render/trovesearch_json.py b/trove/render/trovesearch_json.py index 06bd436ab..e5b4b4087 100644 --- a/trove/render/trovesearch_json.py +++ b/trove/render/trovesearch_json.py @@ -1,4 +1,5 @@ from __future__ import annotations +import itertools import json import re import typing @@ -6,6 +7,7 @@ from primitive_metadata import primitive_rdf as rdf from trove.vocab.jsonapi import ( + JSONAPI_LINK, JSONAPI_LINK_OBJECT, JSONAPI_MEMBERNAME, ) @@ -91,8 +93,9 @@ def _render_meta(self) -> dict[str, int | str]: def _render_links(self) -> JsonObject: _links = {} - for _pagelink in self._page_links: - _twopledict = rdf.twopledict_from_twopleset(_pagelink) + _response_links = self.response_gathering.ask(JSONAPI_LINK, focus=self.response_focus) + for _link_obj in itertools.chain(self._page_links, _response_links): + _twopledict = rdf.twopledict_from_twopleset(_link_obj) if JSONAPI_LINK_OBJECT in _twopledict.get(RDF.type, ()): (_membername,) = _twopledict[JSONAPI_MEMBERNAME] (_link_url,) = _twopledict[RDF.value]