diff --git a/TODO.md b/TODO.md index 4b9d41b16..6a3834d4b 100644 --- a/TODO.md +++ b/TODO.md @@ -4,13 +4,11 @@ ways to better this mess ## better shtrove api experience - better web-browsing experience - - when `Accept` header accepts html, use html regardless of query-params - - when query param `acceptMediatype` requests another mediatype, display on page in copy/pastable way - - exception: when given `withFileName`, download without html wrapping - - exception: `/trove/browse` should still give hypertext with clickable links - include more explanatory docs (and better fill out those explanations) - - more helpful (less erratic) visual design + - even more helpful (less erratic) visual design - in each html rendering of an api response, include a `
` for adding/editing/viewing query params + - in browsable html, replace json literals with rdf rendered like the rest of the page + - (perf) add bare-minimal IndexcardDeriver (iris, types, namelikes); use for search-result display - better tsv/csv experience - set default columns for `index-value-search` (and/or broadly improve `fields` handling) - better turtle experience diff --git a/api/middleware.py b/api/middleware.py index a27e1c2a4..72a7f82d7 100644 --- a/api/middleware.py +++ b/api/middleware.py @@ -27,7 +27,7 @@ def process_view(self, request, view_func, view_args, view_kwargs): if settings.HIDE_DEPRECATED_VIEWS and deprecation_level == DeprecationLevel.HIDDEN: return HttpResponse( - f'This path ({request.path}) has been removed. If you have built something that relies on it, please email us at share-support@osf.io', + f'This path ({request.path}) has been removed. If you have built something that relies on it, please email us at {settings.SHARE_SUPPORT_EMAIL}', status=410, ) diff --git a/api/views/feeds.py b/api/views/feeds.py index 85925591f..40378d1f8 100644 --- a/api/views/feeds.py +++ b/api/views/feeds.py @@ -1,3 +1,4 @@ +import datetime from xml.sax.saxutils import unescape import json import logging @@ -10,7 +11,6 @@ from share.search import index_strategy from share.search.exceptions import IndexStrategyError from share.util.xml import strip_illegal_xml_chars -from share.util.fromisoformat import fromisoformat logger = logging.getLogger(__name__) @@ -108,10 +108,10 @@ def item_author_name(self, item): return prepare_string('{}{}'.format(author_name, ' et al.' if len(authors) > 1 else '')) def item_pubdate(self, item): - return fromisoformat(item.get('date_published') or item.get('date_created')) + return datetime.datetime.fromisoformat(item.get('date_published') or item.get('date_created')) def item_updateddate(self, item): - return fromisoformat(item.get(self._order)) + return datetime.datetime.fromisoformat(item.get(self._order)) def item_categories(self, item): categories = item.get('subjects', []) diff --git a/project/settings.py b/project/settings.py index 96fa1d00d..95fed6109 100644 --- a/project/settings.py +++ b/project/settings.py @@ -445,6 +445,7 @@ def route_urgent_task(name, args, kwargs, options, task=None, **kw): PUBLIC_SENTRY_DSN = os.environ.get('PUBLIC_SENTRY_DSN') SHARE_WEB_URL = os.environ.get('SHARE_WEB_URL', 'http://localhost:8003').rstrip('/') + '/' +SHARE_SUPPORT_EMAIL = os.environ.get('SHARE_SUPPORT_EMAIL', 'share-support@cos.io') SHARE_USER_AGENT = os.environ.get('SHARE_USER_AGENT', 'SHAREbot/{} (+{})'.format(VERSION, SHARE_WEB_URL)) SHARE_ADMIN_USERNAME = os.environ.get('SHARE_ADMIN_USERNAME', 'admin') SHARE_ADMIN_PASSWORD = os.environ.get('SHARE_ADMIN_PASSWORD') diff --git a/share/models/index_backfill.py b/share/models/index_backfill.py index 93f18ab6a..7734cf292 100644 --- a/share/models/index_backfill.py +++ b/share/models/index_backfill.py @@ -185,5 +185,4 @@ def task__schedule_index_backfill(self, index_backfill_pk): except Exception as error: _index_backfill.pls_mark_error(error) raise error - else: - _index_backfill.pls_note_scheduling_has_finished() + _index_backfill.pls_note_scheduling_has_finished() diff --git a/share/oaipmh/indexcard_repository.py b/share/oaipmh/indexcard_repository.py index d9d855f75..72a3ee407 100644 --- a/share/oaipmh/indexcard_repository.py +++ b/share/oaipmh/indexcard_repository.py @@ -1,15 +1,16 @@ +import datetime import uuid from django.core.exceptions import ValidationError as DjangoValidationError +from django.conf import settings from django.db.models import OuterRef, Subquery, F from share.oaipmh import errors as oai_errors from share.oaipmh.verbs import OAIVerb from share.oaipmh.response_renderer import OAIRenderer -from share.oaipmh.util import format_datetime -from share.util.fromisoformat import fromisoformat from share import models as share_db from trove import models as trove_db +from trove.util.datetime import datetime_isoformat_z as format_datetime from trove.vocab.namespaces import OAI_DC @@ -18,7 +19,7 @@ class OaiPmhRepository: REPOSITORY_IDENTIFIER = 'share.osf.io' IDENTIFER_DELIMITER = ':' GRANULARITY = 'YYYY-MM-DD' - ADMIN_EMAILS = ['share-support@osf.io'] + ADMIN_EMAILS = [settings.SHARE_SUPPORT_EMAIL] # TODO better way of structuring this than a bunch of dictionaries? # this dictionary's keys are `metadataPrefix` values @@ -206,7 +207,7 @@ def _get_indexcard_page_queryset(self, kwargs, catch=True, last_id=None): ) if 'from' in kwargs: try: - _from = fromisoformat(kwargs['from']) + _from = datetime.datetime.fromisoformat(kwargs['from']) except ValueError: if not catch: raise @@ -217,7 +218,7 @@ def _get_indexcard_page_queryset(self, kwargs, catch=True, last_id=None): ) if 'until' in kwargs: try: - _until = fromisoformat(kwargs['until']) + _until = datetime.datetime.fromisoformat(kwargs['until']) except ValueError: if not catch: raise @@ -291,12 +292,12 @@ def _get_resumption_token(self, kwargs, last_id): _until = None if 'from' in kwargs: try: - _from = fromisoformat(kwargs['from']) + _from = datetime.datetime.fromisoformat(kwargs['from']) except ValueError: self.errors.append(oai_errors.BadArgument('Invalid value for', 'from')) if 'until' in kwargs: try: - _until = fromisoformat(kwargs['until']) + _until = datetime.datetime.fromisoformat(kwargs['until']) except ValueError: self.errors.append(oai_errors.BadArgument('Invalid value for', 'until')) _set_spec = kwargs.get('set', '') diff --git a/share/oaipmh/response_renderer.py b/share/oaipmh/response_renderer.py index c45aea770..c8e233e0a 100644 --- a/share/oaipmh/response_renderer.py +++ b/share/oaipmh/response_renderer.py @@ -4,7 +4,8 @@ from django.urls import reverse -from share.oaipmh.util import format_datetime, SubEl, ns, nsmap +from share.oaipmh.util import SubEl, ns, nsmap +from trove.util.datetime import datetime_isoformat_z as format_datetime class OAIRenderer: diff --git a/share/oaipmh/util.py b/share/oaipmh/util.py index 413ac0173..a7457d4ef 100644 --- a/share/oaipmh/util.py +++ b/share/oaipmh/util.py @@ -1,23 +1,11 @@ -import datetime from typing import Any from lxml import etree from primitive_metadata import primitive_rdf -from share.util.fromisoformat import fromisoformat from trove.vocab.namespaces import OAI, OAI_DC -def format_datetime(dt: datetime.datetime | primitive_rdf.Literal | str) -> str: - """OAI-PMH has specific time format requirements -- comply. - """ - if isinstance(dt, primitive_rdf.Literal): - dt = dt.unicode_value - if isinstance(dt, str): - dt = fromisoformat(dt) - return dt.strftime('%Y-%m-%dT%H:%M:%SZ') - - XML_NAMESPACES = { 'dc': 'http://purl.org/dc/elements/1.1/', 'oai': str(OAI), diff --git a/share/util/fromisoformat.py b/share/util/fromisoformat.py deleted file mode 100644 index 92ac3d4a8..000000000 --- a/share/util/fromisoformat.py +++ /dev/null @@ -1,10 +0,0 @@ -import datetime -import re - - -def fromisoformat(date_str: str) -> datetime.datetime: - # wrapper around `datetime.datetime.fromisoformat` that supports "Z" UTC suffix - # (may be removed in python 3.11+, when `fromisoformat` handles more iso-6801 formats) - return datetime.datetime.fromisoformat( - re.sub('Z$', '+00:00', date_str), # replace "Z" shorthand with explicit timezone offset - ) diff --git a/share/util/xml.py b/share/util/xml.py index d0979954c..6ff13f829 100644 --- a/share/util/xml.py +++ b/share/util/xml.py @@ -15,5 +15,5 @@ ) -def strip_illegal_xml_chars(string): +def strip_illegal_xml_chars(string: str) -> str: return RE_XML_ILLEGAL.sub('', string) diff --git a/templates/admin/login.html b/templates/admin/login.html new file mode 100644 index 000000000..dbe59e29c --- /dev/null +++ b/templates/admin/login.html @@ -0,0 +1,3 @@ +{% extends "admin/login.html" %} + +{% block content %}{{ block.super }}login with osf{% endblock %} diff --git a/templates/allauth/login_errored_cancelled.html b/templates/allauth/login_errored_cancelled.html index c850a15ec..f7a26ffe1 100644 --- a/templates/allauth/login_errored_cancelled.html +++ b/templates/allauth/login_errored_cancelled.html @@ -3,9 +3,6 @@ {% load static %} Login Failed - - -
diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index 3d5f51e58..c7146a762 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -117,10 +117,10 @@ def test_cardsearch_pagination(self): })) self._index_indexcards(_cards) # gather all pages results: - _querystring: str = f'page[size]={_page_size}' + _querystring: str | None = f'page[size]={_page_size}' _result_iris: set[str] = set() _page_count = 0 - while True: + while _querystring is not None: _cardsearch_handle = self.index_strategy.pls_handle_cardsearch( CardsearchParams.from_querystring(_querystring), ) @@ -133,9 +133,11 @@ def test_cardsearch_pagination(self): _result_iris.update(_page_iris) _page_count += 1 _next_cursor = _cardsearch_handle.cursor.next_cursor() - if _next_cursor is None: - break - _querystring = urlencode({'page[cursor]': _next_cursor.as_queryparam_value()}) + _querystring = ( + urlencode({'page[cursor]': _next_cursor.as_queryparam_value()}) + if _next_cursor is not None + else None # done + ) self.assertEqual(_page_count, math.ceil(_total_count / _page_size)) self.assertEqual(_result_iris, _expected_iris) diff --git a/tests/share/search/index_strategy/_with_real_services.py b/tests/share/search/index_strategy/_with_real_services.py index a4219b312..ec4076668 100644 --- a/tests/share/search/index_strategy/_with_real_services.py +++ b/tests/share/search/index_strategy/_with_real_services.py @@ -48,12 +48,6 @@ def tearDown(self): connections['default']._test_serialized_contents ) - def enterContext(self, context_manager): - # TestCase.enterContext added in python3.11 -- implementing here until then - result = context_manager.__enter__() - self.addCleanup(lambda: context_manager.__exit__(None, None, None)) - return result - @contextlib.contextmanager def _daemon_up(self): _daemon_control = IndexerDaemonControl(celery_app) diff --git a/tests/share/test_oaipmh_trove.py b/tests/share/test_oaipmh_trove.py index 0bdd7df1b..64b0e0b93 100644 --- a/tests/share/test_oaipmh_trove.py +++ b/tests/share/test_oaipmh_trove.py @@ -8,8 +8,8 @@ import pytest from share import models as share_db -from share.oaipmh.util import format_datetime from trove import models as trove_db +from trove.util.datetime import datetime_isoformat_z as format_datetime from trove.vocab.namespaces import OAI_DC from tests import factories @@ -232,11 +232,9 @@ def _assert_full_list(self, verb, params, request_method, expected_count, page_s pages = 0 count = 0 token = None - while True: - if token: - parsed = oai_request({'verb': verb, 'resumptionToken': token}, request_method) - else: - parsed = oai_request({'verb': verb, 'metadataPrefix': 'oai_dc', **params}, request_method) + next_params: dict[str, str] | None = {'verb': verb, 'metadataPrefix': 'oai_dc', **params} + while next_params is not None: + parsed = oai_request(next_params, request_method) page = parsed.xpath('//oai:header/oai:identifier', namespaces=NAMESPACES) pages += 1 count += len(page) @@ -245,9 +243,10 @@ def _assert_full_list(self, verb, params, request_method, expected_count, page_s token = token[0].text if token: assert len(page) == page_size + next_params = {'verb': verb, 'resumptionToken': token} else: assert len(page) <= page_size - break + next_params = None # done assert count == expected_count assert pages == math.ceil(expected_count / page_size) diff --git a/tests/trove/_input_output_tests.py b/tests/trove/_input_output_tests.py index 90590fda9..72ec269f6 100644 --- a/tests/trove/_input_output_tests.py +++ b/tests/trove/_input_output_tests.py @@ -28,12 +28,12 @@ def assert_outputs_equal(self, expected_output: typing.Any, actual_output: typin self.assertEqual(expected_output, actual_output) # (optional override, for when logic is more complicated) - def run_input_output_test(self, given_input, expected_output): + def run_input_output_test(self, given_input: typing.Any, expected_output: typing.Any) -> None: _actual_output = self.compute_output(given_input) self.assert_outputs_equal(expected_output, _actual_output) # (optional override, for when logic is more complicated) - def missing_case(self, name: str, given_input): + def missing_case(self, name: str, given_input: typing.Any) -> typing.Never: _cls = self.__class__ _actual_output = self.compute_output(given_input) raise NotImplementedError('\n'.join(( @@ -43,16 +43,10 @@ def missing_case(self, name: str, given_input): pprint.pformat(_actual_output), ))) - def enterContext(self, context_manager): - # TestCase.enterContext added in python3.11 -- implementing here until then - result = context_manager.__enter__() - self.addCleanup(lambda: context_manager.__exit__(None, None, None)) - return result - ### # private details - def __init_subclass__(cls, **kwargs): + def __init_subclass__(cls, **kwargs: typing.Any) -> None: super().__init_subclass__(**kwargs) # HACK: assign `test_*` method only on concrete subclasses, # so the test runner doesn't try instantiating a base class diff --git a/tests/trove/digestive_tract/test_expel.py b/tests/trove/digestive_tract/test_expel.py index 7f2345eb2..333280a80 100644 --- a/tests/trove/digestive_tract/test_expel.py +++ b/tests/trove/digestive_tract/test_expel.py @@ -40,12 +40,6 @@ def setUp(self): def _replacement_notify_indexcard_update(self, indexcards, **kwargs): self.notified_indexcard_ids.update(_card.id for _card in indexcards) - def enterContext(self, context_manager): - # TestCase.enterContext added in python3.11 -- implementing here until then - result = context_manager.__enter__() - self.addCleanup(lambda: context_manager.__exit__(None, None, None)) - return result - def test_setup(self): self.indexcard_1.refresh_from_db() self.indexcard_2.refresh_from_db() diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py index 94b8f94a8..7e5b59ab9 100644 --- a/tests/trove/render/_base.py +++ b/tests/trove/render/_base.py @@ -1,4 +1,5 @@ import json +import typing from primitive_metadata import ( gather, @@ -7,7 +8,7 @@ from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy from trove.render._base import BaseRenderer -from trove.render._rendering import ProtoRendering +from trove.render.rendering import ProtoRendering from trove.vocab.namespaces import RDF from tests.trove._input_output_tests import BasicInputOutputTestCase from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase @@ -56,7 +57,7 @@ def compute_output(self, given_input: RdfCase): ) return _renderer.render_document() - def assert_outputs_equal(self, expected_output, actual_output) -> None: + def assert_outputs_equal(self, expected_output: typing.Any, actual_output: typing.Any) -> None: if expected_output is None: print(repr(actual_output)) raise NotImplementedError @@ -66,9 +67,9 @@ def assert_outputs_equal(self, expected_output, actual_output) -> None: self._get_rendered_output(actual_output), ) - def _get_rendered_output(self, rendering: ProtoRendering): + def _get_rendered_output(self, rendering: ProtoRendering) -> str: # for now, they always iter strings (update if/when bytes are in play) - return ''.join(rendering.iter_content()) # type: ignore[arg-type] + return ''.join(map(str, rendering.iter_content())) class TrovesearchRendererTests(TroveRendererTests): diff --git a/tests/trove/render/_inputs.py b/tests/trove/render/_inputs.py index 29d6cb9ad..3ca9c9151 100644 --- a/tests/trove/render/_inputs.py +++ b/tests/trove/render/_inputs.py @@ -29,7 +29,7 @@ class RdfCase: DCTERMS.issued: {rdf.literal(datetime.date(2024, 1, 1))}, DCTERMS.modified: {rdf.literal(datetime.date(2024, 1, 1))}, TROVE.resourceMetadata: {rdf.literal( - json.dumps({'@id': BLARG.anItem, 'title': 'an item, yes'}), + json.dumps({'@id': BLARG.anItem, 'title': [{'@value': 'an item, yes'}]}), datatype_iris=RDF.JSON, )}, }, @@ -83,7 +83,7 @@ class RdfCase: DCTERMS.issued: {rdf.literal(datetime.date(2024, 1, 1))}, DCTERMS.modified: {rdf.literal(datetime.date(2024, 1, 1))}, TROVE.resourceMetadata: {rdf.literal( - json.dumps({'@id': BLARG.anItem, 'title': 'an item, yes'}), + json.dumps({'@id': BLARG.anItem, 'title': [{'@value': 'an item, yes'}]}), datatype_iris=RDF.JSON, )}, }, @@ -94,7 +94,7 @@ class RdfCase: DCTERMS.issued: {rdf.literal(datetime.date(2024, 2, 2))}, DCTERMS.modified: {rdf.literal(datetime.date(2024, 2, 2))}, TROVE.resourceMetadata: {rdf.literal( - json.dumps({'@id': BLARG.anItemm, 'title': 'an itemm, yes'}), + json.dumps({'@id': BLARG.anItemm, 'title': [{'@value': 'an itemm, yes'}]}), datatype_iris=RDF.JSON, )}, }, @@ -105,7 +105,31 @@ class RdfCase: DCTERMS.issued: {rdf.literal(datetime.date(2024, 3, 3))}, DCTERMS.modified: {rdf.literal(datetime.date(2024, 3, 3))}, TROVE.resourceMetadata: {rdf.literal( - json.dumps({'@id': BLARG.anItemmm, 'title': 'an itemmm, yes'}), + json.dumps({ + '@id': BLARG.anItemmm, + "sameAs": [ + {"@id": "https://doi.example/13.0/anItemmm"} + ], + 'title': [{'@value': 'an itemmm, yes'}], + "creator": [ + { + "@id": BLARG.aPerson, + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": BLARG.aPerson} + ], + "name": [ + {"@value": "a person indeed"} + ] + } + ], + "dateCreated": [ + {"@value": "2001-02-03"} + ], + }), datatype_iris=RDF.JSON, )}, }, diff --git a/tests/trove/render/test_cardsearch_atom_renderer.py b/tests/trove/render/test_cardsearch_atom_renderer.py new file mode 100644 index 000000000..c07e35c3e --- /dev/null +++ b/tests/trove/render/test_cardsearch_atom_renderer.py @@ -0,0 +1,48 @@ +from trove.render.cardsearch_atom import CardsearchAtomRenderer +from trove.render.rendering import EntireRendering +from . import _base + + +# note: cardsearch only -- this renderer doesn't do arbitrary rdf + +class TestCardsearchAtomRenderer(_base.TrovesearchRendererTests): + renderer_class = CardsearchAtomRenderer + expected_outputs = { + 'no_results': EntireRendering( + mediatype='application/atom+xml', + entire_content=( + b"\n" + b'' + b'shtrove search results' + b'feed of metadata records matching given filters' + b'http://blarg.example/vocab/aSearch' + b'http://blarg.example/vocab/aSearch' + b'' + ), + ), + 'few_results': EntireRendering( + mediatype='application/atom+xml', + entire_content=( + b"\n" + b'' + b'shtrove search results' + b'feed of metadata records matching given filters' + b'http://blarg.example/vocab/aSearchFew' + b'http://blarg.example/vocab/aSearchFew' + b'' + b'' + b'http://blarg.example/vocab/aCard' + b'an item, yes' + b'' + b'' + b'http://blarg.example/vocab/aCardd' + b'an itemm, yes' + b'' + b'' + b'http://blarg.example/vocab/aCarddd' + b'an itemmm, yes' + b'2001-02-03T00:00:00Z' + b'' + ), + ), + } diff --git a/tests/trove/render/test_cardsearch_rss_renderer.py b/tests/trove/render/test_cardsearch_rss_renderer.py new file mode 100644 index 000000000..237a6b6da --- /dev/null +++ b/tests/trove/render/test_cardsearch_rss_renderer.py @@ -0,0 +1,50 @@ +from trove.render.cardsearch_rss import CardsearchRssRenderer +from trove.render.rendering import EntireRendering +from . import _base + + +# note: cardsearch only -- this renderer doesn't do arbitrary rdf + +class TestCardsearchRssRenderer(_base.TrovesearchRendererTests): + renderer_class = CardsearchRssRenderer + expected_outputs = { + 'no_results': EntireRendering( + mediatype='application/rss+xml', + entire_content=( + b"\n" + b'' + b'' + b'shtrove search results' + b'http://blarg.example/vocab/aSearch' + b'feed of metadata records matching given filters' + b'share-support@cos.io' + b'' + ), + ), + 'few_results': EntireRendering( + mediatype='application/rss+xml', + entire_content=( + b"\n" + b'' + b'shtrove search results' + b'http://blarg.example/vocab/aSearchFew' + b'feed of metadata records matching given filters' + b'share-support@cos.io' + b'' + b'http://blarg.example/vocab/anItem' + b'http://blarg.example/vocab/anItem' + b'an item, yes' + b'' + b'http://blarg.example/vocab/anItemm' + b'http://blarg.example/vocab/anItemm' + b'an itemm, yes' + b'' + b'http://blarg.example/vocab/anItemmm' + b'http://blarg.example/vocab/anItemmm' + b'an itemmm, yes' + b'Sat, 03 Feb 2001 00:00:00 -0000' + b'http://blarg.example/vocab/aPerson (a person indeed)' + b'' + ), + ), + } diff --git a/tests/trove/render/test_html_browse_renderer.py b/tests/trove/render/test_html_browse_renderer.py new file mode 100644 index 000000000..ee740248c --- /dev/null +++ b/tests/trove/render/test_html_browse_renderer.py @@ -0,0 +1,31 @@ +import html +import typing + +from trove.render.html_browse import RdfHtmlBrowseRenderer +from . import _base + + +# note: smoke tests only (TODO: better) + +class TestTrovesearchHtmlRenderer(_base.TrovesearchRendererTests): + renderer_class = RdfHtmlBrowseRenderer + expected_outputs = { + 'no_results': { + 'mediatype': 'text/html', + 'result_iris': [], + }, + 'few_results': { + 'mediatype': 'text/html', + 'result_iris': [ + 'http://blarg.example/vocab/anItem', + 'http://blarg.example/vocab/anItemm', + 'http://blarg.example/vocab/anItemmm', + ], + }, + } + + def assert_outputs_equal(self, expected_output: typing.Any, actual_output: typing.Any) -> None: + self.assertEqual(actual_output.mediatype, expected_output['mediatype']) + # smoke tests -- instead of asserting full rendered html page, just check the results are in there + for _result_iri in expected_output['result_iris']: + self.assertIn(html.escape(_result_iri), actual_output.entire_content) diff --git a/tests/trove/render/test_jsonapi_renderer.py b/tests/trove/render/test_jsonapi_renderer.py index 9357c5ff6..de3019739 100644 --- a/tests/trove/render/test_jsonapi_renderer.py +++ b/tests/trove/render/test_jsonapi_renderer.py @@ -2,7 +2,7 @@ from unittest import mock from trove.render.jsonapi import RdfJsonapiRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import EntireRendering from trove.vocab.namespaces import BLARG from . import _base @@ -31,9 +31,9 @@ def _get_rendered_output(self, rendering): class TestJsonapiRenderer(_BaseJsonapiRendererTest): expected_outputs = { - 'simple_card': SimpleRendering( + 'simple_card': EntireRendering( mediatype='application/vnd.api+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "data": { "id": "blarg:aCard", "type": "index-card", @@ -43,7 +43,7 @@ class TestJsonapiRenderer(_BaseJsonapiRendererTest): ], "resourceMetadata": { "@id": BLARG.anItem, - "title": "an item, yes" + "title": [{"@value": "an item, yes"}] } }, "links": { @@ -63,9 +63,9 @@ class TestJsonapiRenderer(_BaseJsonapiRendererTest): } }), ), - 'various_types': SimpleRendering( + 'various_types': EntireRendering( mediatype='application/vnd.api+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "data": { "id": "blarg:aSubject", "type": "blarg:aType", @@ -86,9 +86,9 @@ class TestJsonapiRenderer(_BaseJsonapiRendererTest): class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonRendererTests): expected_outputs = { - 'no_results': SimpleRendering( + 'no_results': EntireRendering( mediatype='application/vnd.api+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "data": { "id": "blarg:aSearch", "type": "index-card-search", @@ -101,9 +101,9 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR } }), ), - 'few_results': SimpleRendering( + 'few_results': EntireRendering( mediatype='application/vnd.api+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "data": { "id": "blarg:aSearchFew", "type": "index-card-search", @@ -189,7 +189,7 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR ], "resourceMetadata": { "@id": BLARG.anItem, - "title": "an item, yes" + "title": [{"@value": "an item, yes"}] } }, "links": { @@ -215,8 +215,29 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR BLARG.anItemmm ], "resourceMetadata": { - "@id": BLARG.anItemmm, - "title": "an itemmm, yes" + '@id': BLARG.anItemmm, + "sameAs": [ + {"@id": "https://doi.example/13.0/anItemmm"} + ], + 'title': [{'@value': 'an itemmm, yes'}], + "creator": [ + { + "@id": BLARG.aPerson, + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": BLARG.aPerson} + ], + "name": [ + {"@value": "a person indeed"} + ] + } + ], + "dateCreated": [ + {"@value": "2001-02-03"} + ], } }, "links": { @@ -243,7 +264,7 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR ], "resourceMetadata": { "@id": BLARG.anItemm, - "title": "an itemm, yes" + "title": [{"@value": "an itemm, yes"}] } }, "links": { diff --git a/tests/trove/render/test_jsonld_renderer.py b/tests/trove/render/test_jsonld_renderer.py index eef657f1d..c983cad19 100644 --- a/tests/trove/render/test_jsonld_renderer.py +++ b/tests/trove/render/test_jsonld_renderer.py @@ -1,8 +1,8 @@ import json from trove.render.jsonld import RdfJsonldRenderer -from trove.render._rendering import SimpleRendering -from ._inputs import BLARG +from trove.render.rendering import EntireRendering +from trove.vocab.namespaces import BLARG from . import _base @@ -10,9 +10,9 @@ class TestJsonldRenderer(_base.TroveJsonRendererTests): renderer_class = RdfJsonldRenderer expected_outputs = { - 'simple_card': SimpleRendering( + 'simple_card': EntireRendering( mediatype='application/ld+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "@id": "blarg:aCard", "dcterms:issued": [ { @@ -38,13 +38,13 @@ class TestJsonldRenderer(_base.TroveJsonRendererTests): ], "trove:resourceMetadata": { "@id": BLARG.anItem, - "title": "an item, yes" + "title": [{"@value": "an item, yes"}] } }), ), - 'various_types': SimpleRendering( + 'various_types': EntireRendering( mediatype='application/ld+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "@id": "blarg:aSubject", "blarg:hasDateLiteral": [ { @@ -88,9 +88,9 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): renderer_class = RdfJsonldRenderer expected_outputs = { - 'no_results': SimpleRendering( + 'no_results': EntireRendering( mediatype='application/ld+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "@id": "blarg:aSearch", "rdf:type": [ {"@id": "trove:Cardsearch"} @@ -101,9 +101,9 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): } }), ), - 'few_results': SimpleRendering( + 'few_results': EntireRendering( mediatype='application/ld+json', - rendered_content=json.dumps({ + entire_content=json.dumps({ "@id": "blarg:aSearchFew", "rdf:type": [ {"@id": "trove:Cardsearch"} @@ -145,7 +145,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): ], "trove:resourceMetadata": { "@id": BLARG.anItem, - "title": "an item, yes" + "title": [{"@value": "an item, yes"}] } } }, @@ -181,7 +181,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): ], "trove:resourceMetadata": { "@id": BLARG.anItemm, - "title": "an itemm, yes" + "title": [{"@value": "an itemm, yes"}] } } }, @@ -214,8 +214,29 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): {"@value": BLARG.anItemmm} ], "trove:resourceMetadata": { - "@id": BLARG.anItemmm, - "title": "an itemmm, yes" + '@id': BLARG.anItemmm, + "sameAs": [ + {"@id": "https://doi.example/13.0/anItemmm"} + ], + 'title': [{'@value': 'an itemmm, yes'}], + "creator": [ + { + "@id": BLARG.aPerson, + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": BLARG.aPerson} + ], + "name": [ + {"@value": "a person indeed"} + ] + } + ], + "dateCreated": [ + {"@value": "2001-02-03"} + ], } } } diff --git a/tests/trove/render/test_simple_csv_renderer.py b/tests/trove/render/test_simple_csv_renderer.py deleted file mode 100644 index ca06aa273..000000000 --- a/tests/trove/render/test_simple_csv_renderer.py +++ /dev/null @@ -1,24 +0,0 @@ -from trove.render.simple_csv import TrovesearchSimpleCsvRenderer -from trove.render._rendering import SimpleRendering -from . import _base - - -# note: trovesearch only -- this renderer doesn't do arbitrary rdf - -class TestSimpleCsvRenderer(_base.TrovesearchRendererTests): - renderer_class = TrovesearchSimpleCsvRenderer - expected_outputs = { - 'no_results': SimpleRendering( - mediatype='text/csv', - rendered_content='@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n', - ), - 'few_results': SimpleRendering( - mediatype='text/csv', - rendered_content=''.join(( - '@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n', - 'http://blarg.example/vocab/anItem,,,,"an item, yes",,,,\r\n', - 'http://blarg.example/vocab/anItemm,,,,"an itemm, yes",,,,\r\n', - 'http://blarg.example/vocab/anItemmm,,,,"an itemmm, yes",,,,\r\n', - )), - ), - } diff --git a/tests/trove/render/test_simple_json_renderer.py b/tests/trove/render/test_simple_json_renderer.py deleted file mode 100644 index 7f59c8a59..000000000 --- a/tests/trove/render/test_simple_json_renderer.py +++ /dev/null @@ -1,62 +0,0 @@ -import json - -from trove.render.simple_json import TrovesearchSimpleJsonRenderer -from trove.render._rendering import SimpleRendering -from trove.vocab.namespaces import BLARG -from . import _base - - -# note: trovesearch only -- this renderer doesn't do arbitrary rdf - -class TestSimpleJsonRenderer(_base.TrovesearchJsonRendererTests): - renderer_class = TrovesearchSimpleJsonRenderer - expected_outputs = { - 'no_results': SimpleRendering( - mediatype='application/json', - rendered_content=json.dumps({ - "data": [], - "links": {}, - "meta": { - "total": 0 - } - }), - ), - 'few_results': SimpleRendering( - mediatype='application/json', - rendered_content=json.dumps({ - "data": [ - { - "@id": BLARG.anItem, - "title": "an item, yes", - "foaf:isPrimaryTopicOf": [ - { - "@id": BLARG.aCard - } - ] - }, - { - "@id": BLARG.anItemm, - "title": "an itemm, yes", - "foaf:isPrimaryTopicOf": [ - { - "@id": BLARG.aCardd - } - ] - }, - { - "@id": BLARG.anItemmm, - "title": "an itemmm, yes", - "foaf:isPrimaryTopicOf": [ - { - "@id": BLARG.aCarddd - } - ] - } - ], - "links": {}, - "meta": { - "total": 3 - } - }), - ), - } diff --git a/tests/trove/render/test_simple_tsv_renderer.py b/tests/trove/render/test_simple_tsv_renderer.py deleted file mode 100644 index 752493362..000000000 --- a/tests/trove/render/test_simple_tsv_renderer.py +++ /dev/null @@ -1,24 +0,0 @@ -from trove.render.simple_tsv import TrovesearchSimpleTsvRenderer -from trove.render._rendering import SimpleRendering -from . import _base - - -# note: trovesearch only -- this renderer doesn't do arbitrary rdf - -class TestSimpleTsvRenderer(_base.TrovesearchRendererTests): - renderer_class = TrovesearchSimpleTsvRenderer - expected_outputs = { - 'no_results': SimpleRendering( - mediatype='text/tab-separated-values', - rendered_content='@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n', - ), - 'few_results': SimpleRendering( - mediatype='text/tab-separated-values', - rendered_content=''.join(( - '@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n', - 'http://blarg.example/vocab/anItem\t\t\t\tan item, yes\t\t\t\t\r\n', - 'http://blarg.example/vocab/anItemm\t\t\t\tan itemm, yes\t\t\t\t\r\n', - 'http://blarg.example/vocab/anItemmm\t\t\t\tan itemmm, yes\t\t\t\t\r\n', - )), - ), - } diff --git a/tests/trove/render/test_trovesearch_csv_renderer.py b/tests/trove/render/test_trovesearch_csv_renderer.py new file mode 100644 index 000000000..aa31651d1 --- /dev/null +++ b/tests/trove/render/test_trovesearch_csv_renderer.py @@ -0,0 +1,24 @@ +from trove.render.trovesearch_csv import TrovesearchCsvRenderer +from trove.render.rendering import EntireRendering +from . import _base + + +# note: trovesearch only -- this renderer doesn't do arbitrary rdf + +class TestTrovesearchCsvRenderer(_base.TrovesearchRendererTests): + renderer_class = TrovesearchCsvRenderer + expected_outputs = { + 'no_results': EntireRendering( + mediatype='text/csv', + entire_content='@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n', + ), + 'few_results': EntireRendering( + mediatype='text/csv', + entire_content=''.join(( + '@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n', + 'http://blarg.example/vocab/anItem,,,,"an item, yes",,,,\r\n', + 'http://blarg.example/vocab/anItemm,,,,"an itemm, yes",,,,\r\n', + 'http://blarg.example/vocab/anItemmm,https://doi.example/13.0/anItemmm,,,"an itemmm, yes",,2001-02-03,,\r\n', + )), + ), + } diff --git a/tests/trove/render/test_trovesearch_json_renderer.py b/tests/trove/render/test_trovesearch_json_renderer.py new file mode 100644 index 000000000..a0a9c4ad0 --- /dev/null +++ b/tests/trove/render/test_trovesearch_json_renderer.py @@ -0,0 +1,71 @@ +import json + +from trove.render.trovesearch_json import TrovesearchJsonRenderer +from trove.render.rendering import EntireRendering +from trove.vocab.namespaces import BLARG +from . import _base + + +# note: trovesearch only -- this renderer doesn't do arbitrary rdf + +class TestTrovesearchJsonRenderer(_base.TrovesearchJsonRendererTests): + renderer_class = TrovesearchJsonRenderer + expected_outputs = { + 'no_results': EntireRendering( + mediatype='application/json', + entire_content=json.dumps({ + "data": [], + "links": {}, + "meta": { + "total": 0 + } + }), + ), + 'few_results': EntireRendering( + mediatype='application/json', + entire_content=json.dumps({ + "data": [ + { + "@id": BLARG.anItem, + "title": [{"@value": "an item, yes"}], + "foaf:isPrimaryTopicOf": [{"@id": BLARG.aCard}] + }, + { + "@id": BLARG.anItemm, + "title": [{"@value": "an itemm, yes"}], + "foaf:isPrimaryTopicOf": [{"@id": BLARG.aCardd}] + }, + { + '@id': BLARG.anItemmm, + "sameAs": [ + {"@id": "https://doi.example/13.0/anItemmm"} + ], + 'title': [{'@value': 'an itemmm, yes'}], + "creator": [ + { + "@id": BLARG.aPerson, + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": BLARG.aPerson} + ], + "name": [ + {"@value": "a person indeed"} + ] + } + ], + "dateCreated": [ + {"@value": "2001-02-03"} + ], + "foaf:isPrimaryTopicOf": [{"@id": BLARG.aCarddd}] + } + ], + "links": {}, + "meta": { + "total": 3 + } + }), + ), + } diff --git a/tests/trove/render/test_trovesearch_tsv_renderer.py b/tests/trove/render/test_trovesearch_tsv_renderer.py new file mode 100644 index 000000000..9d9782a82 --- /dev/null +++ b/tests/trove/render/test_trovesearch_tsv_renderer.py @@ -0,0 +1,24 @@ +from trove.render.trovesearch_tsv import TrovesearchTsvRenderer +from trove.render.rendering import EntireRendering +from . import _base + + +# note: trovesearch only -- this renderer doesn't do arbitrary rdf + +class TestTrovesearchTsvRenderer(_base.TrovesearchRendererTests): + renderer_class = TrovesearchTsvRenderer + expected_outputs = { + 'no_results': EntireRendering( + mediatype='text/tab-separated-values', + entire_content='@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n', + ), + 'few_results': EntireRendering( + mediatype='text/tab-separated-values', + entire_content=''.join(( + '@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n', + 'http://blarg.example/vocab/anItem\t\t\t\tan item, yes\t\t\t\t\r\n', + 'http://blarg.example/vocab/anItemm\t\t\t\tan itemm, yes\t\t\t\t\r\n', + 'http://blarg.example/vocab/anItemmm\thttps://doi.example/13.0/anItemmm\t\t\tan itemmm, yes\t\t2001-02-03\t\t\r\n', + )), + ), + } diff --git a/tests/trove/render/test_turtle_renderer.py b/tests/trove/render/test_turtle_renderer.py index 32f949278..3bf5ee3d8 100644 --- a/tests/trove/render/test_turtle_renderer.py +++ b/tests/trove/render/test_turtle_renderer.py @@ -1,7 +1,7 @@ from primitive_metadata import primitive_rdf as rdf from trove.render.turtle import RdfTurtleRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import EntireRendering from . import _base @@ -14,9 +14,9 @@ def _get_rendered_output(self, rendering): class TestTurtleRenderer(_BaseTurtleRendererTest): expected_outputs = { - 'simple_card': SimpleRendering( + 'simple_card': EntireRendering( mediatype='text/turtle', - rendered_content=''' + entire_content=''' @prefix blarg: . @prefix dcat: . @prefix dcterms: . @@ -30,12 +30,12 @@ class TestTurtleRenderer(_BaseTurtleRendererTest): dcterms:modified "2024-01-01"^^xsd:date ; foaf:primaryTopic blarg:anItem ; trove:focusIdentifier "http://blarg.example/vocab/anItem"^^rdf:string ; - trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": \\"an item, yes\\"}"^^rdf:JSON . + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": [{\\"@value\\": \\"an item, yes\\"}]}"^^rdf:JSON . ''', ), - 'various_types': SimpleRendering( + 'various_types': EntireRendering( mediatype='text/turtle', - rendered_content=''' + entire_content=''' @prefix blarg: . @prefix rdf: . @prefix xsd: . @@ -54,9 +54,9 @@ class TestTurtleRenderer(_BaseTurtleRendererTest): class TestTurtleTrovesearchRenderer(_BaseTurtleRendererTest, _base.TrovesearchRendererTests): expected_outputs = { - 'no_results': SimpleRendering( + 'no_results': EntireRendering( mediatype='text/turtle', - rendered_content=''' + entire_content=''' @prefix blarg: . @prefix trove: . @prefix xsd: . @@ -65,9 +65,9 @@ class TestTurtleTrovesearchRenderer(_BaseTurtleRendererTest, _base.TrovesearchRe trove:totalResultCount 0 . ''', ), - 'few_results': SimpleRendering( + 'few_results': EntireRendering( mediatype='text/turtle', - rendered_content=''' + entire_content=''' @prefix blarg: . @prefix dcat: . @prefix dcterms: . @@ -99,21 +99,21 @@ class TestTurtleTrovesearchRenderer(_BaseTurtleRendererTest, _base.TrovesearchRe dcterms:modified "2024-01-01"^^xsd:date ; foaf:primaryTopic blarg:anItem ; trove:focusIdentifier "http://blarg.example/vocab/anItem"^^rdf:string ; - trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": \\"an item, yes\\"}"^^rdf:JSON . + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": [{\\"@value\\": \\"an item, yes\\"}]}"^^rdf:JSON . blarg:aCardd a dcat:CatalogRecord, trove:Indexcard ; dcterms:issued "2024-02-02"^^xsd:date ; dcterms:modified "2024-02-02"^^xsd:date ; foaf:primaryTopic blarg:anItemm ; trove:focusIdentifier "http://blarg.example/vocab/anItemm"^^rdf:string ; - trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemm\\", \\"title\\": \\"an itemm, yes\\"}"^^rdf:JSON . + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemm\\", \\"title\\": [{\\"@value\\": \\"an itemm, yes\\"}]}"^^rdf:JSON . blarg:aCarddd a dcat:CatalogRecord, trove:Indexcard ; dcterms:issued "2024-03-03"^^xsd:date ; dcterms:modified "2024-03-03"^^xsd:date ; foaf:primaryTopic blarg:anItemmm ; trove:focusIdentifier "http://blarg.example/vocab/anItemmm"^^rdf:string ; - trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemmm\\", \\"title\\": \\"an itemmm, yes\\"}"^^rdf:JSON . + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemmm\\", \\"sameAs\\": [{\\"@id\\": \\"https://doi.example/13.0/anItemmm\\"}], \\"title\\": [{\\"@value\\": \\"an itemmm, yes\\"}], \\"creator\\": [{\\"@id\\": \\"http://blarg.example/vocab/aPerson\\", \\"resourceType\\": [{\\"@id\\": \\"Agent\\"}, {\\"@id\\": \\"Person\\"}], \\"identifier\\": [{\\"@value\\": \\"http://blarg.example/vocab/aPerson\\"}], \\"name\\": [{\\"@value\\": \\"a person indeed\\"}]}], \\"dateCreated\\": [{\\"@value\\": \\"2001-02-03\\"}]}"^^rdf:JSON . ''', ), } diff --git a/tests/trove/test_doctest.py b/tests/trove/test_doctest.py index 18c77a18b..06baf8993 100644 --- a/tests/trove/test_doctest.py +++ b/tests/trove/test_doctest.py @@ -3,7 +3,9 @@ import trove.util.chainmap import trove.util.frozen import trove.util.iris +import trove.util.iter import trove.util.propertypath +import trove.vocab.mediatypes _DOCTEST_OPTIONFLAGS = ( doctest.ELLIPSIS @@ -14,7 +16,9 @@ trove.util.chainmap, trove.util.frozen, trove.util.iris, + trove.util.iter, trove.util.propertypath, + trove.vocab.mediatypes, ) diff --git a/trove/derive/oaidc_xml.py b/trove/derive/oaidc_xml.py index 610fb49fc..e8d3e0967 100644 --- a/trove/derive/oaidc_xml.py +++ b/trove/derive/oaidc_xml.py @@ -2,8 +2,9 @@ from lxml import etree from primitive_metadata import primitive_rdf as rdf -from share.oaipmh.util import format_datetime, ns, nsmap, SubEl +from share.oaipmh.util import ns, nsmap, SubEl +from trove.util.datetime import datetime_isoformat_z as format_datetime from trove.vocab.namespaces import ( DCTYPE, DCTERMS, diff --git a/trove/derive/osfmap_json.py b/trove/derive/osfmap_json.py index 69de39b26..21d3e2fad 100644 --- a/trove/derive/osfmap_json.py +++ b/trove/derive/osfmap_json.py @@ -151,8 +151,7 @@ def _list_or_single_value(self, predicate_iri: str, json_list: list[JsonValue]) (_only_obj,) = json_list except ValueError: return None - else: - return _only_obj + return _only_obj return ( sorted(json_list, key=json.dumps) if len(json_list) > 1 diff --git a/trove/links.py b/trove/links.py new file mode 100644 index 000000000..ae8feadeb --- /dev/null +++ b/trove/links.py @@ -0,0 +1,58 @@ +import dataclasses +import urllib.parse + +from django.conf import settings +from django.http import QueryDict +from django.urls import reverse + +from trove.vocab.namespaces import namespaces_shorthand + + +def is_local_url(iri: str) -> bool: + return iri.startswith(settings.SHARE_WEB_URL) + + +def trove_browse_link(iri: str) -> str: + return reverse( + 'trove:browse-iri', + query={ + 'blendCards': True, + 'iri': namespaces_shorthand().compact_iri(iri), + }, + ) + + +@dataclasses.dataclass +class FeedLinks: + rss: str + atom: str + + +def cardsearch_feed_links(cardsearch_iri: str) -> FeedLinks | None: + _split_iri = urllib.parse.urlsplit(cardsearch_iri) + if _split_iri.path != reverse('trove:index-card-search'): + return None + _feed_query = _get_feed_query(_split_iri.query) + _rss_link = urllib.parse.urljoin( + settings.SHARE_WEB_URL, + reverse('trove:cardsearch-rss', query=_feed_query) + ) + _atom_link = urllib.parse.urljoin( + settings.SHARE_WEB_URL, + reverse('trove:cardsearch-atom', query=_feed_query) + ) + return FeedLinks(rss=_rss_link, atom=_atom_link) + + +def _get_feed_query(query_string: str) -> QueryDict: + _qparams = QueryDict(query_string, mutable=True) + for _param_name in list(filter(_irrelevant_feed_param, _qparams.keys())): + del _qparams[_param_name] + return _qparams + + +def _irrelevant_feed_param(query_param_name: str) -> bool: + return ( + query_param_name in ('sort', 'include', 'acceptMediatype', 'blendCards', 'page[cursor]') + or query_param_name.startswith('fields') + ) diff --git a/trove/openapi.py b/trove/openapi.py index 0ed880583..89c0bee67 100644 --- a/trove/openapi.py +++ b/trove/openapi.py @@ -46,7 +46,7 @@ def get_trove_openapi() -> dict[str, Any]: 'contact': { # 'name': # 'url': web-browsable version of this - 'email': 'share-support@osf.io', + 'email': settings.SHARE_SUPPORT_EMAIL, }, # 'license': 'version': get_shtrove_version(), diff --git a/trove/render/__init__.py b/trove/render/__init__.py index c5bf699a1..cd3189ef2 100644 --- a/trove/render/__init__.py +++ b/trove/render/__init__.py @@ -1,16 +1,17 @@ -from typing import Type - from django import http from trove import exceptions as trove_exceptions +from trove.vocab.mediatypes import strip_mediatype_parameters from ._base import BaseRenderer from .jsonapi import RdfJsonapiRenderer from .html_browse import RdfHtmlBrowseRenderer from .turtle import RdfTurtleRenderer from .jsonld import RdfJsonldRenderer -from .simple_csv import TrovesearchSimpleCsvRenderer -from .simple_json import TrovesearchSimpleJsonRenderer -from .simple_tsv import TrovesearchSimpleTsvRenderer +from .cardsearch_rss import CardsearchRssRenderer +from .cardsearch_atom import CardsearchAtomRenderer +from .trovesearch_csv import TrovesearchCsvRenderer +from .trovesearch_json import TrovesearchJsonRenderer +from .trovesearch_tsv import TrovesearchTsvRenderer __all__ = ('get_renderer_type', 'BaseRenderer') @@ -20,20 +21,20 @@ RdfJsonapiRenderer, RdfTurtleRenderer, RdfJsonldRenderer, - TrovesearchSimpleCsvRenderer, - TrovesearchSimpleJsonRenderer, - TrovesearchSimpleTsvRenderer, + TrovesearchCsvRenderer, + TrovesearchJsonRenderer, + TrovesearchTsvRenderer, +) +CARDSEARCH_ONLY_RENDERERS = ( # TODO: use/consider + CardsearchRssRenderer, + CardsearchAtomRenderer, ) - -RendersType = Type[ - BaseRenderer | RdfHtmlBrowseRenderer | RdfJsonapiRenderer | RdfTurtleRenderer | RdfJsonldRenderer | TrovesearchSimpleCsvRenderer | TrovesearchSimpleJsonRenderer | TrovesearchSimpleTsvRenderer -] RENDERER_BY_MEDIATYPE = { _renderer_type.MEDIATYPE: _renderer_type for _renderer_type in RENDERERS } -DEFAULT_RENDERER_TYPE = RdfJsonapiRenderer # the most stable one +DEFAULT_RENDERER_TYPE = RdfJsonapiRenderer # the most stable one? def get_renderer_type(request: http.HttpRequest) -> type[BaseRenderer]: @@ -42,7 +43,9 @@ def get_renderer_type(request: http.HttpRequest) -> type[BaseRenderer]: _requested_mediatype = request.GET.get('acceptMediatype') if _requested_mediatype: try: - _chosen_renderer_type = RENDERER_BY_MEDIATYPE[_requested_mediatype] + _chosen_renderer_type = RENDERER_BY_MEDIATYPE[ + strip_mediatype_parameters(_requested_mediatype) + ] except KeyError: raise trove_exceptions.CannotRenderMediatype(_requested_mediatype) else: diff --git a/trove/render/_base.py b/trove/render/_base.py index 49a3a52ec..5facde0d4 100644 --- a/trove/render/_base.py +++ b/trove/render/_base.py @@ -13,7 +13,10 @@ from trove.vocab import mediatypes from trove.vocab.trove import TROVE_API_THESAURUS from trove.vocab.namespaces import namespaces_shorthand -from ._rendering import ProtoRendering, SimpleRendering +from .rendering import ( + EntireRendering, + ProtoRendering, +) @dataclasses.dataclass @@ -52,26 +55,16 @@ def response_tripledict(self) -> rdf.RdfTripleDictionary: # TODO: self.response_gathering.ask_all_about or a default ask... return self.response_gathering.leaf_a_record() - def simple_render_document(self) -> str: - raise NotImplementedError - + @abc.abstractmethod def render_document(self) -> ProtoRendering: - try: - _content = self.simple_render_document() - except NotImplementedError: - raise NotImplementedError(f'class "{type(self)}" must implement either `render_document` or `simple_render_document`') - else: - return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 - mediatype=self.MEDIATYPE, - rendered_content=_content, - ) + raise NotImplementedError @classmethod def render_error_document(cls, error: trove_exceptions.TroveError) -> ProtoRendering: # may override, but default to jsonapi - return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 + return EntireRendering( mediatype=mediatypes.JSONAPI, - rendered_content=json.dumps( + entire_content=json.dumps( {'errors': [{ # https://jsonapi.org/format/#error-objects 'status': error.http_status, 'code': error.error_location, diff --git a/trove/render/_html.py b/trove/render/_html.py deleted file mode 100644 index 6daa1e037..000000000 --- a/trove/render/_html.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import annotations -from collections.abc import Generator -import contextlib -import dataclasses -from xml.etree.ElementTree import ( - Element, - SubElement, -) -from typing import Any - -from primitive_metadata import primitive_rdf as rdf - - -__all__ = ('HtmlBuilder',) - - -@dataclasses.dataclass -class HtmlBuilder: - given_root: Element - _: dataclasses.KW_ONLY - _nested_elements: list[Element] = dataclasses.field(default_factory=list) - _heading_depth: int = 0 - - def __post_init__(self) -> None: - self._nested_elements.append(self.given_root) - - @property - def root_element(self) -> Element: - return self._nested_elements[0] - - @property - def _current_element(self) -> Element: - return self._nested_elements[-1] - - ### - # html-building helper methods - - @contextlib.contextmanager - def nest_h_tag(self, **kwargs: Any) -> Generator[Element]: - _outer_heading_depth = self._heading_depth - if not _outer_heading_depth: - self._heading_depth = 1 - elif _outer_heading_depth < 6: # h6 deepest - self._heading_depth += 1 - _h_tag = f'h{self._heading_depth}' - with self.nest(_h_tag, **kwargs) as _nested: - try: - yield _nested - finally: - self._heading_depth = _outer_heading_depth - - @contextlib.contextmanager - def nest(self, tag_name: str, attrs: dict | None = None) -> Generator[Element]: - _attrs = {**attrs} if attrs else {} - _nested_element = SubElement(self._current_element, tag_name, _attrs) - self._nested_elements.append(_nested_element) - try: - yield self._current_element - finally: - _popped_element = self._nested_elements.pop() - assert _popped_element is _nested_element - - def leaf(self, tag_name: str, *, text: str | None = None, attrs: dict | None = None) -> None: - _leaf_element = SubElement(self._current_element, tag_name, attrs or {}) - if isinstance(text, rdf.Literal): - # TODO: lang - _leaf_element.text = text.unicode_value - elif text is not None: - _leaf_element.text = text diff --git a/trove/render/_rendering.py b/trove/render/_rendering.py deleted file mode 100644 index 0de9b015a..000000000 --- a/trove/render/_rendering.py +++ /dev/null @@ -1,47 +0,0 @@ -import abc -import dataclasses -from typing import Iterator, Generator - -from trove import exceptions as trove_exceptions - - -class ProtoRendering(abc.ABC): - '''base class for all renderings - - (TODO: typing.Protocol (when py3.12+)) - ''' - - @property - @abc.abstractmethod - def mediatype(self) -> str: - '''`mediatype`: required readable attribute - ''' - raise NotImplementedError - - @abc.abstractmethod - def iter_content(self) -> Iterator[str | bytes | memoryview]: - '''`iter_content`: (only) required method - ''' - yield from () - - -@dataclasses.dataclass -class SimpleRendering: # implements ProtoRendering - mediatype: str - rendered_content: str = '' - - def iter_content(self) -> Generator[str]: - yield self.rendered_content - - -@dataclasses.dataclass -class StreamableRendering: # implements ProtoRendering - mediatype: str - content_stream: Iterator[str | bytes | memoryview] - _started_already: bool = False - - def iter_content(self) -> Iterator[str | bytes | memoryview]: - if self._started_already: - raise trove_exceptions.CannotRenderStreamTwice - self._started_already = True - yield from self.content_stream diff --git a/trove/render/_simple_trovesearch.py b/trove/render/_trovesearch_card_only.py similarity index 63% rename from trove/render/_simple_trovesearch.py rename to trove/render/_trovesearch_card_only.py index 36bc36c4b..f1bc3378e 100644 --- a/trove/render/_simple_trovesearch.py +++ b/trove/render/_trovesearch_card_only.py @@ -1,6 +1,8 @@ from __future__ import annotations -from collections.abc import Generator, Iterator +import abc +from collections.abc import Generator, Iterator, Sequence import json +import logging from typing import Any, TYPE_CHECKING from primitive_metadata import primitive_rdf as rdf @@ -9,42 +11,30 @@ from trove.vocab.jsonapi import JSONAPI_LINK_OBJECT from trove.vocab.namespaces import TROVE, RDF from ._base import BaseRenderer -from ._rendering import ProtoRendering, SimpleRendering if TYPE_CHECKING: from trove.util.json import JsonObject + from trove.render.rendering import ProtoRendering +_logger = logging.getLogger(__name__) -class SimpleTrovesearchRenderer(BaseRenderer): - '''for "simple" search api responses (including only result metadata) - (very entangled with trove/trovesearch/trovesearch_gathering.py) +class TrovesearchCardOnlyRenderer(BaseRenderer, abc.ABC): + '''for search api responses that include only metadata about results + + very entangled with trove/trovesearch/trovesearch_gathering.py and trove/derive/osfmap_json.py ''' PASSIVE_RENDER = False # knows the properties it cares about - _page_links: set[str] + INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] # assumes osfmap_json + _page_links: set[str] # for use *after* iterating cards/card_pages __already_iterated_cards = False - def simple_unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> str: - raise NotImplementedError - - def simple_multicard_rendering(self, cards: Iterator[tuple[str, JsonObject]]) -> str: - raise NotImplementedError + @abc.abstractmethod + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: + raise NotImplementedError(f'{self.__class__.__name__} must implement `multicard_rendering`') def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering: - return SimpleRendering( # type: ignore[return-value] - mediatype=self.MEDIATYPE, - rendered_content=self.simple_unicard_rendering(card_iri, osfmap_json), - ) - - def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering: - _cards = ( - (_card_iri, _card_contents) - for _page in card_pages - for _card_iri, _card_contents in _page.items() - ) - return SimpleRendering( # type: ignore[return-value] - mediatype=self.MEDIATYPE, - rendered_content=self.simple_multicard_rendering(_cards), - ) + _page = [(card_iri, osfmap_json)] + return self.multicard_rendering(card_pages=iter([_page])) def render_document(self) -> ProtoRendering: _focustypes = self.response_focus.type_iris @@ -57,7 +47,7 @@ def render_document(self) -> ProtoRendering: ) raise trove_exceptions.UnsupportedRdfType(_focustypes) - def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]: + def _iter_card_pages(self) -> Generator[list[tuple[str, JsonObject]]]: assert not self.__already_iterated_cards self.__already_iterated_cards = True self._page_links = set() @@ -67,22 +57,22 @@ def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]: if (RDF.type, JSONAPI_LINK_OBJECT) in _page: self._page_links.add(_page) elif rdf.is_container(_page): - _cardpage = [] - for _search_result in rdf.container_objects(_page): + _cardpage: list[tuple[str, JsonObject]] = [] + for _search_result_blanknode in rdf.container_objects(_page): try: _card = next( _obj - for _pred, _obj in _search_result + for _pred, _obj in _search_result_blanknode if _pred == TROVE.indexCard ) except StopIteration: pass # skip malformed else: - _cardpage.append(_card) - yield { - self._get_card_iri(_card): self._get_card_content(_card, _page_graph) - for _card in _cardpage - } + _cardpage.append(( + self._get_card_iri(_card), + self._get_card_content(_card, _page_graph), + )) + yield _cardpage def _get_card_iri(self, card: str | rdf.RdfBlanknode) -> str: return card if isinstance(card, str) else '' diff --git a/trove/render/cardsearch_atom.py b/trove/render/cardsearch_atom.py new file mode 100644 index 000000000..f845e3e71 --- /dev/null +++ b/trove/render/cardsearch_atom.py @@ -0,0 +1,76 @@ +from __future__ import annotations +import itertools +import typing + +from django.utils.translation import gettext as _ +from primitive_metadata import primitive_rdf as rdf + +from trove.render.rendering import EntireRendering +from trove.util.datetime import datetime_isoformat_z +from trove.util.json import ( + json_strs, + json_vals, + json_datetimes, +) +from trove.util.xml import XmlBuilder +from trove.vocab import mediatypes +from trove.vocab.trove import trove_indexcard_namespace +from ._trovesearch_card_only import TrovesearchCardOnlyRenderer + +if typing.TYPE_CHECKING: + from collections.abc import Iterator, Sequence + from trove.util.json import JsonObject + from trove.render.rendering import ProtoRendering + + +class CardsearchAtomRenderer(TrovesearchCardOnlyRenderer): + '''render card-search results into Atom following https://www.rfc-editor.org/rfc/rfc4287 + ''' + MEDIATYPE = mediatypes.ATOM + + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: + def _strs(*path: str) -> Iterator[str]: + yield from json_strs(_osfmap_json, path, coerce_str=True) + + def _dates(*path: str) -> Iterator[str]: + yield from map(datetime_isoformat_z, json_datetimes(_osfmap_json, path)) + + _xb = XmlBuilder('feed', {'xmlns': 'http://www.w3.org/2005/Atom'}) + _xb.leaf('title', text=_('shtrove search results')) + _xb.leaf('subtitle', text=_('feed of metadata records matching given filters')) + _xb.leaf('link', text=self.response_focus.single_iri()) + _xb.leaf('id', text=self.response_focus.single_iri()) + for _card_iri, _osfmap_json in itertools.chain.from_iterable(card_pages): + with _xb.nest('entry'): + _iri = _osfmap_json.get('@id', _card_iri) + _xb.leaf('link', {'href': _iri}) + _xb.leaf('id', text=self._atom_id(_card_iri)) + for _title in _strs('title'): + _xb.leaf('title', text=_title) + for _desc in _strs('description'): + _xb.leaf('summary', text=_desc) + for _keyword in _strs('keyword'): + _xb.leaf('category', text=_keyword) + for _created in _dates('dateCreated'): + _xb.leaf('published', text=_created) + for _creator_obj in json_vals(_osfmap_json, 'creator'): + assert isinstance(_creator_obj, dict) + with _xb.nest('author'): + for _name in json_strs(_creator_obj, ['name']): + _xb.leaf('name', text=_name) + _creator_iri = _creator_obj.get('@id') + if _creator_iri: + _xb.leaf('uri', text=_creator_iri) + for _sameas_iri in json_strs(_creator_obj, ['sameAs']): + _xb.leaf('uri', text=_sameas_iri) + return EntireRendering( + mediatype=self.MEDIATYPE, + entire_content=bytes(_xb), + ) + + def _atom_id(self, card_iri: str) -> str: + try: + _uuid = rdf.iri_minus_namespace(card_iri, namespace=trove_indexcard_namespace()) + except ValueError: + return card_iri + return f'urn:uuid:{_uuid}' diff --git a/trove/render/cardsearch_rss.py b/trove/render/cardsearch_rss.py new file mode 100644 index 000000000..0218e47b9 --- /dev/null +++ b/trove/render/cardsearch_rss.py @@ -0,0 +1,67 @@ +from __future__ import annotations +from email.utils import format_datetime as rfc2822_datetime +import itertools +import typing + +from django.conf import settings +from django.utils.translation import gettext as _ + +from trove.render.rendering import EntireRendering +from trove.util.json import ( + json_datetimes, + json_vals, + json_strs, +) +from trove.util.xml import XmlBuilder +from trove.vocab import mediatypes +from ._trovesearch_card_only import TrovesearchCardOnlyRenderer + +if typing.TYPE_CHECKING: + from collections.abc import Iterator, Sequence + from trove.util.json import JsonObject + from trove.render.rendering import ProtoRendering + + +class CardsearchRssRenderer(TrovesearchCardOnlyRenderer): + '''render card-search results into RSS following https://www.rssboard.org/rss-specification + ''' + MEDIATYPE = mediatypes.RSS + + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: + def _strs(*path: str) -> Iterator[str]: + yield from json_strs(_osfmap_json, path, coerce_str=True) + + def _dates(*path: str) -> Iterator[str]: + for _dt in json_datetimes(_osfmap_json, path): + yield rfc2822_datetime(_dt) + + _xb = XmlBuilder('rss', {'version': '2.0'}) + with _xb.nest('channel'): + # see https://www.rssboard.org/rss-specification#requiredChannelElements + _xb.leaf('title', text=_('shtrove search results')) + _xb.leaf('link', text=self.response_focus.single_iri()) + _xb.leaf('description', text=_('feed of metadata records matching given filters')) + _xb.leaf('webMaster', text=settings.SHARE_SUPPORT_EMAIL) + for _card_iri, _osfmap_json in itertools.chain.from_iterable(card_pages): + with _xb.nest('item'): + # see https://www.rssboard.org/rss-specification#hrelementsOfLtitemgt + _iri = _osfmap_json.get('@id', _card_iri) + _xb.leaf('link', text=_iri) + _xb.leaf('guid', {'isPermaLink': 'true'}, text=_iri) + for _title in _strs('title'): + _xb.leaf('title', text=_title) + for _desc in _strs('description'): + _xb.leaf('description', text=_desc) + for _keyword in _strs('keyword'): + _xb.leaf('category', text=_keyword) + for _created_date in _dates('dateCreated'): + _xb.leaf('pubDate', text=_created_date) + for _creator_obj in json_vals(_osfmap_json, ['creator']): + assert isinstance(_creator_obj, dict) + _creator_name = next(json_strs(_creator_obj, ['name'])) + _creator_id = _creator_obj.get('@id', _creator_name) + _xb.leaf('author', text=f'{_creator_id} ({_creator_name})') + return EntireRendering( + mediatype=self.MEDIATYPE, + entire_content=bytes(_xb), + ) diff --git a/trove/render/html_browse.py b/trove/render/html_browse.py index 1f5bffd6f..bb5d3c650 100644 --- a/trove/render/html_browse.py +++ b/trove/render/html_browse.py @@ -1,7 +1,4 @@ -from collections.abc import ( - Iterator, - Generator, -) +from collections.abc import Generator import contextlib import dataclasses import datetime @@ -12,11 +9,9 @@ from urllib.parse import quote, urlsplit, urlunsplit from xml.etree.ElementTree import ( Element, - tostring as etree_tostring, fromstring as etree_fromstring, ) -from django.conf import settings from django.contrib.staticfiles.storage import staticfiles_storage from django.http import QueryDict from django.urls import reverse @@ -24,14 +19,22 @@ import markdown2 from primitive_metadata import primitive_rdf as rdf +from trove.links import ( + trove_browse_link, + is_local_url, +) +from trove.util.html import HtmlBuilder from trove.util.iris import get_sufficiently_unique_iri from trove.util.randomness import shuffled from trove.vocab import mediatypes -from trove.vocab.namespaces import RDF, RDFS, SKOS, DCTERMS, FOAF, DC +from trove.vocab import jsonapi +from trove.vocab.namespaces import RDF, RDFS, SKOS, DCTERMS, FOAF, DC, OSFMAP, TROVE from trove.vocab.static_vocab import combined_thesaurus__suffuniq -from trove.vocab.trove import trove_browse_link from ._base import BaseRenderer -from ._html import HtmlBuilder +from .rendering import ( + EntireRendering, + ProtoRendering, +) STABLE_MEDIATYPES = (mediatypes.JSONAPI,) UNSTABLE_MEDIATYPES = ( @@ -42,6 +45,11 @@ mediatypes.TSV, mediatypes.CSV, ) +SEARCHONLY_MEDIATYPES = frozenset(( + mediatypes.JSON, + mediatypes.TSV, + mediatypes.CSV, +)) _LINK_TEXT_PREDICATES = ( SKOS.prefLabel, @@ -50,52 +58,64 @@ DCTERMS.title, DC.title, FOAF.name, + OSFMAP.fileName, ) _IMPLICIT_DATATYPES = frozenset(( RDF.string, RDF.langString, )) +_PREDICATES_RENDERED_SPECIAL = frozenset(( + RDF.type, +)) +_PRIMITIVE_LITERAL_TYPES = (float, int, datetime.date) _QUERYPARAM_SPLIT_RE = re.compile(r'(?=[?&])') _PHI = (math.sqrt(5) + 1) / 2 -_HTML_DOCTYPE = '' - @dataclasses.dataclass class RdfHtmlBrowseRenderer(BaseRenderer): - MEDIATYPE: ClassVar[str] = 'text/html; charset=utf-8' - __current_data: rdf.RdfTripleDictionary = dataclasses.field(init=False) + MEDIATYPE: ClassVar[str] = mediatypes.HTML + __current_data: rdf.RdfGraph = dataclasses.field(init=False) __visiting_iris: set[str] = dataclasses.field(init=False) __hb: HtmlBuilder = dataclasses.field(init=False) __last_hue_turn: float = dataclasses.field(default_factory=random.random) def __post_init__(self) -> None: # TODO: lang (according to request -- also translate) - self.__current_data = self.response_tripledict + self.__current_data = self.response_data self.__visiting_iris = set() @property def is_data_blended(self) -> bool | None: return self.response_gathering.gatherer_kwargs.get('blend_cards') + @property + def is_search(self) -> bool: + return not self.response_focus.type_iris.isdisjoint(( + TROVE.Cardsearch, + TROVE.Valuesearch, + )) + # override BaseRenderer - def simple_render_document(self) -> str: - self.__hb = HtmlBuilder(Element('html')) + def render_document(self) -> ProtoRendering: + return EntireRendering(self.MEDIATYPE, self.render_html_str()) + + def render_html_str(self) -> str: + self.__hb = HtmlBuilder() self.render_html_head() - _body_attrs = { - 'class': 'BrowseWrapper', - 'style': self._hue_turn_css(), - } - with self.__hb.nest('body', attrs=_body_attrs): + with ( + self._hue_turn_css() as _hue_turn_style, + self.__hb.nest('body', attrs={ + 'class': 'BrowseWrapper', + 'style': _hue_turn_style, + }), + ): self.render_nav() self.render_main() self.render_footer() - return '\n'.join(( - _HTML_DOCTYPE, - etree_tostring(self.__hb.root_element, encoding='unicode', method='html'), - )) + return self.__hb.as_html_doc() def render_html_head(self) -> None: with self.__hb.nest('head'): @@ -123,7 +143,10 @@ def render_footer(self) -> None: def __alternate_mediatypes_card(self) -> None: with self.__nest_card('details'): self.__hb.leaf('summary', text=_('alternate mediatypes')) - for _mediatype in shuffled((*STABLE_MEDIATYPES, *UNSTABLE_MEDIATYPES)): + _linked_mediatypes = {*STABLE_MEDIATYPES, *UNSTABLE_MEDIATYPES} + if not self.is_search: + _linked_mediatypes -= SEARCHONLY_MEDIATYPES + for _mediatype in shuffled(_linked_mediatypes): with self.__hb.nest('span', attrs={'class': 'Browse__literal'}): self.__mediatype_link(_mediatype) @@ -153,67 +176,69 @@ def __mediatype_link(self, mediatype: str) -> None: with self.__hb.nest('a', attrs={'href': reverse('trove:docs')}) as _link: _link.text = _('(stable for documented use)') - def __render_subj(self, subj_iri: str, *, start_collapsed: bool | None = None) -> None: - _twopledict = self.__current_data.get(subj_iri, {}) - with self.__visiting(subj_iri): + def __render_subj(self, subj_iri: str, *, include_details: bool = True) -> None: + with self.__visiting(subj_iri) as _h_tag: with self.__nest_card('article'): with self.__hb.nest('header'): - _compact = self.iri_shorthand.compact_iri(subj_iri) - _is_compactable = (_compact != subj_iri) - _should_link = (subj_iri not in self.response_focus.iris) - with self.__hb.nest_h_tag(attrs={'id': quote(subj_iri)}) as _h: - if _should_link: - with self.__nest_link(subj_iri) as _link: - if _is_compactable: - _link.text = _compact - else: - self.__split_iri_pre(subj_iri) + with self.__hb.nest(_h_tag, attrs={'id': quote(subj_iri)}): + if self.__is_focus(subj_iri): + self.__split_iri_pre(subj_iri) else: - if _is_compactable: - _h.text = _compact - else: + with self.__nest_link(subj_iri): self.__split_iri_pre(subj_iri) self.__iri_subheaders(subj_iri) - if _twopledict: - with self.__hb.nest('details') as _details: - _detail_depth = sum((_el.tag == 'details') for _el in self.__hb._nested_elements) - _should_open = ( - _detail_depth < 3 - if start_collapsed is None - else not start_collapsed - ) - if _should_open: - _details.set('open', '') + if self.__is_focus(subj_iri): + self.__hb.leaf('pre', text=subj_iri) + if include_details and (_twopledict := self.__current_data.tripledict.get(subj_iri, {})): + _details_attrs = ( + {'open': ''} + if (self.__is_focus(subj_iri) or is_local_url(subj_iri)) + else {} + ) + with self.__hb.nest('details', _details_attrs): self.__hb.leaf('summary', text=_('more details...')) self.__twoples(_twopledict) def __twoples(self, twopledict: rdf.RdfTwopleDictionary) -> None: with self.__hb.nest('dl', {'class': 'Browse__twopleset'}): - for _pred, _obj_set in shuffled(twopledict.items()): + for _pred, _obj_set in self.__order_twopledict(twopledict): with self.__hb.nest('dt', attrs={'class': 'Browse__predicate'}): self.__compact_link(_pred) for _text in self.__iri_thesaurus_labels(_pred): self.__literal(_text) with self.__hb.nest('dd'): - for _obj in shuffled(_obj_set): + for _obj in _obj_set: self.__obj(_obj) + def __order_twopledict(self, twopledict: rdf.RdfTwopleDictionary) -> Generator[tuple[str, list[rdf.RdfObject]]]: + _items_with_sorted_objs = ( + (_pred, sorted(_obj_set, key=_obj_ordering_key)) + for _pred, _obj_set in twopledict.items() + if _pred not in _PREDICATES_RENDERED_SPECIAL + ) + yield from sorted( + _items_with_sorted_objs, + key=lambda _item: _obj_ordering_key(_item[1][0]), + ) + def __obj(self, obj: rdf.RdfObject) -> None: if isinstance(obj, str): # iri # TODO: detect whether indexcard? - if (obj in self.__current_data) and (obj not in self.__visiting_iris): + if (obj in self.__current_data.tripledict) and (obj not in self.__visiting_iris): self.__render_subj(obj) else: with self.__hb.nest('article', attrs={'class': 'Browse__object'}): self.__iri_link_and_labels(obj) elif isinstance(obj, frozenset): # blanknode - if (RDF.type, RDF.Seq) in obj: + if _is_jsonapi_link_obj(obj): + self.__jsonapi_link_obj(obj) + elif _is_sequence_obj(obj): self.__sequence(obj) else: self.__blanknode(obj) elif isinstance(obj, rdf.Literal): self.__literal(obj, is_rdf_object=True) - elif isinstance(obj, (float, int, datetime.date)): + elif isinstance(obj, _PRIMITIVE_LITERAL_TYPES): self.__literal(rdf.literal(obj), is_rdf_object=True) elif isinstance(obj, rdf.QuotedGraph): self.__quoted_graph(obj) @@ -240,7 +265,7 @@ def __literal( if _is_markdown: # TODO: tests for safe_mode _html = markdown2.markdown(_lit.unicode_value, safe_mode='escape') - self.__hb._current_element.append(etree_fromstring(f'{_html}')) + self.__hb.current_element.append(etree_fromstring(f'{_html}')) else: self.__hb.leaf('q', text=_lit) @@ -255,8 +280,16 @@ def __sequence(self, sequence_twoples: frozenset[rdf.RdfTwople]) -> None: self.__obj(_seq_obj) def __quoted_graph(self, quoted_graph: rdf.QuotedGraph) -> None: - with self.__quoted_data(quoted_graph.tripledict): - self.__render_subj(quoted_graph.focus_iri) # , start_collapsed=True) + _should_include_details = ( + self.__is_focus(quoted_graph.focus_iri) + or (( # primary topic of response focus + self.response_focus.single_iri(), + FOAF.primaryTopic, + quoted_graph.focus_iri, + ) in self.response_data) + ) + with self.__quoted_data(quoted_graph): + self.__render_subj(quoted_graph.focus_iri, include_details=_should_include_details) def __blanknode(self, blanknode: rdf.RdfTwopleDictionary | frozenset) -> None: _twopledict = ( @@ -264,28 +297,46 @@ def __blanknode(self, blanknode: rdf.RdfTwopleDictionary | frozenset) -> None: if isinstance(blanknode, dict) else rdf.twopledict_from_twopleset(blanknode) ) - with self.__hb.nest('details', attrs={ - 'open': '', - 'class': 'Browse__blanknode Browse__object', - 'style': self._hue_turn_css(), - }): - self.__hb.leaf('summary', text='(blank node)') + with ( + self._hue_turn_css() as _hue_turn_style, + self.__hb.nest('details', attrs={ + 'open': '', + 'class': 'Browse__blanknode Browse__object', + 'style': _hue_turn_style, + }), + ): + with self.__hb.nest('summary'): + for _type_iri in _twopledict.get(RDF.type, ()): + self.__compact_link(_type_iri) self.__twoples(_twopledict) + def __jsonapi_link_obj(self, twopleset: frozenset[rdf.RdfTwople]) -> None: + _iri = next( + (str(_obj) for (_pred, _obj) in twopleset if _pred == RDF.value), + '', + ) + _text = next( + (_obj.unicode_value for (_pred, _obj) in twopleset if _pred == jsonapi.JSONAPI_MEMBERNAME), + '', + ) + with self.__nest_link(_iri, attrs={'class': 'Browse__blanknode Browse__object'}) as _a: + _a.text = _('link: %(linktext)s') % {'linktext': _text} + def __split_iri_pre(self, iri: str) -> None: - self.__hb.leaf('pre', text='\n'.join(self.__iri_lines(iri))) + self.__hb.leaf('pre', text='\n'.join(self.__iri_display_lines(iri))) @contextlib.contextmanager - def __visiting(self, iri: str) -> Iterator[None]: + def __visiting(self, iri: str) -> Generator[str]: assert iri not in self.__visiting_iris self.__visiting_iris.add(iri) try: - yield + with self.__hb.deeper_heading() as _h_tag: + yield _h_tag finally: self.__visiting_iris.remove(iri) @contextlib.contextmanager - def __quoted_data(self, quoted_data: dict) -> Generator[None]: + def __quoted_data(self, quoted_data: rdf.RdfGraph) -> Generator[None]: _outer_data = self.__current_data _outer_visiting_iris = self.__visiting_iris self.__current_data = quoted_data @@ -301,27 +352,32 @@ def __iri_link_and_labels(self, iri: str) -> None: for _text in self.__iri_thesaurus_labels(iri): self.__literal(_text) - def __nest_link(self, iri: str) -> contextlib.AbstractContextManager[Element]: + def __nest_link(self, iri: str, attrs: dict[str, str] | None = None) -> contextlib.AbstractContextManager[Element]: _href = ( iri - if _is_local_url(iri) + if is_local_url(iri) else trove_browse_link(iri) ) - return self.__hb.nest('a', attrs={'href': _href}) + return self.__hb.nest('a', attrs={**(attrs or {}), 'href': _href}) def __compact_link(self, iri: str) -> Element: with self.__nest_link(iri) as _a: - _a.text = self.iri_shorthand.compact_iri(iri) + _a.text = ''.join(self.__iri_display_lines(iri)) return _a - def __nest_card(self, tag: str) -> contextlib.AbstractContextManager[Element]: - return self.__hb.nest( - tag, - attrs={ - 'class': 'Browse__card', - 'style': self._hue_turn_css(), - }, - ) + @contextlib.contextmanager + def __nest_card(self, tag: str) -> Generator[Element]: + with ( + self._hue_turn_css() as _hue_turn_style, + self.__hb.nest( + tag, + attrs={ + 'class': 'Browse__card', + 'style': _hue_turn_style, + }, + ) as _element, + ): + yield _element def __iri_thesaurus_labels(self, iri: str) -> list[str]: # TODO: consider requested language @@ -331,20 +387,25 @@ def __iri_thesaurus_labels(self, iri: str) -> list[str]: if _thesaurus_entry: for _pred in _LINK_TEXT_PREDICATES: _labels.update(_thesaurus_entry.get(_pred, ())) - _twoples = self.__current_data.get(iri) + _twoples = self.__current_data.tripledict.get(iri) if _twoples: for _pred in _LINK_TEXT_PREDICATES: _labels.update(_twoples.get(_pred, ())) return shuffled(_labels) - def _hue_turn_css(self) -> str: - _hue_turn = (self.__last_hue_turn + _PHI) % 1.0 + @contextlib.contextmanager + def _hue_turn_css(self) -> Generator[str]: + _prior_turn = self.__last_hue_turn + _hue_turn = (_prior_turn + _PHI) % 1.0 self.__last_hue_turn = _hue_turn - return f'--hue-turn: {_hue_turn}turn;' + try: + yield f'--hue-turn: {_hue_turn}turn;' + finally: + self.__last_hue_turn = _prior_turn def _queryparam_href(self, param_name: str, param_value: str | None) -> str: _base_url = self.response_focus.single_iri() - if not _is_local_url(_base_url): + if not is_local_url(_base_url): _base_url = trove_browse_link(_base_url) (_scheme, _netloc, _path, _query, _fragment) = urlsplit(_base_url) _qparams = QueryDict(_query, mutable=True) @@ -364,26 +425,34 @@ def _queryparam_href(self, param_name: str, param_value: str | None) -> str: )) def __iri_subheaders(self, iri: str) -> None: - _type_iris = self.__current_data.get(iri, {}).get(RDF.type, ()) - if _type_iris: - for _type_iri in _type_iris: - self.__compact_link(_type_iri) + for _type_iri in self.__current_data.q(iri, RDF.type): + self.__compact_link(_type_iri) _labels = self.__iri_thesaurus_labels(iri) if _labels: for _label in _labels: self.__literal(_label) - def __iri_lines(self, iri: str) -> Iterator[str]: - (_scheme, _netloc, _path, _query, _fragment) = urlsplit(iri) - yield ( - f'://{_netloc}{_path}' - if _netloc - else f'{_scheme}:{_path}' - ) - if _query: - yield from filter(bool, _QUERYPARAM_SPLIT_RE.split(f'?{_query}')) - if _fragment: - yield f'#{_fragment}' + def __iri_display_lines(self, iri: str) -> Generator[str]: + _compact = self.iri_shorthand.compact_iri(iri) + if _compact != iri: + yield _compact + else: + (_scheme, _netloc, _path, _query, _fragment) = urlsplit(iri) + # first line with path + if is_local_url(iri): + yield f'/{_path.lstrip('/')}' + elif _netloc: + yield f'://{_netloc}{_path}' + else: + yield f'{_scheme}:{_path}' + # query and fragment separate + if _query: + yield from filter(bool, _QUERYPARAM_SPLIT_RE.split(f'?{_query}')) + if _fragment: + yield f'#{_fragment}' + + def __is_focus(self, iri: str) -> bool: + return (iri in self.response_focus.iris) def _append_class(el: Element, element_class: str) -> None: @@ -393,5 +462,23 @@ def _append_class(el: Element, element_class: str) -> None: ) -def _is_local_url(iri: str) -> bool: - return iri.startswith(settings.SHARE_WEB_URL) +def _is_sequence_obj(obj: rdf.RdfObject) -> bool: + return ( + isinstance(obj, frozenset) + and (RDF.type, RDF.Seq) in obj + ) + + +def _is_jsonapi_link_obj(obj: rdf.RdfObject) -> bool: + return ( + isinstance(obj, frozenset) + and (RDF.type, jsonapi.JSONAPI_LINK_OBJECT) in obj + ) + + +def _obj_ordering_key(obj: rdf.RdfObject) -> tuple[bool, ...]: + return ( + not isinstance(obj, (rdf.Literal, *_PRIMITIVE_LITERAL_TYPES)), # literal values first + not isinstance(obj, str), # iris next + _is_jsonapi_link_obj(obj), # jsonapi link objects last + ) diff --git a/trove/render/jsonapi.py b/trove/render/jsonapi.py index e60fc2338..11a78708c 100644 --- a/trove/render/jsonapi.py +++ b/trove/render/jsonapi.py @@ -7,13 +7,17 @@ import itertools import json import time -from typing import Iterable, Union, List, Any, Dict, Tuple, Iterator +from typing import Iterable, Union, Any, Iterator -from typing import Optional from primitive_metadata import primitive_rdf from trove import exceptions as trove_exceptions +from trove.util.json import ( + JsonObject, + JsonValue, +) from trove.vocab.jsonapi import ( + JSONAPI_LINK, JSONAPI_MEMBERNAME, JSONAPI_RELATIONSHIP, JSONAPI_ATTRIBUTE, @@ -29,6 +33,10 @@ ) from trove.vocab.trove import trove_indexcard_namespace from ._base import BaseRenderer +from .rendering import ( + EntireRendering, + ProtoRendering, +) # a jsonapi resource may pull rdf data using an iri or blank node @@ -38,15 +46,11 @@ def _resource_ids_defaultdict() -> defaultdict[Any, str]: _prefix = str(time.time_ns()) - _ints = itertools.count() - - def _iter_ids() -> Iterator[str]: - while True: - _id = next(_ints) - yield f'{_prefix}-{_id}' - - _ids = _iter_ids() - return defaultdict(lambda: next(_ids)) + _infinite_ids = ( + f'{_prefix}-{_id}' + for _id in itertools.count() + ) + return defaultdict(_infinite_ids.__next__) @dataclasses.dataclass @@ -84,15 +88,16 @@ class RdfJsonapiRenderer(BaseRenderer): def get_deriver_iri(cls, card_blending: bool) -> str | None: return (None if card_blending else super().get_deriver_iri(card_blending)) - def simple_render_document(self) -> str: - return json.dumps( + def render_document(self) -> ProtoRendering: + _json_str = json.dumps( self.render_dict(self.response_focus.single_iri()), indent=2, # TODO: pretty-print query param? ) + return EntireRendering(self.MEDIATYPE, _json_str) - def render_dict(self, primary_iris: Union[str, Iterable[str]]) -> dict[str, Any]: - _primary_data: dict | list | None = None - _included_data = [] + def render_dict(self, primary_iris: Union[str, Iterable[str]]) -> JsonObject: + _primary_data: JsonValue = None + _included_data: list[JsonValue] = [] with self._contained__to_include() as _to_include: if isinstance(primary_iris, str): _already_included = {primary_iris} @@ -108,26 +113,37 @@ def render_dict(self, primary_iris: Union[str, Iterable[str]]) -> dict[str, Any] if _next not in _already_included: _already_included.add(_next) _included_data.append(self.render_resource_object(_next)) - _document = {'data': _primary_data} + _document: JsonObject = {'data': _primary_data} if _included_data: _document['included'] = _included_data return _document - def render_resource_object(self, iri_or_blanknode: _IriOrBlanknode) -> dict[str, Any]: - _resource_object = {**self.render_identifier_object(iri_or_blanknode)} + def render_resource_object(self, iri_or_blanknode: _IriOrBlanknode) -> JsonObject: + _resource_object: JsonObject = {**self.render_identifier_object(iri_or_blanknode)} _twopledict = ( (self.response_data.tripledict.get(iri_or_blanknode) or {}) if isinstance(iri_or_blanknode, str) else primitive_rdf.twopledict_from_twopleset(iri_or_blanknode) ) + _links: JsonObject = {} for _pred, _obj_set in _twopledict.items(): - if _pred != RDF.type: - self._render_field(_pred, _obj_set, into=_resource_object) + if _pred == JSONAPI_LINK: + _links.update( + self._render_link_object(_link_obj) + for _link_obj in _obj_set + ) + elif _pred != RDF.type: + _doc_key, _field_key, _field_value = self._render_field(_pred, _obj_set) + _doc_obj = _resource_object.setdefault(_doc_key, {}) + assert isinstance(_doc_obj, dict) + _doc_obj[_field_key] = _field_value if isinstance(iri_or_blanknode, str): - _resource_object.setdefault('links', {})['self'] = iri_or_blanknode + _links['self'] = iri_or_blanknode + if _links: + _resource_object['links'] = _links return _resource_object - def render_identifier_object(self, iri_or_blanknode: _IriOrBlanknode) -> Any | dict[str, Any]: + def render_identifier_object(self, iri_or_blanknode: _IriOrBlanknode) -> JsonObject: try: return self._identifier_object_cache[iri_or_blanknode] except KeyError: @@ -156,7 +172,7 @@ def render_identifier_object(self, iri_or_blanknode: _IriOrBlanknode) -> Any | d self._identifier_object_cache[iri_or_blanknode] = _id_obj return _id_obj - def _single_typename(self, type_iris: list[str]) -> Optional[str]: + def _single_typename(self, type_iris: list[str]) -> str: if not type_iris: return '' if len(type_iris) == 1: @@ -168,7 +184,7 @@ def _single_typename(self, type_iris: list[str]) -> Optional[str]: return self._membername_for_iri(_type_iris[0]) return self._membername_for_iri(sorted(type_iris)[0]) - def _membername_for_iri(self, iri: str) -> Optional[str] | Any: + def _membername_for_iri(self, iri: str) -> str: try: _membername = next(self.thesaurus.q(iri, JSONAPI_MEMBERNAME)) except StopIteration: @@ -193,12 +209,12 @@ def _resource_id_for_iri(self, iri: str) -> Any: # as fallback, encode the iri into a valid jsonapi member name return base64.urlsafe_b64encode(iri.encode()).decode() - def _render_field(self, predicate_iri: str, object_set: Iterable[Any], *, into: dict[str, Any]) -> None: + def _render_field(self, predicate_iri: str, object_set: Iterable[Any]) -> tuple[str, str, JsonValue]: _is_relationship = (predicate_iri, RDF.type, JSONAPI_RELATIONSHIP) in self.thesaurus _is_attribute = (predicate_iri, RDF.type, JSONAPI_ATTRIBUTE) in self.thesaurus _field_key = self._membername_for_iri(predicate_iri) _doc_key = 'meta' # unless configured for jsonapi, default to unstructured 'meta' - if ':' not in _field_key: # type: ignore + if ':' not in _field_key: if _is_relationship: _doc_key = 'relationships' elif _is_attribute: @@ -207,10 +223,9 @@ def _render_field(self, predicate_iri: str, object_set: Iterable[Any], *, into: _fieldvalue = self._render_relationship_object(predicate_iri, object_set) else: _fieldvalue = self._one_or_many(predicate_iri, self._attribute_datalist(object_set)) # type: ignore - # update the given `into` resource object - into.setdefault(_doc_key, {})[_field_key] = _fieldvalue + return _doc_key, _field_key, _fieldvalue - def _one_or_many(self, predicate_iri: str, datalist: list[Any]) -> Union[list[Any], Any, None]: + def _one_or_many(self, predicate_iri: str, datalist: list[Any]) -> JsonValue: _only_one = (predicate_iri, RDF.type, OWL.FunctionalProperty) in self.thesaurus if _only_one: if len(datalist) > 1: @@ -218,19 +233,19 @@ def _one_or_many(self, predicate_iri: str, datalist: list[Any]) -> Union[list[An return datalist[0] if datalist else None return datalist - def _attribute_datalist(self, object_set: Iterable[Any]) -> List[Any]: + def _attribute_datalist(self, object_set: Iterable[Any]) -> list[Any]: return [ self._render_attribute_datum(_obj) for _obj in object_set ] def _render_relationship_object( - self, - predicate_iri: str, - object_set: Iterable[Union[frozenset[Any], str]] - ) -> Dict[str, Any]: + self, + predicate_iri: str, + object_set: Iterable[Union[frozenset[Any], str]] + ) -> JsonObject: _data = [] - _links = {} + _links: JsonObject = {} for _obj in object_set: if isinstance(_obj, frozenset): if (RDF.type, RDF.Seq) in _obj: @@ -247,14 +262,14 @@ def _render_relationship_object( assert isinstance(_obj, str) _data.append(self.render_identifier_object(_obj)) self._pls_include(_obj) - _relationship_obj = { + _relationship_obj: JsonObject = { 'data': self._one_or_many(predicate_iri, _data), } if _links: _relationship_obj['links'] = _links return _relationship_obj - def _render_link_object(self, link_obj: frozenset[Tuple[Any, Any]]) -> Tuple[str, Dict[str, Any]]: + def _render_link_object(self, link_obj: frozenset[tuple[Any, Any]]) -> tuple[str, JsonObject]: _membername = next( _obj.unicode_value for _pred, _obj in link_obj @@ -296,14 +311,14 @@ def _pls_include(self, item: Any) -> None: if self.__to_include is not None: self.__to_include.add(item) - def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict[Any, Any] | list[Any] | str | float | int: + def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> JsonValue: if isinstance(rdfobject, frozenset): if (RDF.type, RDF.Seq) in rdfobject: return [ self._render_attribute_datum(_seq_obj) for _seq_obj in primitive_rdf.sequence_objects_in_order(rdfobject) ] - _json_blanknode = {} + _json_blanknode: JsonObject = {} for _pred, _obj_set in primitive_rdf.twopledict_from_twopleset(rdfobject).items(): _key = self._membername_for_iri(_pred) _json_blanknode[_key] = self._one_or_many(_pred, self._attribute_datalist(_obj_set)) diff --git a/trove/render/jsonld.py b/trove/render/jsonld.py index a7ca263c6..5c7299f1f 100644 --- a/trove/render/jsonld.py +++ b/trove/render/jsonld.py @@ -10,6 +10,10 @@ from trove.vocab.namespaces import RDF, OWL, TROVE from trove.vocab import mediatypes from ._base import BaseRenderer +from .rendering import ( + EntireRendering, + ProtoRendering, +) if TYPE_CHECKING: from trove.util.json import ( JsonObject, @@ -29,12 +33,13 @@ class RdfJsonldRenderer(BaseRenderer): __visiting_iris: set[str] | None = None - def simple_render_document(self) -> str: - return json.dumps( + def render_document(self) -> ProtoRendering: + _json_str = json.dumps( self.render_jsonld(self.response_data, self.response_focus.single_iri()), indent=2, sort_keys=True, ) + return EntireRendering(self.MEDIATYPE, _json_str) def render_jsonld( self, @@ -152,8 +157,7 @@ def _list_or_single_value(self, predicate_iri: str, objectlist: list[JsonValue]) (_only_obj,) = objectlist except ValueError: return None - else: - return _only_obj + return _only_obj if predicate_iri in _PREDICATES_OF_FLEXIBLE_CARDINALITY and len(objectlist) == 1: return objectlist[0] return sorted(objectlist, key=_naive_sort_key) diff --git a/trove/render/rendering/__init__.py b/trove/render/rendering/__init__.py new file mode 100644 index 000000000..9e8cb29b8 --- /dev/null +++ b/trove/render/rendering/__init__.py @@ -0,0 +1,4 @@ +from .proto import ProtoRendering +from .entire import EntireRendering + +__all__ = ('ProtoRendering', 'EntireRendering') diff --git a/trove/render/rendering/entire.py b/trove/render/rendering/entire.py new file mode 100644 index 000000000..45c7abc0f --- /dev/null +++ b/trove/render/rendering/entire.py @@ -0,0 +1,17 @@ +from collections.abc import Generator +import dataclasses + +from .proto import ProtoRendering + +__all__ = ('EntireRendering',) + + +@dataclasses.dataclass +class EntireRendering(ProtoRendering): + '''EntireRendering: for response content rendered in its entirety before being sent + ''' + mediatype: str + entire_content: str | bytes = '' + + def iter_content(self) -> Generator[str] | Generator[bytes]: + yield self.entire_content diff --git a/trove/render/rendering/html_wrapped.py b/trove/render/rendering/html_wrapped.py new file mode 100644 index 000000000..4aadaff58 --- /dev/null +++ b/trove/render/rendering/html_wrapped.py @@ -0,0 +1,22 @@ +import dataclasses +import html +from typing import Iterator + +from trove.vocab import mediatypes +from trove.util.html import HTML_DOCTYPE +from .proto import ProtoRendering + + +@dataclasses.dataclass +class HtmlWrappedRendering(ProtoRendering): + inner_rendering: ProtoRendering + mediatype: str = mediatypes.HTML + + def iter_content(self) -> Iterator[str]: + yield HTML_DOCTYPE + yield '
'
+        for _content in self.inner_rendering.iter_content():
+            if not isinstance(_content, str):
+                _content = _content.decode()
+            yield html.escape(_content)
+        yield '
' diff --git a/trove/render/rendering/proto.py b/trove/render/rendering/proto.py new file mode 100644 index 000000000..955940acb --- /dev/null +++ b/trove/render/rendering/proto.py @@ -0,0 +1,16 @@ +from typing import ( + Iterator, + Protocol, +) + +__all__ = ('ProtoRendering',) + + +class ProtoRendering(Protocol): + '''protocol for all renderings + ''' + mediatype: str # required attribute + + def iter_content(self) -> Iterator[str] | Iterator[bytes]: + '''`iter_content`: (only) required method + ''' diff --git a/trove/render/rendering/streamable.py b/trove/render/rendering/streamable.py new file mode 100644 index 000000000..c61ff6bcc --- /dev/null +++ b/trove/render/rendering/streamable.py @@ -0,0 +1,20 @@ +from collections.abc import Iterator +import dataclasses + +from trove import exceptions as trove_exceptions +from .proto import ProtoRendering + + +@dataclasses.dataclass +class StreamableRendering(ProtoRendering): + '''StreamableRendering: for response content that may be rendered incrementally while being streamed + ''' + mediatype: str + content_stream: Iterator[str] | Iterator[bytes] = iter(()) + _started_already: bool = False + + def iter_content(self) -> Iterator[str] | Iterator[bytes]: + if self._started_already: + raise trove_exceptions.CannotRenderStreamTwice + self._started_already = True + yield from self.content_stream diff --git a/trove/render/simple_tsv.py b/trove/render/simple_tsv.py deleted file mode 100644 index 30b01a8a6..000000000 --- a/trove/render/simple_tsv.py +++ /dev/null @@ -1,10 +0,0 @@ -import csv - -from trove.vocab import mediatypes - -from .simple_csv import TrovesearchSimpleCsvRenderer - - -class TrovesearchSimpleTsvRenderer(TrovesearchSimpleCsvRenderer): - MEDIATYPE = mediatypes.TSV - CSV_DIALECT = csv.excel_tab diff --git a/trove/render/simple_csv.py b/trove/render/trovesearch_csv.py similarity index 57% rename from trove/render/simple_csv.py rename to trove/render/trovesearch_csv.py index 52c9d700b..a6174f4f4 100644 --- a/trove/render/simple_csv.py +++ b/trove/render/trovesearch_csv.py @@ -2,31 +2,36 @@ from collections.abc import ( Generator, Iterator, - Iterable, Sequence, ) import csv +import dataclasses import functools import itertools -import dataclasses +import logging from typing import TYPE_CHECKING, ClassVar from trove.trovesearch.search_params import ( CardsearchParams, ValuesearchParams, ) +from trove.util.iter import iter_unique +from trove.util.json import json_prims from trove.util.propertypath import Propertypath, GLOB_PATHSTEP from trove.vocab import mediatypes from trove.vocab import osfmap -from trove.vocab.namespaces import TROVE -from ._simple_trovesearch import SimpleTrovesearchRenderer -from ._rendering import StreamableRendering, ProtoRendering +from ._trovesearch_card_only import TrovesearchCardOnlyRenderer +from .rendering import ProtoRendering +from .rendering.streamable import StreamableRendering if TYPE_CHECKING: from trove.util.trove_params import BasicTroveParams - from trove.util.json import JsonValue, JsonObject + from trove.util.json import ( + JsonObject, + JsonPath, + ) +_logger = logging.getLogger(__name__) -type Jsonpath = Sequence[str] # path of json keys type CsvValue = str | int | float | None _MULTIVALUE_DELIMITER = ' ; ' # possible improvement: smarter in-value delimiting? @@ -34,20 +39,16 @@ _ID_JSONPATH = ('@id',) -class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer): +class TrovesearchCsvRenderer(TrovesearchCardOnlyRenderer): MEDIATYPE = mediatypes.CSV - INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] CSV_DIALECT: ClassVar[type[csv.Dialect]] = csv.excel - def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering: - return self.multicard_rendering(card_pages=iter([{card_iri: osfmap_json}])) - - def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering: + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: _doc = TabularDoc( card_pages, trove_params=getattr(self.response_focus, 'search_params', None), ) - return StreamableRendering( # type: ignore[return-value] + return StreamableRendering( mediatype=self.MEDIATYPE, content_stream=csv_stream(self.CSV_DIALECT, _doc.header(), _doc.rows()), ) @@ -66,22 +67,18 @@ def csv_stream( @dataclasses.dataclass class TabularDoc: - card_pages: Iterator[dict[str, JsonObject]] + card_pages: Iterator[Sequence[tuple[str, JsonObject]]] trove_params: BasicTroveParams | None = None _started: bool = False @functools.cached_property - def column_jsonpaths(self) -> tuple[Jsonpath, ...]: + def column_jsonpaths(self) -> tuple[JsonPath, ...]: _column_jsonpaths = ( _osfmap_jsonpath(_path) for _path in self._column_paths() ) return (_ID_JSONPATH, *_column_jsonpaths) - @functools.cached_property - def first_page(self) -> dict[str, JsonObject]: - return next(self.card_pages, {}) - def _column_paths(self) -> Iterator[Propertypath]: _pathlists: list[Sequence[Propertypath]] = [] if self.trove_params is not None: # hacks @@ -102,29 +99,16 @@ def _column_paths(self) -> Iterator[Propertypath]: _pathlists.append(_pathlist) if not _pathlists: _pathlists.append(osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS) - return self.iter_unique(itertools.chain.from_iterable(_pathlists)) - - @staticmethod - def iter_unique[T](iterable: Iterable[T]) -> Generator[T]: - _seen = set() - for _item in iterable: - if _item not in _seen: - _seen.add(_item) - yield _item - - def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]: - assert not self._started - self._started = True - if self.first_page: - yield self.first_page - yield from self.card_pages + return iter_unique(itertools.chain.from_iterable(_pathlists)) def header(self) -> list[CsvValue]: return ['.'.join(_path) for _path in self.column_jsonpaths] def rows(self) -> Generator[list[CsvValue]]: - for _page in self._iter_card_pages(): - for _card_iri, _osfmap_json in _page.items(): + assert not self._started + self._started = True + for _page in self.card_pages: + for _card_iri, _osfmap_json in _page: yield self._row_values(_osfmap_json) def _row_values(self, osfmap_json: JsonObject) -> list[CsvValue]: @@ -133,10 +117,11 @@ def _row_values(self, osfmap_json: JsonObject) -> list[CsvValue]: for _field_path in self.column_jsonpaths ] - def _row_field_value(self, osfmap_json: JsonObject, field_path: Jsonpath) -> CsvValue: + def _row_field_value(self, osfmap_json: JsonObject, field_path: JsonPath) -> CsvValue: _rendered_values = [ - _render_tabularly(_obj) - for _obj in _iter_values(osfmap_json, field_path) + _obj + for _obj in json_prims(osfmap_json, field_path, _VALUE_KEY_PREFERENCE) + if _obj is not None ] if len(_rendered_values) == 1: return _rendered_values[0] # preserve type for single numbers @@ -144,7 +129,7 @@ def _row_field_value(self, osfmap_json: JsonObject, field_path: Jsonpath) -> Csv return _MULTIVALUE_DELIMITER.join(map(str, _rendered_values)) -def _osfmap_jsonpath(iri_path: Propertypath) -> Jsonpath: +def _osfmap_jsonpath(iri_path: Propertypath) -> JsonPath: _shorthand = osfmap.osfmap_json_shorthand() return tuple( _shorthand.compact_iri(_pathstep) @@ -152,50 +137,6 @@ def _osfmap_jsonpath(iri_path: Propertypath) -> Jsonpath: ) -def _has_value(osfmap_json: JsonObject, path: Jsonpath) -> bool: - try: - next(_iter_values(osfmap_json, path)) - except StopIteration: - return False - else: - return True - - -def _iter_values(osfmap_json: JsonObject, path: Jsonpath) -> Generator[JsonValue]: - assert path - (_step, *_rest) = path - _val = osfmap_json.get(_step) - if _rest: - if isinstance(_val, dict): - yield from _iter_values(_val, _rest) - elif isinstance(_val, list): - for _val_obj in _val: - if isinstance(_val_obj, dict): - yield from _iter_values(_val_obj, _rest) - else: - if isinstance(_val, list): - yield from _val - elif _val is not None: - yield _val - - -def _render_tabularly(json_val: JsonValue) -> CsvValue: - if isinstance(json_val, (str, int, float)): - return json_val - if isinstance(json_val, dict): - for _key in _VALUE_KEY_PREFERENCE: - _val = json_val.get(_key) - if isinstance(_val, list): - return ( - _render_tabularly(_val[0]) - if _val - else None - ) - if _val is not None: - return _render_tabularly(_val) - return None - - class _Echo: '''a write-only file-like object, to convince `csv.csvwriter.writerow` to return strings diff --git a/trove/render/simple_json.py b/trove/render/trovesearch_json.py similarity index 68% rename from trove/render/simple_json.py rename to trove/render/trovesearch_json.py index 753d6ee6e..06bd436ab 100644 --- a/trove/render/simple_json.py +++ b/trove/render/trovesearch_json.py @@ -11,37 +11,48 @@ ) from trove.vocab import mediatypes from trove.vocab.namespaces import TROVE, RDF -from ._rendering import StreamableRendering, ProtoRendering -from ._simple_trovesearch import SimpleTrovesearchRenderer +from .rendering import ( + ProtoRendering, + EntireRendering, +) +from .rendering.streamable import StreamableRendering +from ._trovesearch_card_only import TrovesearchCardOnlyRenderer if typing.TYPE_CHECKING: + from collections.abc import ( + Generator, + Iterator, + Sequence, + ) from trove.util.json import JsonObject -class TrovesearchSimpleJsonRenderer(SimpleTrovesearchRenderer): +class TrovesearchJsonRenderer(TrovesearchCardOnlyRenderer): '''for "simple json" search api -- very entangled with trove/trovesearch/trovesearch_gathering.py ''' MEDIATYPE = mediatypes.JSON - INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def simple_unicard_rendering(self, card_iri: str, osfmap_json: dict[str, typing.Any]) -> str: - return json.dumps({ - 'data': self._render_card_content(card_iri, osfmap_json), - 'links': self._render_links(), - 'meta': self._render_meta(), - }, indent=2) + def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering: + return EntireRendering( + mediatype=self.MEDIATYPE, + entire_content=json.dumps({ + 'data': self._render_card_content(card_iri, osfmap_json), + 'links': self._render_links(), + 'meta': self._render_meta(), + }, indent=2), + ) - def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict[str, typing.Any]]]) -> ProtoRendering: - return StreamableRendering( # type: ignore[return-value] + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: + return StreamableRendering( mediatype=self.MEDIATYPE, content_stream=self._stream_json(card_pages), ) - def _stream_json(self, card_pages: typing.Iterator[dict[str, typing.Any]]) -> typing.Generator[str]: + def _stream_json(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> Generator[str]: _prefix = '{"data": [' yield _prefix _datum_prefix = None for _page in card_pages: - for _card_iri, _osfmap_json in _page.items(): + for _card_iri, _osfmap_json in _page: if _datum_prefix is not None: yield _datum_prefix yield json.dumps(self._render_card_content(_card_iri, _osfmap_json), indent=2) @@ -78,7 +89,7 @@ def _render_meta(self) -> dict[str, int | str]: pass return _meta - def _render_links(self) -> dict[str, typing.Any]: + def _render_links(self) -> JsonObject: _links = {} for _pagelink in self._page_links: _twopledict = rdf.twopledict_from_twopleset(_pagelink) @@ -88,8 +99,8 @@ def _render_links(self) -> dict[str, typing.Any]: _links[_membername.unicode_value] = _link_url return _links - def _add_twople(self, json_dict: dict[str, typing.Any], predicate_iri: str, object_iri: str) -> None: - _obj_ref = {'@id': object_iri} + def _add_twople(self, json_dict: JsonObject, predicate_iri: str, object_iri: str) -> None: + _obj_ref: JsonObject = {'@id': object_iri} _obj_list = json_dict.setdefault(predicate_iri, []) if isinstance(_obj_list, list): _obj_list.append(_obj_ref) diff --git a/trove/render/trovesearch_tsv.py b/trove/render/trovesearch_tsv.py new file mode 100644 index 000000000..b58882591 --- /dev/null +++ b/trove/render/trovesearch_tsv.py @@ -0,0 +1,10 @@ +import csv + +from trove.vocab import mediatypes + +from .trovesearch_csv import TrovesearchCsvRenderer + + +class TrovesearchTsvRenderer(TrovesearchCsvRenderer): + MEDIATYPE = mediatypes.TSV + CSV_DIALECT = csv.excel_tab diff --git a/trove/render/turtle.py b/trove/render/turtle.py index 869e12472..afad46e96 100644 --- a/trove/render/turtle.py +++ b/trove/render/turtle.py @@ -1,9 +1,11 @@ -from typing import Any - from primitive_metadata import primitive_rdf as rdf from trove.vocab.namespaces import TROVE from ._base import BaseRenderer +from .rendering import ( + EntireRendering, + ProtoRendering, +) class RdfTurtleRenderer(BaseRenderer): @@ -11,7 +13,10 @@ class RdfTurtleRenderer(BaseRenderer): # include indexcard metadata as JSON literals (because QuotedGraph is non-standard) INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def simple_render_document(self) -> Any: + def render_document(self) -> ProtoRendering: + return EntireRendering(self.MEDIATYPE, self._render_turtle()) + + def _render_turtle(self) -> str: return rdf.turtle_from_tripledict( self.response_data.tripledict, focus=self.response_focus.single_iri(), diff --git a/trove/static/css/browse.css b/trove/static/css/browse.css index 643bcfcf2..75adadddc 100644 --- a/trove/static/css/browse.css +++ b/trove/static/css/browse.css @@ -20,7 +20,7 @@ flex-wrap: wrap; gap: var(--gutter-1); margin: 0; - padding: 1rem; + padding: var(--gutter-2); min-height: 100vh; background-color: lch(var(--bg-luminance) var(--bg-chroma) var(--hue-turn)); } @@ -36,7 +36,7 @@ .Browse__card { display: flex; flex-direction: column; - padding: var(--gutter-2) var(--gutter-3); + padding: var(--gutter-3) var(--gutter-4); background-color: lch(var(--bg-luminance) var(--bg-chroma) var(--hue-turn)); border-color: lch(59% var(--bg-chroma) var(--hue-turn)); border-style: solid; @@ -44,10 +44,10 @@ border-block-start-width: var(--gutter-4); border-inline-end-width: 0; border-block-end-width: 0; - /* - border-start-end-radius: 1rem; - border-end-start-radius: 1rem; - */ +} + +.BrowseWrapper details > summary { + padding-left: var(--gutter-4); } .BrowseWrapper details > summary::before { @@ -65,16 +65,22 @@ .Browse__card > header { display: flex; flex-direction: row; - gap: var(--gutter-2); + flex-wrap: wrap; + gap: var(--gutter-3); align-items: baseline; - border-bottom: solid 1px rgba(0,0,0,0.382); - margin-bottom: var(--gutter-3); + padding-left: var(--gutter-3); } .Browse__card > header > :first-child { margin: 0; } +.Browse__card > header:not(:last-child) { + border-bottom: solid 1px rgba(0,0,0,0.382); + padding-bottom: var(--gutter-3); + margin-bottom: var(--gutter-3); +} + .Browse__card > footer { padding: var(--gutter-2); } @@ -86,7 +92,7 @@ dl.Browse__twopleset { [twople-obj] 1fr ; grid-auto-flow: row; - row-gap: var(--gutter-2); + row-gap: var(--gutter-3); margin: 0; padding: 0; } @@ -126,8 +132,7 @@ dl.Browse__twopleset > dd { .Browse__literal { display: flex; flex-direction: row; - gap: var(--gutter-3); - padding: var(--gutter-4); + gap: var(--gutter-5); } .Browse__literal > q { @@ -140,10 +145,18 @@ dl.Browse__twopleset > dd { .Browse__predicate { background-color: lch(from var(--bg-color-initial) 89% c var(--hue-turn)); - padding: var(--gutter-4); + padding: 0 var(--gutter-4); +} + +.Browse__predicate .Browse__literal { + padding: 0 var(--gutter-3); } .Browse__object { background-color: lch(from var(--bg-color-initial) 93% c var(--hue-turn)); - padding: var(--gutter-4); + padding: 0 var(--gutter-4); +} + +.Browse__object.Browse__blanknode { + background-color: lch(var(--bg-luminance) var(--bg-chroma) var(--hue-turn)); } diff --git a/trove/trovebrowse_gathering.py b/trove/trovebrowse_gathering.py index f8efb9a60..8145ed9ef 100644 --- a/trove/trovebrowse_gathering.py +++ b/trove/trovebrowse_gathering.py @@ -39,14 +39,21 @@ def gather_cards_focused_on(focus: gather.Focus, *, blend_cards: bool) -> GathererGenerator: _identifier_qs = trove_db.ResourceIdentifier.objects.queryset_for_iris(focus.iris) _indexcard_qs = trove_db.Indexcard.objects.filter(focus_identifier_set__in=_identifier_qs) + _lrd_qs = ( + trove_db.LatestResourceDescription.objects + .filter(indexcard__in=_indexcard_qs) + .select_related('indexcard') + ) if blend_cards: - for _latest_resource_description in trove_db.LatestResourceDescription.objects.filter(indexcard__in=_indexcard_qs): - yield from rdf.iter_tripleset(_latest_resource_description.as_rdf_tripledict()) + for _resource_description in _lrd_qs: + yield from rdf.iter_tripleset(_resource_description.as_rdfdoc_with_supplements().tripledict) + yield (ns.FOAF.isPrimaryTopicOf, _resource_description.indexcard.get_iri()) else: - for _indexcard in _indexcard_qs: - _card_iri = _indexcard.get_iri() + for _resource_description in _lrd_qs: + _card_iri = _resource_description.indexcard.get_iri() yield (ns.FOAF.isPrimaryTopicOf, _card_iri) yield (_card_iri, ns.RDF.type, ns.TROVE.Indexcard) + yield (_card_iri, ns.TROVE.resourceMetadata, _resource_description.as_quoted_graph()) @trovebrowse.gatherer(ns.TROVE.thesaurusEntry) diff --git a/trove/trovesearch/page_cursor.py b/trove/trovesearch/page_cursor.py index 5bbdf5ac0..4f52dd40a 100644 --- a/trove/trovesearch/page_cursor.py +++ b/trove/trovesearch/page_cursor.py @@ -17,7 +17,6 @@ DEFAULT_PAGE_SIZE = 13 MAX_PAGE_SIZE = 101 -UNBOUNDED_PAGE_SIZE = math.inf # json-serialized as "Infinity" @dataclasses.dataclass diff --git a/trove/trovesearch/search_handle.py b/trove/trovesearch/search_handle.py index b3ce4a8f7..ec3fb74ce 100644 --- a/trove/trovesearch/search_handle.py +++ b/trove/trovesearch/search_handle.py @@ -39,7 +39,8 @@ class CardsearchHandle(BasicSearchHandle): search_result_page: typing.Iterable[CardsearchResult] = () related_propertypath_results: list[PropertypathUsage] = dataclasses.field(default_factory=list) - def __post_init__(self): # type: ignore + def __post_init__(self) -> None: + # update cursor and/or search_result_page to agree with each other _cursor = self.cursor _page = self.search_result_page if ( # TODO: move this logic into the... cursor? @@ -60,7 +61,6 @@ def __post_init__(self): # type: ignore elif not _cursor.has_many_more(): # visiting first page for the first time _cursor.first_page_ids = [_result.card_id for _result in _page] - return _page def get_next_streaming_handle(self) -> typing.Self | None: if self.cursor.is_complete_page: diff --git a/trove/trovesearch/search_params.py b/trove/trovesearch/search_params.py index dfe047a49..5149ba941 100644 --- a/trove/trovesearch/search_params.py +++ b/trove/trovesearch/search_params.py @@ -35,6 +35,7 @@ get_single_value, ) from trove.vocab import osfmap +from trove.vocab.jsonapi import JSONAPI_LINK from trove.vocab.trove import trove_json_shorthand from trove.vocab.namespaces import RDF, TROVE, OWL, FOAF, DCTERMS if typing.TYPE_CHECKING: @@ -82,6 +83,7 @@ (TROVE.totalResultCount,), (TROVE.cardSearchText,), (TROVE.cardSearchFilter,), + (JSONAPI_LINK,), ], TROVE.Valuesearch: [ (TROVE.propertyPath,), diff --git a/trove/trovesearch/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py index 14138cbf0..8b3b16a6e 100644 --- a/trove/trovesearch/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -9,9 +9,11 @@ from trove import models as trove_db from trove.derive.osfmap_json import _RdfOsfmapJsonldRenderer +from trove.links import cardsearch_feed_links from trove.util.iris import get_sufficiently_unique_iri from trove.vocab.namespaces import RDF, FOAF, DCTERMS, RDFS, DCAT, TROVE from trove.vocab.jsonapi import ( + JSONAPI_LINK, JSONAPI_LINK_OBJECT, JSONAPI_MEMBERNAME, ) @@ -40,7 +42,7 @@ ) -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) type GathererGenerator = Generator[rdf.RdfTriple | rdf.RdfTwople] @@ -313,6 +315,17 @@ def gather_valuesearch_count(focus: ValuesearchFocus, **kwargs: Any) -> Gatherer yield (TROVE.totalResultCount, focus.search_handle.total_result_count) +@trovesearch_by_indexstrategy.gatherer( + JSONAPI_LINK, + focustype_iris={TROVE.Cardsearch}, +) +def gather_feed_links(focus: CardsearchFocus, **kwargs: Any) -> GathererGenerator: + _feed_links = cardsearch_feed_links(focus.single_iri()) + if _feed_links is not None: + yield (JSONAPI_LINK, _jsonapi_link('rss', _feed_links.rss)) + yield (JSONAPI_LINK, _jsonapi_link('atom', _feed_links.atom)) + + # @trovesearch_by_indexstrategy.gatherer( # focustype_iris={TROVE.Indexcard}, # ) @@ -484,8 +497,7 @@ def _osfmap_or_unknown_iri_as_json(iri: str) -> rdf.Literal: _twopledict = osfmap.OSFMAP_THESAURUS[iri] except KeyError: return rdf.literal_json({'@id': iri}) - else: - return _osfmap_json({iri: _twopledict}, focus_iri=iri) + return _osfmap_json({iri: _twopledict}, focus_iri=iri) def _valuesearch_result_as_json(result: ValuesearchResult) -> rdf.Literal: diff --git a/trove/urls.py b/trove/urls.py index 64f4b4e3c..cb729facd 100644 --- a/trove/urls.py +++ b/trove/urls.py @@ -1,16 +1,20 @@ from django.urls import path, re_path from .views.browse import BrowseIriView +from .views.docs import ( + OpenapiHtmlView, + OpenapiJsonView, +) +from .views.feeds import ( + CardsearchRssView, + CardsearchAtomView, +) from .views.ingest import RdfIngestView from .views.indexcard import IndexcardView from .views.search import ( CardsearchView, ValuesearchView, ) -from .views.docs import ( - OpenapiHtmlView, - OpenapiJsonView, -) app_name = 'trove' @@ -19,6 +23,8 @@ path('index-card/', view=IndexcardView.as_view(), name='index-card'), path('index-card-search', view=CardsearchView.as_view(), name='index-card-search'), path('index-value-search', view=ValuesearchView.as_view(), name='index-value-search'), + path('index-card-search/rss.xml', view=CardsearchRssView.as_view(), name='cardsearch-rss'), + path('index-card-search/atom.xml', view=CardsearchAtomView.as_view(), name='cardsearch-atom'), path('browse', view=BrowseIriView.as_view(), name='browse-iri'), path('ingest', view=RdfIngestView.as_view(), name='ingest-rdf'), path('docs/openapi.json', view=OpenapiJsonView.as_view(), name='docs.openapi-json'), diff --git a/trove/util/datetime.py b/trove/util/datetime.py new file mode 100644 index 000000000..ce437e79c --- /dev/null +++ b/trove/util/datetime.py @@ -0,0 +1,18 @@ +import datetime + +from primitive_metadata import primitive_rdf as rdf + + +def datetime_isoformat_z(dt: datetime.datetime | rdf.Literal | str) -> str: + """format (or reformat) a datetime in UTC with 'Z' timezone indicator + + for complying with standards that require the 'Z', like OAI-PMH + https://www.openarchives.org/OAI/openarchivesprotocol.html#Dates + """ + if isinstance(dt, rdf.Literal): + dt = dt.unicode_value + if isinstance(dt, str): + dt = datetime.datetime.fromisoformat(dt) + if isinstance(dt, datetime.datetime) and dt.tzinfo is None: + dt = dt.astimezone(datetime.UTC) + return dt.strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/trove/util/django.py b/trove/util/django.py index 77cf184bd..9b79165ee 100644 --- a/trove/util/django.py +++ b/trove/util/django.py @@ -16,18 +16,16 @@ def pk_chunked(queryset: QuerySet, chunksize: int) -> Generator[list]: ''' _ordered_qs = queryset.order_by('pk') _prior_end_pk = None - while True: # for each chunk: - _qs = ( - _ordered_qs - if _prior_end_pk is None - else _ordered_qs.filter(pk__gt=_prior_end_pk) - ) + _chunk_qs: QuerySet | None = _ordered_qs + while _chunk_qs is not None: # for each chunk: # load primary key values only - _pks = list(_qs.values_list('pk', flat=True)[:chunksize]) - if not _pks: - break # done - _end_pk = _pks[-1] - if (_prior_end_pk is not None) and (_end_pk <= _prior_end_pk): - raise RuntimeError(f'sentinel pks not ascending?? got {_end_pk} after {_prior_end_pk}') - _prior_end_pk = _end_pk - yield _pks + _pks = list(_chunk_qs.values_list('pk', flat=True)[:chunksize]) + if _pks: + _end_pk = _pks[-1] + if (_prior_end_pk is not None) and (_end_pk <= _prior_end_pk): + raise RuntimeError(f'sentinel pks not ascending?? got {_end_pk} after {_prior_end_pk}') + yield _pks + _prior_end_pk = _end_pk + _chunk_qs = _ordered_qs.filter(pk__gt=_prior_end_pk) + else: + _chunk_qs = None # done diff --git a/trove/util/html.py b/trove/util/html.py new file mode 100644 index 000000000..1cef3bb5e --- /dev/null +++ b/trove/util/html.py @@ -0,0 +1,43 @@ +from __future__ import annotations +from collections.abc import Generator +import contextlib +import dataclasses +from xml.etree.ElementTree import tostring as etree_tostring + +from trove.util.xml import XmlBuilder + + +__all__ = ('HtmlBuilder',) + +HTML_DOCTYPE = '' + + +@dataclasses.dataclass +class HtmlBuilder(XmlBuilder): + root_tag_name: str = 'html' + _: dataclasses.KW_ONLY + _heading_depth: int = 0 + + ### + # html-building helper methods + + @contextlib.contextmanager + def deeper_heading(self) -> Generator[str]: + _outer_heading_depth = self._heading_depth + if not _outer_heading_depth: + self._heading_depth = 1 + elif _outer_heading_depth < 6: # h6 deepest + self._heading_depth += 1 + try: + yield f'h{self._heading_depth}' + finally: + self._heading_depth = _outer_heading_depth + + def as_html_doc(self) -> str: + return '\n'.join((HTML_DOCTYPE, str(self))) + + def __str__(self) -> str: + return etree_tostring(self.root_element, encoding='unicode', method='html') + + def __bytes__(self) -> bytes: + return etree_tostring(self.root_element, encoding='utf-8', method='html') diff --git a/trove/util/iter.py b/trove/util/iter.py new file mode 100644 index 000000000..414febee5 --- /dev/null +++ b/trove/util/iter.py @@ -0,0 +1,19 @@ +from collections.abc import ( + Generator, + Hashable, + Iterable, +) + + +def iter_unique[T: Hashable](iterable: Iterable[T]) -> Generator[T]: + ''' + >>> list(iter_unique([1,1,1])) + [1] + >>> list(iter_unique([1,2,3,2,4,2,1,5])) + [1, 2, 3, 4, 5] + ''' + _seen = set() + for _item in iterable: + if _item not in _seen: + _seen.add(_item) + yield _item diff --git a/trove/util/json.py b/trove/util/json.py index aa647681c..496a0607a 100644 --- a/trove/util/json.py +++ b/trove/util/json.py @@ -1,6 +1,99 @@ from __future__ import annotations +from collections.abc import ( + Iterable, + Sequence, + Generator, +) +import datetime +### +# types for json-serializable stuff + +JsonPrimitive = str | int | float | bool | None + +type JsonValue = JsonPrimitive | list[JsonValue] | JsonObject + +type JsonNonArrayValue = JsonPrimitive | JsonObject + type JsonObject = dict[str, JsonValue] -type JsonValue = str | int | float | list[JsonValue] | JsonObject | None +type JsonPath = Sequence[str] # path of json keys + +JSONLD_VALUE_KEYS = ('@value', '@id') + +### +# utils for navigating nested json in the style of trove.derive.osfmap_json +# (TODO: more general json-ld utils) + + +def json_vals(json_obj: JsonObject, path: JsonPath) -> Generator[JsonValue]: + assert path + (_step, *_rest) = path + try: + _val = json_obj[_step] + except KeyError: + return + if _rest: + if isinstance(_val, dict): + yield from json_vals(_val, _rest) + elif isinstance(_val, list): + for _val_obj in _val: + if isinstance(_val_obj, dict): + yield from json_vals(_val_obj, _rest) + else: + if isinstance(_val, list): + yield from _val + else: + yield _val + + +def json_prims( + json_val: JsonValue, + path: JsonPath, + value_key_options: Iterable[str] = JSONLD_VALUE_KEYS, +) -> Generator[JsonPrimitive]: + if isinstance(json_val, list): + for _list_val in json_val: + yield from json_prims(_list_val, path, value_key_options) + elif path: + if isinstance(json_val, dict): + for _path_val in json_vals(json_val, path): + yield from json_prims(_path_val, (), value_key_options) + else: # no path; not list + if isinstance(json_val, JsonPrimitive): + yield json_val + elif isinstance(json_val, dict): + try: + yield next( + _val + for _key in value_key_options + if _key in json_val and isinstance(_val := json_val[_key], JsonPrimitive) + ) + except StopIteration: + pass + + +def json_strs( + json_val: JsonValue, + path: JsonPath, + value_key_options: Iterable[str] = JSONLD_VALUE_KEYS, + coerce_str: bool = False, +) -> Generator[str]: + for _prim in json_prims(json_val, path, value_key_options): + if isinstance(_prim, str): + yield _prim + elif coerce_str and (_prim is not None): + yield str(_prim) + + +def json_datetimes( + json_val: JsonValue, + path: JsonPath, +) -> Generator[datetime.datetime]: + for _prim in json_prims(json_val, path): + if isinstance(_prim, str): + try: + yield datetime.datetime.fromisoformat(_prim) + except ValueError: + pass diff --git a/trove/util/queryparams.py b/trove/util/queryparams.py index 664e63971..feb85c898 100644 --- a/trove/util/queryparams.py +++ b/trove/util/queryparams.py @@ -113,8 +113,7 @@ def get_single_value( (_singlevalue,) = _paramvalues except ValueError: raise trove_exceptions.InvalidRepeatedQueryParam(str(queryparam_name)) - else: - return _singlevalue + return _singlevalue def get_bool_value( diff --git a/trove/util/trove_params.py b/trove/util/trove_params.py index 8801e7d5b..77633841d 100644 --- a/trove/util/trove_params.py +++ b/trove/util/trove_params.py @@ -72,8 +72,7 @@ def _gather_shorthand(cls, queryparams: _qp.QueryparamDict) -> rdf.IriShorthand: (_shortname,) = _qp_name.bracketed_names except ValueError: raise trove_exceptions.InvalidQueryParamName(_qp_name) - else: - _prefixmap[_shortname] = _iri + _prefixmap[_shortname] = _iri _shorthand = cls._default_shorthand() if _prefixmap: _shorthand = _shorthand.with_update(_prefixmap) diff --git a/trove/util/xml.py b/trove/util/xml.py new file mode 100644 index 000000000..79ca0f972 --- /dev/null +++ b/trove/util/xml.py @@ -0,0 +1,66 @@ +from __future__ import annotations +from collections.abc import Generator +import contextlib +import dataclasses +from xml.etree.ElementTree import ( + Element, + SubElement, + tostring as etree_tostring, +) + +from primitive_metadata import primitive_rdf as rdf + + +__all__ = ('XmlBuilder',) + + +@dataclasses.dataclass +class XmlBuilder: + '''XmlBuilder: for building XML (an alternate convenience wrapper around xml.etree) + + >>> _xb = XmlBuilder('foo') + >>> with _xb.nest('bar', {'blib': 'bloz'}): + ... _xb.leaf('baz', text='hello') + ... _xb.leaf('boz', {'blib': 'blab'}, text='world') + >>> str(_xb) + ''' + root_tag_name: str + root_attrs: dict = dataclasses.field(default_factory=dict) + _: dataclasses.KW_ONLY + _nested_elements: list[Element] = dataclasses.field(repr=False, init=False) + + def __post_init__(self) -> None: + self._nested_elements = [Element(self.root_tag_name, self.root_attrs)] + + @property + def root_element(self) -> Element: + return self._nested_elements[0] + + @property + def current_element(self) -> Element: + return self._nested_elements[-1] + + @contextlib.contextmanager + def nest(self, tag_name: str, attrs: dict | None = None) -> Generator[Element]: + _attrs = {**attrs} if attrs else {} + _nested_element = SubElement(self.current_element, tag_name, _attrs) + self._nested_elements.append(_nested_element) + try: + yield self.current_element + finally: + _popped_element = self._nested_elements.pop() + assert _popped_element is _nested_element + + def leaf(self, tag_name: str, attrs: dict | None = None, *, text: str | rdf.Literal | None = None) -> None: + _leaf_element = SubElement(self.current_element, tag_name, attrs or {}) + if isinstance(text, rdf.Literal): + # TODO: lang + _leaf_element.text = text.unicode_value + elif text is not None: + _leaf_element.text = text + + def __str__(self) -> str: + return etree_tostring(self.root_element, encoding='unicode') + + def __bytes__(self) -> bytes: + return etree_tostring(self.root_element, encoding='utf-8', xml_declaration=True) diff --git a/trove/views/_base.py b/trove/views/_base.py index 802aa56e2..feede764b 100644 --- a/trove/views/_base.py +++ b/trove/views/_base.py @@ -26,7 +26,7 @@ if TYPE_CHECKING: from django.http import HttpResponse, StreamingHttpResponse, HttpRequest from trove.render import BaseRenderer - from trove.render._rendering import ProtoRendering + from trove.render.rendering import ProtoRendering __all__ = ( @@ -45,7 +45,7 @@ def _render_response_content(self, request, params, renderer_type: type[BaseRend def get(self, request: HttpRequest, **kwargs: str) -> HttpResponse | StreamingHttpResponse: try: - _renderer_type = get_renderer_type(request) + _renderer_type = self._get_renderer_type(request) except trove_exceptions.CannotRenderMediatype as _error: return make_http_error_response( error=_error, @@ -63,6 +63,9 @@ def get(self, request: HttpRequest, **kwargs: str) -> HttpResponse | StreamingHt renderer_type=_renderer_type, ) + def _get_renderer_type(self, request: HttpRequest): + return get_renderer_type(request) + def _parse_params(self, request: HttpRequest): return self.params_type.from_querystring(request.META['QUERY_STRING']) @@ -74,6 +77,8 @@ class GatheredTroveView(BaseTroveView, abc.ABC): focus_type_iris: ClassVar[Container[str]] = () def _render_response_content(self, request, params, renderer_type: type[BaseRenderer], url_kwargs): + '''implement abstract method from BaseTroveView + ''' _focus = self._build_focus(request, params, url_kwargs) _renderer = self._gather_to_renderer(_focus, params, renderer_type) return _renderer.render_document() @@ -123,6 +128,8 @@ def cached_static_triples(cls, focus_iri): return cls.get_static_triples(focus_iri) def _render_response_content(self, request, params, renderer_type: type[BaseRenderer], url_kwargs): + '''implement abstract method from BaseTroveView + ''' _focus_iri = self.get_focus_iri() _triples = self.cached_static_triples(_focus_iri) _focus = gather.Focus.new( diff --git a/trove/views/_responder.py b/trove/views/_responder.py index 1d3365742..cada5e74d 100644 --- a/trove/views/_responder.py +++ b/trove/views/_responder.py @@ -5,14 +5,23 @@ from django import http as djhttp from trove.render._base import BaseRenderer -from trove.render._rendering import ( - ProtoRendering, - StreamableRendering, -) +from trove.render.rendering import ProtoRendering +from trove.render.rendering.streamable import StreamableRendering +from trove.render.rendering.html_wrapped import HtmlWrappedRendering from trove.exceptions import TroveError from trove.vocab import mediatypes +_BROWSER_FRIENDLY_MEDIATYPES = { + mediatypes.HTML, + mediatypes.JSON, + mediatypes.JSONLD, + mediatypes.JSONAPI, + mediatypes.ATOM, + mediatypes.RSS, +} + + def make_http_response( *, content_rendering: ProtoRendering, @@ -24,15 +33,26 @@ def make_http_response( if isinstance(content_rendering, StreamableRendering) else djhttp.HttpResponse ) + _download_filename = ( + http_request.GET.get('withFileName') + if http_request is not None + else None + ) + if ( + _download_filename is None + and content_rendering.mediatype not in _BROWSER_FRIENDLY_MEDIATYPES + and http_request is not None + and 'Accept' in http_request.headers + and http_request.accepts(mediatypes.HTML) + ): # when browsing in browser, return html (unless given filename) + content_rendering = HtmlWrappedRendering(content_rendering) _response = _response_type( content_rendering.iter_content(), - content_type=content_rendering.mediatype, + content_type=_make_content_type(content_rendering.mediatype), ) - if http_request is not None: - _requested_filename = http_request.GET.get('withFileName') - if _requested_filename is not None: - _file_name = _get_file_name(_requested_filename, content_rendering.mediatype) - _response.headers['Content-Disposition'] = _disposition(_file_name) + if _download_filename is not None: + _file_name = _get_file_name(_download_filename, content_rendering.mediatype) + _response.headers['Content-Disposition'] = _disposition(_file_name) return _response @@ -46,7 +66,7 @@ def make_http_error_response( return djhttp.HttpResponse( _content_rendering.iter_content(), status=error.http_status, - content_type=_content_rendering.mediatype, + content_type=_make_content_type(_content_rendering.mediatype), ) @@ -70,3 +90,13 @@ def _disposition(filename: str) -> bytes: b'filename=' + filename.encode('latin-1', errors='replace'), b"filename*=utf-8''" + filename.encode(), )) + + +def _make_content_type(mediatype: str) -> str: + """make a content-type header value from a mediatype + + currently just adds "charset=utf-8" to text mediatypes that don't already have one + """ + if mediatype.startswith('text/') and ('charset' not in mediatype): + return f'{mediatype};charset=utf-8' + return mediatype diff --git a/trove/views/browse.py b/trove/views/browse.py index 6739b53d7..e50b41721 100644 --- a/trove/views/browse.py +++ b/trove/views/browse.py @@ -47,6 +47,11 @@ def _default_include(cls): _ns.TROVE.usedAtPath, )) + def to_querydict(self): + _querydict = super().to_querydict() + _querydict['iri'] = self.iri + return _querydict + class BrowseIriView(GatheredTroveView): gathering_organizer = trovebrowse diff --git a/trove/views/feeds.py b/trove/views/feeds.py new file mode 100644 index 000000000..ae4b90eb8 --- /dev/null +++ b/trove/views/feeds.py @@ -0,0 +1,48 @@ +from __future__ import annotations +import dataclasses +from typing import TYPE_CHECKING + +from trove.render.cardsearch_rss import CardsearchRssRenderer +from trove.render.cardsearch_atom import CardsearchAtomRenderer +from trove.trovesearch.search_params import ( + CardsearchParams, + SortParam, + ValueType, +) +from trove.views.search import CardsearchView +from trove.vocab.namespaces import DCTERMS + +if TYPE_CHECKING: + from django.http import HttpRequest + + +class CardsearchRssView(CardsearchView): + def _get_renderer_type(self, request: HttpRequest): + '''override method from BaseTroveView + + ignore requested mediatype; always render RSS + ''' + return CardsearchRssRenderer + + def _parse_params(self, request: HttpRequest): + '''override method from BaseTroveView + + ignore requested sort; always sort by date created, descending + ''' + _params: CardsearchParams = super()._parse_params(request) + return dataclasses.replace(_params, sort_list=( + SortParam( + value_type=ValueType.DATE, + propertypath=(DCTERMS.created,), + descending=True, + ), + )) + + +class CardsearchAtomView(CardsearchRssView): + def _get_renderer_type(self, request: HttpRequest): + '''override method from BaseTroveView + + ignore requested mediatype; always render Atom + ''' + return CardsearchAtomRenderer diff --git a/trove/views/ingest.py b/trove/views/ingest.py index a6b21590a..4c634bf00 100644 --- a/trove/views/ingest.py +++ b/trove/views/ingest.py @@ -61,9 +61,8 @@ def post(self, request: HttpRequest) -> HttpResponse: except trove_exceptions.DigestiveError as e: logger.exception(str(e)) return http.HttpResponse(str(e), status=HTTPStatus.BAD_REQUEST) - else: - # TODO: include (link to?) extracted card(s) - return http.HttpResponse(status=HTTPStatus.CREATED) + # TODO: include (link to?) extracted card(s) + return http.HttpResponse(status=HTTPStatus.CREATED) def delete(self, request: HttpRequest) -> HttpResponse: # TODO: cleaner permissions diff --git a/trove/vocab/mediatypes.py b/trove/vocab/mediatypes.py index 66495683a..24dad5053 100644 --- a/trove/vocab/mediatypes.py +++ b/trove/vocab/mediatypes.py @@ -5,6 +5,8 @@ HTML = 'text/html' TSV = 'text/tab-separated-values' CSV = 'text/csv' +RSS = 'application/rss+xml' +ATOM = 'application/atom+xml' _file_extensions = { @@ -15,11 +17,31 @@ HTML: '.html', TSV: '.tsv', CSV: '.csv', + RSS: '.xml', + ATOM: '.xml', } +_PARAMETER_DELIMITER = ';' + + +def strip_mediatype_parameters(mediatype: str) -> str: + """from a full mediatype that may have parameters, get only the base mediatype + + >>> strip_mediatype_parameters('text/plain;charset=utf-8') + 'text/plain' + >>> strip_mediatype_parameters('text/plain') + 'text/plain' + + note: does not validate that the mediatype exists or makes sense + >>> strip_mediatype_parameters('application/whatever ; blarg=foo') + 'application/whatever' + """ + (_base, _, __) = mediatype.partition(_PARAMETER_DELIMITER) + return _base.strip() + def dot_extension(mediatype: str) -> str: try: - return _file_extensions[mediatype] + return _file_extensions[strip_mediatype_parameters(mediatype)] except KeyError: raise ValueError(f'unrecognized mediatype: {mediatype}') diff --git a/trove/vocab/namespaces.py b/trove/vocab/namespaces.py index c0ebf1cb6..db86e679c 100644 --- a/trove/vocab/namespaces.py +++ b/trove/vocab/namespaces.py @@ -47,6 +47,8 @@ SHAREv2 = rdf.IriNamespace('https://share.osf.io/vocab/2017/sharev2/') # for the OSF metadata application profile (TODO: update to resolvable URL, when there is one) OSFMAP = rdf.IriNamespace('https://osf.io/vocab/2022/') +# non-standard namespace used by OSF for datacite terms (resolves to datacite docs) +DATACITE = rdf.IriNamespace('https://schema.datacite.org/meta/kernel-4/#') # for identifying jsonapi concepts with linked anchors on the jsonapi spec (probably fine) JSONAPI = rdf.IriNamespace('https://jsonapi.org/format/1.1/#') @@ -58,6 +60,7 @@ 'jsonapi': JSONAPI, 'oai': OAI, 'oai_dc': OAI_DC, + 'datacite': DATACITE, } if __debug__: # blarg: a nothing namespace for examples and testing diff --git a/trove/vocab/trove.py b/trove/vocab/trove.py index 7dd6d1a9e..5649db6b8 100644 --- a/trove/vocab/trove.py +++ b/trove/vocab/trove.py @@ -1,10 +1,8 @@ import functools -import urllib.parse from typing import Union, Any from uuid import UUID from django.conf import settings -from django.urls import reverse from primitive_metadata.primitive_rdf import ( IriNamespace, IriShorthand, @@ -44,14 +42,6 @@ def _literal_markdown(text: str, *, language: str) -> literal: return literal(text, language=language, mediatype='text/markdown;charset=utf-8') -def trove_browse_link(iri: str) -> str: - _compact = namespaces_shorthand().compact_iri(iri) - return urllib.parse.urljoin( - reverse('trove:browse-iri'), - f'?iri={urllib.parse.quote(_compact)}', - ) - - TROVE_API_THESAURUS: RdfTripleDictionary = { TROVE.search_api: { RDFS.label: {literal('trove search api', language='en')}, @@ -494,7 +484,7 @@ def trove_browse_link(iri: str) -> str: unstable mediatypes (may change or sometimes respond 500): -* `text/html;charset=utf-8`: rdf as browsable html +* `text/html`: rdf as browsable html * `text/turtle`: rdf as [turtle](https://www.w3.org/TR/turtle/) * `application/ld+json`: rdf as [json-ld](https://www.w3.org/TR/json-ld11/)