From 4061cef9a0c5f40436728be6ef2e7adcbd35e04a Mon Sep 17 00:00:00 2001 From: Alex Nizamov Date: Mon, 24 Nov 2025 16:43:45 +0500 Subject: [PATCH 1/2] (#920) Make envelope graphs archive downloadable --- app/api/entities/envelope_download.rb | 10 +- app/api/v1/envelopes.rb | 4 +- app/api/v1/graph.rb | 26 ++ app/models/envelope_community.rb | 2 +- app/models/envelope_download.rb | 9 +- app/services/download_envelopes.rb | 138 ++------- app/services/envelope_dumps/base.rb | 137 +++++++++ .../envelope_dumps/envelope_builder.rb | 13 + app/services/envelope_dumps/graph_builder.rb | 13 + ...23201629_add_type_to_envelope_downloads.rb | 7 + db/structure.sql | 8 +- lib/swagger_docs/sections/graphs.rb | 30 ++ spec/api/v1/envelopes_spec.rb | 6 +- spec/api/v1/graph_spec.rb | 227 +++++++++++++- spec/services/download_envelopes_spec.rb | 279 ++++++------------ .../envelope_dumps/envelope_builder_spec.rb | 204 +++++++++++++ .../envelope_dumps/graph_builder_spec.rb | 160 ++++++++++ 17 files changed, 943 insertions(+), 330 deletions(-) create mode 100644 app/services/envelope_dumps/base.rb create mode 100644 app/services/envelope_dumps/envelope_builder.rb create mode 100644 app/services/envelope_dumps/graph_builder.rb create mode 100644 db/migrate/20251123201629_add_type_to_envelope_downloads.rb create mode 100644 spec/services/envelope_dumps/envelope_builder_spec.rb create mode 100644 spec/services/envelope_dumps/graph_builder_spec.rb diff --git a/app/api/entities/envelope_download.rb b/app/api/entities/envelope_download.rb index c69c2b5d..3ea180f4 100644 --- a/app/api/entities/envelope_download.rb +++ b/app/api/entities/envelope_download.rb @@ -2,24 +2,24 @@ module API module Entities # Presenter for EnvelopeDownload class EnvelopeDownload < Grape::Entity - expose :display_status, as: :status, - documentation: { type: 'string', desc: 'Status of download' } - expose :enqueued_at, documentation: { type: 'string', desc: 'When the download was enqueued' }, if: ->(object) { object.pending? } expose :finished_at, documentation: { type: 'string', desc: 'When the download finished' }, - if: ->(object) { object.finished? } + if: ->(object) { object.failed? || object.finished? } expose :started_at, documentation: { type: 'string', desc: 'When the download started' }, if: ->(object) { object.in_progress? } + expose :status, + documentation: { type: 'string', desc: 'Status of download' } + expose :url, documentation: { type: 'string', desc: 'AWS S3 URL' }, - if: ->(object) { object.finished? } + if: ->(object) { object.failed? || object.finished? } end end end diff --git a/app/api/v1/envelopes.rb b/app/api/v1/envelopes.rb index cef605b7..6915348d 100644 --- a/app/api/v1/envelopes.rb +++ b/app/api/v1/envelopes.rb @@ -73,8 +73,8 @@ class Envelopes < MountableAPI authenticate! authorize Envelope, :index? - @envelope_download = current_community.envelope_download || - current_community.create_envelope_download! + downloads = current_community.envelope_downloads.envelope + @envelope_download = downloads.last || downloads.create! end desc 'Returns the envelope download' diff --git a/app/api/v1/graph.rb b/app/api/v1/graph.rb index a6a68c91..3f4fb8a6 100644 --- a/app/api/v1/graph.rb +++ b/app/api/v1/graph.rb @@ -25,6 +25,32 @@ class Graph < MountableAPI end resource :graph do + resources :download do + before do + authenticate! + authorize Envelope, :index? + + downloads = current_community.envelope_downloads.graph + @envelope_download = downloads.last || downloads.create! + end + + desc 'Returns the envelope download' + get do + present @envelope_download, with: API::Entities::EnvelopeDownload + end + + desc 'Starts an envelope download' + post do + @envelope_download.update!( + enqueued_at: Time.current, + status: :pending + ) + + DownloadEnvelopesJob.perform_later(@envelope_download.id) + present @envelope_download, with: API::Entities::EnvelopeDownload + end + end + namespace do desc 'Return a resource. ' \ 'If the resource is part of a graph, the entire graph is returned.' diff --git a/app/models/envelope_community.rb b/app/models/envelope_community.rb index 59e91aef..3d0330e1 100644 --- a/app/models/envelope_community.rb +++ b/app/models/envelope_community.rb @@ -6,8 +6,8 @@ class EnvelopeCommunity < ActiveRecord::Base include AttributeNormalizer has_one :envelope_community_config - has_one :envelope_download has_many :envelopes + has_many :envelope_downloads has_many :envelope_resources, through: :envelopes has_many :indexed_envelope_resources has_many :versions, class_name: 'EnvelopeVersion' diff --git a/app/models/envelope_download.rb b/app/models/envelope_download.rb index 6b87dbe7..4b17eb62 100644 --- a/app/models/envelope_download.rb +++ b/app/models/envelope_download.rb @@ -1,17 +1,20 @@ # Stores the status and AWS S3 URL of an asynchronously performed envelope download class EnvelopeDownload < ActiveRecord::Base + self.inheritance_column = nil + belongs_to :envelope_community has_many :envelopes, -> { not_deleted }, through: :envelope_community enum :status, { + failed: 'failed', finished: 'finished', in_progress: 'in_progress', pending: 'pending' } - def display_status - return 'failed' if internal_error_message? + enum :type, { envelope: 'envelope', graph: 'graph' } - status + def with_error? + internal_error_message? end end diff --git a/app/services/download_envelopes.rb b/app/services/download_envelopes.rb index 5fdfb74f..850a31ad 100644 --- a/app/services/download_envelopes.rb +++ b/app/services/download_envelopes.rb @@ -1,100 +1,31 @@ -# Dumps an envelope community's envelopes into a ZIP archive and uploads it to S3 -class DownloadEnvelopes # rubocop:todo Metrics/ClassLength - attr_reader :envelope_download, :updated_at +require 'envelope_dumps/envelope_builder' +require 'envelope_dumps/graph_builder' - delegate :envelope_community, to: :envelope_download +# Builds an envelope community's download according to its type +class DownloadEnvelopes + attr_reader :envelope_download, :last_dumped_at def initialize(envelope_download) @envelope_download = envelope_download - @updated_at = envelope_download.started_at + @last_dumped_at = envelope_download.started_at unless envelope_download.with_error? end def self.call(envelope_download:) new(envelope_download).run end - def bucket - ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET') - end - - def create_or_update_entries - FileUtils.mkdir_p(dirname) - - log('Adding recently published envelopes into the dump') - - published_envelopes.find_each do |envelope| - File.write( - File.join(dirname, "#{envelope.envelope_ceterms_ctid}.json"), - API::Entities::Envelope.represent(envelope).to_json - ) - end - end - - def dirname - @dirname ||= [ - envelope_community.name, - Time.current.to_i, - SecureRandom.hex - ].join('_') - end - - def download_file # rubocop:todo Metrics/AbcSize - return unless envelope_download.url? - - log("Downloading the existing dump from #{envelope_download.url}") - - File.open(filename, 'wb') do |file| - URI.parse(envelope_download.url).open do |data| - file.write(data.read) + def builder + builder_class = + case envelope_download.type + when 'envelope' + EnvelopeDumps::EnvelopeBuilder + when 'graph' + EnvelopeDumps::GraphBuilder + else + raise "No dump builder is defined for `#{envelope_download.type}`" end - end - - log("Unarchiving the downloaded dump into #{dirname}") - system("unzip -qq #{filename} -d #{dirname}", exception: true) - rescue StandardError => e - Airbrake.notify(e) - end - - def destroy_envelope_events - @deleted_envelope_ctids = envelope_community - .versions - .where(event: 'destroy') - .where('created_at >= ?', updated_at) - end - - def filename - @filename ||= "#{dirname}.zip" - end - - def log(message) - MR.logger.info(message) - end - - def published_envelopes - @published_envelopes = begin - envelopes = envelope_community - .envelopes - .not_deleted - .includes(:envelope_community, :organization, :publishing_organization) - - envelopes.where!('updated_at >= ?', updated_at) if updated_at - envelopes - end - end - def region - ENV.fetch('AWS_REGION') - end - - def remove_entries - log('Removing recently deleted envelopes from the dump') - - destroy_envelope_events.select(:id, :envelope_ceterms_ctid).find_each do |event| - FileUtils.remove_file( - File.join(dirname, "#{event.envelope_ceterms_ctid}.json"), - true - ) - end + builder_class.new(envelope_download, last_dumped_at) end def run # rubocop:todo Metrics/AbcSize, Metrics/MethodLength @@ -106,44 +37,15 @@ def run # rubocop:todo Metrics/AbcSize, Metrics/MethodLength ) envelope_download.with_lock do - if up_to_date? - log('The dump is up to date.') - return - end - - download_file - create_or_update_entries - remove_entries - envelope_download.url = upload_file + envelope_download.status = :finished + envelope_download.url = builder.run rescue StandardError => e Airbrake.notify(e) envelope_download&.internal_error_backtrace = e.backtrace envelope_download&.internal_error_message = e.message + envelope_download.status = :failed ensure - log('Deleting intermediate files.') - FileUtils.rm_rf(dirname) - FileUtils.rm_f(filename) - envelope_download.update!(finished_at: Time.current, status: :finished) - log('Finished.') + envelope_download.update!(finished_at: Time.current) end end - - def up_to_date? - published_envelopes.none? && destroy_envelope_events.none? - end - - def upload_file - log('Archiving the updated dump.') - - system( - "find #{dirname} -type f -print | zip -FSjqq #{filename} -@", - exception: true - ) - - log('Uploading the updated dump to S3.') - - object = Aws::S3::Resource.new(region:).bucket(bucket).object(filename) - object.upload_file(filename) - object.public_url - end end diff --git a/app/services/envelope_dumps/base.rb b/app/services/envelope_dumps/base.rb new file mode 100644 index 00000000..76d48413 --- /dev/null +++ b/app/services/envelope_dumps/base.rb @@ -0,0 +1,137 @@ +module EnvelopeDumps + # Dumps an envelope community's envelopes or graphs into a ZIP file and uploads it to S3 + class Base # rubocop:todo Metrics/ClassLength + attr_reader :envelope_download, :last_dumped_at + + delegate :envelope_community, to: :envelope_download + + def initialize(envelope_download, last_dumped_at) + @envelope_download = envelope_download + @last_dumped_at = last_dumped_at + end + + def bucket + raise NotImplementedError + end + + def build_content(_envelope) + raise NotImplementedError + end + + def create_or_update_entries + FileUtils.mkdir_p(dirname) + + log('Adding recently published envelopes into the dump') + + published_envelopes.find_each do |envelope| + File.write( + File.join(dirname, "#{envelope.envelope_ceterms_ctid}.json"), + build_content(envelope).to_json + ) + end + end + + def dirname + @dirname ||= [ + envelope_community.name, + Time.current.to_i, + SecureRandom.hex + ].join('_') + end + + def download_file # rubocop:todo Metrics/AbcSize + return unless envelope_download.url? + + log("Downloading the existing dump from #{envelope_download.url}") + + File.open(filename, 'wb') do |file| + URI.parse(envelope_download.url).open do |data| + file.write(data.read) + end + end + + log("Unarchiving the downloaded dump into #{dirname}") + system("unzip -qq #{filename} -d #{dirname}", exception: true) + rescue StandardError => e + Airbrake.notify(e) + end + + def destroy_envelope_events + @destroy_envelope_events ||= envelope_community + .versions + .where(event: 'destroy') + .where('created_at >= ?', last_dumped_at) + end + + def filename + @filename ||= "#{dirname}.zip" + end + + def log(message) + MR.logger.info(message) + end + + def published_envelopes + @published_envelopes ||= begin + envelopes = envelope_community + .envelopes + .not_deleted + .includes(:envelope_community, :organization, :publishing_organization) + + envelopes.where!('updated_at >= ?', last_dumped_at) if last_dumped_at + envelopes + end + end + + def region + ENV.fetch('AWS_REGION') + end + + def remove_entries + log('Removing recently deleted envelopes from the dump') + + destroy_envelope_events.select(:id, :envelope_ceterms_ctid).find_each do |event| + FileUtils.remove_file( + File.join(dirname, "#{event.envelope_ceterms_ctid}.json"), + true + ) + end + end + + def run + if up_to_date? + log('The dump is up to date.') + return + end + + download_file + create_or_update_entries + remove_entries + upload_file + ensure + log('Deleting intermediate files.') + FileUtils.rm_rf(dirname) + FileUtils.rm_f(filename) + log('Finished.') + end + + def up_to_date? + envelope_download.url? && published_envelopes.none? && destroy_envelope_events.none? + end + + def upload_file + log('Archiving the updated dump.') + + system( + "find #{dirname} -type f -print | zip -FSjqq #{filename} -@", + exception: true + ) + + log('Uploading the updated dump to S3.') + + object = Aws::S3::Resource.new(region:).bucket(bucket).object(filename) + object.upload_file(filename) + object.public_url + end + end +end diff --git a/app/services/envelope_dumps/envelope_builder.rb b/app/services/envelope_dumps/envelope_builder.rb new file mode 100644 index 00000000..721c77f3 --- /dev/null +++ b/app/services/envelope_dumps/envelope_builder.rb @@ -0,0 +1,13 @@ +require 'envelope_dumps/base' + +module EnvelopeDumps + class EnvelopeBuilder < Base # rubocop:todo Style/Documentation + def bucket + ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET') + end + + def build_content(envelope) + API::Entities::Envelope.represent(envelope) + end + end +end diff --git a/app/services/envelope_dumps/graph_builder.rb b/app/services/envelope_dumps/graph_builder.rb new file mode 100644 index 00000000..bacb4028 --- /dev/null +++ b/app/services/envelope_dumps/graph_builder.rb @@ -0,0 +1,13 @@ +require 'envelope_dumps/base' + +module EnvelopeDumps + class GraphBuilder < Base # rubocop:todo Style/Documentation + def bucket + ENV.fetch('ENVELOPE_GRAPHS_BUCKET') + end + + def build_content(envelope) + envelope.processed_resource + end + end +end diff --git a/db/migrate/20251123201629_add_type_to_envelope_downloads.rb b/db/migrate/20251123201629_add_type_to_envelope_downloads.rb new file mode 100644 index 00000000..09c90f73 --- /dev/null +++ b/db/migrate/20251123201629_add_type_to_envelope_downloads.rb @@ -0,0 +1,7 @@ +class AddTypeToEnvelopeDownloads < ActiveRecord::Migration[8.0] + def change + add_column :envelope_downloads, :type, :string, default: 'envelope', null: false + remove_index :envelope_downloads, :envelope_community_id, unique: true + add_index :envelope_downloads, %i[envelope_community_id type], unique: true + end +end diff --git a/db/structure.sql b/db/structure.sql index 13067ab7..ccf1f8e3 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -326,7 +326,8 @@ CREATE TABLE public.envelope_downloads ( created_at timestamp(6) without time zone NOT NULL, updated_at timestamp(6) without time zone NOT NULL, status character varying DEFAULT 'pending'::character varying NOT NULL, - enqueued_at timestamp(6) without time zone + enqueued_at timestamp(6) without time zone, + type character varying DEFAULT 'envelope'::character varying NOT NULL ); @@ -1334,10 +1335,10 @@ CREATE INDEX index_envelope_community_configs_on_envelope_community_id ON public -- --- Name: index_envelope_downloads_on_envelope_community_id; Type: INDEX; Schema: public; Owner: - +-- Name: index_envelope_downloads_on_envelope_community_id_and_type; Type: INDEX; Schema: public; Owner: - -- -CREATE UNIQUE INDEX index_envelope_downloads_on_envelope_community_id ON public.envelope_downloads USING btree (envelope_community_id); +CREATE UNIQUE INDEX index_envelope_downloads_on_envelope_community_id_and_type ON public.envelope_downloads USING btree (envelope_community_id, type); -- @@ -1889,6 +1890,7 @@ ALTER TABLE ONLY public.envelopes SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES +('20251123201629'), ('20250925025616'), ('20250922224518'), ('20250921174021'), diff --git a/lib/swagger_docs/sections/graphs.rb b/lib/swagger_docs/sections/graphs.rb index 6381f421..4558b5cb 100644 --- a/lib/swagger_docs/sections/graphs.rb +++ b/lib/swagger_docs/sections/graphs.rb @@ -5,6 +5,36 @@ module Graphs # rubocop:todo Style/Documentation extend ActiveSupport::Concern included do + swagger_path '/{community_name}/graph/download' do + operation :get do + key :operationId, 'getApiGraphDownload' + key :description, "Returns the download's status and URL" + key :produces, ['application/json'] + key :tags, ['Graphs'] + + parameter community_name + + response 200 do + key :description, 'Download object' + schema { key :$ref, :EnvelopeDownload } + end + end + + operation :post do + key :operationId, 'postApiGraphDownloads' + key :description, 'Starts a new download' + key :produces, ['application/json'] + key :tags, ['Graphs'] + + parameter community_name + + response 201 do + key :description, 'Download object' + schema { key :$ref, :EnvelopeDownload } + end + end + end + swagger_path '/{community_name}/graph/search' do operation :post do # rubocop:todo Metrics/BlockLength key :operationId, 'postApiGraphSearch' diff --git a/spec/api/v1/envelopes_spec.rb b/spec/api/v1/envelopes_spec.rb index b77d5ab5..132e67b6 100644 --- a/spec/api/v1/envelopes_spec.rb +++ b/spec/api/v1/envelopes_spec.rb @@ -147,7 +147,6 @@ # rubocop:todo RSpec/MultipleMemoizedHelpers context 'GET /:community/envelopes/download' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers let(:finished_at) { nil } - let(:internal_error_message) { nil } let(:started_at) { nil } let(:url) { nil } @@ -195,7 +194,6 @@ :envelope_download, envelope_community:, finished_at:, - internal_error_message:, started_at:, status:, url: @@ -222,8 +220,8 @@ # rubocop:todo RSpec/NestedGroups context 'failed' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups # rubocop:enable RSpec/NestedGroups - let(:internal_error_message) { Faker::Lorem.sentence } - let(:status) { :finished } + let(:finished_at) { Time.current } + let(:status) { :failed } let(:url) { Faker::Internet.url } it 'returns `failed`' do diff --git a/spec/api/v1/graph_spec.rb b/spec/api/v1/graph_spec.rb index 52cbe3a6..a279df79 100644 --- a/spec/api/v1/graph_spec.rb +++ b/spec/api/v1/graph_spec.rb @@ -1,6 +1,10 @@ RSpec.describe API::V1::Graph do + let(:auth_token) { create(:user).auth_token.value } + # rubocop:todo RSpec/MultipleMemoizedHelpers context 'default community' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers + let(:envelope_community) { ec } + let!(:ec) { create(:envelope_community, name: 'ce_registry') } let!(:envelope) { create(:envelope, :from_cer, :with_cer_credential) } let(:resource) { envelope.processed_resource } @@ -145,7 +149,9 @@ expect_status(:ok) expect_json('@id': full_id) expect(json_body).to have_key(:@graph) - expect(json_body[:@graph].map { |o| o[:'ceterms:ctid'] }).to include(competency_id) + expect(json_body[:@graph].map do |o| + o[:'ceterms:ctid'] + end).to include(competency_id) end end # rubocop:enable RSpec/MultipleMemoizedHelpers @@ -160,7 +166,9 @@ expect_status(:ok) expect_json('@id': full_id) expect(json_body).to have_key(:@graph) - expect(json_body[:@graph].map { |o| o[:'ceterms:ctid'] }).to include(competency_id) + expect(json_body[:@graph].map do |o| + o[:'ceterms:ctid'] + end).to include(competency_id) end end # rubocop:enable RSpec/MultipleMemoizedHelpers @@ -251,8 +259,217 @@ end end # rubocop:enable RSpec/MultipleMemoizedHelpers + + context 'GET /:community/graph/download' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers + let(:finished_at) { nil } + let(:internal_error_message) { nil } + let(:started_at) { nil } + let(:url) { nil } + + let(:perform_request) do + get '/graph/download', 'Authorization' => "Token #{auth_token}" + end + + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'with invalid token' do # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:enable RSpec/NestedGroups + let(:auth_token) { 'invalid token' } + + before do + perform_request + end + + it 'returns 401' do + expect_status(:unauthorized) + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + # rubocop:todo RSpec/NestedGroups + context 'with valid token' do # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:enable RSpec/NestedGroups + # rubocop:todo RSpec/NestedGroups + context 'without envelope download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + it 'creates new pending download' do + expect { perform_request }.to change(EnvelopeDownload, :count).by(1) + expect_status(:ok) + + envelope_download = EnvelopeDownload.last + expect(envelope_download.envelope_community).to eq(envelope_community) + expect(envelope_download.status).to eq('pending') + + expect_json_sizes(2) + expect_json('enqueued_at', nil) + expect_json('status', 'pending') + end + end + + # rubocop:todo RSpec/NestedGroups + context 'with envelope download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let!(:envelope_download) do + create( + :envelope_download, + envelope_community:, + finished_at:, + internal_error_message:, + started_at:, + status:, + type: :graph, + url: + ) + end + + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'in progress' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:status) { :in_progress } + + it 'returns `in progress`' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json_sizes(2) + expect_json('started_at', envelope_download.started_at.as_json) + expect_json('status', 'in_progress') + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'failed' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:finished_at) { Time.current } + let(:status) { :failed } + let(:url) { Faker::Internet.url } + + it 'returns `failed`' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json_sizes(3) + expect_json('finished_at', envelope_download.finished_at.as_json) + expect_json('status', 'failed') + expect_json('url', url) + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'finished' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:finished_at) { Time.current } + let(:status) { :finished } + let(:url) { Faker::Internet.url } + + it 'returns `finished` and URL' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json_sizes(3) + expect_json('finished_at', envelope_download.finished_at.as_json) + expect_json('status', 'finished') + expect_json('url', url) + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + # rubocop:todo RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/NestedGroups + context 'pending' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + let(:status) { :pending } + + # rubocop:enable RSpec/NestedGroups + it 'returns `pending`' do + expect { perform_request }.not_to change(EnvelopeDownload, :count) + expect_status(:ok) + expect_json('status', 'pending') + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + end + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'POST /:community/graph/download' do # rubocop:todo RSpec/ContextWording + let(:perform_request) do + post '/graph/download', nil, 'Authorization' => "Token #{auth_token}" + end + + # rubocop:todo RSpec/NestedGroups + context 'with invalid token' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:auth_token) { 'invalid token' } + + before do + perform_request + end + + it 'returns 401' do + expect_status(:unauthorized) + end + end + + # rubocop:todo RSpec/NestedGroups + context 'with valid token' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:now) { Time.current.change(usec: 0) } + + context 'without envelope download' do # rubocop:todo RSpec/NestedGroups + # rubocop:todo RSpec/MultipleExpectations + it 'creates new pending download and enqueues job' do # rubocop:todo RSpec/ExampleLength + # rubocop:enable RSpec/MultipleExpectations + travel_to now do + expect { perform_request }.to change(EnvelopeDownload, :count).by(1) + end + + expect_status(:created) + + envelope_download = EnvelopeDownload.last + expect(envelope_download.envelope_community).to eq(envelope_community) + expect(envelope_download.status).to eq('pending') + + expect_json_sizes(2) + expect_json('enqueued_at', now.as_json) + expect_json('status', 'pending') + + expect(ActiveJob::Base.queue_adapter.enqueued_jobs.size).to eq(1) + + job = ActiveJob::Base.queue_adapter.enqueued_jobs.first + expect(job.fetch('arguments')).to eq([envelope_download.id]) + expect(job.fetch('job_class')).to eq('DownloadEnvelopesJob') + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'with envelope download' do # rubocop:todo RSpec/NestedGroups + let!(:envelope_download) do + create(:envelope_download, :finished, envelope_community:, type: :graph) + end + + it 'enqueues job for existing download' do + travel_to now do + expect { perform_request }.to not_change(EnvelopeDownload, :count) + .and enqueue_job(DownloadEnvelopesJob).with(envelope_download.id) + end + + expect_status(:created) + expect(envelope_download.reload.status).to eq('pending') + + expect_json_sizes(2) + expect_json('enqueued_at', now.as_json) + expect_json('status', 'pending') + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + end + end end - # rubocop:enable RSpec/MultipleMemoizedHelpers context 'with community' do let!(:name) { ec.name } @@ -266,6 +483,7 @@ ) end + # rubocop:todo RSpec/MultipleMemoizedHelpers context 'GET /:community_name/graph/:id' do # rubocop:todo RSpec/ContextWording let!(:id) { '123-123-123' } let!(:processed_resource) { attributes_for(:cer_org).merge('@id': id) } @@ -388,7 +606,9 @@ end # rubocop:enable RSpec/MultipleMemoizedHelpers end + # rubocop:enable RSpec/MultipleMemoizedHelpers + # rubocop:todo RSpec/MultipleMemoizedHelpers context 'POST /:community_name/graph/search' do # rubocop:todo RSpec/ContextWording let!(:envelope1) do # rubocop:todo RSpec/IndexedLet create(:envelope, :with_cer_credential, envelope_community: ec) @@ -498,5 +718,6 @@ end # rubocop:enable RSpec/MultipleMemoizedHelpers end + # rubocop:enable RSpec/MultipleMemoizedHelpers end end diff --git a/spec/services/download_envelopes_spec.rb b/spec/services/download_envelopes_spec.rb index 5ec381d6..77b7a9ec 100644 --- a/spec/services/download_envelopes_spec.rb +++ b/spec/services/download_envelopes_spec.rb @@ -1,14 +1,9 @@ RSpec.describe DownloadEnvelopes do # rubocop:todo RSpec/MultipleMemoizedHelpers - let(:bucket) { double('bucket') } # rubocop:todo RSpec/VerifiedDoubles - let(:bucket_name) { 'envelope-downloads-bucket-test' } - let(:envelope_download) { create(:envelope_download, envelope_community:) } - let(:entries) { {} } - let(:hex) { Faker::Lorem.characters.first(32) } - let(:key) { "ce_registry_#{now.to_i}_#{hex}.zip" } - let(:now) { Time.current.change(usec: 0) } - let(:region) { 'aws-region-test' } - let(:resource) { double('resource') } # rubocop:todo RSpec/VerifiedDoubles - let(:s3_object) { double('s3_object') } # rubocop:todo RSpec/VerifiedDoubles + let(:builder) { double('builder') } # rubocop:todo RSpec/VerifiedDoubles + let(:envelope_download) { create(:envelope_download, type:) } + let(:error) { StandardError.new(error_message) } + let(:error_message) { Faker::Lorem.sentence } + let(:now) { Date.current } let(:url) { Faker::Internet.url } let(:download_envelopes) do @@ -17,212 +12,114 @@ end end - let(:envelope_community) do - EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') - end - - let!(:envelope1) do # rubocop:todo RSpec/IndexedLet - create(:envelope, :from_cer) - end - - let!(:envelope2) do # rubocop:todo RSpec/IndexedLet - create(:envelope, :from_cer) - end - - let!(:envelope3) do # rubocop:todo RSpec/IndexedLet - create(:envelope, :from_cer) - end - before do - allow(ENV).to receive(:fetch).with('AWS_REGION').and_return(region) - - allow(ENV).to receive(:fetch) - .with('ENVELOPE_DOWNLOADS_BUCKET') - .and_return(bucket_name) - - allow(Aws::S3::Resource).to receive(:new) - .with(region:) - .and_return(resource) - - allow(SecureRandom).to receive(:hex).and_return(hex) - - allow(resource).to receive(:bucket).with(bucket_name).and_return(bucket) - allow(bucket).to receive(:object).with(key).and_return(s3_object) + allow(builder_class).to receive(:new) + .with(envelope_download, envelope_download.started_at) + .and_return(builder) end describe '.call' do # rubocop:todo RSpec/MultipleMemoizedHelpers - context 'without error' do # rubocop:todo RSpec/MultipleMemoizedHelpers - before do - allow(s3_object).to receive(:upload_file) do |path| - Zip::InputStream.open(path) do |stream| - loop do - entry = stream.get_next_entry - break unless entry - - entries[entry.name] = JSON(stream.read) - end - end - end - - allow(s3_object).to receive(:public_url).and_return(url) - end + context 'with envelope builder' do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:builder_class) { EnvelopeDumps::EnvelopeBuilder } + let(:type) { :envelope } # rubocop:todo RSpec/NestedGroups - context 'without previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups # rubocop:enable RSpec/NestedGroups - # rubocop:todo RSpec/MultipleExpectations - it 'creates a new download' do # rubocop:todo RSpec/ExampleLength - # rubocop:enable RSpec/MultipleExpectations - download_envelopes - expect(entries.size).to eq(3) - - entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") - entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") - entry3 = entries.fetch("#{envelope3.envelope_ceterms_ctid}.json") - - expect(entry1.fetch('envelope_ceterms_ctid')).to eq( - envelope1.envelope_ceterms_ctid - ) - expect(entry1.fetch('decoded_resource')).to eq( - envelope1.processed_resource - ) - expect(entry1.fetch('updated_at').to_time).to be_within(1.second).of( - envelope1.updated_at - ) - - expect(entry2.fetch('envelope_ceterms_ctid')).to eq( - envelope2.envelope_ceterms_ctid - ) - expect(entry2.fetch('decoded_resource')).to eq( - envelope2.processed_resource - ) - expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( - envelope2.updated_at - ) - - expect(entry3.fetch('envelope_ceterms_ctid')).to eq( - envelope3.envelope_ceterms_ctid - ) - expect(entry3.fetch('decoded_resource')).to eq( - envelope3.processed_resource - ) - expect(entry3.fetch('updated_at').to_time).to be_within(1.second).of( - envelope3.updated_at - ) + before do + allow(builder).to receive(:run).and_raise(error) + end - expect(envelope_download.internal_error_message).to be_nil - expect(envelope_download.status).to eq('finished') - expect(envelope_download.url).to eq(url) + it 'stores error message' do + expect do + download_envelopes + envelope_download.reload + end.to change(envelope_download, :status).to('failed') + .and change(envelope_download, + # rubocop:todo Layout/LineLength + :internal_error_message).to(error_message) + # rubocop:enable Layout/LineLength + .and change(envelope_download, + :started_at).to(now) + .and change(envelope_download, + :finished_at).to(now) end end # rubocop:todo RSpec/NestedGroups - context 'with previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + context 'without error' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups # rubocop:enable RSpec/NestedGroups - let(:dump) do - buffer = StringIO.new - - Zip::OutputStream.write_buffer(buffer) do |stream| - [envelope1, envelope2, envelope3].each do |envelope| - stream.put_next_entry("#{envelope.envelope_ceterms_ctid}.json") - stream.puts('{}') - end - end - - buffer.string + before do + allow(builder).to receive(:run).and_return(url) end - let(:envelope_download) do - create( - :envelope_download, - envelope_community:, - started_at: now + 1.second, - url: Faker::Internet.url - ) + it 'stores URL' do # rubocop:todo RSpec/ExampleLength + expect do + download_envelopes + envelope_download.reload + end.to change(envelope_download, :status).to('finished') + .and change(envelope_download, :url).to(url) + .and change( + # rubocop:todo Layout/LineLength + envelope_download, :started_at + # rubocop:enable Layout/LineLength + ).to(now) + .and change( + envelope_download, :finished_at + ).to(now) end + end + end - let!(:envelope4) do - create(:envelope, :from_cer, updated_at: envelope_download.started_at) - end + context 'with graph builder' do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:builder_class) { EnvelopeDumps::GraphBuilder } + let(:type) { :graph } + # rubocop:todo RSpec/NestedGroups + context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups before do - PaperTrail.enabled = true - - envelope2.update_column(:updated_at, envelope_download.started_at) - - travel_to envelope_download.started_at do - envelope3.destroy - end - - stub_request(:get, envelope_download.url).to_return(body: dump) + allow(builder).to receive(:run).and_raise(error) end - after do - PaperTrail.enabled = false - end - - # rubocop:todo RSpec/MultipleExpectations - it 'updates the existing download' do # rubocop:todo RSpec/ExampleLength, RSpec/MultipleExpectations - # rubocop:enable RSpec/MultipleExpectations - download_envelopes - expect(entries.size).to eq(3) - - entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") - entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") - entry3 = entries.fetch("#{envelope4.envelope_ceterms_ctid}.json") - - expect(entry1).to eq({}) - - expect(entry2.fetch('envelope_ceterms_ctid')).to eq( - envelope2.envelope_ceterms_ctid - ) - expect(entry2.fetch('decoded_resource')).to eq( - envelope2.processed_resource - ) - expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( - envelope2.updated_at - ) - - expect(entry3.fetch('envelope_ceterms_ctid')).to eq( - envelope4.envelope_ceterms_ctid - ) - expect(entry3.fetch('decoded_resource')).to eq( - envelope4.processed_resource - ) - expect(entry3.fetch('updated_at').to_time).to be_within(1.second).of( - envelope4.updated_at - ) - - expect(envelope_download.internal_error_message).to be_nil - expect(envelope_download.status).to eq('finished') - expect(envelope_download.url).to eq(url) + it 'stores error message' do + expect do + download_envelopes + envelope_download.reload + end.to change(envelope_download, :status).to('failed') + .and change(envelope_download, + # rubocop:todo Layout/LineLength + :internal_error_message).to(error_message) + # rubocop:enable Layout/LineLength + .and change(envelope_download, + :started_at).to(now) + .and change(envelope_download, + :finished_at).to(now) end end - end - - context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers - let(:error) { StandardError.new(error_message) } - let(:error_message) { Faker::Lorem.sentence } - it 'notifies Airbrake and persists error' do # rubocop:todo RSpec/ExampleLength - allow(Airbrake).to receive(:notify).with(error) - allow(s3_object).to receive(:upload_file).and_raise(error) + # rubocop:todo RSpec/NestedGroups + context 'without error' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + before do + allow(builder).to receive(:run).and_return(url) + end - expect do - download_envelopes - envelope_download.reload - end.to change(envelope_download, :finished_at).to(now) - .and change(envelope_download, - # rubocop:todo Layout/LineLength - :internal_error_message).to(error_message) - # rubocop:enable Layout/LineLength - # rubocop:todo Layout/LineLength - .and not_change { - # rubocop:enable Layout/LineLength - # rubocop:todo Layout/LineLength - envelope_download.url - # rubocop:enable Layout/LineLength - } + it 'stores URL' do # rubocop:todo RSpec/ExampleLength + expect do + download_envelopes + envelope_download.reload + end.to change(envelope_download, :status).to('finished') + .and change(envelope_download, :url).to(url) + .and change( + # rubocop:todo Layout/LineLength + envelope_download, :started_at + # rubocop:enable Layout/LineLength + ).to(now) + .and change( + envelope_download, :finished_at + ).to(now) + end end end end diff --git a/spec/services/envelope_dumps/envelope_builder_spec.rb b/spec/services/envelope_dumps/envelope_builder_spec.rb new file mode 100644 index 00000000..7f89e937 --- /dev/null +++ b/spec/services/envelope_dumps/envelope_builder_spec.rb @@ -0,0 +1,204 @@ +RSpec.describe EnvelopeDumps::EnvelopeBuilder do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:bucket) { double('bucket') } # rubocop:todo RSpec/VerifiedDoubles + let(:bucket_name) { 'envelope-downloads-bucket-test' } + let(:envelope_download) { create(:envelope_download, envelope_community:) } + let(:entries) { {} } + let(:hex) { Faker::Lorem.characters.first(32) } + let(:key) { "ce_registry_#{now.to_i}_#{hex}.zip" } + let(:now) { Time.current.change(usec: 0) } + let(:region) { 'aws-region-test' } + let(:resource) { double('resource') } # rubocop:todo RSpec/VerifiedDoubles + let(:s3_object) { double('s3_object') } # rubocop:todo RSpec/VerifiedDoubles + let(:url) { Faker::Internet.url } + + let(:build_dump) do + travel_to now do + described_class.new(envelope_download, envelope_download.started_at).run + end + end + + let(:envelope_community) do + EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') + end + + let!(:envelope1) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer, updated_at: now) + end + + let!(:envelope2) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer, updated_at: now) + end + + let!(:envelope3) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer, updated_at: now) + end + + before do + allow(ENV).to receive(:fetch).with('AWS_REGION').and_return(region) + + allow(ENV).to receive(:fetch) + .with('ENVELOPE_DOWNLOADS_BUCKET') + .and_return(bucket_name) + + allow(Aws::S3::Resource).to receive(:new) + .with(region:) + .and_return(resource) + + allow(SecureRandom).to receive(:hex).and_return(hex) + + allow(resource).to receive(:bucket).with(bucket_name).and_return(bucket) + allow(bucket).to receive(:object).with(key).and_return(s3_object) + end + + describe '.call' do # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'without error' do # rubocop:todo RSpec/MultipleMemoizedHelpers + before do + allow(s3_object).to receive(:upload_file) do |path| + Zip::InputStream.open(path) do |stream| + loop do + entry = stream.get_next_entry + break unless entry + + entries[entry.name] = JSON(stream.read) + end + end + end + + allow(s3_object).to receive(:public_url).and_return(url) + end + + # rubocop:todo RSpec/NestedGroups + context 'without previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + # rubocop:todo RSpec/MultipleExpectations + it 'creates a new download' do # rubocop:todo RSpec/ExampleLength + # rubocop:enable RSpec/MultipleExpectations + build_dump + expect(entries.size).to eq(3) + + entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") + entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") + entry3 = entries.fetch("#{envelope3.envelope_ceterms_ctid}.json") + + expect(entry1.fetch('envelope_ceterms_ctid')).to eq( + envelope1.envelope_ceterms_ctid + ) + expect(entry1.fetch('decoded_resource')).to eq( + envelope1.processed_resource + ) + expect(entry1.fetch('updated_at').to_time).to be_within(1.second).of( + envelope1.updated_at + ) + + expect(entry2.fetch('envelope_ceterms_ctid')).to eq( + envelope2.envelope_ceterms_ctid + ) + expect(entry2.fetch('decoded_resource')).to eq( + envelope2.processed_resource + ) + expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( + envelope2.updated_at + ) + + expect(entry3.fetch('envelope_ceterms_ctid')).to eq( + envelope3.envelope_ceterms_ctid + ) + expect(entry3.fetch('decoded_resource')).to eq( + envelope3.processed_resource + ) + expect(entry3.fetch('updated_at').to_time).to be_within(1.second).of( + envelope3.updated_at + ) + end + end + + # rubocop:todo RSpec/NestedGroups + context 'with previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:dump) do + buffer = StringIO.new + + Zip::OutputStream.write_buffer(buffer) do |stream| + [envelope1, envelope2, envelope3].each do |envelope| + stream.put_next_entry("#{envelope.envelope_ceterms_ctid}.json") + stream.puts('{}') + end + end + + buffer.string + end + + let(:envelope_download) do + create( + :envelope_download, + envelope_community:, + started_at: now + 1.second, + url: Faker::Internet.url + ) + end + + let!(:envelope4) do + create(:envelope, :from_cer, updated_at: envelope_download.started_at) + end + + before do + PaperTrail.enabled = true + + envelope2.update_column(:updated_at, envelope_download.started_at) + + travel_to envelope_download.started_at do + envelope3.destroy + end + + stub_request(:get, envelope_download.url).to_return(body: dump) + end + + after do + PaperTrail.enabled = false + end + + # rubocop:todo RSpec/MultipleExpectations + it 'updates the existing download' do # rubocop:todo RSpec/ExampleLength, RSpec/MultipleExpectations + # rubocop:enable RSpec/MultipleExpectations + build_dump + expect(entries.size).to eq(3) + + entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") + entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") + entry3 = entries.fetch("#{envelope4.envelope_ceterms_ctid}.json") + + expect(entry1).to eq({}) + + expect(entry2.fetch('envelope_ceterms_ctid')).to eq( + envelope2.envelope_ceterms_ctid + ) + expect(entry2.fetch('decoded_resource')).to eq( + envelope2.processed_resource + ) + expect(entry2.fetch('updated_at').to_time).to be_within(1.second).of( + envelope2.updated_at + ) + + expect(entry3.fetch('envelope_ceterms_ctid')).to eq( + envelope4.envelope_ceterms_ctid + ) + expect(entry3.fetch('decoded_resource')).to eq( + envelope4.processed_resource + ) + expect(entry3.fetch('updated_at').to_time).to be_within(1.second).of( + envelope4.updated_at + ) + end + end + end + + context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:error) { StandardError.new } + + it 'notifies Airbrake and persists error' do + allow(s3_object).to receive(:upload_file).and_raise(error) + expect { build_dump }.to raise_error(error) + end + end + end +end diff --git a/spec/services/envelope_dumps/graph_builder_spec.rb b/spec/services/envelope_dumps/graph_builder_spec.rb new file mode 100644 index 00000000..ae235e7c --- /dev/null +++ b/spec/services/envelope_dumps/graph_builder_spec.rb @@ -0,0 +1,160 @@ +RSpec.describe EnvelopeDumps::GraphBuilder do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:bucket) { double('bucket') } # rubocop:todo RSpec/VerifiedDoubles + let(:bucket_name) { 'graph-downloads-bucket-test' } + let(:envelope_download) { create(:envelope_download, envelope_community:) } + let(:entries) { {} } + let(:hex) { Faker::Lorem.characters.first(32) } + let(:key) { "ce_registry_#{now.to_i}_#{hex}.zip" } + let(:now) { Time.current.change(usec: 0) } + let(:region) { 'aws-region-test' } + let(:resource) { double('resource') } # rubocop:todo RSpec/VerifiedDoubles + let(:s3_object) { double('s3_object') } # rubocop:todo RSpec/VerifiedDoubles + let(:url) { Faker::Internet.url } + + let(:build_dump) do + travel_to now do + described_class.new(envelope_download, envelope_download.started_at).run + end + end + + let(:envelope_community) do + EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') + end + + let!(:envelope1) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer, updated_at: now) + end + + let!(:envelope2) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer, updated_at: now) + end + + let!(:envelope3) do # rubocop:todo RSpec/IndexedLet + create(:envelope, :from_cer, updated_at: now) + end + + before do + allow(ENV).to receive(:fetch).with('AWS_REGION').and_return(region) + + allow(ENV).to receive(:fetch) + .with('ENVELOPE_GRAPHS_BUCKET') + .and_return(bucket_name) + + allow(Aws::S3::Resource).to receive(:new) + .with(region:) + .and_return(resource) + + allow(SecureRandom).to receive(:hex).and_return(hex) + + allow(resource).to receive(:bucket).with(bucket_name).and_return(bucket) + allow(bucket).to receive(:object).with(key).and_return(s3_object) + end + + describe '.call' do # rubocop:todo RSpec/MultipleMemoizedHelpers + context 'without error' do # rubocop:todo RSpec/MultipleMemoizedHelpers + before do + allow(s3_object).to receive(:upload_file) do |path| + Zip::InputStream.open(path) do |stream| + loop do + entry = stream.get_next_entry + break unless entry + + entries[entry.name] = JSON(stream.read) + end + end + end + + allow(s3_object).to receive(:public_url).and_return(url) + end + + # rubocop:todo RSpec/NestedGroups + context 'without previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + # rubocop:todo RSpec/MultipleExpectations + it 'creates a new download' do + # rubocop:enable RSpec/MultipleExpectations + build_dump + expect(entries.size).to eq(3) + + entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") + entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") + entry3 = entries.fetch("#{envelope3.envelope_ceterms_ctid}.json") + + expect(entry1).to eq(envelope1.processed_resource) + expect(entry2).to eq(envelope2.processed_resource) + expect(entry3).to eq(envelope3.processed_resource) + end + end + + # rubocop:todo RSpec/NestedGroups + context 'with previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + # rubocop:enable RSpec/NestedGroups + let(:dump) do + buffer = StringIO.new + + Zip::OutputStream.write_buffer(buffer) do |stream| + [envelope1, envelope2, envelope3].each do |envelope| + stream.put_next_entry("#{envelope.envelope_ceterms_ctid}.json") + stream.puts('{}') + end + end + + buffer.string + end + + let(:envelope_download) do + create( + :envelope_download, + envelope_community:, + started_at: now + 1.second, + url: Faker::Internet.url + ) + end + + let!(:envelope4) do + create(:envelope, :from_cer, updated_at: envelope_download.started_at) + end + + before do + PaperTrail.enabled = true + + envelope2.update_column(:updated_at, envelope_download.started_at) + + travel_to envelope_download.started_at do + envelope3.destroy + end + + stub_request(:get, envelope_download.url).to_return(body: dump) + end + + after do + PaperTrail.enabled = false + end + + # rubocop:todo RSpec/MultipleExpectations + it 'updates the existing download' do # rubocop:todo RSpec/MultipleExpectations + # rubocop:enable RSpec/MultipleExpectations + build_dump + expect(entries.size).to eq(3) + + entry1 = entries.fetch("#{envelope1.envelope_ceterms_ctid}.json") + entry2 = entries.fetch("#{envelope2.envelope_ceterms_ctid}.json") + entry3 = entries.fetch("#{envelope4.envelope_ceterms_ctid}.json") + + expect(entry1).to eq({}) + expect(entry2).to eq(envelope2.processed_resource) + expect(entry3).to eq(envelope4.processed_resource) + end + end + end + + context 'with error' do # rubocop:todo RSpec/MultipleMemoizedHelpers + let(:error) { StandardError.new } + + it 'notifies Airbrake and persists error' do + allow(s3_object).to receive(:upload_file).and_raise(error) + expect { build_dump }.to raise_error(error) + end + end + end +end From 171f4239ae728c7c14c0f0e6198df074902bd359 Mon Sep 17 00:00:00 2001 From: Alex Nizamov Date: Tue, 9 Dec 2025 23:06:03 +0500 Subject: [PATCH 2/2] (#920) Download envelope and graph dumps using S3 client --- app/services/envelope_dumps/base.rb | 29 +++++++++---------- .../envelope_dumps/envelope_builder.rb | 2 +- app/services/envelope_dumps/graph_builder.rb | 2 +- .../envelope_dumps/envelope_builder_spec.rb | 12 ++++++-- .../envelope_dumps/graph_builder_spec.rb | 10 ++++++- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/app/services/envelope_dumps/base.rb b/app/services/envelope_dumps/base.rb index 76d48413..bbb4abc3 100644 --- a/app/services/envelope_dumps/base.rb +++ b/app/services/envelope_dumps/base.rb @@ -3,7 +3,7 @@ module EnvelopeDumps class Base # rubocop:todo Metrics/ClassLength attr_reader :envelope_download, :last_dumped_at - delegate :envelope_community, to: :envelope_download + delegate :envelope_community, :url, to: :envelope_download def initialize(envelope_download, last_dumped_at) @envelope_download = envelope_download @@ -11,6 +11,12 @@ def initialize(envelope_download, last_dumped_at) end def bucket + @bucket ||= Aws::S3::Resource + .new(region: ENV.fetch('AWS_REGION')) + .bucket(bucket_name) + end + + def bucket_name raise NotImplementedError end @@ -40,15 +46,12 @@ def dirname end def download_file # rubocop:todo Metrics/AbcSize - return unless envelope_download.url? - - log("Downloading the existing dump from #{envelope_download.url}") + return unless url.present? - File.open(filename, 'wb') do |file| - URI.parse(envelope_download.url).open do |data| - file.write(data.read) - end - end + log("Downloading the existing dump from #{url}") + previous_filename = url.split('/').last + object = bucket.object(previous_filename) + object.get(response_target: filename) log("Unarchiving the downloaded dump into #{dirname}") system("unzip -qq #{filename} -d #{dirname}", exception: true) @@ -83,10 +86,6 @@ def published_envelopes end end - def region - ENV.fetch('AWS_REGION') - end - def remove_entries log('Removing recently deleted envelopes from the dump') @@ -116,7 +115,7 @@ def run end def up_to_date? - envelope_download.url? && published_envelopes.none? && destroy_envelope_events.none? + url.present? && published_envelopes.none? && destroy_envelope_events.none? end def upload_file @@ -129,7 +128,7 @@ def upload_file log('Uploading the updated dump to S3.') - object = Aws::S3::Resource.new(region:).bucket(bucket).object(filename) + object = bucket.object(filename) object.upload_file(filename) object.public_url end diff --git a/app/services/envelope_dumps/envelope_builder.rb b/app/services/envelope_dumps/envelope_builder.rb index 721c77f3..c19eaba1 100644 --- a/app/services/envelope_dumps/envelope_builder.rb +++ b/app/services/envelope_dumps/envelope_builder.rb @@ -2,7 +2,7 @@ module EnvelopeDumps class EnvelopeBuilder < Base # rubocop:todo Style/Documentation - def bucket + def bucket_name ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET') end diff --git a/app/services/envelope_dumps/graph_builder.rb b/app/services/envelope_dumps/graph_builder.rb index bacb4028..1c695079 100644 --- a/app/services/envelope_dumps/graph_builder.rb +++ b/app/services/envelope_dumps/graph_builder.rb @@ -2,7 +2,7 @@ module EnvelopeDumps class GraphBuilder < Base # rubocop:todo Style/Documentation - def bucket + def bucket_name ENV.fetch('ENVELOPE_GRAPHS_BUCKET') end diff --git a/spec/services/envelope_dumps/envelope_builder_spec.rb b/spec/services/envelope_dumps/envelope_builder_spec.rb index 7f89e937..2131f2df 100644 --- a/spec/services/envelope_dumps/envelope_builder_spec.rb +++ b/spec/services/envelope_dumps/envelope_builder_spec.rb @@ -115,6 +115,8 @@ # rubocop:todo RSpec/NestedGroups context 'with previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups # rubocop:enable RSpec/NestedGroups + let(:previous_key) { Faker::Lorem.characters.first(32) } + let(:dump) do buffer = StringIO.new @@ -133,7 +135,7 @@ :envelope_download, envelope_community:, started_at: now + 1.second, - url: Faker::Internet.url + url: "#{Faker::Internet.url}/#{previous_key}" ) end @@ -144,13 +146,17 @@ before do PaperTrail.enabled = true + allow(bucket).to receive(:object).with(previous_key).and_return(s3_object) + + allow(s3_object).to receive(:get).with(response_target: key) do + File.write(key, dump) + end + envelope2.update_column(:updated_at, envelope_download.started_at) travel_to envelope_download.started_at do envelope3.destroy end - - stub_request(:get, envelope_download.url).to_return(body: dump) end after do diff --git a/spec/services/envelope_dumps/graph_builder_spec.rb b/spec/services/envelope_dumps/graph_builder_spec.rb index ae235e7c..7450f42e 100644 --- a/spec/services/envelope_dumps/graph_builder_spec.rb +++ b/spec/services/envelope_dumps/graph_builder_spec.rb @@ -88,6 +88,8 @@ # rubocop:todo RSpec/NestedGroups context 'with previous download' do # rubocop:todo RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups + let(:previous_key) { Faker::Lorem.characters.first(32) } + # rubocop:enable RSpec/NestedGroups let(:dump) do buffer = StringIO.new @@ -107,7 +109,7 @@ :envelope_download, envelope_community:, started_at: now + 1.second, - url: Faker::Internet.url + url: "#{Faker::Internet.url}/#{previous_key}" ) end @@ -118,6 +120,12 @@ before do PaperTrail.enabled = true + allow(bucket).to receive(:object).with(previous_key).and_return(s3_object) + + allow(s3_object).to receive(:get).with(response_target: key) do + File.write(key, dump) + end + envelope2.update_column(:updated_at, envelope_download.started_at) travel_to envelope_download.started_at do