diff --git a/.rubocop.yml b/.rubocop.yml index 5a32e87da..ae1fb40de 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -44,7 +44,9 @@ RSpec/MultipleMemoizedHelpers: RSpec/NestedGroups: Max: 7 RSpec/ExampleLength: - Max: 15 + Max: 20 +RSpec/MultipleExpectations: + Max: 5 Style/NumericLiterals: Enabled: false @@ -78,4 +80,4 @@ Style/HashTransformValues: AllCops: NewCops: enable - TargetRailsVersion: 4.2 + TargetRailsVersion: 7.2 diff --git a/app/workers/recents_cleanup_worker.rb b/app/workers/recents_cleanup_worker.rb new file mode 100644 index 000000000..a5541304a --- /dev/null +++ b/app/workers/recents_cleanup_worker.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +class RecentsCleanupWorker + include Sidekiq::Worker + + sidekiq_options queue: :default, + retry: 0, + congestion: { + interval: 1.hour, + max_in_interval: 1, + reject_with: :cancel + } + + def perform + # Delete all older than 90 days + Recent.where('created_at < ?', 90.days.ago).in_batches(of: 5000).delete_all + + # Identify users active in the past 2 hours + recently_active_pairs = Recent.where('created_at > ?', 2.hours.ago) + .distinct + .pluck(:user_id, :project_id) + + # Clean up any recents over 20 per user/project for recently active users + recently_active_pairs.each do |user_id, project_id| + scope = Recent.where(user_id: user_id, project_id: project_id) + + next unless scope.count > 20 + + ids_to_keep = scope.order(created_at: :desc) + .limit(20) + .pluck(:id) + + scope.where.not(id: ids_to_keep).delete_all + end + end +end diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb new file mode 100644 index 000000000..9ff31778c --- /dev/null +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +class DeleteOldRecents < ActiveRecord::Migration[7.2] + disable_ddl_transaction! + + def up + safety_assured do + cutoff_date = 90.days.ago.to_fs(:db) + current_time = Time.current.to_fs(:db) + + say 'Step 1: Creating new table from existing and loading recent recents...' + + execute <<-SQL + CREATE TABLE recents_new (LIKE recents INCLUDING DEFAULTS INCLUDING CONSTRAINTS); + + INSERT INTO recents_new + SELECT * FROM recents + WHERE created_at >= '#{cutoff_date}' + AND created_at < '#{current_time}'; + SQL + + say 'Step 2: Building indexes and FKs on new table with temporary names...' + + execute 'ALTER TABLE recents_new ADD PRIMARY KEY (id);' + + execute 'CREATE INDEX index_recents_new_on_workflow_id ON recents_new (workflow_id);' + execute 'CREATE INDEX index_recents_new_on_project_id ON recents_new (project_id);' + execute 'CREATE INDEX index_recents_new_on_user_id ON recents_new (user_id);' + execute 'CREATE INDEX index_recents_new_on_subject_id ON recents_new (subject_id);' + execute 'CREATE INDEX index_recents_new_on_created_at ON recents_new (created_at);' + + # New compound index for user/project/created_at lookups + execute 'CREATE INDEX index_recents_on_user_project_and_created ON recents_new (user_id, project_id, created_at DESC);' + + execute <<-SQL + ALTER TABLE recents_new + ADD CONSTRAINT fk_recents_classifications + FOREIGN KEY (classification_id) REFERENCES classifications(id); + + ALTER TABLE recents_new + ADD CONSTRAINT fk_recents_subjects + FOREIGN KEY (subject_id) REFERENCES subjects(id); + SQL + + say 'Step 3: Executing the table swap...' + + execute <<-SQL + BEGIN; + + -- Lock table prevent incoming writes + LOCK TABLE recents IN ACCESS EXCLUSIVE MODE; + + -- Catch up any records created during above operations + INSERT INTO recents_new + SELECT * FROM recents + WHERE created_at >= '#{current_time}'; + + -- Swap the tables + ALTER TABLE recents RENAME TO recents_old; + ALTER TABLE recents_new RENAME TO recents; + + -- Clean up index names so structure.sql looks untouched + ALTER INDEX recents_pkey RENAME TO recents_old_pkey; + ALTER INDEX recents_new_pkey RENAME TO recents_pkey; + + ALTER INDEX index_recents_on_workflow_id RENAME TO index_recents_old_on_workflow_id; + ALTER INDEX index_recents_new_on_workflow_id RENAME TO index_recents_on_workflow_id; + + ALTER INDEX index_recents_on_project_id RENAME TO index_recents_old_on_project_id; + ALTER INDEX index_recents_new_on_project_id RENAME TO index_recents_on_project_id; + + ALTER INDEX index_recents_on_user_id RENAME TO index_recents_old_on_user_id; + ALTER INDEX index_recents_new_on_user_id RENAME TO index_recents_on_user_id; + + ALTER INDEX index_recents_on_subject_id RENAME TO index_recents_old_on_subject_id; + ALTER INDEX index_recents_new_on_subject_id RENAME TO index_recents_on_subject_id; + + ALTER INDEX index_recents_on_created_at RENAME TO index_recents_old_on_created_at; + ALTER INDEX index_recents_new_on_created_at RENAME TO index_recents_on_created_at; + + -- Transfer sequence ownership + ALTER SEQUENCE recents_id_seq OWNED BY recents.id; + + COMMIT; + SQL + + say 'Step 4: Updating database statistics for the new table...' + execute 'ANALYZE recents;' + + say 'Recents swap complete.' + end + end + + def down + safety_assured do + execute <<-SQL + BEGIN; + LOCK TABLE recents IN ACCESS EXCLUSIVE MODE; + + -- Swap tables back + ALTER TABLE recents RENAME TO recents_new; + ALTER TABLE recents_old RENAME TO recents; + + -- Revert Sequence + ALTER SEQUENCE recents_id_seq OWNED BY recents.id; + + -- Revert index names to original state + ALTER INDEX recents_pkey RENAME TO recents_new_pkey; + ALTER INDEX recents_old_pkey RENAME TO recents_pkey; + + ALTER INDEX index_recents_on_workflow_id RENAME TO index_recents_new_on_workflow_id; + ALTER INDEX index_recents_old_on_workflow_id RENAME TO index_recents_on_workflow_id; + + ALTER INDEX index_recents_on_project_id RENAME TO index_recents_new_on_project_id; + ALTER INDEX index_recents_old_on_project_id RENAME TO index_recents_on_project_id; + + ALTER INDEX index_recents_on_user_id RENAME TO index_recents_new_on_user_id; + ALTER INDEX index_recents_old_on_user_id RENAME TO index_recents_on_user_id; + + ALTER INDEX index_recents_on_subject_id RENAME TO index_recents_new_on_subject_id; + ALTER INDEX index_recents_old_on_subject_id RENAME TO index_recents_on_subject_id; + + ALTER INDEX index_recents_on_created_at RENAME TO index_recents_new_on_created_at; + ALTER INDEX index_recents_old_on_created_at RENAME TO index_recents_on_created_at; + + COMMIT; + SQL + + execute 'DROP TABLE recents_new;' + end + end +end diff --git a/db/structure.sql b/db/structure.sql index 5d09146fd..83666867c 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1128,6 +1128,24 @@ CREATE SEQUENCE public.recents_id_seq ALTER SEQUENCE public.recents_id_seq OWNED BY public.recents.id; +-- +-- Name: recents_old; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.recents_old ( + id integer DEFAULT nextval('public.recents_id_seq'::regclass) NOT NULL, + classification_id integer, + subject_id integer, + created_at timestamp without time zone NOT NULL, + updated_at timestamp without time zone NOT NULL, + project_id integer, + workflow_id integer, + user_id integer, + user_group_id integer, + mark_remove boolean DEFAULT false +); + + -- -- Name: schema_migrations; Type: TABLE; Schema: public; Owner: - -- @@ -2495,6 +2513,14 @@ ALTER TABLE ONLY public.projects ADD CONSTRAINT projects_pkey PRIMARY KEY (id); +-- +-- Name: recents_old recents_old_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.recents_old + ADD CONSTRAINT recents_old_pkey PRIMARY KEY (id); + + -- -- Name: recents recents_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -3231,6 +3257,41 @@ CREATE INDEX index_projects_on_state ON public.projects USING btree (state) WHER CREATE INDEX index_projects_on_tsv ON public.projects USING gin (tsv); +-- +-- Name: index_recents_old_on_created_at; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_created_at ON public.recents_old USING btree (created_at); + + +-- +-- Name: index_recents_old_on_project_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_project_id ON public.recents_old USING btree (project_id); + + +-- +-- Name: index_recents_old_on_subject_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_subject_id ON public.recents_old USING btree (subject_id); + + +-- +-- Name: index_recents_old_on_user_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_user_id ON public.recents_old USING btree (user_id); + + +-- +-- Name: index_recents_old_on_workflow_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_workflow_id ON public.recents_old USING btree (workflow_id); + + -- -- Name: index_recents_on_created_at; Type: INDEX; Schema: public; Owner: - -- @@ -3252,6 +3313,13 @@ CREATE INDEX index_recents_on_project_id ON public.recents USING btree (project_ CREATE INDEX index_recents_on_subject_id ON public.recents USING btree (subject_id); +-- +-- Name: index_recents_on_user_and_created; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_on_user_and_created ON public.recents USING btree (user_id, created_at DESC); + + -- -- Name: index_recents_on_user_id; Type: INDEX; Schema: public; Owner: - -- @@ -3804,10 +3872,10 @@ ALTER TABLE ONLY public.gold_standard_annotations -- --- Name: recents fk_rails_1e54468460; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: recents_old fk_rails_1e54468460; Type: FK CONSTRAINT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.recents +ALTER TABLE ONLY public.recents_old ADD CONSTRAINT fk_rails_1e54468460 FOREIGN KEY (classification_id) REFERENCES public.classifications(id); @@ -3884,10 +3952,10 @@ ALTER TABLE ONLY public.user_project_preferences -- --- Name: recents fk_rails_5244e2cc55; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: recents_old fk_rails_5244e2cc55; Type: FK CONSTRAINT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.recents +ALTER TABLE ONLY public.recents_old ADD CONSTRAINT fk_rails_5244e2cc55 FOREIGN KEY (subject_id) REFERENCES public.subjects(id); @@ -4235,6 +4303,22 @@ ALTER TABLE ONLY public.users ADD CONSTRAINT fk_rails_fedc809cf8 FOREIGN KEY (project_id) REFERENCES public.projects(id) ON UPDATE CASCADE ON DELETE RESTRICT; +-- +-- Name: recents fk_recents_classifications; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.recents + ADD CONSTRAINT fk_recents_classifications FOREIGN KEY (classification_id) REFERENCES public.classifications(id); + + +-- +-- Name: recents fk_recents_subjects; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.recents + ADD CONSTRAINT fk_recents_subjects FOREIGN KEY (subject_id) REFERENCES public.subjects(id); + + -- -- PostgreSQL database dump complete -- @@ -4242,6 +4326,7 @@ ALTER TABLE ONLY public.users SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES +('20260428222525'), ('20260407184533'), ('20260323120200'), ('20260323120100'), diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb new file mode 100644 index 000000000..a4bc22d60 --- /dev/null +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RecentsCleanupWorker, type: :worker do + describe '#perform' do + let(:worker) { described_class.new } + + context 'when running a temporal sweep' do + let(:user) { create(:user) } + + it 'deletes recents older than 90 days' do + old_cls = create(:classification, user: user, created_at: 100.days.ago) + old_recent = create(:recent, classification: old_cls) + + # Make sure the recent's created_at isn't overridden by the factory + old_recent.update_column(:created_at, old_cls.created_at) + + new_cls = create(:classification, user: user, created_at: 1.day.ago) + new_recent = create(:recent, classification: new_cls) + new_recent.update_column(:created_at, new_cls.created_at) + + expect { + worker.perform + }.to change { Recent.exists?(old_recent.id) }.from(true).to(false) + + expect(Recent.exists?(new_recent.id)).to be true + end + end + + context 'when running a volume sweep' do + let(:user) { create(:user) } + let(:project_a) { create(:project) } + let(:project_b) { create(:project) } + + it 'keeps the 20 newest recents and deletes the rest for a recently active user', :aggregate_failures do + old_recents = [] + 5.times { + old_cls = create(:classification, project: project_a, user: user, created_at: 15.days.ago) + old_recent = create(:recent, classification: old_cls) + old_recent.update_column(:created_at, old_cls.created_at) + old_recents << old_recent + } + + new_recents = [] + 20.times { + new_cls = create(:classification, project: project_a, user: user, created_at: 30.minutes.ago) + new_recent = create(:recent, classification: new_cls) + new_recent.update_column(:created_at, new_cls.created_at) + new_recents << new_recent + } + + expect(Recent.where(user_id: user.id).count).to eq(25) + expect { + worker.perform + }.to change(Recent, :count).by(-5) + + # Confirm it was the 5 older ones that were deleted + old_records = Recent.where('created_at < ?', 10.days.ago) + expect(old_records).to be_empty + end + + it 'finds the 20 newest recents per user per project and deletes the rest', :aggregate_failures do + 25.times do + cls = create(:classification, user: user, project: project_a) + r = create(:recent, classification: cls) + r.update_column(:created_at, 30.minutes.ago) + end + + 15.times do + cls = create(:classification, user: user, project: project_b) + r = create(:recent, classification: cls) + r.update_column(:created_at, 30.minutes.ago) + end + + expect(Recent.where(user_id: user.id).count).to eq(40) + expect { + worker.perform + }.to change(Recent, :count).by(-5) + + expect(Recent.where(user_id: user.id, project_id: project_a.id).count).to eq(20) + expect(Recent.where(user_id: user.id, project_id: project_b.id).count).to eq(15) + end + + it 'does not delete anything if a recently active user has 20 or fewer recents' do + 15.times { + cls = create(:classification, project: project_a, user: user, created_at: 30.minutes.ago) + rec = create(:recent, classification: cls) + rec.update_column(:created_at, cls.created_at) + } + + expect { + worker.perform + }.not_to change(Recent, :count) + end + + it 'ignores users who have not been active in the last hour' do + # The user hasn't been active in the past hour, so these recents are ignored + # They'll be cleaned up by the temporal sweep when they're older than 90 days + + 25.times { + cls = create(:classification, project: project_a, user: user, created_at: 2.hours.ago) + rec = create(:recent, classification: cls) + rec.update_column(:created_at, cls.created_at) + } + + expect { + worker.perform + }.not_to change(Recent, :count) + end + end + end +end