From 718ee675baf61187b36f646499b1f1733ebb5671 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 6 May 2026 17:51:57 -0500 Subject: [PATCH 01/11] Recents table swap --- .../20260428222525_delete_old_recents.rb | 130 ++++++++++++++++++ db/structure.sql | 93 ++++++++++++- 2 files changed, 219 insertions(+), 4 deletions(-) create mode 100644 db/migrate/20260428222525_delete_old_recents.rb diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb new file mode 100644 index 000000000..4029ce84f --- /dev/null +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -0,0 +1,130 @@ +class DeleteOldRecents < ActiveRecord::Migration[7.2] + disable_ddl_transaction! + + def up + safety_assured do + cutoff_date = 14.days.ago.to_fs(:db) + current_time = Time.current.to_fs(:db) + + say "Step 1: Creating new table from existing and loading recent recents..." + + execute <<-SQL + CREATE TABLE recents_new (LIKE recents INCLUDING DEFAULTS INCLUDING CONSTRAINTS); + + INSERT INTO recents_new + SELECT * FROM recents + WHERE created_at >= '#{cutoff_date}' + AND created_at < '#{current_time}'; + SQL + + say "Step 2: Building indexes and FKs on new table with temporary names..." + + execute "ALTER TABLE recents_new ADD PRIMARY KEY (id);" + + execute "CREATE INDEX index_recents_new_on_workflow_id ON recents_new (workflow_id);" + execute "CREATE INDEX index_recents_new_on_project_id ON recents_new (project_id);" + execute "CREATE INDEX index_recents_new_on_user_id ON recents_new (user_id);" + execute "CREATE INDEX index_recents_new_on_subject_id ON recents_new (subject_id);" + execute "CREATE INDEX index_recents_new_on_created_at ON recents_new (created_at);" + + # New compound index for user/created_at lookups + execute "CREATE INDEX index_recents_on_user_and_created ON recents_new (user_id, created_at DESC);" + + execute <<-SQL + ALTER TABLE recents_new + ADD CONSTRAINT fk_recents_classifications + FOREIGN KEY (classification_id) REFERENCES classifications(id); + + ALTER TABLE recents_new + ADD CONSTRAINT fk_recents_subjects + FOREIGN KEY (subject_id) REFERENCES subjects(id); + SQL + + say "Step 3: Executing the table swap..." + + execute <<-SQL + BEGIN; + + -- Lock table prevent incoming writes + LOCK TABLE recents IN ACCESS EXCLUSIVE MODE; + + -- Catch up any records created during above operations + INSERT INTO recents_new + SELECT * FROM recents + WHERE created_at >= '#{current_time}'; + + -- Swap the tables + ALTER TABLE recents RENAME TO recents_old; + ALTER TABLE recents_new RENAME TO recents; + + -- Clean up index names so structure.sql looks untouched + ALTER INDEX recents_pkey RENAME TO recents_old_pkey; + ALTER INDEX recents_new_pkey RENAME TO recents_pkey; + + ALTER INDEX index_recents_on_workflow_id RENAME TO index_recents_old_on_workflow_id; + ALTER INDEX index_recents_new_on_workflow_id RENAME TO index_recents_on_workflow_id; + + ALTER INDEX index_recents_on_project_id RENAME TO index_recents_old_on_project_id; + ALTER INDEX index_recents_new_on_project_id RENAME TO index_recents_on_project_id; + + ALTER INDEX index_recents_on_user_id RENAME TO index_recents_old_on_user_id; + ALTER INDEX index_recents_new_on_user_id RENAME TO index_recents_on_user_id; + + ALTER INDEX index_recents_on_subject_id RENAME TO index_recents_old_on_subject_id; + ALTER INDEX index_recents_new_on_subject_id RENAME TO index_recents_on_subject_id; + + ALTER INDEX index_recents_on_created_at RENAME TO index_recents_old_on_created_at; + ALTER INDEX index_recents_new_on_created_at RENAME TO index_recents_on_created_at; + + -- Transfer sequence ownership + ALTER SEQUENCE recents_id_seq OWNED BY recents.id; + + COMMIT; + SQL + + say "Step 4: Updating database statistics for the new table..." + execute "ANALYZE recents;" + + say "Recents swap complete." + end + end + + def down + safety_assured do + execute <<-SQL + BEGIN; + LOCK TABLE recents IN ACCESS EXCLUSIVE MODE; + + -- Swap tables back + ALTER TABLE recents RENAME TO recents_new; + ALTER TABLE recents_old RENAME TO recents; + + -- Revert Sequence + ALTER SEQUENCE recents_id_seq OWNED BY recents.id; + + -- Revert index names to original state + ALTER INDEX recents_pkey RENAME TO recents_new_pkey; + ALTER INDEX recents_old_pkey RENAME TO recents_pkey; + + ALTER INDEX index_recents_on_workflow_id RENAME TO index_recents_new_on_workflow_id; + ALTER INDEX index_recents_old_on_workflow_id RENAME TO index_recents_on_workflow_id; + + ALTER INDEX index_recents_on_project_id RENAME TO index_recents_new_on_project_id; + ALTER INDEX index_recents_old_on_project_id RENAME TO index_recents_on_project_id; + + ALTER INDEX index_recents_on_user_id RENAME TO index_recents_new_on_user_id; + ALTER INDEX index_recents_old_on_user_id RENAME TO index_recents_on_user_id; + + ALTER INDEX index_recents_on_subject_id RENAME TO index_recents_new_on_subject_id; + ALTER INDEX index_recents_old_on_subject_id RENAME TO index_recents_on_subject_id; + + ALTER INDEX index_recents_on_created_at RENAME TO index_recents_new_on_created_at; + ALTER INDEX index_recents_old_on_created_at RENAME TO index_recents_on_created_at; + + COMMIT; + SQL + + execute "DROP TABLE recents_new;" + end + end +end \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index ade9f7e0f..4b7cfa172 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1126,6 +1126,24 @@ CREATE SEQUENCE public.recents_id_seq ALTER SEQUENCE public.recents_id_seq OWNED BY public.recents.id; +-- +-- Name: recents_old; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.recents_old ( + id integer DEFAULT nextval('public.recents_id_seq'::regclass) NOT NULL, + classification_id integer, + subject_id integer, + created_at timestamp without time zone NOT NULL, + updated_at timestamp without time zone NOT NULL, + project_id integer, + workflow_id integer, + user_id integer, + user_group_id integer, + mark_remove boolean DEFAULT false +); + + -- -- Name: schema_migrations; Type: TABLE; Schema: public; Owner: - -- @@ -2493,6 +2511,14 @@ ALTER TABLE ONLY public.projects ADD CONSTRAINT projects_pkey PRIMARY KEY (id); +-- +-- Name: recents_old recents_old_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.recents_old + ADD CONSTRAINT recents_old_pkey PRIMARY KEY (id); + + -- -- Name: recents recents_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -3229,6 +3255,41 @@ CREATE INDEX index_projects_on_state ON public.projects USING btree (state) WHER CREATE INDEX index_projects_on_tsv ON public.projects USING gin (tsv); +-- +-- Name: index_recents_old_on_created_at; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_created_at ON public.recents_old USING btree (created_at); + + +-- +-- Name: index_recents_old_on_project_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_project_id ON public.recents_old USING btree (project_id); + + +-- +-- Name: index_recents_old_on_subject_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_subject_id ON public.recents_old USING btree (subject_id); + + +-- +-- Name: index_recents_old_on_user_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_user_id ON public.recents_old USING btree (user_id); + + +-- +-- Name: index_recents_old_on_workflow_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_old_on_workflow_id ON public.recents_old USING btree (workflow_id); + + -- -- Name: index_recents_on_created_at; Type: INDEX; Schema: public; Owner: - -- @@ -3250,6 +3311,13 @@ CREATE INDEX index_recents_on_project_id ON public.recents USING btree (project_ CREATE INDEX index_recents_on_subject_id ON public.recents USING btree (subject_id); +-- +-- Name: index_recents_on_user_and_created; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_recents_on_user_and_created ON public.recents USING btree (user_id, created_at DESC); + + -- -- Name: index_recents_on_user_id; Type: INDEX; Schema: public; Owner: - -- @@ -3802,10 +3870,10 @@ ALTER TABLE ONLY public.gold_standard_annotations -- --- Name: recents fk_rails_1e54468460; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: recents_old fk_rails_1e54468460; Type: FK CONSTRAINT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.recents +ALTER TABLE ONLY public.recents_old ADD CONSTRAINT fk_rails_1e54468460 FOREIGN KEY (classification_id) REFERENCES public.classifications(id); @@ -3882,10 +3950,10 @@ ALTER TABLE ONLY public.user_project_preferences -- --- Name: recents fk_rails_5244e2cc55; Type: FK CONSTRAINT; Schema: public; Owner: - +-- Name: recents_old fk_rails_5244e2cc55; Type: FK CONSTRAINT; Schema: public; Owner: - -- -ALTER TABLE ONLY public.recents +ALTER TABLE ONLY public.recents_old ADD CONSTRAINT fk_rails_5244e2cc55 FOREIGN KEY (subject_id) REFERENCES public.subjects(id); @@ -4233,6 +4301,22 @@ ALTER TABLE ONLY public.users ADD CONSTRAINT fk_rails_fedc809cf8 FOREIGN KEY (project_id) REFERENCES public.projects(id) ON UPDATE CASCADE ON DELETE RESTRICT; +-- +-- Name: recents fk_recents_classifications; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.recents + ADD CONSTRAINT fk_recents_classifications FOREIGN KEY (classification_id) REFERENCES public.classifications(id); + + +-- +-- Name: recents fk_recents_subjects; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.recents + ADD CONSTRAINT fk_recents_subjects FOREIGN KEY (subject_id) REFERENCES public.subjects(id); + + -- -- PostgreSQL database dump complete -- @@ -4240,6 +4324,7 @@ ALTER TABLE ONLY public.users SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES +('20260428222525'), ('20260323120200'), ('20260323120100'), ('20260323120000'), From e183999a1545e7c5143873cd3d68dac15dc60994 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 6 May 2026 17:52:07 -0500 Subject: [PATCH 02/11] Recents cleanup worker & spec Co-authored-by: Copilot --- app/workers/recents_cleanup_worker.rb | 35 ++++++++ spec/workers/recents_cleanup_worker_spec.rb | 88 +++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 app/workers/recents_cleanup_worker.rb create mode 100644 spec/workers/recents_cleanup_worker_spec.rb diff --git a/app/workers/recents_cleanup_worker.rb b/app/workers/recents_cleanup_worker.rb new file mode 100644 index 000000000..c60366ee3 --- /dev/null +++ b/app/workers/recents_cleanup_worker.rb @@ -0,0 +1,35 @@ +class RecentsCleanupWorker + include Sidekiq::Worker + + sidekiq_options queue: :default, + retry: 0, + congestion: { + interval: 1.hour, + max_in_interval: 1, + reject_with: :cancel + } + + def perform + # Delete all older than 14 days + Recent.where('created_at < ?', 14.days.ago).in_batches(of: 5000).delete_all + + # Identify users active in the past 2 hours + recently_active_user_ids = Recent.where('created_at > ?', 2.hours.ago) + .distinct + .pluck(:user_id) + + # Delete all but the 20 newest recents for each recently active user + recently_active_user_ids.each do |user_id| + next unless Recent.where(user_id: user_id).count > 20 + + ids_to_keep = Recent.where(user_id: user_id) + .order(created_at: :desc) + .limit(20) + .pluck(:id) + + Recent.where(user_id: user_id) + .where.not(id: ids_to_keep) + .delete_all + end + end +end \ No newline at end of file diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb new file mode 100644 index 000000000..03fc1f5c7 --- /dev/null +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -0,0 +1,88 @@ +require 'spec_helper' + +RSpec.describe RecentsCleanupWorker, type: :worker do + describe '#perform' do + let(:worker) { described_class.new } + + context 'Temporal Sweep' do + let(:user) { create(:user)} + + it 'deletes recents older than 14 days' do + old_cls = create(:classification, user: user, created_at: 15.days.ago) + old_recent = create(:recent, classification: old_cls) + + # Make sure the recent's created_at isn't overridden by the factory + old_recent.update_column(:created_at, old_cls.created_at) + + new_cls = create(:classification, user: user, created_at: 1.days.ago) + new_recent = create(:recent, classification: new_cls) + new_recent.update_column(:created_at, new_cls.created_at) + + + expect { + worker.perform + }.to change { Recent.exists?(old_recent.id) }.from(true).to(false) + + expect(Recent.exists?(new_recent.id)).to be true + end + end + + context 'Volume sweep' do + let(:user) { create(:user)} + + it 'keeps the 20 newest recents and deletes the rest for a recently active user' do + old_recents = [] + 5.times { + old_cls = create(:classification, user: user, created_at: 15.days.ago) + old_recent = create(:recent, classification: old_cls) + old_recent.update_column(:created_at, old_cls.created_at) + old_recents << old_recent + } + + new_recents = [] + 20.times { + new_cls = create(:classification, user: user, created_at: 30.minutes.ago) + new_recent = create(:recent, classification: new_cls) + new_recent.update_column(:created_at, new_cls.created_at) + new_recents << new_recent + } + + expect(Recent.where(user_id: user.id).count).to eq(25) + expect { + worker.perform + }.to change { Recent.count }.by(-5) + + # Confirm it was the 5 older ones that were deleted + old_records = Recent.where('created_at < ?', 10.days.ago) + expect(old_records).to be_empty + end + + it 'does not delete anything if a recently active user has 20 or fewer recents' do + 15.times { + cls = create(:classification, user: user, created_at: 30.minutes.ago) + rec = create(:recent, classification: cls) + rec.update_column(:created_at, cls.created_at) + } + + expect { + worker.perform + }.not_to change { Recent.count } + end + + it 'ignores users who have not been active in the last hour' do + # The user hasn't been active in the past hour, so these recents are ignored + # They'll be cleaned up by the temporal sweep when they're older than 14 days + + 25.times { + cls = create(:classification, user: user, created_at: 2.hours.ago) + rec = create(:recent, classification: cls) + rec.update_column(:created_at, cls.created_at) + } + + expect { + worker.perform + }.not_to change { Recent.count } + end + end + end +end \ No newline at end of file From 74c78272561230f6d0be8389c4f321fa9b1b89a9 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 6 May 2026 18:21:46 -0500 Subject: [PATCH 03/11] newlines --- app/workers/recents_cleanup_worker.rb | 2 +- db/migrate/20260428222525_delete_old_recents.rb | 2 +- spec/workers/recents_cleanup_worker_spec.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/workers/recents_cleanup_worker.rb b/app/workers/recents_cleanup_worker.rb index c60366ee3..c1c7c6f9f 100644 --- a/app/workers/recents_cleanup_worker.rb +++ b/app/workers/recents_cleanup_worker.rb @@ -32,4 +32,4 @@ def perform .delete_all end end -end \ No newline at end of file +end diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb index 4029ce84f..1631e98a6 100644 --- a/db/migrate/20260428222525_delete_old_recents.rb +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -127,4 +127,4 @@ def down execute "DROP TABLE recents_new;" end end -end \ No newline at end of file +end diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb index 03fc1f5c7..8df0e10c5 100644 --- a/spec/workers/recents_cleanup_worker_spec.rb +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -85,4 +85,4 @@ end end end -end \ No newline at end of file +end From f3638e573a32472a26231a195cc1ca2b31315a59 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 6 May 2026 18:34:07 -0500 Subject: [PATCH 04/11] FSL --- app/workers/recents_cleanup_worker.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/workers/recents_cleanup_worker.rb b/app/workers/recents_cleanup_worker.rb index c1c7c6f9f..62fd043c0 100644 --- a/app/workers/recents_cleanup_worker.rb +++ b/app/workers/recents_cleanup_worker.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class RecentsCleanupWorker include Sidekiq::Worker From 9c9c0d6cbd20d6bb0ce9ffb9b39c637628e14629 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 6 May 2026 18:35:10 -0500 Subject: [PATCH 05/11] FSL --- db/migrate/20260428222525_delete_old_recents.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb index 1631e98a6..e0e93d151 100644 --- a/db/migrate/20260428222525_delete_old_recents.rb +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class DeleteOldRecents < ActiveRecord::Migration[7.2] disable_ddl_transaction! From fc2d8f8f4a377e87a80119ead68314fa64a7f811 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 26 May 2026 13:24:12 -0500 Subject: [PATCH 06/11] Hound cleanup Co-authored-by: Copilot --- .../20260428222525_delete_old_recents.rb | 28 +++++++++---------- spec/workers/recents_cleanup_worker_spec.rb | 17 +++++------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb index e0e93d151..f927932b0 100644 --- a/db/migrate/20260428222525_delete_old_recents.rb +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -8,7 +8,7 @@ def up cutoff_date = 14.days.ago.to_fs(:db) current_time = Time.current.to_fs(:db) - say "Step 1: Creating new table from existing and loading recent recents..." + say 'Step 1: Creating new table from existing and loading recent recents...' execute <<-SQL CREATE TABLE recents_new (LIKE recents INCLUDING DEFAULTS INCLUDING CONSTRAINTS); @@ -19,18 +19,18 @@ def up AND created_at < '#{current_time}'; SQL - say "Step 2: Building indexes and FKs on new table with temporary names..." + say 'Step 2: Building indexes and FKs on new table with temporary names...' - execute "ALTER TABLE recents_new ADD PRIMARY KEY (id);" + execute 'ALTER TABLE recents_new ADD PRIMARY KEY (id);' - execute "CREATE INDEX index_recents_new_on_workflow_id ON recents_new (workflow_id);" - execute "CREATE INDEX index_recents_new_on_project_id ON recents_new (project_id);" - execute "CREATE INDEX index_recents_new_on_user_id ON recents_new (user_id);" - execute "CREATE INDEX index_recents_new_on_subject_id ON recents_new (subject_id);" - execute "CREATE INDEX index_recents_new_on_created_at ON recents_new (created_at);" + execute 'CREATE INDEX index_recents_new_on_workflow_id ON recents_new (workflow_id);' + execute 'CREATE INDEX index_recents_new_on_project_id ON recents_new (project_id);' + execute 'CREATE INDEX index_recents_new_on_user_id ON recents_new (user_id);' + execute 'CREATE INDEX index_recents_new_on_subject_id ON recents_new (subject_id);' + execute 'CREATE INDEX index_recents_new_on_created_at ON recents_new (created_at);' # New compound index for user/created_at lookups - execute "CREATE INDEX index_recents_on_user_and_created ON recents_new (user_id, created_at DESC);" + execute 'CREATE INDEX index_recents_on_user_and_created ON recents_new (user_id, created_at DESC);' execute <<-SQL ALTER TABLE recents_new @@ -42,7 +42,7 @@ def up FOREIGN KEY (subject_id) REFERENCES subjects(id); SQL - say "Step 3: Executing the table swap..." + say 'Step 3: Executing the table swap...' execute <<-SQL BEGIN; @@ -84,10 +84,10 @@ def up COMMIT; SQL - say "Step 4: Updating database statistics for the new table..." - execute "ANALYZE recents;" + say 'Step 4: Updating database statistics for the new table...' + execute 'ANALYZE recents;' - say "Recents swap complete." + say 'Recents swap complete.' end end @@ -126,7 +126,7 @@ def down COMMIT; SQL - execute "DROP TABLE recents_new;" + execute 'DROP TABLE recents_new;' end end end diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb index 8df0e10c5..8c6c236f2 100644 --- a/spec/workers/recents_cleanup_worker_spec.rb +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -1,10 +1,12 @@ +# frozen_string_literal: true + require 'spec_helper' RSpec.describe RecentsCleanupWorker, type: :worker do describe '#perform' do let(:worker) { described_class.new } - context 'Temporal Sweep' do + context 'when running a temporal sweep' do let(:user) { create(:user)} it 'deletes recents older than 14 days' do @@ -14,11 +16,10 @@ # Make sure the recent's created_at isn't overridden by the factory old_recent.update_column(:created_at, old_cls.created_at) - new_cls = create(:classification, user: user, created_at: 1.days.ago) + new_cls = create(:classification, user: user, created_at: 1.day.ago) new_recent = create(:recent, classification: new_cls) new_recent.update_column(:created_at, new_cls.created_at) - expect { worker.perform }.to change { Recent.exists?(old_recent.id) }.from(true).to(false) @@ -27,8 +28,8 @@ end end - context 'Volume sweep' do - let(:user) { create(:user)} + context 'when running a volume sweep' do + let(:user) { create(:user) } it 'keeps the 20 newest recents and deletes the rest for a recently active user' do old_recents = [] @@ -50,7 +51,7 @@ expect(Recent.where(user_id: user.id).count).to eq(25) expect { worker.perform - }.to change { Recent.count }.by(-5) + }.to change(Recent, :count).by(-5) # Confirm it was the 5 older ones that were deleted old_records = Recent.where('created_at < ?', 10.days.ago) @@ -66,7 +67,7 @@ expect { worker.perform - }.not_to change { Recent.count } + }.not_to change(Recent, :count) end it 'ignores users who have not been active in the last hour' do @@ -81,7 +82,7 @@ expect { worker.perform - }.not_to change { Recent.count } + }.not_to change(Recent, :count) end end end From 08b5172e3cfb0dc6c90a336ba9bf6ef63c2b3a5f Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 26 May 2026 13:26:46 -0500 Subject: [PATCH 07/11] woof --- spec/workers/recents_cleanup_worker_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb index 8c6c236f2..d59394fda 100644 --- a/spec/workers/recents_cleanup_worker_spec.rb +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -7,7 +7,7 @@ let(:worker) { described_class.new } context 'when running a temporal sweep' do - let(:user) { create(:user)} + let(:user) { create(:user) } it 'deletes recents older than 14 days' do old_cls = create(:classification, user: user, created_at: 15.days.ago) From 57b9192feb3a4fdaa03caa785f36839fd6290146 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 26 May 2026 15:22:03 -0500 Subject: [PATCH 08/11] Keep recents per user per project, increase keep time to 90 days Co-authored-by: Copilot --- app/workers/recents_cleanup_worker.rb | 38 +++++++++--------- .../20260428222525_delete_old_recents.rb | 4 +- spec/workers/recents_cleanup_worker_spec.rb | 40 +++++++++++++++---- 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/app/workers/recents_cleanup_worker.rb b/app/workers/recents_cleanup_worker.rb index 62fd043c0..58e0d71d8 100644 --- a/app/workers/recents_cleanup_worker.rb +++ b/app/workers/recents_cleanup_worker.rb @@ -12,26 +12,26 @@ class RecentsCleanupWorker } def perform - # Delete all older than 14 days - Recent.where('created_at < ?', 14.days.ago).in_batches(of: 5000).delete_all + # Delete all older than 90 days + Recent.where('created_at < ?', 90.days.ago).in_batches(of: 5000).delete_all # Identify users active in the past 2 hours - recently_active_user_ids = Recent.where('created_at > ?', 2.hours.ago) - .distinct - .pluck(:user_id) - - # Delete all but the 20 newest recents for each recently active user - recently_active_user_ids.each do |user_id| - next unless Recent.where(user_id: user_id).count > 20 - - ids_to_keep = Recent.where(user_id: user_id) - .order(created_at: :desc) - .limit(20) - .pluck(:id) - - Recent.where(user_id: user_id) - .where.not(id: ids_to_keep) - .delete_all + recently_active_pairs = Recent.where('created_at > ?', 2.hours.ago) + .distinct + .pluck(:user_id, :project_id) + + + # Clean up any recents over 20 per user/project for recently active users + recently_active_pairs.each do |user_id, project_id| + scope = Recent.where(user_id: user_id, project_id: project_id) + + next unless scope.count > 20 + + ids_to_keep = scope.order(created_at: :desc) + .limit(20) + .pluck(:id) + + scope.where.not(id: ids_to_keep).delete_all end end -end +end \ No newline at end of file diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb index f927932b0..bc77ed4ad 100644 --- a/db/migrate/20260428222525_delete_old_recents.rb +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -29,8 +29,8 @@ def up execute 'CREATE INDEX index_recents_new_on_subject_id ON recents_new (subject_id);' execute 'CREATE INDEX index_recents_new_on_created_at ON recents_new (created_at);' - # New compound index for user/created_at lookups - execute 'CREATE INDEX index_recents_on_user_and_created ON recents_new (user_id, created_at DESC);' + # New compound index for user/project/created_at lookups + execute 'CREATE INDEX index_recents_on_user_project_and_created ON recents_new (user_id, project_id, created_at DESC);' execute <<-SQL ALTER TABLE recents_new diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb index d59394fda..754e5d9d8 100644 --- a/spec/workers/recents_cleanup_worker_spec.rb +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -9,8 +9,8 @@ context 'when running a temporal sweep' do let(:user) { create(:user) } - it 'deletes recents older than 14 days' do - old_cls = create(:classification, user: user, created_at: 15.days.ago) + it 'deletes recents older than 90 days' do + old_cls = create(:classification, user: user, created_at: 100.days.ago) old_recent = create(:recent, classification: old_cls) # Make sure the recent's created_at isn't overridden by the factory @@ -30,11 +30,13 @@ context 'when running a volume sweep' do let(:user) { create(:user) } + let(:project_a) { create(:project) } + let(:project_b) { create(:project) } - it 'keeps the 20 newest recents and deletes the rest for a recently active user' do + it 'keeps the 20 newest recents and deletes the rest for a recently active user', :aggregate_failures do old_recents = [] 5.times { - old_cls = create(:classification, user: user, created_at: 15.days.ago) + old_cls = create(:classification, project: project_a, user: user, created_at: 15.days.ago) old_recent = create(:recent, classification: old_cls) old_recent.update_column(:created_at, old_cls.created_at) old_recents << old_recent @@ -42,7 +44,7 @@ new_recents = [] 20.times { - new_cls = create(:classification, user: user, created_at: 30.minutes.ago) + new_cls = create(:classification, project: project_a, user: user, created_at: 30.minutes.ago) new_recent = create(:recent, classification: new_cls) new_recent.update_column(:created_at, new_cls.created_at) new_recents << new_recent @@ -58,9 +60,31 @@ expect(old_records).to be_empty end + it 'finds the 20 newest recents per user per project and deletes the rest', :aggregate_failures do + 25.times do + cls = create(:classification, user: user, project: project_a) + r = create(:recent, classification: cls) + r.update_column(:created_at, 30.minutes.ago) + end + + 15.times do + cls = create(:classification, user: user, project: project_b) + r = create(:recent, classification: cls) + r.update_column(:created_at, 30.minutes.ago) + end + + expect(Recent.where(user_id: user.id).count).to eq(40) + expect { + worker.perform + }.to change { Recent.count }.by(-5) + + expect(Recent.where(user_id: user.id, project_id: project_a.id).count).to eq(20) + expect(Recent.where(user_id: user.id, project_id: project_b.id).count).to eq(15) + end + it 'does not delete anything if a recently active user has 20 or fewer recents' do 15.times { - cls = create(:classification, user: user, created_at: 30.minutes.ago) + cls = create(:classification, project: project_a, user: user, created_at: 30.minutes.ago) rec = create(:recent, classification: cls) rec.update_column(:created_at, cls.created_at) } @@ -72,10 +96,10 @@ it 'ignores users who have not been active in the last hour' do # The user hasn't been active in the past hour, so these recents are ignored - # They'll be cleaned up by the temporal sweep when they're older than 14 days + # They'll be cleaned up by the temporal sweep when they're older than 90 days 25.times { - cls = create(:classification, user: user, created_at: 2.hours.ago) + cls = create(:classification, project: project_a, user: user, created_at: 2.hours.ago) rec = create(:recent, classification: cls) rec.update_column(:created_at, cls.created_at) } From e270d9e1fd35019b2ffdfb395edfdb4f92162366 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 2 Jun 2026 12:21:38 -0500 Subject: [PATCH 09/11] Update rubocop rails version, loosen rspec guidelines --- .rubocop.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 5a32e87da..ae1fb40de 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -44,7 +44,9 @@ RSpec/MultipleMemoizedHelpers: RSpec/NestedGroups: Max: 7 RSpec/ExampleLength: - Max: 15 + Max: 20 +RSpec/MultipleExpectations: + Max: 5 Style/NumericLiterals: Enabled: false @@ -78,4 +80,4 @@ Style/HashTransformValues: AllCops: NewCops: enable - TargetRailsVersion: 4.2 + TargetRailsVersion: 7.2 From 39662f9d952b51172fdf076223896ab967c12418 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 2 Jun 2026 12:21:55 -0500 Subject: [PATCH 10/11] Hound --- app/workers/recents_cleanup_worker.rb | 3 +-- spec/workers/recents_cleanup_worker_spec.rb | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/workers/recents_cleanup_worker.rb b/app/workers/recents_cleanup_worker.rb index 58e0d71d8..a5541304a 100644 --- a/app/workers/recents_cleanup_worker.rb +++ b/app/workers/recents_cleanup_worker.rb @@ -20,7 +20,6 @@ def perform .distinct .pluck(:user_id, :project_id) - # Clean up any recents over 20 per user/project for recently active users recently_active_pairs.each do |user_id, project_id| scope = Recent.where(user_id: user_id, project_id: project_id) @@ -34,4 +33,4 @@ def perform scope.where.not(id: ids_to_keep).delete_all end end -end \ No newline at end of file +end diff --git a/spec/workers/recents_cleanup_worker_spec.rb b/spec/workers/recents_cleanup_worker_spec.rb index 754e5d9d8..a4bc22d60 100644 --- a/spec/workers/recents_cleanup_worker_spec.rb +++ b/spec/workers/recents_cleanup_worker_spec.rb @@ -76,7 +76,7 @@ expect(Recent.where(user_id: user.id).count).to eq(40) expect { worker.perform - }.to change { Recent.count }.by(-5) + }.to change(Recent, :count).by(-5) expect(Recent.where(user_id: user.id, project_id: project_a.id).count).to eq(20) expect(Recent.where(user_id: user.id, project_id: project_b.id).count).to eq(15) From ef2682ea39cda309edce3c69f8f943f3c4d0dab7 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Thu, 4 Jun 2026 11:28:22 -0500 Subject: [PATCH 11/11] Fix cutoff date in migration --- db/migrate/20260428222525_delete_old_recents.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/migrate/20260428222525_delete_old_recents.rb b/db/migrate/20260428222525_delete_old_recents.rb index bc77ed4ad..9ff31778c 100644 --- a/db/migrate/20260428222525_delete_old_recents.rb +++ b/db/migrate/20260428222525_delete_old_recents.rb @@ -5,7 +5,7 @@ class DeleteOldRecents < ActiveRecord::Migration[7.2] def up safety_assured do - cutoff_date = 14.days.ago.to_fs(:db) + cutoff_date = 90.days.ago.to_fs(:db) current_time = Time.current.to_fs(:db) say 'Step 1: Creating new table from existing and loading recent recents...'