diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index 580c9d32b..a6fecb38e 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -48,10 +48,12 @@ This will prune remote posts older than 90 days (configurable with [`config :ple ### Options -- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity). +- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread has seen recent activity. - `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote. - `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task. - `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size. +- `--prune-pinned` - Also prune pinned posts; keeping pinned posts does not suffice to protect their threads from pruning, even when using `--keep-threads`. + Note, if using this option and pinned posts are pruned, they and their threads will just be refetched on the next user update. Therefore it usually doesn't bring much gain while incurring a heavy fetch load after pruning. - `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning. ## Prune orphaned activities from the database diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index d44ece13a..a746dc8fe 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -120,6 +120,21 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do del_single + del_array end + defp query_pinned_object_apids() do + Pleroma.User + |> select([u], %{ap_id: fragment("jsonb_object_keys(?)", u.pinned_objects)}) + end + + defp query_pinned_object_ids() do + # If this additional level of subquery is omitted and we directly supply AP ids + # to te final query, it appears to overexert PostgreSQL(17)'s planner leading + # to a very inefficient query with enormous memory and time consumption. + # By supplying database IDs it ends up quite cheap however. + Object + |> where([o], fragment("?->>'id' IN ?", o.data, subquery(query_pinned_object_apids()))) + |> select([o], o.id) + end + defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do # We want to delete objects from threads where # 1. the newest post is still old @@ -262,6 +277,7 @@ def run(["prune_objects" | args]) do keep_threads: :boolean, keep_non_public: :boolean, prune_orphaned_activities: :boolean, + prune_pinned: :boolean, limit: :integer ] ) @@ -276,6 +292,7 @@ def run(["prune_objects" | args]) do "Pruning objects older than #{deadline} days" |> maybe_concat(Keyword.get(options, :keep_non_public), ", keeping non public posts") |> maybe_concat(Keyword.get(options, :keep_threads), ", keeping threads intact") + |> maybe_concat(Keyword.get(options, :prune_pinned), ", pruning pinned posts") |> maybe_concat( Keyword.get(options, :prune_orphaned_activities), ", pruning orphaned activities" @@ -293,6 +310,13 @@ def run(["prune_objects" | args]) do else deletable_objects_breaking_threads(time_deadline, limit_cnt, options) end + |> then(fn q -> + if Keyword.get(options, :prune_pinned) do + q + else + where(q, [o], o.id not in subquery(query_pinned_object_ids())) + end + end) |> Repo.delete_all(timeout: :infinity) Logger.info("Deleted #{del_obj} objects...") diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 4f97a978a..0b9a9e75f 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -88,6 +88,74 @@ test "it prunes old objects from the database", %{old_insert_date: old_insert_da refute Object.get_by_id(note_remote_non_public_id) end + test "it retains pinned posts by default", %{old_insert_date: old_insert_date} do + insert(:note) + + pin_user = insert(:user, local: false) + + %{id: note_remote_pinned_id, data: note_remote_pinned_data} = + :note + |> insert(user: pin_user) + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + User.add_pinned_object_id(pin_user, note_remote_pinned_data["id"]) + + note_remote_non_public = + %{id: note_remote_non_public_id, data: note_remote_non_public_data} = + :note + |> insert() + + note_remote_non_public + |> Ecto.Changeset.change(%{ + updated_at: old_insert_date, + data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 + + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + + assert length(Repo.all(Object)) == 2 + assert Object.get_by_id(note_remote_pinned_id) + refute Object.get_by_id(note_remote_non_public_id) + end + + test "it prunes pinned posts with --prune-pinned", %{old_insert_date: old_insert_date} do + insert(:note) + + pin_user = insert(:user, local: false) + + %{id: note_remote_pinned_id, data: note_remote_pinned_data} = + :note + |> insert(user: pin_user) + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + User.add_pinned_object_id(pin_user, note_remote_pinned_data["id"]) + + note_remote_non_public = + %{id: note_remote_non_public_id, data: note_remote_non_public_data} = + :note + |> insert() + + note_remote_non_public + |> Ecto.Changeset.change(%{ + updated_at: old_insert_date, + data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-pinned"]) + + assert length(Repo.all(Object)) == 1 + refute Object.get_by_id(note_remote_pinned_id) + refute Object.get_by_id(note_remote_non_public_id) + end + test "it cleans up bookmarks", %{old_insert_date: old_insert_date} do user = insert(:user) {:ok, old_object_activity} = CommonAPI.post(user, %{status: "yadayada"})