Also allow limiting the initial prune_object
May sometimes be helpful to get more predictable runtime than just with an age-based limit. The subquery for the non-keep-threads path is required since delte_all does not directly accept limit(). Again most of the diff is just adjusting indentation, best hide whitespace-only changes with git diff -w or similar.
This commit is contained in:
parent
e64f031167
commit
225f87ad62
3 changed files with 45 additions and 18 deletions
|
@ -107,6 +107,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
- ability to auto-approve follow requests from users you are already following
|
- ability to auto-approve follow requests from users you are already following
|
||||||
- The SimplePolicy MRF can now strip user backgrounds from selected remote hosts
|
- The SimplePolicy MRF can now strip user backgrounds from selected remote hosts
|
||||||
- New standalone `prune_orphaned_activities` mix task with configurable batch limit
|
- New standalone `prune_orphaned_activities` mix task with configurable batch limit
|
||||||
|
- The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning
|
||||||
|
|
||||||
## Changed
|
## Changed
|
||||||
- OTP builds are now built on erlang OTP26
|
- OTP builds are now built on erlang OTP26
|
||||||
|
|
|
@ -50,6 +50,7 @@ This will prune remote posts older than 90 days (configurable with [`config :ple
|
||||||
|
|
||||||
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity).
|
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity).
|
||||||
- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote.
|
- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote.
|
||||||
|
- `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task.
|
||||||
- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size.
|
- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size.
|
||||||
- `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning.
|
- `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning.
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,14 @@ defmodule Mix.Tasks.Pleroma.Database do
|
||||||
@shortdoc "A collection of database related tasks"
|
@shortdoc "A collection of database related tasks"
|
||||||
@moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md")
|
@moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md")
|
||||||
|
|
||||||
|
defp maybe_limit(query, limit_cnt) do
|
||||||
|
if is_number(limit_cnt) and limit_cnt > 0 do
|
||||||
|
limit(query, [], ^limit_cnt)
|
||||||
|
else
|
||||||
|
query
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
|
def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
|
||||||
limit_arg =
|
limit_arg =
|
||||||
if limit > 0 do
|
if limit > 0 do
|
||||||
|
@ -148,7 +156,8 @@ def run(["prune_objects" | args]) do
|
||||||
vacuum: :boolean,
|
vacuum: :boolean,
|
||||||
keep_threads: :boolean,
|
keep_threads: :boolean,
|
||||||
keep_non_public: :boolean,
|
keep_non_public: :boolean,
|
||||||
prune_orphaned_activities: :boolean
|
prune_orphaned_activities: :boolean,
|
||||||
|
limit: :integer
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -157,6 +166,8 @@ def run(["prune_objects" | args]) do
|
||||||
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
|
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
|
||||||
time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
|
time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
|
||||||
|
|
||||||
|
limit_cnt = Keyword.get(options, :limit, 0)
|
||||||
|
|
||||||
log_message = "Pruning objects older than #{deadline} days"
|
log_message = "Pruning objects older than #{deadline} days"
|
||||||
|
|
||||||
log_message =
|
log_message =
|
||||||
|
@ -188,6 +199,13 @@ def run(["prune_objects" | args]) do
|
||||||
log_message
|
log_message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
log_message =
|
||||||
|
if limit_cnt > 0 do
|
||||||
|
log_message <> ", limiting to #{limit_cnt} rows"
|
||||||
|
else
|
||||||
|
log_message
|
||||||
|
end
|
||||||
|
|
||||||
Logger.info(log_message)
|
Logger.info(log_message)
|
||||||
|
|
||||||
if Keyword.get(options, :keep_threads) do
|
if Keyword.get(options, :keep_threads) do
|
||||||
|
@ -221,11 +239,13 @@ def run(["prune_objects" | args]) do
|
||||||
|> having([a], max(a.updated_at) < ^time_deadline)
|
|> having([a], max(a.updated_at) < ^time_deadline)
|
||||||
|> having([a], not fragment("bool_or(?)", a.local))
|
|> having([a], not fragment("bool_or(?)", a.local))
|
||||||
|> having([_, b], fragment("max(?::text) is null", b.id))
|
|> having([_, b], fragment("max(?::text) is null", b.id))
|
||||||
|
|> maybe_limit(limit_cnt)
|
||||||
|> select([a], fragment("? ->> 'context'::text", a.data))
|
|> select([a], fragment("? ->> 'context'::text", a.data))
|
||||||
|
|
||||||
Pleroma.Object
|
Pleroma.Object
|
||||||
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
|
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
|
||||||
else
|
else
|
||||||
|
deletable =
|
||||||
if Keyword.get(options, :keep_non_public) do
|
if Keyword.get(options, :keep_non_public) do
|
||||||
Pleroma.Object
|
Pleroma.Object
|
||||||
|> where(
|
|> where(
|
||||||
|
@ -246,6 +266,11 @@ def run(["prune_objects" | args]) do
|
||||||
[o],
|
[o],
|
||||||
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
|
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
|
||||||
)
|
)
|
||||||
|
|> maybe_limit(limit_cnt)
|
||||||
|
|> select([o], o.id)
|
||||||
|
|
||||||
|
Pleroma.Object
|
||||||
|
|> where([o], o.id in subquery(deletable))
|
||||||
end
|
end
|
||||||
|> Repo.delete_all(timeout: :infinity)
|
|> Repo.delete_all(timeout: :infinity)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue