From 8e5defe6ca1421e41e543f507a9a65c0617c7571 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 11 Dec 2024 03:03:14 +0100 Subject: [PATCH] stats: estimate remote user count This value is currently only used by Prometheus metrics but (after optimisng the peer query inthe preceeding commit) the most costly part of instance stats. --- CHANGELOG.md | 2 + lib/pleroma/stats.ex | 20 +++++----- ...00_remote_user_count_estimate_function.exs | 38 +++++++++++++++++++ 3 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index b0448b0d8..ced89d2b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Changed - Dropped obsolete `ap_enabled` indicator from user table and associated buggy logic +- The remote user count in prometheus metrics is now an estimate instead of an exact number + since the latter proved unreasonably costly to obtain for a merely nice-to-have statistic ## 2025.01.01 diff --git a/lib/pleroma/stats.ex b/lib/pleroma/stats.ex index 7bbe089f8..f33c378dd 100644 --- a/lib/pleroma/stats.ex +++ b/lib/pleroma/stats.ex @@ -79,24 +79,22 @@ def calculate_stat_data do status_count = Repo.aggregate(User.Query.build(%{local: true}), :sum, :note_count) - users_query = + # there are few enough local users for postgres to use an index scan + # (also here an exact count is a bit more important) + user_count = from(u in User, where: u.is_active == true, where: u.local == true, where: not is_nil(u.nickname), where: not u.invisible ) + |> Repo.aggregate(:count, :id) - remote_users_query = - from(u in User, - where: u.is_active == true, - where: u.local == false, - where: not is_nil(u.nickname), - where: not u.invisible - ) - - user_count = Repo.aggregate(users_query, :count, :id) - remote_user_count = Repo.aggregate(remote_users_query, :count, :id) + # but mostly numerous remote users leading to a full a full table scan + # (ecto currently doesn't allow building queries without explicit table) + %{rows: [[remote_user_count]]} = + "SELECT estimate_remote_user_count();" + |> Pleroma.Repo.query!() %{ peers: peers, diff --git a/priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs b/priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs new file mode 100644 index 000000000..010f068a5 --- /dev/null +++ b/priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs @@ -0,0 +1,38 @@ +# Akkoma: Magically expressive social media +# Copyright © 2024 Akkoma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Repo.Migrations.RemoteUserCountEstimateFunction do + use Ecto.Migration + + @function_name "estimate_remote_user_count" + + def up() do + # yep, this EXPLAIN (ab)use is blessed by the PostgreSQL wiki: + # https://wiki.postgresql.org/wiki/Count_estimate + """ + CREATE OR REPLACE FUNCTION #{@function_name}() + RETURNS integer + LANGUAGE plpgsql AS $$ + DECLARE plan jsonb; + BEGIN + EXECUTE ' + EXPLAIN (FORMAT JSON) + SELECT * + FROM public.users + WHERE local = false AND + is_active = true AND + invisible = false AND + nickname IS NOT NULL; + ' INTO plan; + RETURN plan->0->'Plan'->'Plan Rows'; + END; + $$; + """ + |> execute() + end + + def down() do + execute("DROP FUNCTION IF EXISTS #{@function_name}()") + end +end