Postgresql

<< Back to wiki homepage

Table of contents:

Cache usage

Dig what is in shared buffer

Needs pg_sharedbuffer extension:

SELECT c.relname, count(*) AS buffers
    FROM pg_buffercache b INNER JOIN pg_class c
    ON b.relfilenode = pg_relation_filenode(c.oid) AND
    b.reldatabase IN (0, (SELECT oid FROM pg_database
                            WHERE datname = current_database()))
    GROUP BY c.relname
    ORDER BY 2 DESC
    LIMIT 10;

If you want to see only dirty buffers, add where isdirty = 't' after datname.

Find cache utilization of tables

WITH hitratio as (
    SELECT
        relname,
        round(cast(heap_blks_hit as numeric) / (heap_blks_hit + heap_blks_read) * 100, 2) AS hit_pct,
        heap_blks_hit as "from cache",
        heap_blks_read as "from disk"
    FROM
        pg_statio_user_tables
    WHERE
        (heap_blks_hit + heap_blks_read)>0
)
    SELECT * FROM hitratio WHERE hit_pct < 80 ORDER BY "from disk" DESC;

Index

Check unused indexes

SELECT relname, indexrelname, idx_scan
    FROM pg_catalog.pg_stat_user_indexes
    WHERE schemaname = 'public'
    ORDER BY idx_scan ASC;

Check invalid indexes

SELECT i.relname as index_name,
       idx.indrelid::regclass as location,
       idx.indexrelid as relation_id, am.amname as type,
       idx.indkey as index_keys,
       ARRAY(
       SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
       FROM generate_subscripts(idx.indkey, 1) as k
       ORDER BY k
       ) as index_key_names
FROM   pg_index as idx
JOIN   pg_class as i
ON     i.oid = idx.indexrelid
JOIN   pg_am as am
ON     i.relam = am.oid where idx.indisvalid is FALSE;

Compare sequential and index scans / summarize

SELECT
    relname,
    seq_scan,
    idx_scan,
    seq_tup_read,
    idx_tup_fetch,
    round(cast(idx_tup_fetch AS numeric) / (idx_tup_fetch + seq_tup_read) * 100, 2) AS index_read_pct,
    pg_size_pretty(pg_relation_size(to_regclass(relname))) as size_on_disk
FROM
    pg_stat_user_tables
WHERE
    (idx_tup_fetch + seq_tup_read)>0
        and
    cast(idx_tup_fetch AS numeric) / (idx_tup_fetch + seq_tup_read) < 0.8
        and
    pg_relation_size(to_regclass(relname))>800000
ORDER BY
    seq_tup_read desc;

Show INSERT/UPDATE/DELETE ratio of tables

SELECT
    relname,
    round(cast(n_tup_ins AS numeric) / (n_tup_ins + n_tup_upd + n_tup_del) * 100, 2) AS ins_pct,
    round(cast(n_tup_upd AS numeric) / (n_tup_ins + n_tup_upd + n_tup_del) * 100, 2) AS upd_pct,
    round(cast(n_tup_del AS numeric) / (n_tup_ins + n_tup_upd + n_tup_del) * 100, 2) AS del_pct
FROM
    pg_stat_user_tables
WHERE
    (n_tup_ins + n_tup_upd + n_tup_del) > 0
ORDER BY relname;

Show all indexes with their definitions

SELECT indexname,indexdef FROM pg_indexes WHERE schemaname = 'public';

Rebuild index

Postgres 13 and above

rebuild index concurrently ${index_name};

Postgres 12 and lower

Get index definition first (see above)

CREATE INDEX CONCURRENTLY ${index_name}_NEW ${index_definition};
ALTER INDEX ${index_name} RENAME TO ${index_name}_OLD;
ALTER INDEX ${index_name}_NEW RENAME TO ${index_name};
DROP INDEX ${index_name}_OLD;

List index size

    SELECT
    t.tablename,
    indexname,
    c.reltuples AS num_rows,
    pg_size_pretty(pg_relation_size(quote_ident(t.tablename)::text)) AS table_size,
    pg_size_pretty(pg_relation_size(quote_ident(indexrelname)::text)) AS index_size,
    CASE WHEN indisunique THEN 'Y'
       ELSE 'N'
    END AS UNIQUE,
    idx_scan AS number_of_scans,
    idx_tup_read AS tuples_read,
    idx_tup_fetch AS tuples_fetched
FROM pg_tables t
LEFT OUTER JOIN pg_class c ON t.tablename=c.relname
LEFT OUTER JOIN
    ( SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns, idx_scan, idx_tup_read, idx_tup_fetch, indexrelname, indisunique FROM pg_index x
           JOIN pg_class c ON c.oid = x.indrelid
           JOIN pg_class ipg ON ipg.oid = x.indexrelid
           JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid AND psai.schemaname = 'public' )
    AS foo
    ON t.tablename = foo.ctablename
WHERE t.schemaname='public'
ORDER BY 1,2;

Show duplicate indexes

SELECT pg_size_pretty(sum(pg_relation_size(idx))::bigint) as size,
       (array_agg(idx))[1] as idx1, (array_agg(idx))[2] as idx2,
       (array_agg(idx))[3] as idx3, (array_agg(idx))[4] as idx4
FROM (
    SELECT indexrelid::regclass as idx, (indrelid::text ||E'\n'|| indclass::text ||E'\n'|| indkey::text ||E'\n'||
                                         coalesce(indexprs::text,'')||E'\n' || coalesce(indpred::text,'')) as key
    FROM pg_index) sub
GROUP BY key HAVING count(*)>1
ORDER BY sum(pg_relation_size(idx)) DESC;

Show indexed columns with high null fractions

Which shows that where partial index without nulls can save you space.

Nice read about it here.

SELECT
    c.oid,
    c.relname AS index,
    pg_size_pretty(pg_relation_size(c.oid)) AS index_size,
    i.indisunique AS unique,
    a.attname AS indexed_column,
    CASE s.null_frac
        WHEN 0 THEN ''
        ELSE to_char(s.null_frac * 100, '999.00%')
    END AS null_frac,
    pg_size_pretty((pg_relation_size(c.oid) * s.null_frac)::bigint) AS expected_saving
    -- Uncomment to include the index definition
    --, ixs.indexdef

FROM
    pg_class c
    JOIN pg_index i ON i.indexrelid = c.oid
    JOIN pg_attribute a ON a.attrelid = c.oid
    JOIN pg_class c_table ON c_table.oid = i.indrelid
    JOIN pg_indexes ixs ON c.relname = ixs.indexname
    LEFT JOIN pg_stats s ON s.tablename = c_table.relname AND a.attname = s.attname

WHERE
    -- Primary key cannot be partial
    NOT i.indisprimary

    -- Exclude already partial indexes
    AND i.indpred IS NULL

    -- Exclude composite indexes
    AND array_length(i.indkey, 1) = 1

    -- Larger than 10MB
    AND pg_relation_size(c.oid) > 10 * 1024 ^ 2

ORDER BY
    pg_relation_size(c.oid) * s.null_frac DESC;

Materialized views

List materialized views

select schemaname as schema_name,
       matviewname as view_name,
       matviewowner as owner,
       ispopulated as is_populated,
       definition
from pg_matviews
order by schema_name,
         view_name;

Temp files

Check what is using temp files most

You might want to reset the stats first, if that's something currently going on

SELECT total_exec_time,
to_char(calls, 'FM999G999G999G990') AS ncalls,
total_exec_time / calls AS avg_exec_time_ms,
interval '1 millisecond' * (blk_read_time + blk_write_time) AS sync_io_time,
pg_size_pretty(temp_blks_written * 8192) as total_tmp_size,
query AS query
FROM pg_stat_statements
WHERE temp_blks_written > 0
ORDER BY temp_blks_written DESC
LIMIT 20;

WAL

Force switch to a new WAL

select * from pg_switch_wal();

Check current WAL info

select * from pg_ls_waldir();

Bloat

Show all bloat

Both tables and indexes are shown, somewhat complicated:

SELECT
  current_database(), schemaname, tablename, /*reltuples::bigint, relpages::bigint, otta,*/
  ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::FLOAT/otta END)::NUMERIC,1) AS tbloat,
  CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END AS wastedbytes,
  iname, /*ituples::bigint, ipages::bigint, iotta,*/
  ROUND((CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages::FLOAT/iotta END)::NUMERIC,1) AS ibloat,
  CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes
FROM (
  SELECT
    schemaname, tablename, cc.reltuples, cc.relpages, bs,
    CEIL((cc.reltuples*((datahdr+ma-
      (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::FLOAT)) AS otta,
    COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
    COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::FLOAT)),0) AS iotta -- very rough approximation, assumes all cols
  FROM (
    SELECT
      ma,bs,schemaname,tablename,
      (datawidth+(hdr+ma-(CASE WHEN hdr%ma=0 THEN ma ELSE hdr%ma END)))::NUMERIC AS datahdr,
      (maxfracsum*(nullhdr+ma-(CASE WHEN nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
    FROM (
      SELECT
        schemaname, tablename, hdr, ma, bs,
        SUM((1-null_frac)*avg_width) AS datawidth,
        MAX(null_frac) AS maxfracsum,
        hdr+(
          SELECT 1+COUNT(*)/8
          FROM pg_stats s2
          WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
        ) AS nullhdr
      FROM pg_stats s, (
        SELECT
          (SELECT current_setting('block_size')::NUMERIC) AS bs,
          CASE WHEN SUBSTRING(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
          CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
        FROM (SELECT version() AS v) AS foo
      ) AS constants
      GROUP BY 1,2,3,4,5
    ) AS foo
  ) AS rs
  JOIN pg_class cc ON cc.relname = rs.tablename
  JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
  LEFT JOIN pg_index i ON indrelid = cc.oid
  LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml
ORDER BY wastedbytes DESC;

Show table bloat

From pgexperts:

-- new table bloat query
-- still needs work; is often off by +/- 20%
WITH constants AS (
    -- define some constants for sizes of things
    -- for reference down the query and easy maintenance
    SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 8 AS ma
),
no_stats AS (
    -- screen out table who have attributes
    -- which dont have stats, such as JSON
    SELECT table_schema, table_name,
        n_live_tup::numeric as est_rows,
        pg_table_size(relid)::numeric as table_size
    FROM information_schema.columns
        JOIN pg_stat_user_tables as psut
           ON table_schema = psut.schemaname
           AND table_name = psut.relname
        LEFT OUTER JOIN pg_stats
        ON table_schema = pg_stats.schemaname
            AND table_name = pg_stats.tablename
            AND column_name = attname
    WHERE attname IS NULL
        AND table_schema NOT IN ('pg_catalog', 'information_schema')
    GROUP BY table_schema, table_name, relid, n_live_tup
),
null_headers AS (
    -- calculate null header sizes
    -- omitting tables which dont have complete stats
    -- and attributes which aren't visible
    SELECT
        hdr+1+(sum(case when null_frac <> 0 THEN 1 else 0 END)/8) as nullhdr,
        SUM((1-null_frac)*avg_width) as datawidth,
        MAX(null_frac) as maxfracsum,
        schemaname,
        tablename,
        hdr, ma, bs
    FROM pg_stats CROSS JOIN constants
        LEFT OUTER JOIN no_stats
            ON schemaname = no_stats.table_schema
            AND tablename = no_stats.table_name
    WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
        AND no_stats.table_name IS NULL
        AND EXISTS ( SELECT 1
            FROM information_schema.columns
                WHERE schemaname = columns.table_schema
                    AND tablename = columns.table_name )
    GROUP BY schemaname, tablename, hdr, ma, bs
),
data_headers AS (
    -- estimate header and row size
    SELECT
        ma, bs, hdr, schemaname, tablename,
        (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
        (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
    FROM null_headers
),
table_estimates AS (
    -- make estimates of how large the table should be
    -- based on row and page size
    SELECT schemaname, tablename, bs,
        reltuples::numeric as est_rows, relpages * bs as table_bytes,
    CEIL((reltuples*
            (datahdr + nullhdr2 + 4 + ma -
                (CASE WHEN datahdr%ma=0
                    THEN ma ELSE datahdr%ma END)
                )/(bs-20))) * bs AS expected_bytes,
        reltoastrelid
    FROM data_headers
        JOIN pg_class ON tablename = relname
        JOIN pg_namespace ON relnamespace = pg_namespace.oid
            AND schemaname = nspname
    WHERE pg_class.relkind = 'r'
),
estimates_with_toast AS (
    -- add in estimated TOAST table sizes
    -- estimate based on 4 toast tuples per page because we dont have
    -- anything better.  also append the no_data tables
    SELECT schemaname, tablename,
        TRUE as can_estimate,
        est_rows,
        table_bytes + ( coalesce(toast.relpages, 0) * bs ) as table_bytes,
        expected_bytes + ( ceil( coalesce(toast.reltuples, 0) / 4 ) * bs ) as expected_bytes
    FROM table_estimates LEFT OUTER JOIN pg_class as toast
        ON table_estimates.reltoastrelid = toast.oid
            AND toast.relkind = 't'
),
table_estimates_plus AS (
-- add some extra metadata to the table data
-- and calculations to be reused
-- including whether we cant estimate it
-- or whether we think it might be compressed
    SELECT current_database() as databasename,
            schemaname, tablename, can_estimate,
            est_rows,
            CASE WHEN table_bytes > 0
                THEN table_bytes::NUMERIC
                ELSE NULL::NUMERIC END
                AS table_bytes,
            CASE WHEN expected_bytes > 0
                THEN expected_bytes::NUMERIC
                ELSE NULL::NUMERIC END
                    AS expected_bytes,
            CASE WHEN expected_bytes > 0 AND table_bytes > 0
                AND expected_bytes <= table_bytes
                THEN (table_bytes - expected_bytes)::NUMERIC
                ELSE 0::NUMERIC END AS bloat_bytes
    FROM estimates_with_toast
    UNION ALL
    SELECT current_database() as databasename,
        table_schema, table_name, FALSE,
        est_rows, table_size,
        NULL::NUMERIC, NULL::NUMERIC
    FROM no_stats
),
bloat_data AS (
    -- do final math calculations and formatting
    select current_database() as databasename,
        schemaname, tablename, can_estimate,
        table_bytes, round(table_bytes/(1024^2)::NUMERIC,3) as table_mb,
        expected_bytes, round(expected_bytes/(1024^2)::NUMERIC,3) as expected_mb,
        round(bloat_bytes*100/table_bytes) as pct_bloat,
        round(bloat_bytes/(1024::NUMERIC^2),2) as mb_bloat,
        table_bytes, expected_bytes, est_rows
    FROM table_estimates_plus
)
-- filter output for bloated tables
SELECT databasename, schemaname, tablename,
    can_estimate,
    est_rows,
    pct_bloat, mb_bloat,
    table_mb
FROM bloat_data
-- this where clause defines which tables actually appear
-- in the bloat chart
-- example below filters for tables which are either 50%
-- bloated and more than 20mb in size, or more than 25%
-- bloated and more than 4GB in size
WHERE ( pct_bloat >= 50 AND mb_bloat >= 10 )
    OR ( pct_bloat >= 25 AND mb_bloat >= 1000 )
ORDER BY mb_bloat DESC;

Show index bloat

Also from pgexperts:

-- btree index stats query
-- estimates bloat for btree indexes
WITH btree_index_atts AS (
    SELECT nspname,
        indexclass.relname as index_name,
        indexclass.reltuples,
        indexclass.relpages,
        indrelid, indexrelid,
        indexclass.relam,
        tableclass.relname as tablename,
        regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
        indexrelid as index_oid
    FROM pg_index
    JOIN pg_class AS indexclass ON pg_index.indexrelid = indexclass.oid
    JOIN pg_class AS tableclass ON pg_index.indrelid = tableclass.oid
    JOIN pg_namespace ON pg_namespace.oid = indexclass.relnamespace
    JOIN pg_am ON indexclass.relam = pg_am.oid
    WHERE pg_am.amname = 'btree' and indexclass.relpages > 0
         AND nspname NOT IN ('pg_catalog','information_schema')
    ),
index_item_sizes AS (
    SELECT
    ind_atts.nspname, ind_atts.index_name,
    ind_atts.reltuples, ind_atts.relpages, ind_atts.relam,
    indrelid AS table_oid, index_oid,
    current_setting('block_size')::numeric AS bs,
    8 AS maxalign,
    24 AS pagehdr,
    CASE WHEN max(coalesce(pg_stats.null_frac,0)) = 0
        THEN 2
        ELSE 6
    END AS index_tuple_hdr,
    sum( (1-coalesce(pg_stats.null_frac, 0)) * coalesce(pg_stats.avg_width, 1024) ) AS nulldatawidth
    FROM pg_attribute
    JOIN btree_index_atts AS ind_atts ON pg_attribute.attrelid = ind_atts.indexrelid AND pg_attribute.attnum = ind_atts.attnum
    JOIN pg_stats ON pg_stats.schemaname = ind_atts.nspname
          -- stats for regular index columns
          AND ( (pg_stats.tablename = ind_atts.tablename AND pg_stats.attname = pg_catalog.pg_get_indexdef(pg_attribute.attrelid, pg_attribute.attnum, TRUE))
          -- stats for functional indexes
          OR   (pg_stats.tablename = ind_atts.index_name AND pg_stats.attname = pg_attribute.attname))
    WHERE pg_attribute.attnum > 0
    GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
),
index_aligned_est AS (
    SELECT maxalign, bs, nspname, index_name, reltuples,
        relpages, relam, table_oid, index_oid,
        coalesce (
            ceil (
                reltuples * ( 6
                    + maxalign
                    - CASE
                        WHEN index_tuple_hdr%maxalign = 0 THEN maxalign
                        ELSE index_tuple_hdr%maxalign
                      END
                    + nulldatawidth
                    + maxalign
                    - CASE /* Add padding to the data to align on MAXALIGN */
                        WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
                        ELSE nulldatawidth::integer%maxalign
                      END
                )::numeric
              / ( bs - pagehdr::NUMERIC )
              +1 )
         , 0 )
      as expected
    FROM index_item_sizes
),
raw_bloat AS (
    SELECT current_database() as dbname, nspname, pg_class.relname AS table_name, index_name,
        bs*(index_aligned_est.relpages)::bigint AS totalbytes, expected,
        CASE
            WHEN index_aligned_est.relpages <= expected
                THEN 0
                ELSE bs*(index_aligned_est.relpages-expected)::bigint
            END AS wastedbytes,
        CASE
            WHEN index_aligned_est.relpages <= expected
                THEN 0
                ELSE bs*(index_aligned_est.relpages-expected)::bigint * 100 / (bs*(index_aligned_est.relpages)::bigint) 
            END AS realbloat,
        pg_relation_size(index_aligned_est.table_oid) as table_bytes,
        stat.idx_scan as index_scans
    FROM index_aligned_est
    JOIN pg_class ON pg_class.oid=index_aligned_est.table_oid
    JOIN pg_stat_user_indexes AS stat ON index_aligned_est.index_oid = stat.indexrelid
),
format_bloat AS (
SELECT dbname as database_name, nspname as schema_name, table_name, index_name,
        round(realbloat) as bloat_pct, round(wastedbytes/(1024^2)::NUMERIC) as bloat_mb,
        round(totalbytes/(1024^2)::NUMERIC,3) as index_mb,
        round(table_bytes/(1024^2)::NUMERIC,3) as table_mb,
        index_scans
FROM raw_bloat
)
-- final query outputting the bloated indexes
-- change the where and order by to change
-- what shows up as bloated
SELECT *
FROM format_bloat
WHERE ( bloat_pct > 50 and bloat_mb > 10 )
ORDER BY bloat_pct DESC;

Ownership & roles

Re-assign owners to some other user

REASSIGN OWNED BY old_role [, ...] TO new_role;

Change owner of selected tables

Just print what commands are needed:

select 'ALTER TABLE ' || t.tablename || ' OWNER TO new_user;'
 from  pg_tables t
where t.tablename like '%my_filter%'; # or any valid where case

Actually do the change (obviously dangerous if you don't set your filter restrictive):

DO
$do$
  DECLARE
    i pg_tables%rowtype;
  BEGIN
  FOR i IN SELECT * FROM pg_catalog.pg_tables where schemaname like 'public' and tablename like '%my_filter%'
  LOOP
    EXECUTE FORMAT(
      $$
        ALTER TABLE %I OWNER TO new_user;
      $$,
      i.tablename
    );
  END LOOP;
  END
$do$;

List all grants/roles for all users

WITH rol AS (
    SELECT oid,
            rolname::text AS role_name
        FROM pg_authid
    UNION
    SELECT 0::oid AS oid,
            'public'::text
),
schemas AS ( -- Schemas
    SELECT oid AS schema_oid,
            n.nspname::text AS schema_name,
            n.nspowner AS owner_oid,
            'schema'::text AS object_type,
            coalesce ( n.nspacl, acldefault ( 'n'::"char", n.nspowner ) ) AS acl
        FROM pg_catalog.pg_namespace n
        WHERE n.nspname !~ '^pg_'
            AND n.nspname <> 'information_schema'
),
classes AS ( -- Tables, views, etc.
    SELECT schemas.schema_oid,
            schemas.schema_name AS object_schema,
            c.oid,
            c.relname::text AS object_name,
            c.relowner AS owner_oid,
            CASE
                WHEN c.relkind = 'r' THEN 'table'
                WHEN c.relkind = 'v' THEN 'view'
                WHEN c.relkind = 'm' THEN 'materialized view'
                WHEN c.relkind = 'c' THEN 'type'
                WHEN c.relkind = 'i' THEN 'index'
                WHEN c.relkind = 'S' THEN 'sequence'
                WHEN c.relkind = 's' THEN 'special'
                WHEN c.relkind = 't' THEN 'TOAST table'
                WHEN c.relkind = 'f' THEN 'foreign table'
                WHEN c.relkind = 'p' THEN 'partitioned table'
                WHEN c.relkind = 'I' THEN 'partitioned index'
                ELSE c.relkind::text
                END AS object_type,
            CASE
                WHEN c.relkind = 'S' THEN coalesce ( c.relacl, acldefault ( 's'::"char", c.relowner ) )
                ELSE coalesce ( c.relacl, acldefault ( 'r'::"char", c.relowner ) )
                END AS acl
        FROM pg_class c
        JOIN schemas
            ON ( schemas.schema_oid = c.relnamespace )
        WHERE c.relkind IN ( 'r', 'v', 'm', 'S', 'f', 'p' )
),
cols AS ( -- Columns
    SELECT c.object_schema,
            null::integer AS oid,
            c.object_name || '.' || a.attname::text AS object_name,
            'column' AS object_type,
            c.owner_oid,
            coalesce ( a.attacl, acldefault ( 'c'::"char", c.owner_oid ) ) AS acl
        FROM pg_attribute a
        JOIN classes c
            ON ( a.attrelid = c.oid )
        WHERE a.attnum > 0
            AND NOT a.attisdropped
),
procs AS ( -- Procedures and functions
    SELECT schemas.schema_oid,
            schemas.schema_name AS object_schema,
            p.oid,
            p.proname::text AS object_name,
            p.proowner AS owner_oid,
            CASE p.prokind
                WHEN 'a' THEN 'aggregate'
                WHEN 'w' THEN 'window'
                WHEN 'p' THEN 'procedure'
                ELSE 'function'
                END AS object_type,
            pg_catalog.pg_get_function_arguments ( p.oid ) AS calling_arguments,
            coalesce ( p.proacl, acldefault ( 'f'::"char", p.proowner ) ) AS acl
        FROM pg_proc p
        JOIN schemas
            ON ( schemas.schema_oid = p.pronamespace )
),
udts AS ( -- User defined types
    SELECT schemas.schema_oid,
            schemas.schema_name AS object_schema,
            t.oid,
            t.typname::text AS object_name,
            t.typowner AS owner_oid,
            CASE t.typtype
                WHEN 'b' THEN 'base type'
                WHEN 'c' THEN 'composite type'
                WHEN 'd' THEN 'domain'
                WHEN 'e' THEN 'enum type'
                WHEN 't' THEN 'pseudo-type'
                WHEN 'r' THEN 'range type'
                WHEN 'm' THEN 'multirange'
                ELSE t.typtype::text
                END AS object_type,
            coalesce ( t.typacl, acldefault ( 'T'::"char", t.typowner ) ) AS acl
        FROM pg_type t
        JOIN schemas
            ON ( schemas.schema_oid = t.typnamespace )
        WHERE ( t.typrelid = 0
                OR ( SELECT c.relkind = 'c'
                        FROM pg_catalog.pg_class c
                        WHERE c.oid = t.typrelid ) )
            AND NOT EXISTS (
                SELECT 1
                    FROM pg_catalog.pg_type el
                    WHERE el.oid = t.typelem
                        AND el.typarray = t.oid )
),
fdws AS ( -- Foreign data wrappers
    SELECT null::oid AS schema_oid,
            null::text AS object_schema,
            p.oid,
            p.fdwname::text AS object_name,
            p.fdwowner AS owner_oid,
            'foreign data wrapper' AS object_type,
            coalesce ( p.fdwacl, acldefault ( 'F'::"char", p.fdwowner ) ) AS acl
        FROM pg_foreign_data_wrapper p
),
fsrvs AS ( -- Foreign servers
    SELECT null::oid AS schema_oid,
            null::text AS object_schema,
            p.oid,
            p.srvname::text AS object_name,
            p.srvowner AS owner_oid,
            'foreign server' AS object_type,
            coalesce ( p.srvacl, acldefault ( 'S'::"char", p.srvowner ) ) AS acl
        FROM pg_foreign_server p
),
all_objects AS (
    SELECT schema_name AS object_schema,
            object_type,
            schema_name AS object_name,
            null::text AS calling_arguments,
            owner_oid,
            acl
        FROM schemas
    UNION
    SELECT object_schema,
            object_type,
            object_name,
            null::text AS calling_arguments,
            owner_oid,
            acl
        FROM classes
    UNION
    SELECT object_schema,
            object_type,
            object_name,
            null::text AS calling_arguments,
            owner_oid,
            acl
        FROM cols
    UNION
    SELECT object_schema,
            object_type,
            object_name,
            calling_arguments,
            owner_oid,
            acl
        FROM procs
    UNION
    SELECT object_schema,
            object_type,
            object_name,
            null::text AS calling_arguments,
            owner_oid,
            acl
        FROM udts
    UNION
    SELECT object_schema,
            object_type,
            object_name,
            null::text AS calling_arguments,
            owner_oid,
            acl
        FROM fdws
    UNION
    SELECT object_schema,
            object_type,
            object_name,
            null::text AS calling_arguments,
            owner_oid,
            acl
        FROM fsrvs
),
acl_base AS (
    SELECT object_schema,
            object_type,
            object_name,
            calling_arguments,
            owner_oid,
            ( aclexplode ( acl ) ).grantor AS grantor_oid,
            ( aclexplode ( acl ) ).grantee AS grantee_oid,
            ( aclexplode ( acl ) ).privilege_type AS privilege_type,
            ( aclexplode ( acl ) ).is_grantable AS is_grantable
        FROM all_objects
)
SELECT acl_base.object_schema,
        acl_base.object_type,
        acl_base.object_name,
        acl_base.calling_arguments,
        owner.role_name AS object_owner,
        grantor.role_name AS grantor,
        grantee.role_name AS grantee,
        acl_base.privilege_type,
        acl_base.is_grantable
    FROM acl_base
    JOIN rol owner
        ON ( owner.oid = acl_base.owner_oid )
    JOIN rol grantor
        ON ( grantor.oid = acl_base.grantor_oid )
    JOIN rol grantee
        ON ( grantee.oid = acl_base.grantee_oid )
    WHERE acl_base.grantor_oid <> acl_base.grantee_oid ;

Sequences / counters

List all sequences / counters

SELECT * FROM information_schema.sequences;

Reset a sequence / counter

ALTER SEQUENCE ${counter_name} RESTART WITH 100; -- or any other value you'd like to put it

Stats

Reset all statistics for DB

select pg_stat_reset(); select pg_stat_statements_reset();

For archiver only:

SELECT pg_stat_reset_shared('archiver');

See when the stats are reset last

select datname,stats_reset from pg_stat_database;

Hugepages

Check if postgres is allocating hugepages

sudo pmap $(pgrep postgres) |  grep -E -- "-s- .*deleted" | sort -u

If you see anon_hugepage, postgres is using it

Calculate needed hugepages

Get the size of current usage:

~> sudo pmap $(pgrep postgres) | awk '/rw-s/ && /zero/ {print $2}' | sort -u
4410584K

So we have 4410584K here, let's check how big one hugepage is:

~> grep Hugepagesize /proc/meminfo
Hugepagesize:       2048 kB

4410584/2048 is 2153, so we need to set vm.nr_hugepages to at least 2153. I'd recommend adding a bit to this number (e.g. 10%) for various reasons, but be aware this space will be dead to the processes which can't use hugepages.

Misc

List all triggers

select * from information_schema.triggers;

Check current timeline id

select timeline_id from pg_control_checkpoint();

Repeat last command

\g

Watch last command

\watch {seconds}

Check the size (as in disk space) of all databases

SELECT d.datname AS Name, pg_catalog.pg_get_userbyid(d.datdba) AS Owner,
  CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')
    THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))
    ELSE 'No Access'
  END AS SIZE
FROM pg_catalog.pg_database d
ORDER BY
  CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')
    THEN pg_catalog.pg_database_size(d.datname)
    ELSE NULL
  END;

Check the size (as in disk space) of each table

WITH RECURSIVE pg_inherit(inhrelid, inhparent) AS
    (select inhrelid, inhparent
    FROM pg_inherits
    UNION
    SELECT child.inhrelid, parent.inhparent
    FROM pg_inherit child, pg_inherits parent
    WHERE child.inhparent = parent.inhrelid),
pg_inherit_short AS (SELECT * FROM pg_inherit WHERE inhparent NOT IN (SELECT inhrelid FROM pg_inherit))
SELECT table_schema
    , TABLE_NAME
    , est_row_count
    , pg_size_pretty(total_bytes) AS total
    , pg_size_pretty(index_bytes) AS INDEX
    , pg_size_pretty(toast_bytes) AS toast
    , pg_size_pretty(table_bytes) AS TABLE
  FROM (
    SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes
    FROM (
         SELECT c.oid
              , nspname AS table_schema
              , relname AS TABLE_NAME
              , CEIL(SUM(c.reltuples) OVER (partition BY parent)) AS est_row_count
              , SUM(pg_total_relation_size(c.oid)) OVER (partition BY parent) AS total_bytes
              , SUM(pg_indexes_size(c.oid)) OVER (partition BY parent) AS index_bytes
              , SUM(pg_total_relation_size(reltoastrelid)) OVER (partition BY parent) AS toast_bytes
              , parent
          FROM (
                SELECT pg_class.oid
                    , reltuples
                    , relname
                    , relnamespace
                    , pg_class.reltoastrelid
                    , COALESCE(inhparent, pg_class.oid) parent
                FROM pg_class
                    LEFT JOIN pg_inherit_short ON inhrelid = oid
                WHERE relkind IN ('r', 'p')
             ) c
             LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
  ) a
  WHERE oid = parent
) a
ORDER BY total_bytes DESC;

Show all foreign keys

SELECT
    tc.table_schema,
    tc.constraint_name,
    tc.table_name,
    kcu.column_name,
    ccu.table_schema AS foreign_table_schema,
    ccu.table_name AS foreign_table_name,
    ccu.column_name AS foreign_column_name
FROM
    information_schema.table_constraints AS tc
    JOIN information_schema.key_column_usage AS kcu
      ON tc.constraint_name = kcu.constraint_name
      AND tc.table_schema = kcu.table_schema
    JOIN information_schema.constraint_column_usage AS ccu
      ON ccu.constraint_name = tc.constraint_name
      AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY';

# AND tc.table_name='mytable';

Export output of a query to CSV file

\COPY (SELECT * from users) To '/tmp/output.csv' With CSV;

Drop multiple databases quickly

Since you can't run "drop database" in a function, we'll cheat by exporting a file, and running it:

Export the drop database command for DBs which has a prefix for example:

\copy (select 'DROP DATABASE ' || db.datname || ' ;' from pg_catalog.pg_database db where db.datname like '%myfilter%') to '/tmp/dropdball.sql' ;

Then execute via:

\i /tmp/dropdball.sql

Show IOD of a table

SELECT '${TABLE_NAME}'::regclass::oid;

Find out physical folder id for databases

Just find and run oid2name command (it's sometimes not in path)

Define a global search function

Taken from here, works really nice.

CREATE OR REPLACE FUNCTION global_search(
    search_term text,
    progress text default null, -- 'tables','hits','all'
    param_tables text[] default '{}',
    param_schemas text[] default '{public}'
)
RETURNS table(schemaname text, tablename text, columnname text, rowctid tid)
AS $$
    declare
    query text;
    hit boolean;
begin
FOR schemaname,tablename IN
    SELECT table_schema, table_name
    FROM information_schema.tables t
    WHERE (t.table_name=ANY(param_tables) OR param_tables='{}')
    AND t.table_schema=ANY(param_schemas)
    AND t.table_type='BASE TABLE'
    LOOP
        IF (progress in ('tables','all')) THEN
            raise info '%', format('Searching globally in table: %I.%I',
            schemaname, tablename);
        END IF;
        query := format('SELECT ctid FROM %I.%I AS t WHERE strpos(cast(t.* as text), %L) > 0',
        schemaname,
        tablename,
        search_term);
        FOR rowctid IN EXECUTE query
        LOOP
            FOR columnname IN
            SELECT column_name
            FROM information_schema.columns
            WHERE table_name=tablename
            AND table_schema=schemaname
            LOOP
                query := format('SELECT true FROM %I.%I WHERE cast(%I as text)=%L AND ctid=%L',
                schemaname, tablename, columnname, search_term, rowctid);
                EXECUTE query INTO hit;
                IF hit THEN
                    IF (progress in ('hits', 'all')) THEN
                        raise info '%', format('Found in %I.%I.%I at ctid %s',
                        schemaname, tablename, columnname, rowctid);
                    END IF;
                    RETURN NEXT;
                END IF;
            END LOOP; -- for columnname
        END LOOP; -- for rowctid
    END LOOP; -- for table
END;
$$ language plpgsql;

Functions

Show definition of functions

\df+

Locks / connections / transactions

See all locks

    SELECT t.relname, l.locktype, page, virtualtransaction, pid, mode, granted
    FROM pg_locks l, pg_stat_all_tables t
    WHERE l.relation = t.relid ORDER BY relation asc;

Detailed blocked/blocking list

SELECT blocked_locks.pid     AS blocked_pid,
    blocked_activity.usename  AS blocked_user,
    blocking_locks.pid     AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocked_activity.query    AS blocked_statement,
    blocking_activity.query   AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks         blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity  ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks         blocking_locks
    ON blocking_locks.locktype = blocked_locks.locktype
    AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
    AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
    AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
    AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
    AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
    AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
    AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
    AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
    AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
    AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.GRANTED;

List all transactions running longer than N

SELECT
    pid
    ,datname
    ,usename
    ,application_name
    ,client_hostname
    ,client_port
    ,backend_start
    ,query_start
    ,query
    ,state
FROM pg_stat_activity where (now() - query_start) > interval '6 hours'; -- or whatever you'd like

Show all active connections

SELECT
    pid
    ,datname
    ,usename
    ,application_name
    ,client_hostname
    ,client_port
    ,backend_start
    ,query_start
    ,query
    ,state
FROM pg_stat_activity
WHERE state = 'active';

Terminate a connection

SELECT pg_terminate_backend(${PID})

Terminate all queries which is running longer than X

SELECT
    pg_terminate_backend(pid) as killed,
    query
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '30 minutes';

Backup

Check if DB is in backup mode

select pg_is_in_backup();

Restore

Pick only parts of the backup archive to restore

If you have an archive backup and you'd like to only restore single/few table(s) and related indexes/sequences.

First dump what is where for that archive:

pg_restore -l ${archive_folder} > list

Then edit the list and keep only the parts you'd like to restore, e.g. something like this stays in the list:

459; 1259 18463 TABLE public mytable mydb
5266; 0 0 ACL public TABLE mytable mydb
5142; 0 18463 TABLE DATA public mytable mydb

Now you can restore only these via -L:

pg_restore -L list ${archive_folder} -d ${target_db_name}

Barman

Error: "please make sure WAL shipping is setup"

barman switch-wal --force --archive ${hostname}

Replication

Rebuild secondary (repmgr)

# stop postgres
repmgr -h ${MASTER_SERVER} -c -F -U repmgr -d repmgr --without-barman standby clone
# start postgres
repmgr standby unregister
repmgr standby register

Check replication status from DB

select client_addr, state, sent_lsn, write_lsn,
    flush_lsn, replay_lsn from pg_stat_replication;

Show replication lag by seconds

SELECT
    pg_last_wal_receive_lsn() AS receive,
    pg_last_wal_replay_lsn() AS replay,
    COALESCE(ROUND(EXTRACT(epoch FROM now() - pg_last_xact_replay_timestamp())),0) AS seconds;

Replication slots

Create:

select pg_create_physical_replication_slot('slotname');

Check:

select * from pg_replication_slots;

Remove:

pg_drop_replication_slot('slotname');

JSON

Query a first level attribute

SELECT * FROM table WHERE column_name->>'1st_element' = 'value';

Query a 2nd level attribute

SELECT * FROM table WHERE column_name->'1st_element'->>'2nd_element' = "value";

Configuration

Show current vacuum settings

select name, setting FROM pg_settings WHERE "name" ~* 'vac' ORDER BY "name";

Show per-table setting overrides

select relname, reloptions from pg_class;

To show only changed ones:

SELECT relname, reloptions FROM pg_class where reloptions is not null and relname not like 'pg_stats%';

Check if any change is waiting for restart

select name from pg_settings where pending_restart = 't';

List all variables which requires server restart to change

select name from pg_settings where context = 'postmaster';

Reload the config

select * from pg_reload_conf();

Large files

List large files

\lo_list

Export a large file

\lo_export ${lo_id} ${file_path}

Vacuum unused large files

vacuumlo ${db_name}

Autovacuum

Read a nice explanation from Cybertec here

Show last (auto)vacuum / (auto)analyze of tables

Also shows which tables are expected to get autovacuum soon

WITH rel_set AS
(
    SELECT
        oid,
        CASE split_part(split_part(array_to_string(reloptions, ','), 'autovacuum_vacuum_threshold=', 2), ',', 1)
            WHEN '' THEN NULL
        ELSE split_part(split_part(array_to_string(reloptions, ','), 'autovacuum_vacuum_threshold=', 2), ',', 1)::BIGINT
        END AS rel_av_vac_threshold,
        CASE split_part(split_part(array_to_string(reloptions, ','), 'autovacuum_vacuum_scale_factor=', 2), ',', 1)
            WHEN '' THEN NULL
        ELSE split_part(split_part(array_to_string(reloptions, ','), 'autovacuum_vacuum_scale_factor=', 2), ',', 1)::NUMERIC
        END AS rel_av_vac_scale_factor
    FROM pg_class
)
SELECT
    PSUT.relname,
    to_char(PSUT.last_vacuum, 'YYYY-MM-DD HH24:MI')     AS last_vacuum,
    to_char(PSUT.last_autovacuum, 'YYYY-MM-DD HH24:MI') AS last_autovacuum,
    to_char(C.reltuples, '9G999G999G999')               AS n_tup,
    to_char(PSUT.n_dead_tup, '9G999G999G999')           AS dead_tup,
    to_char(coalesce(RS.rel_av_vac_threshold, current_setting('autovacuum_vacuum_threshold')::BIGINT) + coalesce(RS.rel_av_vac_scale_factor, current_setting('autovacuum_vacuum_scale_factor')::NUMERIC) * C.reltuples, '9G999G999G999') AS av_threshold,
    CASE
        WHEN (coalesce(RS.rel_av_vac_threshold, current_setting('autovacuum_vacuum_threshold')::BIGINT) + coalesce(RS.rel_av_vac_scale_factor, current_setting('autovacuum_vacuum_scale_factor')::NUMERIC) * C.reltuples) < PSUT.n_dead_tup
        THEN '*'
    ELSE ''
    END AS expect_av
FROM
    pg_stat_user_tables PSUT
    JOIN pg_class C
        ON PSUT.relid = C.oid
    JOIN rel_set RS
        ON PSUT.relid = RS.oid
ORDER BY C.reltuples DESC;

Show what is currently being processed by autovacuum workers

SELECT
    p.pid,
    now() - a.xact_start AS duration,
    coalesce(wait_event_type ||'.'|| wait_event, 'f') AS waiting,
    CASE
        WHEN a.query ~*'^autovacuum.*to prevent wraparound' THEN 'wraparound'
        WHEN a.query ~*'^vacuum' THEN 'user'
    ELSE 'regular'
    END AS mode,
    p.datname AS database,
    p.relid::regclass AS table,
    p.phase,
    pg_size_pretty(p.heap_blks_total * current_setting('block_size')::int) AS table_size,
    pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
    pg_size_pretty(p.heap_blks_scanned * current_setting('block_size')::int) AS scanned,
    pg_size_pretty(p.heap_blks_vacuumed * current_setting('block_size')::int) AS vacuumed,
    round(100.0 * p.heap_blks_scanned / p.heap_blks_total, 1) AS scanned_pct,
    round(100.0 * p.heap_blks_vacuumed / p.heap_blks_total, 1) AS vacuumed_pct,
    p.index_vacuum_count,
    round(100.0 * p.num_dead_tuples / p.max_dead_tuples,1) AS dead_pct
FROM pg_stat_progress_vacuum p
JOIN pg_stat_activity a using (pid)
ORDER BY now() - a.xact_start DESC;

Check how far we're away from a "wraparound" vacuum

SELECT
       oid::regclass::text AS table,
       age(relfrozenxid) AS xid_age,
       mxid_age(relminmxid) AS mxid_age,
       least(
(SELECT setting::int
            FROM    pg_settings
            WHERE   name = 'autovacuum_freeze_max_age') - age(relfrozenxid),
(SELECT setting::int
            FROM    pg_settings
            WHERE   name = 'autovacuum_multixact_freeze_max_age') - mxid_age(relminmxid)
) AS tx_before_wraparound_vacuum,
pg_size_pretty(pg_total_relation_size(oid)) AS size,
pg_stat_get_last_autovacuum_time(oid) AS last_autovacuum
FROM    pg_class
WHERE   relfrozenxid != 0
AND oid > 16384
ORDER BY tx_before_wraparound_vacuum;

Tuple/row internals - xmin/xmax stuff

A nice blog post about this section from cybertec.

Check the physical location of a row

SELECT ctid, xmin, xmax FROM ${table_name}; -- and ofc add WHERE as needed

Get hint bits of a tuple

Needs pageinspect extension

SELECT lp,
       t_ctid AS ctid,
       t_xmin AS xmin,
       t_xmax AS xmax,
       (t_infomask & 128)::boolean AS xmax_is_lock,
       (t_infomask & 1024)::boolean AS xmax_committed,
       (t_infomask & 2048)::boolean AS xmax_rolled_back,
       (t_infomask & 4096)::boolean AS xmax_multixact
FROM heap_page_item_attrs(
        get_raw_page('${table_name', ${tuple_number}),
        '${table_name}'
     );

You might need to send a SELECT to update the latest xmax value first.

Check max row age for all tables

Relevant for vacuum investigation also

SELECT c.oid::regclass as table_name,
       greatest(age(c.relfrozenxid),age(t.relfrozenxid)) as age
FROM pg_class c
LEFT JOIN pg_class t ON c.reltoastrelid = t.oid
WHERE c.relkind IN ('r', 'm') order by 2 desc;

Show current connections with their xmin value

That means they will block the marking of tuples (which has bigger xmax value) as dead

SELECT pid, datname, usename, state, backend_xmin
FROM pg_stat_activity
WHERE backend_xmin IS NOT NULL
ORDER BY age(backend_xmin) DESC;

Query examples

Get the number of duplicates for each given value

SELECT ${column_name}, count(*) FROM ${table_name} GROUP BY ${column_name} ORDER BY count DESC;