create table rides ( id bigserial not null primary key, tenant_id varchar(20) not null, ride_id varchar(36) not null, created_at timestamp with time zone not null, metadata jsonb -- Probably more columns and indexes coming here );
CREATE OR REPLACE FUNCTION insert_row() RETURNS TRIGGER AS $BODY$ DECLARE partition_env TEXT; partition_date TIMESTAMP; partition_name TEXT; sql TEXT; BEGIN -- construct partition name partition_env := lower(NEW.tenant_id); partition_date := date_trunc('month', NEW.created_at AT TIME ZONE 'UTC'); partition_name := format('%s_%s_%s', TG_TABLE_NAME, partition_env, to_char(partition_date, 'YYYY_MM')); -- create partition, if necessary IF NOT EXISTS(SELECT relname FROM pg_class WHERE relname = partition_name) THEN PERFORM create_new_partition(TG_TABLE_NAME, NEW.tenant_id, partition_date, partition_name); END IF; select format('INSERT INTO %s values ($1.*)', partition_name) into sql; -- Propagate insert EXECUTE sql USING NEW; RETURN NEW; -- RETURN NULL; if no ORM END; $BODY$ LANGUAGE plpgsql;
PERFORM
is useful if we want to call a function that returns nothing. Usually, in the examples, all the logic is tried to be put into one function, but we try to be careful.
USING NEW
indicates that in this query we are using values ​​from the string we tried to add.
$1.*
expands all the values ​​of the new line In fact, this can be translated into NEW.*
. What translates to NEW.ID, NEW.TENANT_ID, …
PERFORM
, will create a new partition if it does not already exist. This will happen once a period for each tenant.
CREATE OR REPLACE FUNCTION create_new_partition(parent_table_name text, env text, partition_date timestamp, partition_name text) RETURNS VOID AS $BODY$ DECLARE sql text; BEGIN -- Notifying RAISE NOTICE 'A new % partition will be created: %', parent_table_name, partition_name; select format('CREATE TABLE IF NOT EXISTS %s (CHECK ( tenant_id = ''%s'' AND created_at AT TIME ZONE ''UTC'' > ''%s'' AND created_at AT TIME ZONE ''UTC'' <= ''%s'')) INHERITS (%I)', partition_name, env, partition_date, partition_date + interval '1 month', parent_table_name) into sql; -- New table, inherited from a master one EXECUTE sql; PERFORM index_partition(partition_name); END; $BODY$ LANGUAGE plpgsql;
INHERITS
to create a table similar to parent, and CHECK
to determine which data should go there.
RAISE NOTICE
simply prints the string to the console. If we run the INSERT
from psql now, we can see if the partition has been created.
INHERITS
does not inherit indexes. For this, we have two solutions:
CREATE TABLE LIKE
, and then ALTER TABLE INHERITS
CREATE OR REPLACE FUNCTION index_partition(partition_name text) RETURNS VOID AS $BODY$ BEGIN -- Ensure we have all the necessary indices in this partition; EXECUTE 'CREATE INDEX IF NOT EXISTS ' || partition_name || '_tenant_timezone_idx ON ' || partition_name || ' (tenant_id, timezone(''UTC''::text, created_at))'; -- More indexes here... END; $BODY$ LANGUAGE plpgsql;
CREATE TRIGGER before_insert_row_trigger BEFORE INSERT ON rides FOR EACH ROW EXECUTE PROCEDURE insert_row();
UPDATE
, and at the application level, always do DELETE+INSERT
UPDATE
, which will transfer data to the correct partition.
CREATE OR REPLACE FUNCTION delete_parent_row() RETURNS TRIGGER AS $BODY$ DECLARE BEGIN delete from only rides where id = NEW.ID; RETURN null; END; $BODY$ LANGUAGE plpgsql;
CREATE TRIGGER after_insert_row_trigger AFTER INSERT ON rides FOR EACH ROW EXECUTE PROCEDURE delete_parent_row();
DO $script$ DECLARE year_start_epoch bigint := extract(epoch from '20170101'::timestamptz at time zone 'UTC'); delta bigint := extract(epoch from '20171231 23:59:59'::timestamptz at time zone 'UTC') - year_start_epoch; tenant varchar; tenants varchar[] := array['tenant_a', 'tenant_b', 'tenant_c', 'tenant_d']; BEGIN FOREACH tenant IN ARRAY tenants LOOP FOR i IN 1..100000 LOOP insert into rides (tenant_id, created_at, ride_id) values (tenant, to_timestamp(random() * delta + year_start_epoch) at time zone 'UTC', i); END LOOP; END LOOP; END $script$;
explain select * from rides where tenant_id = 'tenant_a' and created_at AT TIME ZONE 'UTC' > '20171102' and created_at AT TIME ZONE 'UTC' <= '20171103';
Append (cost=0.00..4803.76 rows=4 width=196) -> Seq Scan on rides (cost=0.00..4795.46 rows=3 width=196) Filter: (((created_at)::timestamp without time zone > '2017-11-02 00:00:00'::timestamp without time zone) AND ((created_at)::timestamp without time zone <= '2017-11-03 00:00:00'::timestamp without time zone) AND ((tenant_id)::text = 'tenant_a'::text)) -> Index Scan using rides_tenant_a_2017_11_tenant_timezone_idx on rides_tenant_a_2017_11 (cost=0.28..8.30 rows=1 width=196) Index Cond: (((tenant_id)::text = 'tenant_a'::text) AND ((created_at)::timestamp without time zone > '2017-11-02 00:00:00'::timestamp without time zone) AND ((created_at)::timestamp without time zone <= '2017-11-03 00:00:00'::timestamp without time zone)) (5 rows)
show CONSTRAINT_EXCLUSION
Source: https://habr.com/ru/post/422753/