content
- documents.content_keyword_ref
- keywords that are present in the document. CREATE TABLE content ( id integer NOT NULL DEFAULT nextval('content_id_seq'::regclass), some_data character varying(1000) NOT NULL, CONSTRAINT content_pkey PRIMARY KEY (id), ); CREATE TABLE content_keyword_ref ( keyword_id integer NOT NULL, content_id integer NOT NULL, CONSTRAINT content_keyword_ref_pkey PRIMARY KEY (keyword_id, content_id), CONSTRAINT content_keyword_ref_content_id_foreign FOREIGN KEY (content_id) REFERENCES content (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE CASCADE, CONSTRAINT content_keyword_ref_keyword_id_foreign FOREIGN KEY (keyword_id) REFERENCES keywords (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE CASCADE ); CREATE INDEX content_keyword_ref_content_id_index ON content_keyword_ref USING btree (content_id); CREATE INDEX content_keyword_ref_keyword_id_index ON content_keyword_ref USING btree (keyword_id); CREATE INDEX content_keyword_ref_keyword_content_idx ON content_keyword_ref USING btree (keyword_id, content_id);
EXPLAIN ANALYZE
and display the plan): EXPLAIN ANALYSE SELECT c.id FROM content c JOIN content_keyword_ref r ON r.content_id = c.id AND r.keyword_id IN (4713, 5951) GROUP BY c.id LIMIT 1000
Limit (cost = 21454.94..34933.16 rows = 1000 width = 4) (actual time = 6.777..199.735 rows = 1000 loops = 1) -> Group (cost = 21454.94..100235.11 rows = 5845 width = 4) (actual time = 6.775..199.641 rows = 1000 loops = 1) Group Key: c.id -> Merge Join (cost = 21454.94..100220.49 rows = 5845 width = 4) (actual time = 6.774..199.389 rows = 1141 loops = 1) Merge Cond: (c.id = r.content_id) -> Index Only using content_pkey on content c (cost = 0.43..73221.47 rows = 2182736 width = 4) (actual time = 0.013..131.942 rows = 1339506 loops = 1) Heap Fetches: 0 -> Sort (cost = 21454.51..21469.13 rows = 5845 width = 4) (actual time = 6.662..6.792 rows = 1141 loops = 1) Sort Key: r.content_id Sort Method: quicksort Memory: 143kB -> Bitmap Heap Scan on content_keyword_ref r (cost = 118.16..21088.82 rows = 5845 width = 4) (actual time = 0.470..6.273 rows = 2007 loops = 1) Recheck Cond: (keyword_id = ANY ('{4713,5951}' :: integer [])) Heap Blocks: exact = 1781 -> Bitmap Index Scan on content_keyword_ref_keyword_content_idx (cost = 0.00..116.70 rows = 5845 width = 0) (actual time = 0.239..0.239 rows = 2007 loops = 1) Index Cond: (keyword_id = ANY ('{4713,5951}' :: integer [])) Planning time: 0.277 ms Execution time: 199.805 ms
EXPLAIN ANALYSE SELECT DISTINCT c.id FROM content c JOIN content_keyword_ref r ON r.content_id = c.id AND r.keyword_id IN (4713, 5951) LIMIT 1000
Limit (cost = 21454.94..34933.16 rows = 1000 width = 4) (actual time = 2.824..187.619 rows = 1000 loops = 1) -> Unique (cost = 21454.94..100235.11 rows = 5845 width = 4) (actual time = 2.824..187.519 rows = 1000 loops = 1) -> Merge Join (cost = 21454.94..100220.49 rows = 5845 width = 4) (actual time = 2.823..187.351 rows = 1141 loops = 1) Merge Cond: (c.id = r.content_id) -> Index Only content using content_pkey on content c (cost = 0.43..73221.47 rows = 2182736 width = 4) (actual time = 0.011..120.481 rows = 1339506 loops = 1) Heap Fetches: 0 -> Sort (cost = 21454.51..21469.13 rows = 5845 width = 4) (actual time = 2.693..2.805 rows = 1141 loops = 1) Sort Key: r.content_id Sort Method: quicksort Memory: 143kB -> Bitmap Heap Scan on content_keyword_ref r (cost = 118.16..21088.82 rows = 5845 width = 4) (actual time = 0.463..2.321 rows = 2007 loops = 1) Recheck Cond: (keyword_id = ANY ('{4713,5951}' :: integer [])) Heap Blocks: exact = 1781 -> Bitmap Index Scan on content_keyword_ref_keyword_content_idx (cost = 0.00..116.70 rows = 5845 width = 0) (actual time = 0.235..0.235 rows = 2007 loops = 1) Index Cond: (keyword_id = ANY ('{4713,5951}' :: integer [])) Planning time: 0.264 ms Execution time: 187.727 ms
content
leave. But links with keywords will be stored in an array. To quickly select data by condition on an array, we also create a GiST index. For information on which array operators are supported by indexes, see the PostgreSQL documentation . CREATE TABLE document ( content_id integer NOT NULL, -- , content_keyword_ref keyword_ids integer[] NOT NULL ); -- GiST CREATE INDEX document_keyword_ids_index ON document USING GiST(keyword_ids gist__intbig_ops);
CREATE INDEX document_content_id_index ON public.document USING btree (content_id); -- INSERT INTO document (content_id, keyword_ids) SELECT c.id, ARRAY( SELECT r.keyword_id FROM content_keyword_ref r WHERE r.content_id = c.id ) FROM content c GROUP BY c.id;
EXPLAIN ANALYZE SELECT c.id FROM content c JOIN document d ON d.content_id = c.id AND d.keyword_ids && ARRAY[4713, 5951] limit 1000
Limit (cost = 387.80..7540.27 rows = 1000 width = 4) (actual time = 8.799..12.935 rows = 1000 loops = 1) -> Nested Loop (cost = 387.80..14177.77 rows = 1928 width = 4) (actual time = 8.799..12.880 rows = 1000 loops = 1) -> Bitmap Heap Scan on document d (cost = 387.37..6246.79 rows = 1930 width = 4) (actual time = 8.786..10.599 rows = 1000 loops = 1) Recheck Cond: (keyword_ids && '{4713,5951}' :: integer []) Rows Removed by Index Recheck: 107 Heap Blocks: exact = 1008 -> Bitmap Index scan on document_keyword_ids_index (cost = 0.00 ..386.89 rows = 1930 width = 0) (actual time = 8.560..8.560 rows = 1977 loops = 1) Index Cond: (keyword_ids && {4713,5951} ':: integer []) -> Index Only using content_pkey on content c (cost = 0.43..4.10 rows = 1 width = 4) (actual time = 0.002..0.002 rows = 1 loops = 1000) Index Cond: (id = d.content_id) Heap Fetches: 0 Planning time: 0.184 ms Execution time: 12.994 ms
GROUP BY:
SELECT COUNT(1) FROM ( SELECT c.id FROM content c JOIN content_keyword_ref r ON r.content_id = c.id AND r.keyword_id IN (4713, 5951) GROUP BY c.id ) t;
DISTINCT
: SELECT COUNT(1) FROM ( SELECT DISTINCT(c.id) FROM content c JOIN content_keyword_ref r ON r.content_id = c.id AND r.keyword_id IN (4713, 5951) ) t;
COUNT (DISTINCT columns)
: SELECT COUNT(DISTINCT c.id) FROM content c JOIN content_keyword_ref r ON r.content_id = c.id AND r.keyword_id IN (4713, 5951)
SELECT COUNT(1) OVER() FROM content c JOIN content_keyword_ref r ON r.content_id = c.id AND r.keyword_id IN (4713, 5951) GROUP BY c.id LIMIT 1
count()
already in PHP. Therefore, most likely you will have to redefine the method of calculating the number of records so that the entire sample is not read from the database each time. SELECT COUNT(1) FROM document d WHERE d.keyword_ids && ARRAY[4713, 5951]
SELECT c.id FROM content c JOIN content_keyword_ref r1 ON r1.content_id = c.id AND r1.keyword_id = 5388 JOIN content_keyword_ref r2 ON r2.content_id = c.id AND r2.keyword_id = 5951 LIMIT 1000
@>
operator. Then the query looks more neat: SELECT c.id FROM content c JOIN document d ON d.content_id = c.id AND d.keyword_ids @> ARRAY[5388, 5951] LIMIT 1000
Source: https://habr.com/ru/post/317980/
All Articles