sql >> Database teknologi >  >> RDS >> PostgreSQL

Samlet forespørgsel på 50M+ rækketabel i PostgreSQL

Første trin:udfør præ-aggregering i underforespørgslen:

EXPLAINSELECT cal.theday, act.action_name, SUM(sub.the_count)FROM generate_series(current_date - interval '1 week', now(), interval '1 day') as cal(theday) -- kalender pseudo-tableCROSS JOIN (VÆRDIER ('page_open') , ​​('product_add') , ('product_buy') , ('product_event') , ('product_favourite') , ('product_open') , ​​('product_share') , (' session_start') ) AS act(action_name)LEFT JOIN ( SELECT es.action_name, date_trunc('day',es.date_update) as theday , COUNT(DISTINCT es.id ) AS the_count FROM event_statistics as es WHERE es.client_id =(SELECT c.id FRA klienter AS c WHERE c.client_name ='klientnavn') OG (es.date_update MELLEM (aktuel_dato - interval '1 uge') OG nu()) GRUPPER MED 1,2 ) sub ON cal.theday =sub .theday OG act.action_name =sub.action_name GRUPPER EFTER act.action_name,cal.thedayORDER BY act.action_name,cal.theday; 

Næste trin:Indsæt VALUES i en CTE og referer til den også i den samlede underforespørgsel. (forstærkningen afhænger af antallet af handlingsnavne, der kan springes over)

EXPLAINWITH act(action_name) AS (VALUES ('page_open') , ​​('product_add') , ('product_buy') , ('product_event') , ('product_favourite') , ('product_open') , ('product_share'), ('session_start') )SELECT cal.theday, act.action_name, SUM(sub.the_count)FROM generate_series(current_date - interval '1 week', now(), interval '1day') AS cal( theday)CROSS JOIN actLEFT JOIN ( SELECT es.action_name, date_trunc('day',es.date_update) AS theday , COUNT(DISTINCT es.id ) AS the_count FROM event_statistics AS es WHERE es.date_update BETWEEN (current_date - interval '1 ') AND now() AND EXISTS (SELECT * FROM clients cli WHERE cli.id=es.client_id AND cli.client_name ='client name') OG EKSISTERER (SELECT * FROM act WHERE act.action_name =es.action_name) GRUPPER EFTER 1,2 ) sub ON cal.theday =sub.theday OG act.action_name =sub.action_nameGROUP BY act.action_name,cal.thedayORDER BY act.action_name,cal.thed ay; 

OPDATERING:Brug af en fysisk (temp) tabel vil resultere i bedre estimater.

-- Sidste forsøg:materialiser det kartesiske produkt (tidsserier*handlingsnavn) -- i en midlertidig tabelCREATE TEMP TABLE grid AS(SELECT act.action_name, cal.thedayFROM gener_series(current_date - interval '1 week', now (), interval '1 dag') AS cal(theday)CROSS JOIN (VÆRDIER ('page_open') , ​​('product_add') , ('product_buy') , ('product_event') , ('product_favourite') , (' product_open'), ('product_share'), ('session_start') ) act(handlingsnavn) );OPRET UNIKT INDEKS PÅ gitter(handlingsnavn, dag); -- Indeks vil tvinge statistik til at blive indsamlet -- , og vil generere bedre estimater for antallet af rækkerCREATE INDEX iii ON hændelsesstatistik (action_name, date_update );VACUUM ANALYZE grid;VACUUM ANALYZE event_statistics;EXPLAINSELECT grid.action_name, grid.theday, SUM (sub.the_count) AS the_countFROM gridLEFT JOIN ( SELECT es.action_name, date_trunc('day',es.date_update) AS theday , COUNT(*) AS the_count FROM event_statistics AS es WHERE es.date_update BETWEEN (current_date - interval '1 week) ') AND now() AND EXISTS (SELECT * FROM clients cli WHERE cli.id=es.client_id AND cli.client_name ='client name') -- AND EXISTS (SELECT * FROM grid WHERE grid.action_name =es.action_name) GRUPPER EFTER 1,2 BESTIL EFTER 1,2 --nonsense! ) sub ON grid.theday =sub.theday AND grid.action_name =sub.action_name GRUPPER EFTER grid.action_name,grid.thedayORDER BY grid.action_name,grid.theday;

Opdatering nr. 3 (beklager, jeg opretter indekser på basistabellen/-tabellerne her. Du skal redigere. Jeg har også fjernet en-kolonnen på tidsstemplet)

-- forsøg #4:-- - materialisere det kartesiske produkt (tidsserier*handlingsnavn) -- - rense datointerval -logicCREATE TEMP TABLE grid AS(SELECT act.action_name, cal.theday::dateFROM gener_series( aktuel_dato - interval '1 uge', nu(), interval '1 dag') AS cal(theday)CROSS JOIN (VÆRDIER ('page_open') , ​​('product_add') , ('product_buy') , ('product_event') , ('product_favourite') , ('product_open') , ​​('product_share') , ('session_start') ) act(action_name) ); -- Indeks vil tvinge statistik til at blive indsamlet -- ,og vil generere bedre estimater for antallet af rækker-- CREATE UNIQUE INDEX ON grid(action_name, theday);-- CREATE INDEX iii ON event_statistics (action_name, date_update);CREATE UNIQUE INDEX ON grid(theday, action_name); CREATE INDEX iii ON event_statistics (date_update, action_name);VACUUM ANALYZE grid;VACUUM ANALYZE event_statistics;EXPLAINSELECT gr.action_name, gr.theday , COUNT(*) AS the_countFROM grid_events grLEFTistics es.action_name =gr.action_name AND date_trunc('day',es.date_update)::date =gr.theday OG es.date_update MELLEM (current_date - interval '1 week') AND current_dateJOIN-klienter cli ON cli.id=es. client_id OG cli.client_name ='klientnavn'GRUPPER EFTER gr.action_name,gr.thedayORDER BY 1,2;
 FORESPØRGSPLAN ------------------------------------------ -------------------------------------------------- -------------------------------------------------- ------------ GroupAggregate (pris=8.33..8.35 rows=1 width=17) Group Key:gr.action_name, gr.theday -> Sort (pris=8.33..8.34 rows=1) width=17) Sort Key:gr.action_name, gr.theday -> Nested Loop (pris=1.40..8.33 rows=1 width=17) -> Nested Loop (pris=1.31..7.78 rows=1 width=40) Join Filter:(es.client_id =cli.id) -> Index Scan ved hjælp af clients_client_name_key på klienters cli (cost=0.09..2.30 rows=1 width=4) Index Cond:(client_name ='client name'::text) -> Bitmap Heap Scan på event_statistics es (pris=1.22..5.45 rows=5 width=44) Tjek igen Cond:((date_update>=(('now'::cstring)::date - '7 days'::interval)) AND (date_update <=('now'::cstring)::date)) -> Bitmap Indeksscanning på iii (pris=0.00..1.22 rækker=5 bredde=0) Index Cond:((date_update>=(('now'::cstring)::date - '7 days'::interval)) AND ( date_update <=('now'::cstring)::date)) -> Kun indeks Scan med grid_theday_action_name_idx på grid gr (cost=0.09..0.54 rows=1 width=17) Index Cond:((theday =(date_trunc( 'day'::text, es.date_update)::date) AND (action_name =es.action_name))(15 rows) 



  1. MySQL matematiske funktioner (fuld liste)

  2. Hvordan finder man ud af, hvilke kolonner der ikke har nogen data (alle værdier er NULL)?

  3. Sådan tilslutter du dig på flere kolonner

  4. Hibernate Postgresql vælg til opdatering med ydre join-problem