sql >> Base de Datos >  >> RDS >> PostgreSQL

Consulta distinta de selección lenta en postgres

BEGIN; 
CREATE TABLE dist ( x INTEGER NOT NULL ); 
INSERT INTO dist SELECT random()*50 FROM generate_series( 1, 5000000 ); 
COMMIT;
CREATE INDEX dist_x ON dist(x);


VACUUM ANALYZE dist;
EXPLAIN ANALYZE SELECT DISTINCT x FROM dist;

HashAggregate  (cost=84624.00..84624.51 rows=51 width=4) (actual time=1840.141..1840.153 rows=51 loops=1)
   ->  Seq Scan on dist  (cost=0.00..72124.00 rows=5000000 width=4) (actual time=0.003..573.819 rows=5000000 loops=1)
 Total runtime: 1848.060 ms

PG no puede (todavía) usar un índice para distinguir (saltar los valores idénticos), pero puede hacer esto:

CREATE OR REPLACE FUNCTION distinct_skip_foo()
RETURNS SETOF INTEGER
LANGUAGE plpgsql STABLE 
AS $$
DECLARE
    _x  INTEGER;
BEGIN
    _x := min(x) FROM dist;
    WHILE _x IS NOT NULL LOOP
        RETURN NEXT _x;
        _x := min(x) FROM dist WHERE x > _x;
    END LOOP;
END;
$$ ;

EXPLAIN ANALYZE SELECT * FROM distinct_skip_foo();
Function Scan on distinct_skip_foo  (cost=0.00..260.00 rows=1000 width=4) (actual time=1.629..1.635 rows=51 loops=1)
 Total runtime: 1.652 ms