Creating a test case for bloat and understanding how it works in Greenplum

posted Apr 28, 2017, 11:53 AM by Sachchida Ojha
create table bloattest (
    id int4,
    int_1 int4,
    int_2 int4,
    int_3 int4,
    ts_1 timestamptz,
    ts_2 timestamptz,
    ts_3 timestamptz,
    text_1 text,
    text_2 text,
    text_3 text
)
distributed by (id);

insert into bloattest
select
    i,
    cast(random() * 10000000 as int4),
    cast(random()*10000000 as int4),
    cast(random()*10000000 as int4),
    now() - '2 years'::interval * random(),
    now() - '2 years'::interval * random(),
    now() - '2 years'::interval * random(),
    repeat('text_1 ', cast(10 + random() * 100 as int4)),
    repeat('text_2 ', cast(10 + random() * 100 as int4)),
    repeat('text_2 ', cast(10 + random() * 100 as int4))
from generate_series(1, 1000000) i;

select gp_segment_id,count(*) from bloattest group by gp_segment_id;

select * from gp_toolkit.gp_bloat_diag;

delete from bloattest where random() < 0.95;

sachi=# create table bloattest (                                                                                                                             
sachi(#     id int4,
sachi(#     int_1 int4,
sachi(#     int_2 int4,
sachi(#     int_3 int4,
sachi(#     ts_1 timestamptz,
sachi(#     ts_2 timestamptz,
sachi(#     ts_3 timestamptz,
sachi(#     text_1 text,
sachi(#     text_2 text,
sachi(#     text_3 text
sachi(# )
sachi-# distributed by (id);
CREATE TABLE


sachi=# insert into bloattest
sachi-# select
sachi-#     i,
sachi-#     cast(random() * 10000000 as int4),
sachi-#     cast(random()*10000000 as int4),
sachi-#     cast(random()*10000000 as int4),
sachi-#     now() - '2 years'::interval * random(),
sachi-#     now() - '2 years'::interval * random(),
sachi-#     now() - '2 years'::interval * random(),
sachi-#     repeat('text_1 ', cast(10 + random() * 100 as int4)),
sachi-#     repeat('text_2 ', cast(10 + random() * 100 as int4)),
sachi-#     repeat('text_2 ', cast(10 + random() * 100 as int4))
sachi-# from generate_series(1, 1000000) i;

INSERT 0 1000000
sachi=# select gp_segment_id,count(*) from bloattest group by gp_segment_id;
 gp_segment_id | count
---------------+--------
             1 | 499999
             0 | 500001
(2 rows)

sachi=# select * from gp_toolkit.gp_bloat_diag;
 bdirelid | bdinspname |  bdirelname   | bdirelpages | bdiexppages |              bdidiag
----------+------------+---------------+-------------+-------------+------------------------------------
     1259 | pg_catalog | pg_class      |          36 |           9 | moderate amount of bloat suspected
     2606 | pg_catalog | pg_constraint |          44 |          11 | moderate amount of bloat suspected
(2 rows)


sachi=# delete from bloattest where random() < 0.95;
DELETE 950138

sachi=# \i bloattablelist.sql
 dbname | relid | schemaname |  tablename   | bloat_ratio | expected_size | relation_size | wasted_space | relative_size |  concern
--------+-------+------------+--------------+-------------+---------------+---------------+--------------+---------------+------------
 sachi  |  1249 | pg_catalog | pg_attribute |         2.6 | 1696 kB       | 4416 kB       | 2720 kB      |        1.0000 | 1.60377358
(1 row)

sachi=# analyze bloattest;
ANALYZE
sachi=# \i bloattablelist.sql
 dbname | relid | schemaname |  tablename   | bloat_ratio | expected_size | relation_size | wasted_space | relative_size |   concern
--------+-------+------------+--------------+-------------+---------------+---------------+--------------+---------------+-------------
 sachi  | 83678 | public     | bloattest    |        19.6 | 67 MB         | 1312 MB       | 1245 MB      |        0.9967 | 18.52354263
 sachi  |  1249 | pg_catalog | pg_attribute |         2.6 | 1696 kB       | 4416 kB       | 2720 kB      |        0.0033 |  0.00525366
(2 rows)

sachi=# select * from gp_toolkit.gp_bloat_diag;
 bdirelid | bdinspname |  bdirelname   | bdirelpages | bdiexppages |                bdidiag
----------+------------+---------------+-------------+-------------+---------------------------------------
     1259 | pg_catalog | pg_class      |          36 |           9 | moderate amount of bloat suspected
     2606 | pg_catalog | pg_constraint |          44 |          11 | moderate amount of bloat suspected
    83678 | public     | bloattest     |       41989 |        2130 | significant amount of bloat suspected
(3 rows)

sachi=# delete from bloattest where random() < 0.95;
DELETE 950138
sachi=# \i bloattablelist.sql
 dbname | relid | schemaname |  tablename   | bloat_ratio | expected_size | relation_size | wasted_space | relative_size |  concern
--------+-------+------------+--------------+-------------+---------------+---------------+--------------+---------------+------------
 sachi  |  1249 | pg_catalog | pg_attribute |         2.6 | 1696 kB       | 4416 kB       | 2720 kB      |        1.0000 | 1.60377358
(1 row)

sachi=# analyze bloattest;
ANALYZE
sachi=# \i bloattablelist.sql
 dbname | relid | schemaname |  tablename   | bloat_ratio | expected_size | relation_size | wasted_space | relative_size |   concern
--------+-------+------------+--------------+-------------+---------------+---------------+--------------+---------------+-------------
 sachi  | 83678 | public     | bloattest    |        19.6 | 67 MB         | 1312 MB       | 1245 MB      |        0.9967 | 18.52354263
 sachi  |  1249 | pg_catalog | pg_attribute |         2.6 | 1696 kB       | 4416 kB       | 2720 kB      |        0.0033 |  0.00525366
(2 rows)

[gpadmin@sachi myscripts]$ psql -d sachi
psql (8.2.15)
Type "help" for help.

sachi=# \i bloattablelist.sql
 dbname | relid | schemaname |  tablename   | bloat_ratio | expected_size | relation_size | wasted_space | relative_size |  concern
--------+-------+------------+--------------+-------------+---------------+---------------+--------------+---------------+------------
 sachi  |  1249 | pg_catalog | pg_attribute |         2.6 | 1696 kB       | 4416 kB       | 2720 kB      |        1.0000 | 1.60377358
(1 row)

[gpadmin@sachi myscripts]$ vi bloattest.sql
[gpadmin@sachi myscripts]$ psql -d sachi -f bloatcatalogtablelist.sql
        ?column?         
-------------------------
 pg_catalog.pg_attribute
(1 row)

[gpadmin@sachi myscripts]$ psql -d sachi -f bloattablelist.sql
     ?column?     
------------------
 public.bloattest
(1 row)

[gpadmin@sachi myscripts]$ 
Comments