DROP TABLE IF EXISTS dynamic_analyze_config; CREATE TABLE dynamic_analyze_config ( schemaname TEXT NOT NULL, -- Text representation of schema name tablename TEXT NOT NULL, -- Text representation of table name columns TEXT[], -- Array of column names represented as text -- objects listing the subset of columns -- to analyze; if this field is empty, all -- columns will be analyzed threshold INTERVAL NOT NULL -- Interval of time after which statistics -- for this table are considered stale ) WITH (APPENDONLY=FALSE) DISTRIBUTED RANDOMLY ; -- This code is provided on an "as is" basis as an example of pothential -- functionality, without warntees or conditions of any kind, either express or -- implied, including, without limitation, any warranties or conditions of -- title, non-infringement, merchantability, or fitness for a particular -- purpose. As the user of this code you are solely responsible for determining -- the appropriateness of using this code and mitigating risks associated with -- it. -- Usage forms: -- -- SELECT * FROM dynamic_analyze_targeted(schema_name, max_analyzes); -- - Run ANALYZE against at most max_analyzes tables belonging to the schema -- named by schema_name that are configured in the dynamic_analyze_config -- table. If max_analyzes is not a positive integer, no such limit will -- be observed. Table analysis will be prioritized based upon how much -- time has elapsed since each table has been analyzed relative to the -- staleness threshold configured in dynamic_analyze_config. -- -- SELECT * FROM dynamic_analyze_targeted(schema_name, max_analyzes, runtime_limit); -- - Run ANALYZE against tables belonging to the schema named by schema_name -- that are configured in the dynamic_analyze_config table until either: -- 1) the number of tables that have been analyzed equals/exceeds -- max_analyzes (assuming max_analyzes is a positive integer) or -- 2) the total runtime after the completion of the most recent ANALYZE -- has exceeded the runtime_limit parameter (interpreted as an -- INTERVAL) -- As with the other usage form, table analysis will be prioritized based -- upon how much time has elapsed since each table has been analyzed -- relative to the staleness threshold configured in -- dynamic_analyze_config. CREATE OR REPLACE FUNCTION dynamic_analyze_targeted ( schema_name TEXT, size int ) RETURNS TABLE( tablename TEXT, analyze_start TIMESTAMP, analyze_stop TIMESTAMP, time_minutes NUMERIC ) AS $$ BEGIN FOR tablename, analyze_start, analyze_stop, time_minutes IN SELECT * FROM dynamic_analyze_targeted ( schema_name, size, '-1 SECONDS' ) LOOP RETURN NEXT; END LOOP; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION dynamic_analyze_targeted ( schema_name TEXT, size int, runtime_limit_text TEXT ) RETURNS TABLE( tablename TEXT, analyze_start TIMESTAMP, analyze_stop TIMESTAMP, time_minutes NUMERIC ) AS $$ DECLARE cmd TEXT; runtime_limit INTERVAL; start_ts TIMESTAMP; columns TEXT[]; columnstring TEXT; tablesdone INTEGER; BEGIN tablesdone := 0; start_ts := CLOCK_TIMESTAMP(); runtime_limit := runtime_limit_text::INTERVAL; FOR tablename, columns IN SELECT n.nspname||'.'||c.relname, tgts.columns FROM -- Assemble a list of all objects known to the catalog pg_class c INNER JOIN pg_namespace n ON (c.relnamespace = n.oid) -- Determine when each object has last been analyzed LEFT OUTER JOIN ( SELECT DISTINCT objid, statime FROM pg_stat_last_operation WHERE staactionname in ('ANALYZE') ) a ON (a.objid = c.oid) -- Get a list of objects explicitly named in the dynamic_analyze_config table LEFT OUTER JOIN dynamic_analyze_config tgts ON ( n.nspname = tgts.schemaname AND c.relname = tgts.tablename ) -- Consider only tables in the named schema that have never been analyzed, or -- for whom the timestamp of the last ANALYZE operation is outside of the -- staleness interval set in dynamic_analyze_config WHERE n.nspname = schema_name AND ( a.statime IS NULL OR NOW()-a.statime-tgts.threshold > '0 SECONDS'::INTERVAL ) -- Consider only tables explicitly named in dynamic_analyze_config AND tgts.tablename IS NOT NULL -- Prioritize tables whose statistics are more out-of-date ORDER BY NOW()-a.statime-tgts.threshold DESC -- Keep running until no tables remain to analyze, until the runtime -- threshold has been exceeded, or until the table maximum has been hit. LOOP EXIT WHEN runtime_limit > '0 SECONDS'::INTERVAL AND CLOCK_TIMESTAMP() - start_ts > runtime_limit; EXIT WHEN size > 0 AND tablesdone >= size; -- Build and execute the analyze statement, taking into account whether a column -- subset was configured in dynamic_analyze_config. IF columns IS NULL OR columns = '{}' THEN columnstring := ''; ELSE columnstring := '('||array_to_string(columns, ',')||')'; END IF; cmd := 'ANALYZE '||tablename||' '||columnstring||';'; analyze_start := CLOCK_TIMESTAMP(); EXECUTE cmd; analyze_stop := CLOCK_TIMESTAMP(); tablesdone := tablesdone + 1; time_minutes := EXTRACT(EPOCH FROM analyze_stop - analyze_start) / 60.0; RETURN NEXT; END LOOP; END; $$ LANGUAGE plpgsql; |