Schema Check sum report

posted Feb 22, 2013, 2:49 PM by Sachchida Ojha
set serveroutput on
set feedback off
set verify off
set linesize 10000

create or replace type chksumType as VARRAY(3000) of NUMBER;
/
declare
  cursor c0 is
    select OWNER, TABLE_NAME from ALL_TABLES where OWNER='&1' ORDER BY TABLE_NAME;
  TYPE refType is REF CURSOR;
  tbCursor refType;
  chksumCursor refType;
  dtCursor refType;
  tableName VARCHAR(64);
  owner VARCHAR(64);
  COLUMN_NAME VARCHAR(64);
  DATA_TYPE VARCHAR(64);
  NULLABLE VARCHAR(64);
  colArray chksumType;
  sqlStatement CLOB;
  TYPE clobArray is VARRAY(10) of CLOB;
  selectListArray clobArray;
  colNum NUMBER;
  qryNum INTEGER;
  i INTEGER;
  j INTEGER;
  outLine CLOB;
  dt VARCHAR2(12);
  numDt NUMBER;
begin
  for tab_rec in c0 loop
    begin
    tableName := tab_rec.TABLE_NAME;
owner := tab_rec.OWNER;

    -- Build the query for "non-DATE and NULL" --
qryNum := 1;
    selectListArray := clobArray('count(*)');
    colNum := 2;
open tbCursor for 'select COLUMN_NAME, DATA_TYPE, NULLABLE from ALL_TAB_COLUMNS where OWNER='''|| owner ||''' and TABLE_NAME=''' || tableName || ''' order by COLUMN_NAME';
    loop
 fetch tbCursor into COLUMN_NAME, DATA_TYPE, NULLABLE;
 exit when tbCursor%NOTFOUND;

 -- Check whether the number of columns is more than max allowed
 if colNum > 950
 then
   qryNum := qryNum + 1;
colNum := 1;

selectListArray.EXTEND;
selectListArray(qryNum) := '';
 end if;

      case
 when DATA_TYPE in ('NUMBER', 'FLOAT', 'DOUBLE') then
        if colNum > 1
   then
     selectListArray(qryNum) := selectListArray(qryNum) || ', ';
   end if;
        selectListArray(qryNum) := selectListArray(qryNum) || 'min(' || COLUMN_NAME || ')' || ', max(' || COLUMN_NAME || ')' || ', avg(' || COLUMN_NAME || ')';
        colNum := colNum + 3;
 when DATA_TYPE in ('VARCHAR', 'VARCHAR2', 'NVARCHAR', 'NVARCHAR2') then
        if colNum > 1
   then
     selectListArray(qryNum) := selectListArray(qryNum) || ', ';
   end if;
        selectListArray(qryNum) := selectListArray(qryNum) || 'min(nvl(length(' || COLUMN_NAME || '),0))' || ', max(nvl(length(' || COLUMN_NAME || '),0))' || ', avg(nvl(length(' || COLUMN_NAME || '),0))';
        colNum := colNum + 3;
 when DATA_TYPE in ('CHAR', 'NCHAR') then
        if colNum > 1
   then
     selectListArray(qryNum) := selectListArray(qryNum) || ', ';
   end if;
        selectListArray(qryNum) := selectListArray(qryNum) || 'min(nvl(length(trim(' || COLUMN_NAME || ')),0))' || ', max(nvl(length(trim(' || COLUMN_NAME || ')),0))' || ', avg(nvl(length(trim(' || COLUMN_NAME || ')),0))';
        colNum := colNum + 3;
 when DATA_TYPE in ('CLOB', 'BLOB', 'NCLOB', 'LONG', 'BFILE', 'RAW') then
        if colNum > 1
   then
     selectListArray(qryNum) := selectListArray(qryNum) || ', ';
   end if;
        selectListArray(qryNum) := selectListArray(qryNum) || 'min(nvl(dbms_lob.getlength(' || COLUMN_NAME || '),0))' || ', max(nvl(dbms_lob.getlength(' || COLUMN_NAME || '),0))' || ', avg(nvl(dbms_lob.getlength(' || COLUMN_NAME || '),0))';
        colNum := colNum + 3;
 else
   selectListArray(qryNum) := selectListArray(qryNum);
 end case;
 -- Add the NULL count
 if NULLABLE = 'Y'
 then
        if colNum > 1
   then
     selectListArray(qryNum) := selectListArray(qryNum) || ', ';
   end if;
        selectListArray(qryNum) := selectListArray(qryNum) || 'sum(nvl2(' || COLUMN_NAME || ',0,1))';
        colNum := colNum + 1;
 end if;
    end loop;
close tbCursor;

j := 951;
i := 0;
open tbCursor for 'select COLUMN_NAME, DATA_TYPE, NULLABLE from ALL_TAB_COLUMNS where OWNER='''|| owner ||''' and TABLE_NAME=''' || tableName || ''' order by COLUMN_NAME';
    loop
 fetch tbCursor into COLUMN_NAME, DATA_TYPE, NULLABLE;
 exit when tbCursor%NOTFOUND;

 if j > 950
 then
   i := i + 1;
        sqlStatement := 'select chksumType(' || selectListArray(i) || ') from ' || owner || '.' || tableName;
        --  dbms_output.put_line(sqlStatement);

-- Execute the "non-DATE and NULL" query --
   open chksumCursor for sqlStatement;

-- Output the "non-DATE and NULL" query results
   fetch chksumCursor into colArray;
close chksumCursor;

if i = 1
then
          -- Output the number of records
          dbms_output.put_line(tableName || ',*,' || colArray(1));
  
          j := 2;
else
 j := 1;
end if;
 end if;

 -- Output the rest of the data
      case
 when DATA_TYPE in ('NUMBER', 'FLOAT', 'DOUBLE', 'VARCHAR', 'VARCHAR2', 'NVARCHAR', 'NVARCHAR2', 'CHAR', 'NCHAR', 'CLOB', 'BLOB', 'NCLOB', 'LONG', 'BFILE', 'RAW') then
   outLine := tableName || ',' || COLUMN_NAME || ',' || DATA_TYPE || ',' || NULLABLE || ',' || colArray(j) || ',' || colArray(j+1) || ',' || colArray(j+2);
   j := j + 3;

        -- Add the NULL count
   if NULLABLE = 'Y'
   then
     outLine := outLine || ',' || colArray(j);
 j := j + 1;
   end if;
   dbms_output.put_line(outLine);
      else
   if NULLABLE = 'Y'
   then
     dbms_output.put_line(tableName || ',' || COLUMN_NAME || ',' || DATA_TYPE || ',' || NULLABLE || ',' || colArray(j));
 j := j + 1;
   end if;
 end case;
end loop;
close tbCursor;

    -- Loop through date/time column
open tbCursor for 'select COLUMN_NAME, DATA_TYPE, NULLABLE from ALL_TAB_COLUMNS where OWNER='''|| owner ||''' and TABLE_NAME=''' || tableName || ''' order by COLUMN_NAME';
    loop
 fetch tbCursor into COLUMN_NAME, DATA_TYPE, NULLABLE;
 exit when tbCursor%NOTFOUND;
      if DATA_TYPE in ('DATE', 'TIMESTAMP')
 then
   -- Create the query to get the distribution of dates
        sqlStatement := 'select to_char(' || COLUMN_NAME || ',''YYYYMMDD''), count(*) from ' || owner || '.' || tableName || ' group by to_char(' || COLUMN_NAME || ',''YYYYMMDD'')';
-- dbms_output.put_line(sqlStatement);

   -- Execute the date distribution query
   open dtCursor for sqlStatement;
   loop
     fetch dtCursor into dt, numDt;
 exit when dtCursor%NOTFOUND;
 dbms_output.put_line(tableName || ',' || COLUMN_NAME || ',' || DATA_TYPE || ',*,' || dt || ',' || numDt);
   end loop;
        close dtCursor;
 end if;
    end loop;
close tbCursor;

  exception
  when others then
    dbms_output.put_line(tableName || ',#ERR#,' || SQLCODE || ':' || SQLERRM);
if dtCursor%isopen
then
 close dtCursor;
end if;
if tbCursor%isopen
then
 close tbCursor;
end if;
if chksumCursor%isopen
then
 close chksumCursor;
end if;
  end;
  end loop;
end;
/
exit;

Here is a description of the output that's generated by the script.  For each table that the script runs the following content is generated in a text file.

1. The SQL query that was used to genarate the counts

2. One line that contains the tablename and total record count in the following format
[tablename],*,[row count]

3. For each non-date column in the table there is a line containing the following information depending on the datatype.
  if datatype is a int, float, numeric type:
[tablename], [column name], [data type], [Nullable Y/N], [Minimum value], [Maximum value], [Average value], [Count of Nulls]
  if datatype is a character or bytea type:
[tablename], [column name], [data type], [Nullable Y/N], [Minimum value length], [Maximum value length], [Average value length], [Count of Nulls]

4. For each date or timestamp column there will be a distribution of all the date values in the table.
[tablename], [column name], [data type],*, [yyyymmdd], [row count] 
Comments