1. Create checksum SQL script (sachi.sql) 2. Create Wrapper script to call checksum script (sachi.sh) 3. Read schema name and table name from a parameter file (hr.txt) 1. sachi.sql [oracle@usha datavalidation]$ cat sachi.sql SET SERVEROUTPUT ON SET ECHO OFF SET NEWPAGE 0 SET SPACE 0 SET LINESIZE 9999 SET PAGESIZE 0 SET FEEDBACK OFF SET HEADING OFF SET VERIFY OFF SET TRIMSPOOL ON declare cursor c0 is select OWNER, TABLE_NAME from ALL_TABLES where OWNER = '&1' AND TABLE_NAME LIKE '&2' ORDER BY TABLE_NAME; TYPE refType is REF CURSOR; tbCursor refType; dtCursor refType; chksumCursor refType; tableName VARCHAR(64); owner VARCHAR(64); COLUMN_NAME VARCHAR(256); DATA_TYPE VARCHAR(64); NULLABLE VARCHAR(64); colNum INTEGER; numRows NUMBER; numFields INTEGER; minVal NUMBER; maxVal NUMBER; avgVal NUMBER; numNulls NUMBER; maxCharVal varchar2(30000); minCharVal varchar2(30000); sqlStatement CLOB; selectList varchar2(30000); fieldNameSql varchar2(10000); datatypeSql varchar2(15000); nullableSql varchar2(15000); minValSql varchar2(15000); maxValSql varchar2(15000); avgValSql varchar2(15000); numNullsSql varchar2(15000); minCharSql varchar2(15000); maxCharSql varchar2(15000); res INTEGER; d INTEGER; j INTEGER; dt VARCHAR2(12); numDt NUMBER; begin for tab_rec in c0 loop begin dbms_lob.createtemporary(sqlStatement, true); tableName := tab_rec.TABLE_NAME; owner := tab_rec.OWNER; -- Build the query for "non-DATE and NULL" -- selectList := 'count(*) as totalRecs'; numFields := 0; fieldNameSql := 'DECODE(fieldNum'; datatypeSql := 'DECODE(fieldNum'; nullableSql := 'DECODE(fieldNum'; minValSql := 'DECODE(fieldNum'; maxValSql := 'DECODE(fieldNum'; avgValSql := 'DECODE(fieldNum'; numNullsSql := 'DECODE(fieldNum'; minCharSql := 'DECODE(fieldNum'; maxCharSql := 'DECODE(fieldNum'; open tbCursor for 'select COLUMN_NAME, DATA_TYPE, NULLABLE from ALL_TAB_COLUMNS where OWNER='''|| owner ||''' and TABLE_NAME=''' || tableName || ''' order by COLUMN_NAME'; loop fetch tbCursor into COLUMN_NAME, DATA_TYPE, NULLABLE; exit when tbCursor%NOTFOUND; numFields := numFields + 1; selectList := selectList || ', ''' || COLUMN_NAME || ''' as fieldName' || numFields || ', ''' || DATA_TYPE || ''' as datatype' || numFields || ', ''' || NULLABLE || ''' as nullable' || numFields; case when DATA_TYPE in ('NUMBER', 'FLOAT', 'DOUBLE') then selectList := selectList || ', min(' || COLUMN_NAME || ') as minValue' || numFields || ', max(' || COLUMN_NAME || ') as maxValue' || numFields || ', avg(nvl(' || COLUMN_NAME || ', 0)) as avgValue' || numFields || ', CAST(NULL AS VARCHAR(4000)) as minCharValue' || numFields || ', CAST(NULL AS VARCHAR(4000)) as maxCharValue' || numFields; when DATA_TYPE in ('VARCHAR', 'VARCHAR2', 'NVARCHAR', 'NVARCHAR2') then selectList := selectList || ', min(nvl(length(' || COLUMN_NAME || '),0)) as minValue' || numFields || ', max(nvl(length(' || COLUMN_NAME || '),0)) as maxValue' || numFields || ', avg(nvl(length(' || COLUMN_NAME || '),0)) as avgValue' || numFields || ', min(' || COLUMN_NAME || ') as minCharValue' || numFields || ', max(' || COLUMN_NAME || ') as maxCharValue' || numFields; when DATA_TYPE in ('CHAR', 'NCHAR') then selectList := selectList || ', min(nvl(length(' || COLUMN_NAME || '),0)) as minValue' || numFields || ', max(nvl(length(' || COLUMN_NAME || '),0)) as maxValue' || numFields || ', avg(nvl(length(' || COLUMN_NAME || '),0)) as avgValue' || numFields || ', min(' || COLUMN_NAME || ') as minCharValue' || numFields || ', max(' || COLUMN_NAME || ') as maxCharValue' || numFields; when DATA_TYPE in ('CLOB', 'BLOB', 'NCLOB', 'LONG', 'BFILE', 'RAW') then selectList := selectList || ', min(nvl(dbms_lob.getlength(' || COLUMN_NAME || '),0)) as minValue' || numFields || ', max(nvl(dbms_lob.getlength(' || COLUMN_NAME || '),0)) as maxValue' || numFields || ', avg(nvl(dbms_lob.getlength(' || COLUMN_NAME || '),0)) as avgValue' || numFields || ', CAST(NULL AS VARCHAR(4000)) as minCharValue' || numFields || ', CAST(NULL AS VARCHAR(4000)) as maxCharValue' || numFields; when DATA_TYPE in ('DATE', 'TIMESTAMP(6)', 'TIMESTAMP(9)') then selectList := selectList || ', NULL as minValue' || numFields || ', NULL as maxValue' || numFields || ', NULL as avgValue' || numFields || ', TO_CHAR(MIN(' || COLUMN_NAME ||'), ''DD-MON-YYYY HH24:MI:SS'') as minCharValue' || numFields || ', TO_CHAR(MAX(' || COLUMN_NAME ||'), ''DD-MON-YYYY HH24:MI:SS'') as maxCharValue' || numFields; else selectList := selectList || ', NULL as minValue' || numFields || ', NULL as maxValue' || numFields || ', NULL as avgValue' || numFields || ', CAST(NULL AS VARCHAR(4000)) as minCharValue' || numFields || ', CAST(NULL AS VARCHAR(4000)) as maxCharValue' || numFields; end case; -- Add the NULL count if NULLABLE = 'Y' then selectList := selectList || ', sum(nvl2(' || COLUMN_NAME || ',0,1)) as numNulls' || numFields; else selectList := selectList || ', 0 as numNulls' || numFields; end if; end loop; close tbCursor; for j in 1..numFields loop fieldNameSql := fieldNameSql || ', ' || j || ', fieldName' || j; datatypeSql := datatypeSql || ', ' || j || ', datatype' || j; nullableSql := nullableSql || ', ' || j || ', nullable' || j; minValSql := minValSql || ', ' || j || ', minValue' || j; maxValSql := maxValSql || ', ' || j || ', maxValue' || j; avgValSql := avgValSql || ', ' || j || ', avgValue' || j; numNullsSql := numNullsSql || ', ' || j || ', numNulls' || j; minCharSql := minCharSql || ', ' || j || ', minCharValue' || j; maxCharSql := maxCharSql || ', ' || j || ', maxCharValue' || j; end loop; fieldNameSql := fieldNameSql || ') as fieldName, '; datatypeSql := datatypeSql || ') as datatype, '; nullableSql := nullableSql || ') as nullable, '; minValSql := minValSql || ') as minValue, '; maxValSql := maxValSql || ') as maxValue, '; avgValSql := avgValSql || ') as avgValue, '; numNullsSql := numNullsSql || ') as numNulls, '; minCharSql := minCharSql || ') as minCharValue, '; maxCharSql := maxCharSql || ') as maxCharValue'; --build dyanmic sql statement dbms_lob.writeappend(sqlStatement, 27, 'WITH cteMetrics AS (select '); dbms_lob.writeappend(sqlStatement, LENGTH(selectList), selectList); dbms_lob.writeappend(sqlStatement, LENGTH(' from ' || owner || '.' || tableName || ')' || ' SELECT fieldNum, ') , ' from ' || owner || '.' || tableName || ')' || ' SELECT fieldNum, '); dbms_lob.writeappend(sqlStatement, LENGTH(fieldNameSql), fieldNameSql); dbms_lob.writeappend(sqlStatement, LENGTH(datatypeSql), datatypeSql); dbms_lob.writeappend(sqlStatement, LENGTH(nullableSql), nullableSql); dbms_lob.writeappend(sqlStatement, LENGTH(minValSql), minValSql); dbms_lob.writeappend(sqlStatement, LENGTH(maxValSql), maxValSql); dbms_lob.writeappend(sqlStatement, LENGTH(avgValSql), avgValSql); dbms_lob.writeappend(sqlStatement, LENGTH(numNullsSql), numNullsSql); dbms_lob.writeappend(sqlStatement, LENGTH('totalRecs, '), 'totalRecs, '); dbms_lob.writeappend(sqlStatement, LENGTH(minCharSql), minCharSql); dbms_lob.writeappend(sqlStatement, LENGTH(maxCharSql), maxCharSql); dbms_lob.writeappend(sqlStatement, LENGTH(' FROM cteMetrics,' || ' (SELECT level AS fieldNum FROM dual CONNECT BY level <= ' || numFields || ')') , ' FROM cteMetrics,' || ' (SELECT level AS fieldNum FROM dual CONNECT BY level <= ' || numFields || ')'); --print query for debugging --dbms_output.put_line(SUBSTR(sqlStatement, 1, 30000)); --dbms_output.put_line(SUBSTR(sqlStatement, 30001, 30000)); -- Execute the "non-DATE and NULL" query -- open chksumCursor for sqlStatement; fetch chksumCursor into colNum, COLUMN_NAME, DATA_TYPE, NULLABLE, minVal, maxVal, avgVal, numNulls, numRows, minCharVal, maxCharVal; dbms_output.put_line(tableName || '|*|' || numRows); loop exit when chksumCursor%NOTFOUND; dbms_output.put_line(tableName || '|' || COLUMN_NAME || '|' || DATA_TYPE || '|' || NULLABLE || '|' || minVal || '|' || maxVal || '|' || avgVal || '|' || numNulls || '|' || minCharVal || '|' || maxCharVal); fetch chksumCursor into colNum, COLUMN_NAME, DATA_TYPE, NULLABLE, minVal, maxVal, avgVal, numNulls, numRows, minCharVal, maxCharVal; end loop; close chksumCursor; /** Disabled Date Distribution -- Loop through date/time column open tbCursor for 'select COLUMN_NAME, DATA_TYPE, NULLABLE from ALL_TAB_COLUMNS where OWNER='''|| owner ||''' and TABLE_NAME=''' || tableName || ''' order by COLUMN_NAME'; loop fetch tbCursor into COLUMN_NAME, DATA_TYPE, NULLABLE; exit when tbCursor%NOTFOUND; if DATA_TYPE in ('DATE', 'TIMESTAMP') then -- Create the query to get the distribution of dates sqlStatement := 'select to_char(' || COLUMN_NAME || ',''YYYYMMDD''), count(*) from ' || owner || '.' || tableName || ' group by to_char(' || COLUMN_NAME || ',''YYYYMMDD'')'; -- dbms_output.put_line(sqlStatement); -- Execute the date distribution query open dtCursor for sqlStatement; loop fetch dtCursor into dt, numDt; exit when dtCursor%NOTFOUND; dbms_output.put_line(tableName || ',' || COLUMN_NAME || ',' || DATA_TYPE || ',*,' || dt || ',' || numDt); end loop; close dtCursor; end if; --DBMS_LOB.FREETEMPORARY(sqlStatement); end loop; close tbCursor; **/ exception when others then dbms_output.put_line(tableName || ',#ERR#,' || SQLCODE || ':' || SQLERRM); if dtCursor%isopen then close dtCursor; end if; if tbCursor%isopen then close tbCursor; end if; end; end loop; end; / exit; [oracle@usha datavalidation]$ 2. sachi.sh [oracle@usha datavalidation]$ cat sachi.sh #!/bin/sh if [ $# -ne 5 ] then echo "Bad number of parameters. Usage:" echo "$0 <username> <password> <connection> <table list file name> <checksum file name>" echo "" echo "<connection> format: SID or //[SERVERNAME]:[PORT]/[DATABASE]" exit 1 fi # Check whether the table list file exists table_list_file=$4 if [ ! -f $table_list_file ] then echo "$table_list_file cannot be found" exit 1 fi echo "Initializing the checksum file" checksum_file=$5 if [ -f $checksum_file ] then echo "$checksum_file exists. Deleting..." rm $checksum_file if [ $? -ne 0 ] then echo "Error deleteing the checksum file" exit 1 fi fi touch $checksum_file #Loop through the table list file cat $table_list_file | while read owner table do echo "Reading $table_list_file and Calculating checksum for $owner.$table and redirecting output to $checksum_file" sqlplus -s $1/$2@$3 <<EOF >>$checksum_file @sachi.sql $owner $table EXIT; EOF done [oracle@usha datavalidation]$ 3. hr.txt [oracle@usha datavalidation]$ cat hr.txt HR EMPLOYEES HR LOCATIONS HR JOBS HR REGIONS HR JOB_HISTORY HR COUNTRIES [oracle@usha datavalidation]$ Usage [oracle@usha datavalidation]$ ./sachi.sh Bad number of parameters. Usage: ./sachi.sh <username> <password> <connection> <table list file name> <checksum file name> <connection> format: SID or //[SERVERNAME]:[PORT]/[DATABASE] [oracle@usha datavalidation]$ [oracle@usha datavalidation]$ ./sachi.sh sachi sachi proddb hr.txt hr.out Initializing the checksum file hr.out exists. Deleting... and redirecting output to hr.outhecksum for HR.EMPLOYEES and redirecting output to hr.outhecksum for HR.LOCATIONS and redirecting output to hr.outhecksum for HR.JOBS and redirecting output to hr.outhecksum for HR.REGIONS and redirecting output to hr.outhecksum for HR.JOB_HISTORY and redirecting output to hr.outhecksum for HR.COUNTRIES [oracle@usha datavalidation]$ OUTPUT [oracle@usha datavalidation]$ cat hr.out EMPLOYEES|*|107 EMPLOYEES|COMMISSION_PCT|NUMBER|Y|.1|.4|.072897196261682242990654205607476635514|72|| EMPLOYEES|DEPARTMENT_ID|NUMBER|Y|10|110|62.61682242990654205607476635514018691589|1|| EMPLOYEES|EMAIL|VARCHAR2|N|4|8|6.68224299065420560747663551401869158879|0|ABANDA|WTAYLOR EMPLOYEES|EMPLOYEE_ID|NUMBER|N|100|206|153|0|| EMPLOYEES|FIRST_NAME|VARCHAR2|Y|2|11|5.83177570093457943925233644859813084112|0|Adam|Winston EMPLOYEES|HIRE_DATE|DATE|N||||0|13-JAN-2001 00:00:00|21-APR-2008 00:00:00 EMPLOYEES|JOB_ID|VARCHAR2|N|5|10|7.11214953271028037383177570093457943925|0|AC_ACCOUNT|ST_MAN EMPLOYEES|LAST_NAME|VARCHAR2|N|3|11|6.01869158878504672897196261682242990654|0|Abel|Zlotkey EMPLOYEES|MANAGER_ID|NUMBER|Y|100|205|123.598130841121495327102803738317757009|1|| EMPLOYEES|PHONE_NUMBER|VARCHAR2|Y|12|18|13.96261682242990654205607476635514018692|0|011.44.1343.329268|650.509.4876 EMPLOYEES|SALARY|NUMBER|Y|2100|24000|6461.831775700934579439252336448598130841|0|| LOCATIONS|*|23 LOCATIONS|CITY|VARCHAR2|N|4|19|7.95652173913043478260869565217391304348|0|Beijing|Whitehorse LOCATIONS|COUNTRY_ID|CHAR|Y|2|2|2|0|AU|US LOCATIONS|LOCATION_ID|NUMBER|N|1000|3200|2100|0|| LOCATIONS|POSTAL_CODE|VARCHAR2|Y|0|11|5.43478260869565217391304347826086956522|1|00989|YSW 9T2 LOCATIONS|STATE_PROVINCE|VARCHAR2|Y|0|17|6.65217391304347826086956521739130434783|6|BE|Yukon LOCATIONS|STREET_ADDRESS|VARCHAR2|Y|14|40|19.3913043478260869565217391304347826087|0|12-98 Victoria Street|Schwanthalerstr. 7031 JOBS|*|19 JOBS|JOB_ID|VARCHAR2|N|5|10|6.84210526315789473684210526315789473684|0|AC_ACCOUNT|ST_MAN JOBS|JOB_TITLE|VARCHAR2|N|9|31|17.84210526315789473684210526315789473684|0|Accountant|Stock Manager JOBS|MAX_SALARY|NUMBER|Y|5000|40000|13215.1578947368421052631578947368421053|0|| JOBS|MIN_SALARY|NUMBER|Y|2008|20080|6573.052631578947368421052631578947368421|0|| REGIONS|*|4 REGIONS|REGION_ID|NUMBER|N|1|4|2.5|0|| REGIONS|REGION_NAME|VARCHAR2|Y|4|22|10|0|Americas|Middle East and Africa JOB_HISTORY|*|10 JOB_HISTORY|DEPARTMENT_ID|NUMBER|Y|20|110|74|0|| JOB_HISTORY|EMPLOYEE_ID|NUMBER|N|101|201|149.3|0|| JOB_HISTORY|END_DATE|DATE|N||||0|17-JUN-2001 00:00:00|31-DEC-2007 00:00:00 JOB_HISTORY|JOB_ID|VARCHAR2|N|6|10|7.4|0|AC_ACCOUNT|ST_CLERK JOB_HISTORY|START_DATE|DATE|N||||0|17-SEP-1995 00:00:00|01-JAN-2007 00:00:00 COUNTRIES|*|25 COUNTRIES|COUNTRY_ID|CHAR|N|2|2|2|0|AR|ZW COUNTRIES|COUNTRY_NAME|VARCHAR2|Y|5|24|7.92|0|Argentina|Zimbabwe COUNTRIES|REGION_ID|NUMBER|Y|1|4|2.4|0|| [oracle@usha datavalidation]$ Here is a description of the output that's generated by the script. For each table that the script runs the following content is generated in a text file. 1. The SQL query that was used to genarate the counts 2. One line that contains the tablename and total record count in the following format [tablename],*,[row count] 3. For each non-date column in the table there is a line containing the following information depending on the datatype. if datatype is a int, float, numeric type: [tablename], [column name], [data type], [Nullable Y/N], [Minimum value], [Maximum value], [Average value], [Count of Nulls] if datatype is a character or bytea type: [tablename], [column name], [data type], [Nullable Y/N], [Minimum value length], [Maximum value length], [Average value length], [Count of Nulls] 4. For each date or timestamp column there will be a distribution of all the date values in the table. [tablename], [column name], [data type],*, [yyyymmdd], [row count] |
Unix Scripts for Oracle DBA's
oraconnect shell script
oraconnect.sh printf "Oracle User ID: " read ORAUSER if [ "$ORAUSER" = "" ] then printf "Oracle User ID must be entered..\n" exit 1 fi printf "Password: " read -s ORAPASSWORD if [ "$ORAPASSWORD" = "" ] then printf "\nPassword must be entered..\n" exit 1 fi printf "\nConnect String: " read CONNECTSTRING if [ "$CONNECTSTRING" = "" ] then printf "\nCONNECTSTRING must be entered..\n" exit 1 fi echo "hostname:"`hostname`":user:"$ORAUSER":passwd:"$ORAPASSWORD":string:"$CONNECTSTRING >connectstring.log echo "" export ORAUSER export ORAPASSWORD export CONNECTSTRING sqlplus $ORAUSER/$ORAPASSWORD@$CONNECTSTRING |
1-1 of 1