Zbudowałem procedurę składowaną, która analizuje tabelę źródłową lub kwerendę z kilkoma cechami na kolumnę, w tym minimalną długością (min_len) i maksymalną długością (max_len).
CREATE PROCEDURE [dbo].[sp_analysetable] (
@tableName varchar(8000),
@deep bit = 0
) AS
/*
sp_analysetable 'company'
sp_analysetable 'select * from company where name is not null'
*/
DECLARE @intErrorCode INT, @errorMSG VARCHAR(500), @tmpQ NVARCHAR(2000), @column_name VARCHAR(50), @isQuery bit
SET @intErrorCode=0
IF OBJECT_ID('tempdb..##tmpTableToAnalyse') IS NOT NULL BEGIN
DROP TABLE ##tmpTableToAnalyse
END
IF OBJECT_ID('tempdb..##tmpColumns') IS NOT NULL BEGIN
DROP TABLE ##tmpColumns
END
if CHARINDEX('from', @tableName)>0
set @isQuery=1
IF @intErrorCode=0 BEGIN
if @isQuery=1 begin
--set @tableName = 'USE '+@db+';'+replace(@tableName, 'from', 'into ##tmpTableToAnalyse from')
--replace only first occurance. Now multiple froms may exists, but first from will be replaced with into .. from
set @tableName=Stuff(@tableName, CharIndex('from', @tableName), Len('from'), 'into ##tmpTableToAnalyse from')
exec(@tableName)
IF OBJECT_ID('tempdb..##tmpTableToAnalyse') IS NULL BEGIN
set @intErrorCode=1
SET @errorMSG='Error generating temporary table from query.'
end
else begin
set @tableName='##tmpTableToAnalyse'
end
end
end
IF @intErrorCode=0 BEGIN
SET @tmpQ='USE '+DB_NAME()+';'+CHAR(13)+CHAR(10)+'
select
c.column_name as [column],
cast(sp.value as varchar(1000)) as description,
tc_fk.constraint_type,
kcu_pk.table_name as fk_table,
kcu_pk.column_name as fk_column,
c.ordinal_position as pos,
c.column_default as [default],
c.is_nullable as [null],
c.data_type,
c.character_maximum_length as length,
c.numeric_precision as [precision],
c.numeric_precision_radix as radix,
cast(null as bit) as [is_unique],
cast(null as int) as min_len,
cast(null as int) as max_len,
cast(null as int) as nulls,
cast(null as int) as blanks,
cast(null as int) as numerics,
cast(null as int) as distincts,
cast(null as varchar(500)) as distinct_values,
cast(null as varchar(50)) as remarks
into ##tmpColumns'
if @isQuery=1 begin
SET @tmpQ=@tmpQ+' from tempdb.information_schema.columns c, (select null as value) sp'
end
else begin
SET @tmpQ=@tmpQ+'
from information_schema.columns c
left join sysobjects so on so.name=c.table_name and so.xtype=''U''
left join syscolumns sc on sc.name=c.column_name and sc.id =so.id
left join sys.extended_properties sp on sp.minor_id = sc.colid AND sp.major_id = sc.id and sp.name=''MS_Description''
left join information_schema.key_column_usage kcu_fk on kcu_fk.table_name = c.table_name and c.column_name = kcu_fk.column_name
left join information_schema.table_constraints tc_fk on kcu_fk.table_name = tc_fk.table_name and kcu_fk.constraint_name = tc_fk.constraint_name
left join information_schema.referential_constraints rc on rc.constraint_name = kcu_fk.constraint_name
left join information_schema.table_constraints tc_pk on rc.unique_constraint_name = tc_pk.constraint_name
left join information_schema.key_column_usage kcu_pk on tc_pk.constraint_name = kcu_pk.constraint_name
'
end
SET @tmpQ=@tmpQ+' where c.table_name = '''+@tableName+''''
exec(@tmpQ)
end
IF @intErrorCode=0 AND @deep = 1 BEGIN
DECLARE
@count_rows int,
@count_distinct int,
@count_nulls int,
@count_blanks int,
@count_numerics int,
@min_len int,
@max_len int,
@distinct_values varchar(500)
DECLARE curTmp CURSOR LOCAL FAST_FORWARD FOR
select [column] from ##tmpColumns;
OPEN curTmp
FETCH NEXT FROM curTmp INTO @column_name
WHILE @@FETCH_STATUS = 0 and @intErrorCode=0 BEGIN
set @tmpQ = 'USE '+DB_NAME()+'; SELECT'+
' @count_rows=count(0), '+char(13)+char(10)+
' @count_distinct=count(distinct ['+@column_name+']),'+char(13)+char(10)+
' @count_nulls=sum(case when ['+@column_name+'] is null then 1 else 0 end),'+char(13)+char(10)+
' @count_blanks=sum(case when ltrim(['+@column_name+'])='''' then 1 else 0 end),'+char(13)+char(10)+
' @count_numerics=sum(isnumeric(['+@column_name+'])),'+char(13)+char(10)+
' @min_len=min(len(['+@column_name+'])),'+char(13)+char(10)+
' @max_len=max(len(['+@column_name+']))'+char(13)+char(10)+
' from ['+@tableName+']'
exec sp_executesql @tmpQ,
N'@count_rows int OUTPUT,
@count_distinct int OUTPUT,
@count_nulls int OUTPUT,
@count_blanks int OUTPUT,
@count_numerics int OUTPUT,
@min_len int OUTPUT,
@max_len int OUTPUT',
@count_rows OUTPUT,
@count_distinct OUTPUT,
@count_nulls OUTPUT,
@count_blanks OUTPUT,
@count_numerics OUTPUT,
@min_len OUTPUT,
@max_len OUTPUT
IF (@count_distinct>10) BEGIN
SET @distinct_values='Many ('+cast(@count_distinct as varchar)+')'
END ELSE BEGIN
set @distinct_values=null
set @tmpQ = N'USE '+DB_NAME()+';'+
' select @distinct_values=COALESCE(@distinct_values+'',''+cast(['+@column_name+'] as varchar), cast(['+@column_name+'] as varchar))'+char(13)+char(10)+
' from ('+char(13)+char(10)+
' select distinct ['+@column_name+'] from ['+@tableName+'] where ['+@column_name+'] is not null) a'+char(13)+char(10)
exec sp_executesql @tmpQ,
N'@distinct_values varchar(500) OUTPUT',
@distinct_values OUTPUT
END
UPDATE ##tmpColumns SET
is_unique =case when @count_rows=@count_distinct then 1 else 0 end,
distincts =@count_distinct,
nulls =@count_nulls,
blanks =@count_blanks,
numerics =@count_numerics,
min_len =@min_len,
max_len =@max_len,
distinct_values=@distinct_values,
remarks =
case when @count_rows=@count_nulls then 'all null,' else '' end+
case when @count_rows=@count_distinct then 'unique,' else '' end+
case when @count_distinct=0 then 'empty,' else '' end+
case when @min_len=@max_len then 'same length,' else '' end+
case when @count_rows=@count_numerics then 'all numeric,' else '' end
WHERE [column]=@column_name
FETCH NEXT FROM curTmp INTO @column_name
END
CLOSE curTmp DEALLOCATE curTmp
END
IF @intErrorCode=0 BEGIN
select * from ##tmpColumns order by pos
end
IF @intErrorCode=0 BEGIN --Clean up temporary tables
IF OBJECT_ID('tempdb..##tmpTableToAnalyse') IS NOT NULL BEGIN
DROP TABLE ##tmpTableToAnalyse
END
IF OBJECT_ID('tempdb..##tmpColumns') IS NOT NULL BEGIN
DROP TABLE ##tmpColumns
END
end
IF @intErrorCode<>0 BEGIN
RAISERROR(@errorMSG, 12, 1)
END
RETURN @intErrorCode
Przechowuję tę procedurę w głównej bazie danych, aby móc jej używać w każdej bazie danych, tak jak:
sp_analysetable 'table_name', 1
// deep=1 for doing value analyses
A wynik to:
column description constraint_type fk_table fk_column pos default null data_type length precision radix is_unique min_len max_len nulls blanks numerics distincts distinct_values remarks
id_individual NULL PRIMARY KEY NULL NULL 1 NULL NO int NULL 10 10 1 1 2 0 0 70 70 Many (70) unique,all numeric,
id_brand NULL NULL NULL NULL 2 NULL NO int NULL 10 10 0 1 1 0 0 70 2 2,3 same length,all numeric,
guid NULL NULL NULL NULL 3 (newid()) NO uniqueidentifier NULL NULL NULL 1 36 36 0 0 0 70 Many (70) unique,same length,
customer_id NULL NULL NULL NULL 4 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
email NULL NULL NULL NULL 5 NULL YES varchar 100 NULL NULL 0 4 36 0 0 0 31 Many (31)
mobile NULL NULL NULL NULL 6 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
initials NULL NULL NULL NULL 7 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
title_short NULL NULL NULL NULL 8 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
title_long NULL NULL NULL NULL 9 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
firstname NULL NULL NULL NULL 10 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
lastname NULL NULL NULL NULL 11 NULL YES varchar 50 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
address NULL NULL NULL NULL 12 NULL YES varchar 100 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
pc NULL NULL NULL NULL 13 NULL YES varchar 10 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
kixcode NULL NULL NULL NULL 14 NULL YES varchar 20 NULL NULL 0 NULL NULL 70 0 0 0 NULL all null,empty,
date_created NULL NULL NULL NULL 15 (getdate()) NO datetime NULL NULL NULL 1 19 19 0 0 0 70 Many (70) unique,same length,
created_by NULL NULL NULL NULL 16 (user_name()) NO varchar 50 NULL NULL 0 13 13 0 0 0 1 loyalz-public same length,
id_location_created NULL FOREIGN KEY location id_location 17 NULL YES int NULL 10 10 0 1 1 0 0 70 2 1,2 same length,all numeric,
id_individual_type NULL FOREIGN KEY individual_type id_individual_type 18 NULL YES int NULL 10 10 0 NULL NULL 70 0 0 0 NULL all null,empty,
optin NULL NULL NULL NULL 19 NULL YES int NULL 10 10 0 1 1 39 0 31 2 0,1 same length,