为每个数据库的每个表中的每一列选择一个非空值



可能有更好的方法可以做到这一点。但我正在尝试查找可能包含个人信息的列。

问题是表格的名称很差(非英语,缩写(。所以我正在运行这个动态脚本,它将返回所有数据库中的所有表及其列。

USE master;
DECLARE @SQL varchar(max)
SET @SQL=';WITH cteCols (dbName, colName) AS (SELECT NULL, NULL '
SELECT @SQL=@SQL+'UNION
SELECT
'''+d.name COLLATE Czech_CI_AS +'.''+sh.name COLLATE Czech_CI_AS +''.''+o.name COLLATE Czech_CI_AS  ''dbSchTab''
, c.name COLLATE Czech_CI_AS ''colName''
FROM ['+d.name+'].sys.columns   c 
    JOIN ['+d.name+'].sys.objects o ON c.object_id=o.object_id 
    JOIN ['+d.name+'].sys.schemas sh ON o.schema_id=sh.schema_id
WHERE o.[type] = ''U'' COLLATE Czech_CI_AS'
FROM sys.databases d
SET @SQL = @SQL + ')
SELECT
    *
FROM cteCols cs
ORDER BY 1;'
EXEC (@SQL);

结果:

+---------------------+------------+
| DatabaseSchemaTable | ColumnName |
+---------------------+------------+
| dev1.dbo.Users      | Col1       |
| dev1.dbo.Users      | Col2       |
| dev1.dbo.Users      | Col3       |
| dev1.dbo.Users      | Col4       |
+---------------------+------------+

但是由于列命名不佳,我无法判断这些列中的内容。我想从每列中选择一个 TOP (1( 非 NULL 值,但我正在努力。

所需结果:

+---------------------+------------+--------------+
| DatabaseSchemaTable | ColumnName | ColumnValue  |
+---------------------+------------+--------------+
| dev1.dbo.Users      | Col1       | 20           |
| dev1.dbo.Users      | Col2       | 2018-02-06   |
| dev1.dbo.Users      | Col3       | 202-555-0133 |
| dev1.dbo.Users      | Col4       | John Doe     |
+---------------------+------------+--------------+

我的想法:

  • 我需要转置每个表(可能不是 枢轴的工作(
  • 我可以动态连接表格,只显示当前列。但是我不能在相关子查询中使用动态列。

有什么想法吗?

我会创建一个临时表,例如 ##cols,然后使用此临时表遍历表,对表本身运行更新查询。请注意,我们的字段名称中有很多空格和其他可能麻烦的字符。因此,我在字段/表/模式/数据库名称周围使用一些 QUOTENAME 更新了您的 cte。

USE master;
DECLARE @SQL varchar(max);
SET @SQL=';WITH cteCols (dbName, colName, top1Value) AS (SELECT NULL, NULL, CAST(NULL AS VARCHAR(MAX)) '
SELECT @SQL=@SQL+' UNION
SELECT
'''+QUOTENAME(d.[name]) COLLATE Czech_CI_AS +'.''+QUOTENAME(sh.name) COLLATE Czech_CI_AS +''.''+QUOTENAME(o.name) COLLATE Czech_CI_AS  ''dbSchTab''
, QUOTENAME(c.name) COLLATE Czech_CI_AS ''colName'', CAST(NULL AS VARCHAR(MAX)) AS ''top1Value''
FROM ['+d.[name]+'].sys.columns   c 
    JOIN ['+d.[name]+'].sys.objects o ON c.object_id=o.object_id 
    JOIN ['+d.[name]+'].sys.schemas sh ON o.schema_id=sh.schema_id
WHERE o.[type] = ''U'' COLLATE Czech_CI_AS'
FROM sys.databases d;
SET @SQL = @SQL + ')
SELECT
    *
INTO ##Cols
FROM cteCols cs
ORDER BY 1;'
EXEC (@SQL);
DECLARE @colName VARCHAR(255), @dbName VARCHAR(255), @SQL2 NVARCHAR(MAX);
DECLARE C CURSOR FOR SELECT [colName],[dbName] FROM ##Cols;
    OPEN C;
    FETCH NEXT FROM C INTO @colName, @dbName;
    WHILE @@FETCH_STATUS=0
    BEGIN
        SET @SQL2='UPDATE ##Cols SET [top1Value] = (SELECT TOP 1 x.'+@colName+' FROM '+@dbName+' x WHERE x.'+@colName+' IS NOT NULL) WHERE [colName]='''+@colName+''' AND [dbName]='''+@dbName+''''
        EXEC sp_executesql @SQL2
        FETCH NEXT FROM C INTO @colName, @dbName
    END;
    CLOSE C;
DEALLOCATE C;
SELECT * FROM ##Cols;

它并不漂亮,但它适合您的需求。

你可以试试这个:

--In this table we write our findings
CREATE TABLE ##TargetTable(ID INT IDENTITY, TableName VARCHAR(500), FirstRowXML XML);
--the undocumented sp "MSforeachtable" allows to create a statement where the 
--question mark is a place holder for the actual table
--(SELECT TOP 1 * FROM ? FOR XML PATH('row')) will create one single XML with all first row's values
EXEC sp_MSforeachtable 'INSERT INTO ##TargetTable(TableName,FirstRowXML) SELECT ''?'', (SELECT TOP 1 * FROM ? FOR XML PATH(''row''))';
--Now it is easy to get what you want
SELECT ID 
      ,TableName
      ,col.value('local-name(.)','nvarchar(max)') AS colname
      ,col.value('text()[1]','nvarchar(max)') AS colval
FROM ##TargetTable
CROSS APPLY FirstRowXML.nodes('/row/*') A(col);
GO
DROP TABLE ##TargetTable

只需使用 SELECT TOP X 即可获得多行...

更新

下面将创建一个表,其中包含所有数据库的所有表的所有列,并每行获取一个值。

CREATE TABLE ##TargetTable(ID INT IDENTITY
                           ,TABLE_CATALOG VARCHAR(300),TABLE_SCHEMA VARCHAR(300),TABLE_NAME VARCHAR(300),COLUMN_NAME VARCHAR(300)
                           ,DATA_TYPE VARCHAR(300),CHARACTER_MAXIMUM_LENGTH INT, IS_NULLABLE VARCHAR(10),Command VARCHAR(MAX),OneValue NVARCHAR(MAX));
EXEC sp_MSforeachdb
'USE ?; 
INSERT INTO ##TargetTable(TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,IS_NULLABLE,Command)
SELECT ''?''
      ,c.TABLE_SCHEMA
      ,c.TABLE_NAME
      ,c.COLUMN_NAME
      ,c.DATA_TYPE
      ,c.CHARACTER_MAXIMUM_LENGTH
      ,c.IS_NULLABLE
      , CASE WHEN c.IS_NULLABLE=''YES''
             THEN ''SELECT CAST(MAX('' + QUOTENAME(c.COLUMN_NAME) + '') AS NVARCHAR(MAX))'' 
             ELSE ''SELECT TOP 1 CAST('' + QUOTENAME(c.COLUMN_NAME) + '' AS NVARCHAR(MAX))''
        END
      + '' FROM '' + QUOTENAME(''?'') + ''.'' + QUOTENAME(c.TABLE_SCHEMA) + ''.'' + QUOTENAME(c.TABLE_NAME) 
FROM INFORMATION_SCHEMA.COLUMNS c 
INNER JOIN INFORMATION_SCHEMA.TABLES t ON c.TABLE_CATALOG=t.TABLE_CATALOG AND c.TABLE_SCHEMA=t.TABLE_SCHEMA AND c.TABLE_NAME=T.TABLE_NAME AND t.TABLE_TYPE=''BASE TABLE''
WHERE c.DATA_TYPE NOT IN(''BINARY'',''VARBINARY'',''IMAGE'',''NTEXT'')';
DECLARE @ID INT,@Command VARCHAR(MAX);
DECLARE cur CURSOR FOR SELECT ID,Command FROM ##TargetTable
OPEN cur;
FETCH NEXT FROM cur INTO @ID,@Command;
WHILE @@FETCH_STATUS=0
BEGIN
    SET @Command = 'UPDATE ##TargetTable SET OneValue=(' + @Command + ') WHERE ID=' + CAST(@ID AS VARCHAR(100))
    PRINT @command;
    EXEC(@Command);
    FETCH NEXT FROM cur INTO @ID,@Command;
END
CLOSE cur;
DEALLOCATE cur;
GO
SELECT * FROM ##TargetTable;
GO
DROP TABLE ##TargetTable;

最新更新