I wrote this to determine the ASCII values being stored inside my data. It builds upon
Determine ASCII Values of a string and produces a breakdown of all the ASCII values in a column.
To cut a long story short I had an issue where NUL characters (ASCII value 0) were stored and were producing unexpected results.
Code for the results table and procedure is here -
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE object_id = OBJECT_ID('dbo.charactercount'))
CREATE TABLE dbo.charactercount
(
[id] int identity(1,1)
,[table_schema] sysname
,[table_name] sysname
,[column_name] sysname
,[character] int
,[column_count] int
)
GO
IF EXISTS (SELECT 1 FROM sys.procedures WHERE name = 'charactercount_insert')
DROP PROCEDURE dbo.charactercount_insert
GO
CREATE PROCEDURE dbo.charactercount_insert
(@table_schema sysname
,@table_name sysname
,@column_name sysname
,@sample_rows int = NULL)
AS
BEGIN
SET NOCOUNT ON
DECLARE @ColumnData table
(
UniqueRowID int IDENTITY (1, 1) Primary key NOT NULL ,
testeddata nvarchar(MAX)
)
DECLARE @sql NVARCHAR(MAX)
DECLARE @CurrentRow int
DECLARE @TotalRows int
DECLARE @Index INT
DECLARE @IntASCII INT
DECLARE @StatusMessage varchar (100)
DECLARE @currentdata NVARCHAR(MAX)
DECLARE @currentrowlength INT
IF @sample_rows IS NULL
SET @sql = 'SELECT [' + @column_name + '] FROM [' + @table_schema + '].[' + @table_name + ']'
ELSE
SET @sql = 'SELECT TOP (' + CAST(@sample_rows AS VARCHAR(10)) + ') [' + @column_name + '] FROM [' + @table_schema + '].[' + @table_name + ']'
SET @StatusMessage = 'Fetching: [' + @column_name + '] FROM [' + @table_schema + '].[' + @table_name + ']'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT
INSERT INTO @ColumnData (testeddata)
EXEC(@sql)
SELECT @TotalRows = MAX(UniqueRowID) FROM @ColumnData
SET @StatusMessage = CAST(@TotalRows AS VARCHAR(20)) + ' rows fetched'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT
SELECT @CurrentRow = 1
SET @StatusMessage = 'Counting Characters'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT
IF EXISTS (SELECT * FROM tempdb..sysobjects WHERE name LIKE '#charactercount')
DROP TABLE #charactercount
CREATE TABLE #charactercount
(
[id] int identity(1,1)
,[table_schema] sysname
,[table_name] sysname
,[column_name] sysname
,[character] int
,[column_count] int
)
WHILE @CurrentRow <= @TotalRows
BEGIN
SET @Index = 1
SELECT @currentdata = testeddata
FROM @ColumnData
WHERE [UniqueRowID] = @CurrentRow
SELECT @currentrowlength = LEN(@currentdata)
IF EXISTS (SELECT * FROM tempdb..sysobjects WHERE name LIKE '#charactercount')
TRUNCATE TABLE #charactercount
WHILE @Index < 1 +(@currentrowlength)
BEGIN
SELECT @IntASCII = ASCII(SUBSTRING(@currentdata, @Index, 1))
FROM @ColumnData
WHERE [UniqueRowID] = @CurrentRow
INSERT INTO #charactercount
(table_schema ,table_name ,column_name ,character,column_count )
SELECT @table_schema AS Table_Schema
,@table_name AS Table_Name
,@column_name AS Column_Name
,@IntASCII AS [Character]
,1 AS Number
SET @Index = @Index + 1
END
-- At end of text, update the character count
MERGE [dbo].[charactercount] AS [Target]
USING (SELECT table_schema AS Table_Schema
,table_name AS Table_Name
,column_name AS Column_Name
,character AS character
,SUM(column_count) AS Number
FROM #charactercount
GROUP BY table_schema, table_name, column_name,character) AS [Source]
ON Source.Table_Schema = Target.Table_Schema
AND Source.Table_Name = Target.Table_Name
AND Source.Column_Name = Target.Column_Name
AND Source.character = Target.Character
WHEN MATCHED
THEN UPDATE SET [Target].[column_count] = [Target].[column_count] + Source.Number
WHEN NOT MATCHED
THEN INSERT ([table_schema]
,[table_name]
,[column_name]
,[character]
,[column_count])
VALUES
(Source.table_schema
,Source.table_name
,Source.column_name
,Source.Character
,Source.Number);
SELECT @CurrentRow = @CurrentRow + 1
END
END
SET @StatusMessage = 'Done - Check dbo.charactercount table'
RAISERROR (@StatusMessage , 10, 1) WITH NOWAIT
GO
Code to use this procedure is as follows ...
--Remember to clear down the results table before you run
TRUNCATE TABLE dbo.charactercount
GO
-- Provide the name of a column, like this
-- The final parameter is the number of rows to test.
EXEC charactercount_insert 'schemaname', 'tablename', 'columnname', 200
GO
-- View the data like this
SELECT * FROM charactercount ORDER BY [character]
GO
-- Dynamically generate statements to interogate all columns, as follows
SELECT command = 'EXEC charactercount_insert ''' + TABLE_SCHEMA + ''',''' + TABLE_NAME+ ''',''' + COLUMN_NAME + ''',200'
FROM INFORMATION_SCHEMA.COLUMNS
WHERE DATA_TYPE LIKE '%char%'