Monday, 17 November 2008

First Look... SQL 2008 : CDC - Change Data Capture

Playing with SQL 2008's CDC - Change Data Capture with the AdventureWorks 2008 database.

CDC has excellent potential in load systems e.g. for data warehousing etc.
It can be used to capture changes on tables or just columns.

exec sys.sp_cdc_enable_table
[ @source_schema = ] 'source_schema',
[ @source_name = ] 'source_name' ,
[ @role_name = ] 'role_name'
[,[ @capture_instance = ] 'capture_instance' ]
[,[ @supports_net_changes = ] supports_net_changes ]
[,[ @index_name = ] 'index_name' ]
[,[ @captured_column_list = ] 'captured_column_list' ]
[,[ @filegroup_name = ] 'filegroup_name' ]
[,[ @partition_switch = ] 'partition_switch' ]

USE [AdventureWorks2008]
GO

-- create a test table
CREATE TABLE [HumanResources].[Employee_Sync](
[BusinessEntityID] [int] NOT NULL,
[NationalIDNumber] [nvarchar](15) NOT NULL,
[LoginID] [nvarchar](256) NOT NULL,
[OrganizationNode] [hierarchyid] NULL,
[OrganizationLevel]  AS ([OrganizationNode].[GetLevel]()),
[JobTitle] [nvarchar](50) NOT NULL,
[BirthDate] [date] NOT NULL,
[MaritalStatus] [nchar](1) NOT NULL,
[Gender] [nchar](1) NOT NULL,
[HireDate] [date] NOT NULL,
[SalariedFlag] [dbo].[Flag] NOT NULL,
[VacationHours] [smallint] NOT NULL,
[SickLeaveHours] [smallint] NOT NULL,
[CurrentFlag] [dbo].[Flag] NOT NULL,
[ModifiedDate] [datetime] NOT NULL)

GO

-- Enable CDC on the current database
exec sys.sp_cdc_enable_db

-- Check CDC has been enabled...
select name, is_cdc_enabled from sys.databases

-- Enable CDC for my test table...
exec sys.sp_cdc_enable_table
@source_schema = 'HumanResources',
@source_name = 'Employee_Sync' ,
@role_name = 'CDCRole',
@supports_net_changes = 0
-- note @supports_net_changes can only be set if there is a primary key set on the table

-- Check CDC has been enabled on the table...
select name, type, type_desc, is_tracked_by_cdc from sys.tables
where is_tracked_by_cdc = 1

-- Disable CDC for my test table...
exec sys.sp_cdc_disable_table
@source_schema = 'HumanResources',
@source_name = 'Employee_Sync',
@capture_instance = 'all'

-- Disable CDC on the current database
exec sys.sp_cdc_disable_db
-- Check CDC has been disabled...
select name, is_cdc_enabled from sys.databases

INSERT INTO [HumanResources].[Employee_Sync]
([BusinessEntityID]
,[NationalIDNumber]
,[LoginID]
,[OrganizationNode]
,[JobTitle]
,[BirthDate]
,[MaritalStatus]
,[Gender]
,[HireDate]
,[SalariedFlag]
,[VacationHours]
,[SickLeaveHours]
,[CurrentFlag]
,[ModifiedDate])

SELECT [BusinessEntityID]
,[NationalIDNumber]
,[LoginID]
,[OrganizationNode]
,[JobTitle]
,[BirthDate]
,[MaritalStatus]
,[Gender]
,[HireDate]
,[SalariedFlag]
,[VacationHours]
,[SickLeaveHours]
,[CurrentFlag]
,[ModifiedDate]
FROM [AdventureWorks2008].[HumanResources].[Employee]

-- delete 50 rows
SET ROWCOUNT 50
DELETE FROM [HumanResources].[Employee_Sync]
SET ROWCOUNT 0

-- CDC utilises the sql transaction log. The 'log sequence number' or LSN identifies transactions in the log.
-- You query the changes in CDC via LSNs.
declare @startLSN binary(10), @endLSN binary(10)
-- get the first LSN for table changes
select @startLSN = sys.fn_cdc_get_min_lsn('HumanResources_Employee_Sync')
-- get the last LSN for table changes
select @endLSN = sys.fn_cdc_get_max_lsn()

-- get net changes; group changes in the range by the pk
-- get individual changes in the range
select * from cdc.fn_cdc_get_all_changes_HumanResources_Employee_Sync(@startLSN, @endLSN, 'all');

-- if had set '@supports_net_changes' to 1, then this function works...
--select * from cdc.fn_cdc_get_net_changes_HumanResources_EmployeeSync(@startLSN, @endLSN, 'all');

Good summary of CDC objects >
http://weblogs.sqlteam.com/derekc/archive/2008/01/28/60469.aspx

No comments: