1 |
originally posted 2013-9-7 |
There are many more female names in the census data from which this dataset was generated, so statistically the resultset is likely to be lop-sided. If this matters, then extra steps should be taken to balance the M to F ratio.
“Random” SQL database backup (random.zip)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
/* Random generation of person records inserted into a [GeneratedPeople] table when [dbo].[spGeneratePeople] is executed. The source [Address], [Domain], [First], and [Last] tables must first be populated with seed values. */ /* Seed with address data [Id] field must be a unique sequential integer */ CREATE TABLE [dbo].[Address]( [Id] [int] NOT NULL, [City] [nvarchar](64) NOT NULL, [State] [nvarchar](2) NOT NULL, [Zip] [nvarchar](5) NOT NULL, [County] [nvarchar](255) NOT NULL, [AreaCode] [nvarchar](16) NOT NULL, CONSTRAINT [PK_Address] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO /* Seed with domain names, e.g. google.com [Id] field must be a unique sequential integer */ CREATE TABLE [dbo].[Domain]( [Id] [int] NOT NULL, [Name] [nvarchar](100) NOT NULL, CONSTRAINT [PK_Domain] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO /* Seed with person first names and gender [Id] field must be a unique sequential integer (a good source is the U.S. census) */ CREATE TABLE [dbo].[First]( [Id] [int] NOT NULL, [Name] [nvarchar](255) NOT NULL, [Gender] [nvarchar](1) NOT NULL, CONSTRAINT [PK_First] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO /* Seed with person surnames [Id] field must be a unique sequential integer (a good source is the U.S. census) */ CREATE TABLE [dbo].[Last]( [Id] [int] NOT NULL, [Name] [nvarchar](255) NOT NULL, CONSTRAINT [PK_Last] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] /* This table is created and populated with person records when the [dbo].[spGeneratePeople] procedure is executed */ CREATE TABLE [dbo].[GeneratedPeople]( [Guid] [uniqueidentifier] NULL, [Id] [int] IDENTITY(1,1) NOT NULL, [FirstName] [nvarchar](100) NULL, [LastName] [nvarchar](100) NULL, [Gender] [nvarchar](2) NULL, [DateOfBirth] [date] NULL, [Email] [nvarchar](100) NULL, [Phone] [nvarchar](12) NULL, [City] [nvarchar](64) NULL, [State] [nvarchar](2) NULL, [Zip] [nvarchar](5) NULL, [County] [nvarchar](255) NULL ) ON [PRIMARY] GO ALTER TABLE [dbo].[GeneratedPeople] ADD DEFAULT (newsequentialid()) FOR [Guid] GO /* Called by [dbo].[spGeneratePeople] to generate a birth date */ create procedure [dbo].[spGenerateDateOfBirth](@maxPastYears int = 100, @date date out) as begin set @date = null; if(@maxPastYears is null or @maxPastYears < 0) set @maxPastYears = 100 declare @year int = 0; declare @value nvarchar(10) = null; while(isdate(@value)=0) begin while( @year < year(getdate())-@maxPastYears or @year > year(getdate())) select @year = case when ceiling(rand(convert(varbinary,newid())) * 2) = 1 then '19' else '20' end + right('0'+rtrim(ceiling(rand(CONVERT([varbinary],newid(),0))*(99))),(2)) declare @month int = right('0'+rtrim(ceiling(rand(CONVERT([varbinary],newid(),0))*(12))),(2)) declare @day int = right('0'+rtrim(ceiling(rand(CONVERT([varbinary],newid(),0))*(31))),(2)) set @value = convert(nvarchar(4),@year) + '-' + convert(nvarchar(2),@month) + '-' + convert(nvarchar(2),@day) end set @date = convert(date,@value); return; end GO /* Called by [dbo].[spGeneratePeople] to create a subset of the records in the Address table */ create function [dbo].[tfGetAddresses](@state nvarchar(2) = null, @county nvarchar(255) = null, @city nvarchar(255) = null) returns @addrs table ( [Id] int ,[City] nvarchar(64) ,[State] nvarchar(2) ,[Zip] nvarchar(5) ,[County] nvarchar(255) ,[AreaCode] nvarchar(3) ) as begin insert @addrs select [Id] = ROW_NUMBER() OVER( ORDER BY [Zip]),[City],[State],[Zip],[County],[AreaCode] from [Address] where ((@state is not null and [State]= @state) or (@state is null)) and ((@county is not null and [County]= @county) or (@county is null)) and ((@city is not null and [City]= @city) or (@city is null)) return; end GO /* Generates people records into a [GeneratedPeople] table (which is dropped and re-created each time) @number - the number of records to generate @state - if not null, filters addresses to the two char state code @county - if not null, filters addresses to the county @city - if not null, filters addresses to the city */ create procedure [dbo].[spGeneratePeople](@number int, @state nvarchar(2), @county nvarchar(255),@city nvarchar(255)) as begin declare @count int = 0; if(@number is null or @number<=0) set @number = 1 if exists (select name from sys.tables where name = 'GeneratedPeople') drop table [GeneratedPeople] create table [GeneratedPeople] ( [Guid] uniqueidentifier default newsequentialid() ,[Id] int identity(1,1) ,[FirstName] nvarchar(100) ,[LastName] nvarchar(100) ,[Gender] nvarchar(2) ,[DateOfBirth] date ,[Email] nvarchar(100) ,[Phone] nvarchar(12) ,[City] nvarchar(64) ,[State] nvarchar(2) ,[Zip] nvarchar(5) ,[County] nvarchar(255) ) declare @addrs table ( [Id] int ,[City] nvarchar(64) ,[State] nvarchar(2) ,[Zip] nvarchar(5) ,[County] nvarchar(255) ,[AreaCode] nvarchar(3) ) insert @addrs select * from tfGetAddresses(@state,@county,@city) declare @maxAddrs int = (select max([Id]) from @addrs) declare @maxFirsts int = (select max([Id]) from [First]) declare @maxLasts int = (select max([Id]) from [Last]) declare @maxDomains int = (select max([Id]) from [Domain]) while(@count<@number) begin declare @date date = null exec dbo.[spGenerateDateOfBirth] default,@date out declare @rnd float =rand(convert(varbinary,newid())); insert [GeneratedPeople] ([FirstName],[LastName],[Gender],[DateOfBirth],[Email],[Phone],[City],[State],[Zip],[County]) select [FirstName] = [First].[Name] ,[LastName] = [Last].[Name] ,[Gender] = [First].[Gender] ,[DateOfBirth] = @date ,[Email] = lower([First].[Name] + '_' + [Last].[Name] + '@' + [Domain].[Name]) ,[Phone] = ((((a.[AreaCode] +'-')+right('0'+rtrim(ceiling(rand(CONVERT([varbinary],newid(),0))*(999))),(3)))+'-')+right('0'+rtrim(ceiling(rand(CONVERT([varbinary],newid(),0))*(9999))),(4))) ,a.[City] ,a.[State] ,a.[Zip] ,a.[County] from [First],[Last],[Domain],@addrs a where [First].[Id] = (select ceiling(@rnd * @maxFirsts)) and [Last].[Id] = (select ceiling(@rnd * @maxLasts)) and [Domain].[Id] = (select ceiling(@rnd * @maxDomains)) and a.[Id] = (select ceiling(@rnd * @maxAddrs)) set @count=@count+1 end end GO |