##################################################################################### ## ## Intro STATS : CREATING AND MANAGING RANDOMIZED DATA ## Henry Glick (JDRS) (Last updated Sept. 8, 2017) ## http://www.reuningscherer.net/stat10x/r/ ## ##################################################################################### ## Replicate randomzation process on page 168, in which 100 people are ## ## assigned to take one of three surveys ## myDataFrame <- data.frame(seq(1,100,1)) #Make a sequence of values from 1 to 100 by 1. Or, more simply, just: mySequence <- 1:100 set.seed(14) #You must specify a seed (start) value if you want the "random" results to be the same on future runs. If you want them to be as random as the computer can make them, just don't set the seed. myDataFrame$mySample <- sample(myDataFrame[,1], 100, replace=FALSE) #Take a sample of size 100 (the same size as our original sequence), without replacement. This is the same as randomly sorting the order of the values myDataFrame$Code <- c(rep("A",40), rep("B",30), rep("C", 30)) #Create a new variable with categorical values for each sample record colnames(myDataFrame) <- c("ID", "Sample", "Survey") #Rename the variables as desired myDataFrame #Look at the data you just created ##################################################################################### ##################################################################################### ## Generate data (random binomials) as on page and make histograms (Page 218-220) ## #Perform simulation using samples of size 10 randomData <- rbinom(n=1000, size=10, prob=0.8) #1000 times, take a sample of size 10 from a binomial distribution with a probability of success being 0.8 (following example on Page 217). randomData <- randomData/10 #Scale the results to a 0-1 probability range hist(randomData, breaks=seq(0,1, 0.1), col="gray")#Make a histogram of the random data. The 'breaks' argument sets the plot up to show the entire x axis with bins of 0.1 units. abline(v=mean(randomData), col="red") #Plot the mean as a line mean(randomData) #See how the mean represents the true mean probability of success (0.8). #Repeat the above while taking a sample 10x larger randomData <- rbinom(n=1000, size=100, prob=0.8) randomData <- randomData/100 hist(randomData, breaks=seq(0,1, 0.01), col="gray") abline(v=mean(randomData), col="red") mean(randomData) #Repeat the above while taking a sample 100 x larger randomData <- rbinom(n=1000, size=1000, prob=0.8) randomData <- randomData/1000 hist(randomData, breaks=seq(0,1, 0.01), col="gray") abline(v=mean(randomData), col="red") mean(randomData) #Notice that a sample of size 1000 does not really produce more accurate estimate of the mean probability than a sample of size 100 ## END SCRIPT ##