#####################################################################################
##
##  Intro STATS : CREATING AND MANAGING RANDOMIZED DATA
##  Henry Glick (JDRS) (Last updated Sept. 8, 2017)
##  http://www.reuningscherer.net/stat10x/r/
##
#####################################################################################


## Replicate randomzation process on page 168, in which 100 people are    ##
## assigned to take one of three surveys                                  ##

myDataFrame <- data.frame(seq(1,100,1)) #Make a sequence of values from 1 to 100 by 1. Or, more simply, just: mySequence <- 1:100
set.seed(14) #You must specify a seed (start) value if you want the "random" results to be the same on future runs. If you want them to be as random as the computer can make them, just don't set the seed.
myDataFrame$mySample <- sample(myDataFrame[,1], 100, replace=FALSE) #Take a sample of size 100 (the same size as our original sequence), without replacement. This is the same as randomly sorting the order of the values
myDataFrame$Code <- c(rep("A",40), rep("B",30), rep("C", 30)) #Create a new variable with categorical values for each sample record
colnames(myDataFrame) <- c("ID", "Sample", "Survey") #Rename the variables as desired
myDataFrame #Look at the data you just created


#####################################################################################
#####################################################################################


## Generate data (random binomials) as on page and make histograms (Page 218-220) ##

#Perform simulation using samples of size 10
randomData <- rbinom(n=1000, size=10, prob=0.8) #1000 times, take a sample of size 10 from a binomial distribution with a probability of success being 0.8 (following example on Page 217).
randomData <- randomData/10 #Scale the results to a 0-1 probability range
hist(randomData, breaks=seq(0,1, 0.1), col="gray")#Make a histogram of the random data. The 'breaks' argument sets the plot up to show the entire x axis with bins of 0.1 units.
abline(v=mean(randomData), col="red") #Plot the mean as a line
mean(randomData) #See how the mean represents the true mean probability of success (0.8).

#Repeat the above while taking a sample 10x larger
randomData <- rbinom(n=1000, size=100, prob=0.8)
randomData <- randomData/100
hist(randomData, breaks=seq(0,1, 0.01), col="gray")
abline(v=mean(randomData), col="red")
mean(randomData)

#Repeat the above while taking a sample 100 x larger
randomData <- rbinom(n=1000, size=1000, prob=0.8)
randomData <- randomData/1000
hist(randomData, breaks=seq(0,1, 0.01), col="gray")
abline(v=mean(randomData), col="red")
mean(randomData) #Notice that a sample of size 1000 does not really produce more accurate estimate of the mean probability than a sample of size 100

## END SCRIPT ##