##############################################################################
#
#   Global Indices of Spatial Autocorrelation
#   JDRS
#
##############################################################################


# First an exploration of weighting schemes
# examples taken from Local Models for Spatial Analysis by C. D. Lloyd

cat(" Results from global_indices.r --- ", format(Sys.time(),
    "%A, %d %B %Y"), "\n",
 "==================================================================\n" )

 # from p. 62 of 1st edition
 
#create a 3x3 matrix

y.mat <- matrix(nrow=3, ncol=3, byrow=T,
             data=c(47, 48, 44, 42, 40, 38, 43, 37, 35) )
y.mat
 
y <- c(47, 48, 44, 42, 40, 38, 43, 37, 35)
N <- length(y)
 
y.ave <- mean(y)
#Get sum of squares
y.ssq <- sum(y^2) - N*y.ave^2
 
 Y.ij <- matrix(nrow=9, ncol=9, byrow=T, data=rep(0, 81) )
 for(i in 1:9) {
   for(j in 1:9) Y.ij[i,j] <- (y[i] - y.ave) * (y[j] - y.ave)
  }
round(Y.ij ,2)

#Look at WEIGHT MAtrix, W

W.rook <- matrix(nrow=9, ncol=9, byrow=T, data=rep(0, 81) )
   W.rook[1,] <- c(0,1,0, 1,0,0, 0,0,0)
   W.rook[2,] <- c(1,0,1, 0,1,0, 0,0,0)
   W.rook[3,] <- c(0,1,0, 0,0,1, 0,0,0)
   W.rook[4,] <- c(1,0,0, 0,1,0, 1,0,0)
   W.rook[5,] <- c(0,1,0, 1,0,1, 0,1,0)
   W.rook[6,] <- c(0,0,1, 0,1,0, 0,0,1)
   W.rook[7,] <- c(0,0,0, 1,0,0, 0,1,0)
   W.rook[8,] <- c(0,0,0, 0,1,0, 1,0,1)
   W.rook[9,] <- c(0,0,0, 0,0,1, 0,1,0)

W.rook
sum(W.rook)

( I.rook <- sum(N * W.rook * Y.ij) / (sum(W.rook) * y.ssq) )

#-------------------------------------------------------------------------

W.bishop <- matrix(nrow=9, ncol=9, byrow=T, data=rep(0, 81) )
   W.bishop[1,] <- c(0,0,0, 0,1,0, 0,0,0)
   W.bishop[2,] <- c(0,0,0, 1,0,1, 0,0,0)
   W.bishop[3,] <- c(0,0,0, 0,1,0, 0,0,0)
   W.bishop[4,] <- c(0,1,0, 0,0,0, 0,1,0)
   W.bishop[5,] <- c(1,0,1, 0,0,0, 1,0,1)
   W.bishop[6,] <- c(0,1,0, 0,0,0, 0,1,0)
   W.bishop[7,] <- c(0,0,0, 0,1,0, 0,0,0)
   W.bishop[8,] <- c(0,0,0, 1,0,1, 0,0,0)
   W.bishop[9,] <- c(0,0,0, 0,1,0, 0,0,0)

W.bishop
sum(W.bishop)

( I.bishop <- sum(N * W.bishop * Z.ij) / (sum(W.bishop) * z.ssq) )

#-------------------------------------------------------------------------

W.queen <- matrix(nrow=9, ncol=9, byrow=T, data=rep(0, 81) )
   W.queen[1,] <- c(0,1,0, 1,1,0, 0,0,0)
   W.queen[2,] <- c(1,0,1, 1,1,1, 0,0,0)
   W.queen[3,] <- c(0,1,0, 0,1,1, 0,0,0)
   W.queen[4,] <- c(1,1,0, 0,1,0, 1,1,0)
   W.queen[5,] <- c(1,1,1, 1,0,1, 1,1,1)
   W.queen[6,] <- c(0,1,1, 0,1,0, 0,1,1)
   W.queen[7,] <- c(0,0,0, 1,1,0, 0,1,0)
   W.queen[8,] <- c(0,0,0, 1,1,1, 1,0,1)
   W.queen[9,] <- c(0,0,0, 0,1,1, 0,1,0)

W.queen
sum(W.queen)

( I.queen <- sum(N * W.queen * Y.ij) / (sum(W.queen) * y.ssq) )



##############################################################################
#
#  New York Leukemia Data, 1978-1982.   W&G page 98
#
##############################################################################

#Have to get in data from folder on CANVAS under Files--> Data --> NYData.  Download ALL FILES!


#New York Data from Waller and Gotway
#Code modified from the following sources
#    http://www.bias-project.org.uk/ASDARcourse/unit6_slides.pdf
#    http://tw.rpi.edu/media/latest/ex9.pdf
#    http://rstudio-pubs-static.s3.amazonaws.com/8495_3eca534268ea4c89b6b3d57dcf2638c2.html


library(rgdal)
library(maptools)
library(spdep)
library(ggplot2)

#Set path below to wherever you locally stored NYData folder.

setwd("C:\\Users\\scherer\\Documents\\Classes\\Classes 16-17\\Spatial Stats\\Syllabus\\data\\NYData")

# Bring in shapefiles from ArcGIS
NY8 <- readOGR(".", "NY8_utm18")  #readOGR is in rgdal
#below is waste sites
TCE <- readOGR(".", "TCE")
#below is major cities
cities <- readOGR(".", "NY8cities")

#make plot of study area
#note that the coordinates() function gets the centroid of each county
par(mar=c(5,5,5,5), las=0,cex=.7)
plot(NY8, border="grey60", axes=TRUE, asp=1, 
    main="Central New York census tracts", cex=.5)
points(cities, col="red", pch=19)
text(coordinates(cities), labels=as.character(cities$names),
     font=2, pos=4)
points(TCE, col="blue", pch=19)
text(coordinates(TCE), labels=as.character(TCE$name),
     font=2, pos=4)



#INVESTIGATE NEIGHBORS
#Identify which counties share a common border
NY_nb <- poly2nb(NY8, queen = FALSE)   #queen=F means cant just touch at one corner
summary(NY_nb)
# Plot boundaries and connections
plot(NY8, border = "grey60", axes = TRUE)
title("Spatial Connectivity")
plot(NY_nb, coordinates(NY8), pch = 19, cex = 0.6, add = TRUE)

#Focus just on Syracuse
Syracuse <- NY8[NY8$AREANAME == "Syracuse city", ] 
coords <- coordinates(Syracuse)
IDs <- row.names(as(Syracuse, "data.frame"))

#Neighbors, Queen (single point touch allowed )
Synb_QN <- poly2nb(Syracuse)
plot(Syracuse, border = "grey60", axes = TRUE)
title("Spatial Connectivity")
plot(Synb_QN, coords, pch = 19, cex = 0.6, add = TRUE)

#Neighbors, Rook (single point touch NOT allowed) - counties that meet at corner are no longer neighbors
Synb_RK <- poly2nb(Syracuse, queen=FALSE)
plot(Syracuse, border = "grey60", axes = TRUE)
title("Spatial Connectivity")
plot(Synb_RK, coords, pch = 19, cex = 0.6, add = TRUE)

#K Nearest Neighbors, calculated from centroids
Synb_01 <- knn2nb(knearneigh(coords, k = 1), row.names = IDs) 
Synb_02 <- knn2nb(knearneigh(coords, k = 2), row.names = IDs) 
Synb_04 <- knn2nb(knearneigh(coords, k = 4), row.names = IDs)


#1 NN
plot(Syracuse, border = "grey60", axes = TRUE)
plot(Synb_01, coords, pch = 19, cex = 0.6, add = TRUE)

#2 NN
plot(Syracuse, border = "grey60", axes = TRUE)
plot(Synb_02, coords, pch = 19, cex = 0.6, add = TRUE)

#4 NN
plot(Syracuse, border = "grey60", axes = TRUE)
plot(Synb_04, coords, pch = 19, cex = 0.6, add = TRUE)


#Distance based neighbors
#First, figure out average distances of one nearest neighbor
dsts <- unlist(nbdists(Synb_01, coords))
summary(dsts)
#Get Max Dist
max_1nn <- max(dsts)
max_1nn
#Define circles based on percent of max distance
Synb_dist1 <- dnearneigh(coords, d1 = 0, d2 = 0.75 * max_1nn, row.names = IDs) 
Synb_dist2 <- dnearneigh(coords, d1 = 0, d2 = 1 * max_1nn, row.names = IDs) 
Synb_dist3 <- dnearneigh(coords, d1 = 0, d2 = 1.5 * max_1nn, row.names = IDs) 

#Make Distance based neightbor plots
plot(Syracuse, border = "grey60", axes = TRUE)
plot(Synb_dist1, coords, pch = 19, cex = 0.6, add = TRUE)

plot(Syracuse, border = "grey60", axes = TRUE)
plot(Synb_dist2, coords, pch = 19, cex = 0.6, add = TRUE)

plot(Syracuse, border = "grey60", axes = TRUE)
plot(Synb_dist3, coords, pch = 19, cex = 0.6, add = TRUE)

#make histogram of distribution of number of neighbors based on distance
Neighbors=matrix(0,63*3,1)
for (i in 1:63){
    Neighbors[i]=length(Synb_dist1[[i]])
    Neighbors[i+63]=length(Synb_dist2[[i]])  
    Neighbors[i+(63*2)]=length(Synb_dist3[[i]])  
}
Neighbors=data.frame(data.frame(Neighbors),as.character(c(rep("Small",63),rep("Medium",63),rep("Large",63))))
colnames(Neighbors)=c("Neighbors","Distance")
boxplot(Neighbors~Distance,data=Neighbors, col='red',lwd=2, horizontal=TRUE, main="Number of Neighbors")

#Distance based neighbors for entire NY dataset

#First, figure out average distances of one nearest neighbor
IDsNY <- row.names(as(NY8, "data.frame"))
coordsNY=coordinates(NY8)
NY8NN_01 <- knn2nb(knearneigh(coordsNY, k = 1), row.names = IDsNY) 
dsts <- unlist(nbdists(NY8NN_01, coordsNY))
summary(dsts)
#Get Max Dist
max_1nn <- max(dsts)
max_1nn
NY_dist1 <- dnearneigh(coordsNY, d1 = 0, d2 = 1.5* max_1nn, row.names = IDsNY) 
plot(NY8, border = "grey60", axes = TRUE)
plot(NY_dist1, coordsNY, pch = 19, cex = 0.6, add = TRUE)





####################################################
#   Weight Calculations
####################################################


#WEIGHTS
#First for entire NY dataset
help(nb2listw)
# Construct row-standardized spatial weights
NY_W <- nb2listw(NY_nb, style = "W", zero.policy = TRUE)
# Construct unstandardized spatial weights
NY_B <- nb2listw(NY_nb, style = "B", zero.policy = TRUE)


#Now Just Syracuse
# Construct row-standardized spatial weights
SY_W <- nb2listw(Synb_QN, style = "W", zero.policy = TRUE)
# Construct unstandardized spatial weights
SY_B <- nb2listw(Synb_QN, style = "B", zero.policy = TRUE)


#NOW - Leukemia
#Make plot of Number of Leukemia Cases
spplot(NY8, "Cases")
#same, just for Syracuse
spplot(Syracuse, "Cases")


########### Calculate Moran's I for all NY 
help(moran.test)
#Moran Test, standardized
moran.test(NY8$Cases, NY_W)  #row-standardized
#Moran Test, unstandardized
moran.test(NY8$Cases, NY_B)  # unstandardized

#Calculate possible values on I in this case
1/range(eigenw(NY_W))
1/range(eigenw(NY_B))

#Now Just Syracuse
#Moran Test, standardized
moran.test(Syracuse$Cases, SY_W)  #row-standardized
#Moran Test, unstandardized
moran.test(Syracuse$Cases, SY_B)  # unstandardized

#Monte Carlo Test (no distributional assumptions)
moran.mc(NY8$Cases, NY_W, nsim=1000)


#Make Moran Plots
#Input is Y's (leukemia counts), and neighbor information, inthis case, standardized values
#Note that dotted lines are at means

moran.plot(NY8$Cases, NY_W, pch=19, col='blue')
moran.plot(Syracuse$Cases, SY_W, pch=19, col='blue')


########### Calculate Geary's C 
help("geary.test")
#Moran Test, standardized
geary.test(NY8$Cases, NY_W)  #row-standardized
#Moran Test, unstandardized
geary.test(NY8$Cases, NY_B)  # unstandardized

#Monte Carlo Test (no distributional assumptions)
geary.mc(NY8$Cases, NY_W, nsim=1000)

#Now Just Syracuse
#Moran Test, standardized
geary.test(Syracuse$Cases, SY_W)  #row-standardized
#Moran Test, unstandardized
geary.test(Syracuse$Cases, SY_B)  # unstandardized


########### Calculate Global Getis Ord G 
help("globalG.test")
#G test, standardized - NOT RECOMMENDED
globalG.test(NY8$Cases, NY_W)  #row-standardized
#G Test, unstandardized
globalG.test(NY8$Cases, NY_B)  # unstandardized


#Now Just Syracuse
#G Test, unstandardized
globalG.test(Syracuse$Cases, SY_B)  # unstandardized




####################################################
#   Moran's I fitted to model residuals
####################################################


require(maptools)
columbus <- readShapePoly(system.file("etc/shapes/columbus.shp",package="spdep")[1])
plot(columbus)
spplot(columbus, "CRIME")
col.gal.nb <- read.gal(system.file("etc/weights/columbus.gal", package="spdep")[1])
plot(columbus)
plot(col.gal.nb)

#Fit constant model (basically, take out overall mean)
oldcrime01.lm <- lm(CRIME ~ 1, data = columbus) 

#Now, fit model where use Home Value and household income
oldcrime02.lm <- lm(CRIME ~ HOVAL + INC, data = COL.OLD) 

#Get Moran Test, no model at all
moran.test(columbus$CRIME,nb2listw(COL.nb, style="W"))
#Get Moran Test for flat model - basically the same
lm.morantest(oldcrime01.lm, nb2listw(COL.nb, style="W"))

#Get Moran Test for model after accounting for Home Value and Income
lm.morantest(oldcrime02.lm, nb2listw(COL.nb, style="W"))


lm.LMtests(oldcrime02.lm, nb2listw(COL.nb, style="W")) 
lm.morantest(oldcrime02.lm, nb2listw(COL.nb, style="S")) 
lm.morantest(oldcime02, nb2listw(COL.nb, style="W")) 


oldcrime.wlm <- lm(CRIME ~ HOVAL + INC, data = COL.OLD, weights = I(1/AREA_PL)) 
lm.morantest(oldcrime.wlm, nb2listw(COL.nb, style="W"), resfun=weighted.residuals) 
lm.morantest(oldcrime.wlm, nb2listw(COL.nb, style="W"), resfun=rstudent)


###################################################################
#
#  Distance Based Moran's I, Etc.
#
###################################################################


#First, do for Moran's I
NYcorrel=sp.correlogram(NY_dist1,var=NY8$Cases, order=5,method='I', zero.policy = TRUE)
print(NYcorrel)
#use Bonferroni correction for multiple comparisons
print(NYcorrel,"bonferroni" )
#plot correlegram with confidence intervals
plot(NYcorrel)

#Geary's c
NYcorrel=sp.correlogram(NY_dist1,var=NY8$Cases, order=5,method='C', zero.policy = TRUE)
print(NYcorrel,"bonferroni" )
#plot correlegram with confidence intervals
plot(NYcorrel)

#Straight up Correlation
NYcorrel=sp.correlogram(NY_dist1,var=NY8$Cases, order=5,method='corr', zero.policy = TRUE)
print(NYcorrel,"bonferroni" )
#plot correlegram with confidence intervals
plot(NYcorrel)


####################################################
#   Local Moran's I
####################################################

#Make plot of Number of Leukemia Cases
spplot(NY8, "Cases")

#Leukemia Data entire NY dataset
#Unstandardized
LocalMNYC=localmoran(NY8$Cases, NY_B) 
NY8_2=NY8
NY8_2$Cases=LocalMNYC[,1]
spplot(NY8_2, "Cases")
summary(LocalMNYC)

#Leukemia Data entire NY dataset
#Standardized Row Weights
LocalMNYC=localmoran(NY8$Cases, NY_B) 
NY8_2=NY8
NY8_2$Cases=LocalMNYC[,1]
spplot(NY8_2, "Cases")
summary(LocalMNYC)

#Try doing for 8 nearest neighbors
NYnb_08 <- knn2nb(knearneigh(coordsNY, k = 8), row.names = IDsNY)
NY_NWeight <- nb2listw(NYnb_08, style = "B", zero.policy = TRUE)
LocalMNYC=localmoran(NY8$Cases, NY_NWeight) 
NY8_2=NY8
NY8_2$Cases=LocalMNYC[,1]
spplot(NY8_2, "Cases")
summary(LocalMNYC)

#same plot with truncation
LocalMNYC[,1][LocalMNYC[,1]>3]=3
LocalMNYC[,1][LocalMNYC[,1]<(-1)]=-1
NY8_2=NY8
NY8_2$Cases=LocalMNYC[,1]
spplot(NY8_2, "Cases")