"The
purpose of this document is to consolidate and improve the various R scripts
used to perform the cited analyses."
Table of Contents
# 1- DATA IMPORT AND BASIC STATISTICS:
# 2- DATA QUALITY VERIFICATION
:
#
7-ROTATED AND STANDARDIZED PCA :
#
8-HIEARCHICAL CLASSIFICATION ANALYSIS:
#
10-DISCORANCE HETEROGENEITY AND HOMOGENEITY OF CLUSTER WARD METHOD :
#
12-PERFORM A RELATIVE WEIGHTS ANALYSIS (RWA):
#
13 - EMPIRICAL ORTHOGONAL FUNCTION ANALYSIS (EOF):
#
1- DATA
IMPORT AND BASIC STATISTICS:
rm(list=ls()) # Nettoyage de
l’environnement de travail
rm(list=ls())
rm(list=ls())
#
data=read.table(file("clipboard"),header=TRUE,
sep="\t", dec=".", row.names=1)
# data =read.table(file.choose()),header=TRUE,
sep="\t", dec=".")
str(data)
dim(data)
cbind(names(data))
# 2- DATA
QUALITY VERIFICATION :
names(data)
attach(data)
dim(data)
table(is.na(data))
round(table(is.na(data))[2]/(table(is.na(data))[1]+table(is.na(data))[2])*100,2)
# % Vrai
# data=data[,-1]
datan=na.omit(data)
names(datan)
str(data)
datu= unique(datan)
datd= datan[!duplicated(datan)]
library(questionr)[1]
freq(is.na(data))
# 3- DESCRIPTIVE
STATISTICS:
library(pastecs)
round(stat.desc(data),2)
library(prettyR)
describe(data,num.desc=c("mean","max","min","sd","var","median","valid.n"),xname=NA,horizontal=TRUE)
#
str(data)
summary(data)
library(Hmisc); describe(data)
#
#
library(psych); lowerCor(data)
round(cor(data, use="complete.obs",
method="pearson"),3)
round(cor(na.omit(data)),3)
#
quantile(data$Fmg, 0.75) # third quartile
IQR(data$Fmg)
lapply(data[, 1:4], sd)
by(data, data$Fmg, summary)
# datan = na.omit(data)
library(gclus)
corr <- abs(cor(datan))
colors <- dmat.color(corr)
order <- order.single(corr)
#
x11() ;cpairs(data,order,panel.colors =
colors,border.color = "grey70", gap = 0.45,main = "",
show.points = TRUE,pch = 21,bg = rainbow(3))
#
rainbow(3)[iris$Species]
x11(); pairs(data, labels = colnames(data), pch = 21,
bg = rainbow(3),
col =
rainbow(3),main = "",row1attop = TRUE,gap = 1, cex.labels =
NULL,font.labels = 1)
# https://r-coder.com/correlation-plot-r/
#
require(Hmisc)
library(psych)
library(corrplot)
#
x11();corPlot(data, cex = 0.3)
cor_test_mat <- corr.test(data)$p;
corrplot(cor_test_mat)
corrplot(cor(datan),p.mat = cor_test_mat,insig =
"p-value", cex=0.1)
# https://r-coder.com/correlation-plot-r/
#
https://statisticsglobe.com/add-p-values-correlation-matrix-plot-r
#
require(Hmisc)
col<-
colorRampPalette(c("blue","cyan","green"))(2000)
col<- colorRampPalette(c( "yellow",
"orange", "red"))(2000)
heatmap(x = cor(data), col = col, symm = TRUE)
x11();corrplot(cor(data,use="complete.obs",
method="pearson"), type="upper", order="hclust",
tl.col="black", tl.srt=45)
#
M <- cor(data,use="complete.obs",
method="pearson")
ord <- corrMatOrder(M, order = "AOE")
M2 <- M[ord,ord]
col1 <- colorRampPalette(c("#7F0000",
"red", "#FF7F00", "yellow", "white",
"cyan", "#007FFF", "blue", "#00007F"))
col2 <- colorRampPalette(c("#67001F",
"#B2182B", "#D6604D", "#F4A582",
"#FDDBC7", "#FFFFFF", "#D1E5F0",
"#92C5DE", "#4393C3", "#2166AC",
"#053061"))
col3 <- colorRampPalette(c("red",
"white", "blue"))
col4 <- colorRampPalette(c("#7F0000",
"red", "#FF7F00", "yellow", "#7FFF7F",
"cyan", "#007FFF", "blue", "#00007F"))
col5<-
colorRampPalette(c("blue","cyan","green","white","yellow",
"orange", "red"))
whiteblack <- c("white",
"black")
corrplot.mixed(M2)
cor.test(data$H, data$I, method="pearson",
alternative="two.sided")
# 5-DATA TRANSFORMATION :
library(compositions)
# centered log-ratio transformation
clr(X)
# back-transformation
clrInv(clr(X))
# isometric log-ratio transformation (D-1)
ilr(X)
# back-transformation
ilrInv(ilr(X))
# additive log-ratio transformation (D-1)
alr(X)
# back-transformation
alrInv(alr(X))
#
https://www.geo.fu-berlin.de/en/v/soga-r/Advances-statistics/Feature-scales/Logratio_Transformations/index.html#:~:text=In%20R%2Dpackage%20compositions%20the,then%20taken%20as%20a%20composition).
library(SpatialNP)
library(performance)
library(EFAtools)
library(factoextra)
library(FactoMineR)
# KMO (Kaiser-Meyer-Olkin) : Data suitable for
PCA?
KMO( data[,-1], use =
c("na.or.complete"),cor_method = c("kendall"))
res.pca <- PCA(data, scale=TRUE,graph=FALSE) #
,quali.sup = 1
res.pca$eig
var <- get_pca_var(res.pca) ;var
var$coord
ind <- get_pca_ind(res); ind
fviz_contrib(res.pca, choice = "var", axes =
1, top = 10)
x11(); fviz_contrib(res, choice = "ind",
axes = 1, top = 10)
x11(); fviz_pca_ind(res.pca, col.ind="cos2")
fviz_pca_var(res.pca, col.var="cos2",repel =
TRUE, axes = c(1,3),
select.var = list(cos2 = 16))
+ theme_grey()
x11(); fviz_pca_biplot(res.pca,cex=0.5)
# https://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/
# Geographically Weighted Principal Components Analysis
:
library(GWmodel)
# GW models
library(sp)
# Data management
library(spdep)
# Spatial autocorrelation
library(gstat)
# Geostatistics
library(RColorBrewer) # Visualization
library(classInt)
# Class intervals
library(raster)
# spatial data
library(gridExtra)
# Multiple plot
library(ggplot2)
# Multiple plot
library(GWnnegPCA)
#
# Import Shapefile & CVS data
dataFolder<-"D:\\Dropbox\\Spatial Data
Analysis and Processing in R\\Data_GWR\\"
COUNTY<-shapefile(paste0(dataFolder,"COUNTY_ATLANTIC.shp"))
df<-read.csv(paste0(dataFolder,"data_atlantic_1998_2012.csv"),
header=T)
#
SPDF<-merge(COUNTY,df, by="FIPS")
#
pepper.df <- as.data.frame(rcrop)
is.na(pepper.df)
head(pepper.df)
pca <- prcomp(pepper.df, scale. = TRUE, cor =
FALSE) # center = TRUE, scale = TRUE)
(pca$sdev^2 / sum(pca$sdev^2)) * 100
pca$rotation
#
#
https://cran.r-project.org/web/packages/GWnnegPCA/GWnnegPCA.pdf
#
https://zia207.github.io/geospatial-r-github.io/geographically-weighted-principal-components-analysis.html
#
7-ROTATED AND STANDARDIZED PCA :
library(ade4)
library(psych)
#
acp.varimax <- principal(r=scale(dat[,-1]),
nfactors=5, rotate="varimax", scores=T)
acp.varimax
x11();s.corcircle(acp.varimax$loadings[1:length(data[,-1]),],
xax = 1, yax = 2)
x11();s.corcircle(acp.varimax$loadings[1:length(data[,-1]),],
xax = 1, yax = 3)
x11();s.corcircle(acp.varimax$loadings[1:length(data[,-1]),],
xax = 1, yax = 4)
x11();biplot(principal(datan[,-1],nfactors = 2, rotate
= "varimax"),labels=paste0(1:1)); abline(v=0,h=0)
# Varimax
library(psych)
varimax7 <- principal(r=na.omit(data),
nfactors=dim(data)[2], rotate="varimax", scores=FALSE)
varimax7 <- varimax(pca_cars$rotation)
promaxx <- principal(r=na.omit(data),
nfactors=dim(data)[2], rotate="promax", scores=FALSE)
.
# 8-HIEARCHICAL CLASSIFICATION
ANALYSIS:
data =
read.table(file("clipboard"),header=TRUE, sep="\t",
dec=".", row.names=1)
str(data)
dim(data)
cbind(names(data))
library(caret)
library(DescTools)
library(ggplot2)
library(fpc)
library(FactoMineR)
library(cluster)
library(factoextra)
library('WeightedCluster')
# Choix du nombre optimal d’axes à retenir
library(NbClust)
res.nbclust <- NbClust(scale(data), distance =
"euclidean", min.nc = 2, max.nc = 9,
method = "complete", index ="all")
cbind(names(data))
dd <- dist(scale(data ), method =
"euclidean")
hc <- hclust(dd, method = "ward.D2")
hcd <- as.dendrogram(hc)
#
hc1 <- hclust(dd, method = "complete")
hc2 <- hclust(dd, method = "average")
hc3 <- hclust(dd, method = "single")
hc4 <- hclust(dd, method = "centroid")
hc5 <- hclust(dd, method = "mcquitty")
hc6 <- hclust(dd, method = "median")
#
x11()
# par(new=TRUE)
fviz_dend(hc, lwd = 1.1, k = N, cex = 0.55, ylab =
"Euclidean Distance", main = "",
k_colors = c("red", "blue", "green",
"yellow"), horiz = FALSE,
labels_track_height = 1, ggtheme = theme_test())
#
res.pca <- PCA(data, ncp = 3, graph = FALSE)
# Compute hierarchical clustering on principal
components
res.hcpc <- HCPC(res.pca, graph = FALSE)
res.hcpc1 <- HCPC(res.pca, nb.clust = 4, graph =
FALSE)
#
# HCA map ou Factor map :
x11(); fviz_dend(res.hcpc,
cex =
0.7, # Label size
palette = "jco",
# Color palette see ?ggpubr::ggpar
rect =
TRUE, rect_fill = TRUE, # Add rectangle around groups
rect_border = "jco",
# Rectangle color
labels_track_height = 0.8 #
Augment the room for labels
)
x11();fviz_cluster(res.hcpc1 ,
repel = TRUE, # Avoid
label overlapping
show.clust.cent = TRUE, # Show cluster centers
palette = c( "red", "blue",
"green","yellow"),
# Color palette see ?ggpubr::ggpar
ggtheme = theme_minimal(),
main = "Factor map"),
k_colors =
c("red", "blue", "green"), # Color palette see ?ggpubr::ggpar
ggtheme = theme_test(),
main = "Factor map")
#
library(FactoMineR)
data(decathlon)
res.pca = PCA(decathlon,
quanti.sup=11:12,quali.sup=13)
library(Factoshiny)
resshiny = PCAshiny(res.pca)
resshiny = PCAshiny(resshiny)
# http://factominer.free.fr/graphs/factoshiny-fr.html
#-EXTRACT SUBGROUP
#
x11();plot(hc,
main="",cex=0.25,xlab="",sub="",hang=-1, las=1);
kk= 3; box(lwd=2, lty=1)
# grid(lwd=2, col="grey", lty=1)
x=rect.hclust(hc,k=kk,border =1:3)
# par(new=TRUE); dev.off()
#
kk = 3
gr=cutree(hc,k= kk ); cbind(gr)
table(names(gr))
library(questionr); freq(names(gr))
# copier le nom du group rempaklexcr
l'espace par virgule sou excel.
G = subset(gr, gr %in% c( 3
))
names(G)
cbind(G)
write.table(G, "G1.txt",col.names=FALSE,
row.names=TRUE); getwd()
#
H=rep('@',length(G))
G=cbind(names(G),H)
# remplacer par vbirgule '@'
write.table(G, "G1.txt",col.names=FALSE,
row.names=FALSE); getwd()
#
# Clustering Large Applications
data =
read.table(file("clipboard"),header=TRUE, sep="\t",
dec=".")
#
dataa=read.table(file("clipboard"),header=TRUE, sep="\t",
dec=".")
str(data)
dim(data)
attach(data)
summary(data)
cbind(names(data))
#
require(cluster)
# CLARA
clarax <- clara(data , 2)
clarax <-clara(data, 2, metric =
"euclidean", stand = TRUE,
samples = 50, pamLike = TRUE)
dd <- cbind(data, cluster = clarax$cluster)
head(dd, n = 4)
x11(); plot(data, col = clarax$clustering, main =
"CLARA Clustering")
points(data[clarax$medoids,], col = 1:2, pch = 10, cex
= 2)
# PAM
pam_result <- pam(data, k = 2)
x11();plot(data, col = pam_result$clustering, main =
"PAM Clustering")
points(data[pam_result$medoids,], col = 1:2, pch = 8,
cex = 2)
# https://www.datanovia.com/en/lessons/clara-in-r-clustering-large-applications/#:~:text=the%20next%20section.-,CLARA%20Algorithm,set%20to%20the%20closest%20medoid.
x11(); plot(data, col = kmeans_result$cluster, main =
"K-means Clustering")
points(kmeans_result$centers, col = 1:2, pch = 8, cex
= 2)
# https://rpubs.com/folwalsh/UL
# 9-K-MEANS :
library(factoextra)
my_data <- scale(data1n) # Optimal number of clusters for k-means
x11(); fviz_nbclust(my_data, kmeans, method =
"gap_stat")
# 1. Loading and preparing data
df <- scale(datan[,-1])
set.seed(123)
km.res <- kmeans(scale(datan[,-1]), 3, nstart = 25)
# 3. Visualize
library("factoextra")
x11()
fviz_cluster(km.res, data = df,
palette = c("#00AFBB",
"#E7B800", "#FC4E07"),
ggtheme =
theme_minimal(),
main = "Partitioning Clustering Plot"
)+ theme_gray()
# Kmean Method 1
library(factoextra)
set.seed(111)
datam=cbind(sapply(na.omit(data), median))
df <- na.omit(datam);df <- scale(df)
distance <- get_dist(df);fviz_dist(distance,
gradient = list(low = "#00AFBB", mid = "white", high =
"#FC4E07"))
km2 <- kmeans(df, centers = 2, nstart = 25); k2
fviz_cluster(k2, data = df) # The dimension of the data < 2
#
## Kmean Method 2
library(factoextra)
res.hk <-hkmeans(scale(data), kk)
res.hk <-hkmeans(scale(t(datan)),kk)
#
fviz_dend(res.hk, cex = 0.5, palette =
"jco", rect = TRUE,
rect_border = "jco", rect_fill = TRUE)
x11();fviz_cluster(res.hk, palette = "jco",
repel = TRUE, ggtheme = theme_classic())
# x11()
library("factoextra")
df <- scale(t(datan))
km.res <- eclust(df, "kmeans", k = kk,
nstart = 25, graph = FALSE);km.res$cluster
fviz_cluster(km.res,
frame.type = "norm", frame.level = 0.68)
#
library(mclust);fit
= Mclust(scale(t(datan)))
#
# Kmean Method
3 (Transposition Manuel)
res.hk <-hkmeans(scale(datan), 2)
fviz_cluster(res.hk,axes = c(1, 2))+
theme_bw() # verifier la repartition des
parametres
fviz_cluster(res.hk,ellipse.type = "n",
k_colors = c("#b30000", "#0000FF"))+ theme_bw() # ,geom =
"point"
fviz_cluster(res.hk,ellipse.type = "norm")+
theme_bw() # ,geom = c("point", "text")
cbind(res.hk$cluster)
# hkmeans_tree(res.hk, cex = 0.6)
res.hk$tot.withinss/(res.hk$tot.withinss+res.hk$betweenss)*100
res.hk$betweenss/(res.hk$tot.withinss+res.hk$betweenss)*100
# 10-DISCORANCE HETEROGENEITY AND
HOMOGENEITY OF CLUSTER WARD METHOD :
# Group of Cluster
cbind(names(data))
cbind(row.names(data))
cbind(data.frame(res.hk$cluster)[,1]);
table(data.frame(res.hk$cluster)[,1])
#
grp1=data[,c(3,5,8,9,12,14,16,27,26,34)]; names(grp1);
length(grp1)
grp2=data[,-c(3,5,8,9,12,14,16,27,26,34)];names(grp2);
length(grp2)
#grp3=data[-c(3,5,8,9,12,14,16,27,26,34,20,32,24),];names(grp3);
length(grp3) # Severity Duration
#
grp11=data[,c(3,5,8,9,12,14,16,6,34,10,15,17,28,19,7)]; cbind(names(grp11));length(grp11)
grp22=data[,-c(3,5,8,9,12,14,16,6,34,10,15,17,28,19,7)];
cbind(names(grp22));length(grp22)
#
grp11s=data[,c(
5,8, 15, 12,14,16,6,34, 19,
17,28 )]; cbind(names(grp11s));length(grp11s)
grp22s=data[,-c(3,9,
5,8, 15, 12,14,16,6,34, 19,
17,28 )];cbind(names(grp22s));length(grp22s)
#
#
library(lmomco);library(lmomRFA)
summary(regtst(regsamlmu(grp1)));
regtst(regsamlmu(grp1))
summary(regtst(regsamlmu(grp2)));
regtst(regsamlmu(grp2))
#summary(regtst(regsamlmu(grp3)))
summary(regtst(regsamlmu(grp11))); regtst(regsamlmu(grp11))
summary(regtst(regsamlmu(grp22))); regtst(regsamlmu(grp22))
#
summary(regtst(regsamlmu(grp11s)));
regtst(regsamlmu(grp11s))
summary(regtst(regsamlmu(grp22s)));
regtst(regsamlmu(grp22s))
#
#-----Homogeneity Test H value :
Vi1=regsamlmu(grp1)[2]*(regsamlmu(grp1)[3]-sapply(regsamlmu(grp1)[3],
mean))^2/ regsamlmu(grp1)[2]
Vi2=regsamlmu(grp2)[2]*(regsamlmu(grp2)[3]-sapply(regsamlmu(grp2)[3],
mean))^2/ regsamlmu(grp2)[2]
#
V1=sqrt(sum(Vi1))
V2=sqrt(sum(Vi2))
VBAR=mean(c(V1,V2)); VSD=sd(c(V1,V2))
#
H1=(V1-VBAR)/VSD; H1
H2=(V2-VBAR)/VSD; H2
#-----Homogeneity Test H value
Vi11=regsamlmu(grp11)[2]*(regsamlmu(grp11)[3]-sapply(regsamlmu(grp11)[3],
mean))^2/ regsamlmu(grp11)[2]
Vi22=regsamlmu(grp22)[2]*(regsamlmu(grp22)[3]-sapply(regsamlmu(grp22)[3],
mean))^2/ regsamlmu(grp22)[2]
#
V11=sqrt(sum(Vi11))
V22=sqrt(sum(Vi22))
VBAR=mean(c(V11,V22)); VSD=sd(c(V11,V22))
#
H11=(V11-VBAR)/VSD; H11
H22=(V22-VBAR)/VSD; H22
#-----Homogeneity Test H value
Vi11s=regsamlmu(grp11s)[2]*(regsamlmu(grp11s)[3]-sapply(regsamlmu(grp11s)[3],
mean))^2/ regsamlmu(grp11s)[2]
Vi22s=regsamlmu(grp22s)[2]*(regsamlmu(grp22s)[3]-sapply(regsamlmu(grp22s)[3],
mean))^2/ regsamlmu(grp22s)[2]
#
V11s=sqrt(sum(Vi11s))
V22s=sqrt(sum(Vi22s))
VBAR=mean(c(V11s,V22s)); VSD=sd(c(V11s,V22s))
#
H11s=(V11s-VBAR)/VSD; round(H11s,3)
H22s=(V22s-VBAR)/VSD; round(H22s,3)
#-#-#-#-#
data <- as.regdata(regsamlmu(grp3)) # A
"regional" data structure
Dhosking <- sort(regtst(data)$D, decreasing=TRUE) #
Discordancy
Dlmomco <- lmrdiscord(site=data$name,
t2=data$t, t3=data$t_3, t4=data$t_4)
Dasquith <- Dlmomco$D
Dasquith
# Now show the site id, and the two discordancy
computations
print(data.frame(NAME=data$name, Dhosking=Dhosking,
Dasquith=Dasquith))
#-page-266
https://cran.csiro.au/web/packages/lmomco/lmomco.pdf
#
lmrd(samlmu(t(data[,1])))
lmrd(samlmu(t(data[,1])), xaxs="i",
yaxs="i", las=1)
aa <- apply(t(data), 1, samlmu)
aaa=regsamlmu(data)
mean=cbind(sapply(data, mean,na.rm = TRUE))
n=cbind(sapply(data, length))
# bb=t(aa)
# name=row.names(t(data))
# cc<-data.frame(name=name, n=n,mean=mean)
# dd=cbind(aa,cc)
regtst(aaa, nsim=500)
regtst(regsamlmu(data))
#https://cran.r-project.org/web/packages/lmomRFA/lmomRFA.pdf
#https://rdrr.io/cran/lmomRFA/man/regtst.html
#--------------------------- Graph regst
#https://rdrr.io/cran/lmomco/man/plotlmrdia.html
#https://rdrr.io/cran/lmom/man/lmrd.html
#------------------Discordance or dissimilarity
#https://cran.rstudio.com/web/packages/fasthplus/readme/README.html
#https://cran.r-project.org/web/packages/fasthplus/vignettes/fasthplus.html
#
# Bivariate
Discrdancy Test
library(lmomco);library(lmomRFA)
cbind(row.names(data))
grp3=data[c(32,35,1,18,34,29,2,25,23,24,9,31),]
grp4=data[-c(32,35,1,18,34,29,2,25,23,24,9,31),]
# L-comoment ratios (L-covariance, L-coskewness and
L-cokurtosis) in the bivariate case
D <- data.frame(X1=rnorm(30), X2=rnorm(30),
X3=rnorm(30))
L1 <-
Lcomoment.matrix(D,k=1)
L2 <-
Lcomoment.matrix(D,k=2)
L3 <-
Lcomoment.matrix(D,k=3)
LkLCV <-
Lcomoment.coefficients(L1,L2)
LkTAU3 <- Lcomoment.coefficients(L3,L2)
Lcomoment.correlation(L2)
X1=rnorm(101);X2=rnorm(101)+X1
Lcoskew12=Lcomoment.Lk12(X1,X2)
Lcorr12=Lcomoment.Lk12(X1,X2,k=2)/Lcomoment.Lk12(X1,X1,k=2)
rhop12=cor(X1,X2, method="pearson")
print(Lcorr12-rhop12)
#
X=rnorm(200);Y=X+rexp(200)
z=lcomoms2(data.frame(X=X, Y=Y),diag=TRUE)
zz=lcomoms2(data.frame(X=X, Y=Y),opdiag=TRUE)
#
X=grp3[,1];Y=grp3[,2]
z=lcomoms2(grp3,diag=TRUE)
zz=lcomoms2(data.frame(X=X,
Y=Y),opdiag=TRUE)
lmrdiscord(site=row.names(grp3), t2=zz$T2,
t3=zz$T3, t4=zz$T4)
#
regtst(regsamlmu(grp3))
#dd<-data.frame(
# name =c("site 1", "site
2", "site 3"),
# n =c(
20, 30, 40),
# mean =c( 100,
110, 120),
# LCV =c(0.20, 0.25, 0.30),
#
L_skewness=c(0.15, 0.20, 0.25),
#
L_kurtosis=c(0.10, 0.15, 0.20),
#
t_5 =c(0.10, 0.12, 0.14))
# Convert to class "regdata"
# rdd<-as.regdata(dd)
# regtst(regdata, nsim=1000)
#https://cran.rstudio.org/web/packages/lmomco/lmomco.pdf
#
#
# Quality Measure of Cluster with Weight Cluster
??WeightedCluster
library(WeightedCluster)
str(mvad)
mvad
names(mvad)
#
mvad.alphabet <- c("employment",
"FE", "HE", "joblessness", "school",
"training")
mvad.labels <- c("Employment",
"Further Education", "Higher
Education","Joblessness", "School",
"Training")
mvad.scodes <- c("EM", "FE",
"HE", "JL", "SC", "TR")
mvadseq <- seqdef(mvad[, 17:86], alphabet =
mvad.alphabet, states = mvad.scodes,
labels = mvad.labels,weights = mvad$weight, xtstep = 6)
subm.custom <- matrix(
c(0, 1, 1, 2,
1, 1,
1, 0, 1, 2,
1, 2,
1, 1, 0, 3,
1, 2,
2, 2, 3, 0,
3, 1,
1, 1, 1, 3,
0, 2,
1, 2, 2, 1,
2, 0),nrow = 6, ncol = 6, byrow = TRUE)
wardCluster <- hclust(as.dist(mvaddist),
method="ward.D", members=mvad$weight)
wardTree <- as.seqtree(wardCluster,
seqdata=mvadseq, diss=mvaddist, ncluster=6)
seqtreedisplay(wardTree, type="d",
border=NA, showdepth=TRUE)
#
#
#
# Homogeneity Test Between Clusterings (en 2 vecteur)
gr1=cbind(c(data[,3],data[,5],data[,8],data[,9],data[,12],data[,14],data[,16]
,data[,34],data[,10],data[,15],data[,17],data[,28],data[,19],data[,7]))
gr2=cbind(c(data[,23],data[,24],data[,25],data[,26],data[,27],data[,29],data[,30],data[,31],data[,32],data[,33],data[,1],data[,2],data[,4],data[,6],data[,11],data[,13],data[,18],data[,20],data[,21],data[,22]))
n1=rep("grp1",length(gr1))
n2=rep("grp2",length(gr2))
m=cbind(c(gr1,gr2))
mm=cbind(c(n1,n2))
ndata=data.frame(m,mm)
library(car);leveneTest(m ~ mm, data = ndata)
library(car);fligner.test(m ~ mm, data = ndata)
#https://www.datanovia.com/en/lessons/homogeneity-of-variance-test-in-r
#
library(clv)
datas=scale(t(datan),center=TRUE); km=kmeans(datas,2);
km
cls.scatt.data(datas,res.hk$cluster);
aggregate(t(datan), list(res.hk$cluster), mean)
#
# Homogeneity Between Clusterings (en 2 vecteur) BUT THE SAME
LENGTH
library(clevr)
#true <- c(1,1,1,2,2) # ground truth clustering
#pred <- c(1,1,2,2,2) # predicted clustering
#
grp11s=data[,c(
5,8, 15, 12,14,16,6,34, 19,
17,28 )]; cbind(names(grp11s));length(grp11s)
grp22s=data[,-c(3,9, 5,8, 15, 12,14,16,6,34, 19,
17,28
)];cbind(names(grp22s));length(grp22s)
#
true=cbind(c(data[,5],data[,8],data[,15],data[,12],data[,14],data[,16],data[,6],data[,34],data[,19],data[,17],data[,28])
)
pred=cbind(c(data[,34],data[,10],data[,15],data[,17],data[,28],data[,19],data[,7]))
homogeneity(true, pred)
#
#The homogeneity ranges between 0 and 1, where 1
indicates a perfect homogeneity
#https://rdrr.io/cran/clevr/man/homogeneity.html
#
# Homogeneity: Calculate within-cluster
library(cata)
homogeneity(grp1)
homogeneity(bread$cata[1:7,,1:6])
# https://rdrr.io/cran/cata/man/homogeneity.html
#SOUS ANNEXE
# ANOVA Analysis in clustering (en 2 vecteur)
lm=lm(m ~ mm, data = ndata)
summary(anova(lm))
#
cbind(names(data))
gr1=data[,c(5,8,9,12,14, 16,17,26,34)]; dim(gr1)[2]
gr2=data[,-c(5,8,9,12,14, 16,17,26,34)]; dim(gr2)[2]
#
names(gr1)
names(gr2)
#
gr11=cbind(c(gr1[,1],gr1[,2],gr1[,3],gr1[,4],gr1[,5],gr1[,6],gr1[,7],gr1[,8],gr1[,9]))
#
gr22=cbind(c(gr2[,1],gr2[,2],gr2[,3],gr2[,4],gr2[,5],gr2[,6],gr2[,7],gr2[,8],gr2[,9],gr2[,10],
gr2[,11],gr2[,12],gr2[,13],gr2[,14],gr2[,15],gr2[,16],gr2[,17],gr2[,18],gr2[,19],gr2[,20],
gr2[,21] ,gr2[,22],gr2[,23],gr2[,24],gr2[,25],gr2[,26] ))
#gr11=cbind(c(median(na.omit(gr1[,1])),
median(na.omit(gr1[,2])), median(na.omit(gr1[,3])), median(na.omit( gr1[,4])),
median(na.omit(gr1[,5])), median(na.omit(gr1[,6])), median(na.omit(gr1[,7])),
median(na.omit(gr1[,8])), median(na.omit(gr1[,9])) ))
#gr22=cbind(c(median(na.omit(gr2[,1])),
median(na.omit(gr2[,2])), median(na.omit(gr2[,3])),
median(na.omit(gr2[,4])),
median(na.omit(gr2[,5])), median(na.omit(gr2[,6])),
median(na.omit(gr2[,7])), median(na.omit(gr2[,8])), median(na.omit(gr2[,9])),
median(na.omit(gr2[,10])),
median(na.omit(gr2[,11])),median(na.omit(gr2[,12])),median(na.omit(gr2[,13])),median(na.omit(gr2[,14])),median(na.omit(gr2[,15])),median(na.omit(gr2[,16])),median(na.omit(gr2[,17])),median(na.omit(gr2[,18])),median(na.omit(gr2[,19])),median(na.omit(gr2[,20])),
median(na.omit(gr2[,21])),median(na.omit(gr2[,22])),median(na.omit(gr2[,23])),median(na.omit(gr2[,24])),median(na.omit(gr2[,25])),median(na.omit(gr2[,26]))
))
#
gr12=rbind(gr11,gr22)
#
grp11=data[,c(3,5,8,9,12,14,16 ,34,
10,15,17,28,19,7)]
grp22=data[,-c(3,5,8,9,12,14,16 ,34,
10,15,17,28,19,7)]
group= c(2,2,1,2,1,2,1,1,1, 1, 2, 1, 2, 1, 1, 1, 1, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2)
# 1 2 3 4
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
33 34 35
#
gr1=cbind(c(data[,3],data[,5],data[,8],data[,9],data[,12],data[,14],data[,16]
,data[,34],data[,10],data[,15],data[,17],data[,28],data[,19],data[,7]))
gr2=cbind(c(data[,23],data[,24],data[,25],data[,26],data[,27],data[,29],data[,30],data[,31],data[,32],data[,33],data[,1],data[,2],data[,4],data[,6],data[,11],data[,13],data[,18],data[,20],data[,21],data[,22]))
ybar= mean(na.omit(c(gr1,gr2)))
eff1=length(gr1);hybar1=mean(na.omit(gr1));
var1=var(na.omit(gr1))
eff2=length(gr2);hybar2=mean(na.omit(gr2));
var2=var(na.omit(gr2))
#
# within-cluster sums of squares: minimize
# Between-cluster sums of squares: maximize
var_inter=(((eff1*(hybar1^2))+(eff2*(hybar2^2))/sum(eff1+eff2)))-(ybar^2);var_inter
var_intra
=(((eff1*var1)+(eff2*var2))/sum(eff1+eff2));var_intra
var_tot=var_inter+var_intra;var_tot
var_intra/var_tot*100
var_inter/var_tot*100
#https://www.univ-montp3.fr/miap/ens/site/uploads/Main.StatL2S4/Cours11E46XP3.pdf
SSW=sum((na.omit(gr1)-hybar1)^2)
+sum((na.omit(gr2)-hybar2)^2);SSW
SSB=sum((rep(hybar1,length(gr1))-ybar)^2) +
sum((rep(hybar2,length(gr2))-ybar)^2);SSB
SST=sum((na.omit(gr1)-ybar)^2)
+sum((na.omit(gr2)-ybar)^2);SST
SSW/SST*100 #
minimize
SSB/SST*100 #
maximize
#https://towardsdatascience.com/explain-ml-in-a-simple-way-k-means-clustering-e925d019743b
ybar= mean(na.omit(c(gr12)))
eff1=length(na.omit(gr11));
eff2=length(na.omit(gr22));
s11=sum(na.omit(gr11));s22=sum(na.omit(gr22));s11q=sum(na.omit(gr11^2));s22q=sum(na.omit(gr22^2))
hybar1=mean(na.omit(gr11)); hybar2=mean(na.omit(gr22))
var1=(s11q/eff1)-(hybar1^2);var2=(s22q/eff2)-(hybar2^2)
#var(na.omit(gr11)) var2=var(na.omit(gr22))
moycond1=(hybar1^2)*eff1;moycond2=(hybar2^2)*eff2
varcond1=(var1)*eff1;varcond2=(var2)*eff2
efft=eff1+eff2; moytot=sum(s11+s22)/efft;
moytotq=sum(s11q+s22q)/efft
Moymoy=sum(hybar1+hybar2)/efft;
Varmoy=sum(var1+var2)/efft
Moymoycond=sum(moycond1+moycond2)/efft;Moyvarcond=sum(varcond1+varcond2)/efft
#
var_inter
=(((eff1*(hybar1^2))+(eff2*(hybar2^2))/sum(eff1+eff2)))-(ybar^2)
var_intra
=(((eff1*var1)+(eff2*var2))/sum(eff1+eff2))
var_intra2 =
eff1*((sum(na.omit(gr11^2))/eff1)-(hybar1^2)) +
eff2*((sum(na.omit(gr22^2))/eff2)-(hybar2^2))
var_tot
=
(sum(na.omit(gr11)^2)/eff1)-ybar^2+ (sum(na.omit(gr22)^2)/eff2)-ybar^2
var_tot2
=var_inter+var_intra2
var_intra/var_tot*100 # MINimiser
var_inter/var_tot*100 # MAXimiser
#
vartot=moytotq-(moytot^2)
varinter=Moymoycond-(moytot^2)
varintra=Moyvarcond
varintra/vartot*100
# within MINimiser
varinter/vartot*100
# Between MAXimiser
set.seed(111)
calc_SS <- function(df) sum(as.matrix(dist(df)^2))
/ (2 * nrow(df))
df2 <- data
cbind(row.names(df2))
kmeans <- kmeans(as.matrix(df2), 2)
#
df2$cluster <- kmeans$cluster
df2 %>%
group_by(cluster) %>%
nest() %>%
mutate(within_SS = map_dbl(data, ~calc_SS(.x))) %>%
arrange(cluster)
kmeans$betweenss/ kmeans$totss*100
kmeans$tot.withinss/ kmeans$totss*100
# 11-SPATIAL AUTOCORRELATION:
library(terra)
library(rgeoda)
library(ape)
library(spdep)
library(sp)
library(rgdal)
library(cartography)
library(dplyr)
library(deldir)
library(tripack)
library(ggplot2)
library(tidyr)
library(zip)
library(tidyverse)
library(sf)
library(rgeos)
library(tmap)
library(tmaptools)
library(spgwr)
library(grid)
library(gridExtra)
library(spData)
library(spatstat)
library(sf)
library(mapview)
require(DCluster)
library(ggvoronoi)
library(gstat)
knitr::opts_chunk$set(echo = TRUE)
#
Census.Data =
read.table(file("clipboard"),header=TRUE, sep="\t",
dec=".", row.names=1)
# Census.Data =
read.table(file("clipboard"),header=TRUE, sep="\t",
dec=".")
summary(Census.Data)
# Census.Data =Census.Data[1:6,]
str(Census.Data)
names(Census.Data)
attach(Census.Data)
dim(Census.Data)
glimpse(Census.Data)
#
Census.Dat=na.omit(Census.Data)
summary(Census.Dat)
table(is.na(Census.Dat))
Census.Datu= unique(Census.Dat)
# Census.Datd= Census.Dat[!duplicated(Census.Dat)]
#
# OUTLIERS VALUE___________________________________
# Q1-3.0 x IQR
# Q3 + 3.0 x IQR
# summary(data[,1])
X=datan[, 15 ]
Q1= quantile(X, probs = c(0.25)); Q1
Q3= quantile(X, probs = c(0.75)); Q3
#
IQR= Q3-Q1; IQR
B1=Q1-(3*IQR); B2=Q3+(3*IQR); rbind(B1,B2)
#
# NORMALITY AND HOMOGENEITY TEST___________________
library(tidyverse)
library(ggpubr)
library(car)
library(LaplacesDemon)
library(vGWAS)
library(onewaytests)
library(caret)
# install.packages('vGWAS', repos =
'http://r-forge.r-project.org')
cbind(names(Census.Data))
dim(Census.Data)
Z=Census.Data[,4]
#
shapiro.test(Z)
nortest::sf.test(Z)
brown.forsythe.test(y = Census.Dat$F, group =
Census.Dat$sample)
leveneTest(Census.Dat$F ~ Census.Dat$sample,
Census.Dat)
#
Census.Dat %>%
group_by(sample) %>%
summarize(Variance=var(Census.Dat$F))
bf.test(Census.Dat$F ~ Census.Dat$sample, data =
Census.Dat)
#
# TRANSFORMATION BOXCOX()__________________________
data = Census.Dat[,4]
bc_trans <- BoxCoxTrans(data)
transformed_data <- predict(bc_trans, data)
x11();hist(transformed_data)
x11();qqnorm(transformed_data);
qqline(transformed_data)
# Deviations from the straight line are minimal =
indicates normal distribution.
# boxcox Equivalent de log en graph de
qqnorm
#
https://stats.stackexchange.com/questions/52293/r-qqplot-how-to-see-whether-data-are-normally-distributed
#
https://www.geeksforgeeks.org/how-to-use-boxcox-transformation-in-the-caret-package-in-r/
# Importing geospatial data (shapefile) :
deja fait sous arcgis thiessens merge avec le contour country OU utiliser
directement sous R : "th.clp" file
#
hunan <- readOGR(dsn =
"C:/Abdi-Basid ADAN/12.Doc.IST.CERD/EAU
[En cours]/arcgis/thiessenpolygonwater.shp")
names(hunan ) # Name Layers
gdppc <- read.table(file("clipboard"),
header=TRUE, sep="\t"); str(gdppc) #
read_csv("data/attributeTable/Hunan_GDPPC.csv")
table(is.na(gdppc)); summary(gdppc);
gdppc1=na.omit(gdppc)
# MORAN INDICE autocorrelation verification :
F.dists <- as.matrix(dist(cbind(gdppc1$Longdec,
gdppc1$Latdec)))
F.dists.inv <- 1/F.dists ; F.dists.inv[1:2, 1:2]
diag(F.dists.inv) <- 0
Moran.I(gdppc1$F, F.dists.inv)
# Binary distance matrix :distance est
inférieure à d soient considérées comme connectées et que les paires dont la
distance est supérieure à d ne le soient pas.
F.dists.bin <- (F.dists > 0 & F.dists <=
.75)
Moran.I(gdppc1$F, F.dists.bin)
# We can
reject the null hypothesis that there is: zero spatial autocorrelation present
in the variable / randomly disbursed.
# $p.value UNDER at alpha = 0.05.
# https://stats.oarc.ucla.edu/r/faq/how-can-i-calculate-morans-i-in-r
# hunan@data <- left_join(hunan@data, gdppc)
x11(); qtm(hunan)
x11(); spplot(hunan, "Shape_Area")
# vignette("tmap-getstarted")
tmap_tip()
# plot map :
https://www.kaggle.com/code/umeshnarayanappa/quickstart-guide-shapefiles-and-r-tmap
#
# Deriving spatial weight matrix : Creation of weight
matrix:
wm_q <- poly2nb(hunan, queen = TRUE); summary(wm_q)
x11();
plot(hunan, border = 'lightgrey'); plot(wm_q, coordinates(hunan), pch =
19, cex = 0.6, add = TRUE, col = "red")
rswm_q <- nb2listw(wm_q, zero.policy = TRUE) #
Row-standardised weights matrix
knn8 <-
knn2nb(knearneigh(coordinates(hunan), k =8, longlat = TRUE), row.names =
row.names(hunan$GDPPC2012))
knn_lw <- nb2listw(knn8, style = "B")
summary(knn8)
#
# Point/Station en shapefile pour le Queens Weight
Matrix # K-Nearest Neighbor Weights
queen_w <- queen_weights(iris.dj1, order=1,
include_lower_order = FALSE, precision_threshold = 0)
get_neighbors(queen_w, idx = 1)
#
# Computing Local Moran's I (for Queen Contiguity neighbours) :
fips <- order(gdppc$sample)
localMI <- localmoran(as.vector(hunan$F), rswm_q)
head(localMI)
# Computing
Local Moran's I (for adaptive weight matrix 8 neighbours):
fips1 <- order(gdppc $F)
localMI1 <- localmoran(as.vector(hunan$F), knn_lw)
print(localMI1); View(localMI1)
printCoefmat(data.frame( localMI1[fips1], row.names =
hunan$sample[fips1] ), check.names = FALSE)
# Ii: the
local Moran's I statistics
# E.Ii: the
expectation of local moran statistic under the randomisation hypothesis
# Var.Ii: the
variance of local moran statistic under the randomisation hypothesis
# Z.Ii: the
standard deviate of local moran statistic
# Pr(): the
p-value of local moran statistic
#
# Mapping the
Local Moran's I :
hunan.localMI <- cbind(hunan, localMI)
x11();tm_shape(hunan.localMI) +
tm_fill(col =
"Ii",
style
= "pretty",
palette = "RdBu",
title
= "local moran statistics")+
tm_borders(alpha = 0.5)
# Mapping Local Moran's I p-values :
x11();tm_shape(hunan.localMI) +
tm_fill(col =
"Ii",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette = "-Blues",
title
= "local Moran's I p-values")+
tm_borders(alpha = 0.5)
# Mapping both
local Moran's I values & p-values
x11()
localMI.map <- tm_shape(hunan.localMI) +
tm_fill(col =
"Ii",
style
= "pretty",
title
= "local moran statistics")+
tm_borders(alpha = 0.5)
pvalue.map <- tm_shape(hunan.localMI) +
tm_fill(col =
"Ii",
breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
palette = "-Blues",
title
= "local Moran's I p-values")+
tm_borders(alpha = 0.5)
tmap_arrange(localMI.map, pvalue.map, asp = 1, ncol =
2)
#
# Creating LISA Cluster map :
#---------------------------
# LISA cluster map shows the significant locations
color coded by type of spatial autocorrelation :
# 1 plot the
Moran scatterplot + 2 generate LISA
cluster map :
# 1 plot the
Moran scatterplot :
X11(); nci <- moran.plot(hunan$F, rswm_q, labels =
as.character(hunan$sample), xlab = "F", ylab = "Spatially Lag
F")
# We can see that the plot is split into 4 quadrants,
the top right corner belong to areas that have high surrounded by other areas that have average
level (high-high locations).
#
# Plotting Moran scatterplot with standardised
variable :
hunan$Z.F <- scale(hunan$F) %>% as.vector
knitr::kable(head(hunan, n=5))
X11(); nci2 <- moran.plot(hunan$Z.F, rswm_q, labels
= as.character(hunan$sample), xlab = "F", ylab = "Spatially Lag
F")
# high-high Quadrant 4 - Cluster/Positive
autocorrelation
# low-low Quadrant 1 - Cluster/Positive
autocorrelation
# low-high Quadrant 2 - Outlier/Negative
autocorrelation
# high-low Quadrant 3 - Outlier/Negative
autocorrelation
# non-significant Moran (p-value of localMI is
compared to the significance level)
#
# 2 generate
LISA cluster map
quadrant <- vector(mode = "numeric",
length = nrow(localMI))
DV <- hunan$F - mean(hunan$F)
C_mI <- localMI[,1] - mean(localMI[,1])
signif <- 0.05
#| D1=-LH | G1=+HH | D2=+LL | G2=-HL : -outliers |
+cluster
quadrant[DV >0 & C_mI>0] <-
4
quadrant[DV <0 & C_mI<0] <-
1
quadrant[DV <0 & C_mI>0] <- 2
quadrant[DV >0 & C_mI<0] <- 3
quadrant[localMI[,5]>signif] <- 0
#
hunan.localMI$quadrant <- quadrant
colors <- c("#ffffff",
"#2c7bb6", "#abd9e9", "#fdae61",
"#d7191c")
clusters <- c("insignificant",
"low-low", "low-high", "high-low",
"high-high")
knitr::kable(head(hunan.localMI, n=5))
#
x11();tm_shape(hunan.localMI) +
tm_fill(col = "quadrant", style =
"cat", palette = colors[c(sort(unique(quadrant)))+1], labels =
clusters[c(sort(unique(quadrant)))+1], popup.vars = c("Postal.Code"))
+
tm_view(set.zoom.limits
= c(11,17)) +
tm_borders(alpha=0.5)
# low low = cold spot | High High = hot spot.
# high-high cluster indicating positive
autocorrelation.
# low-high cluster indicating negative
autocorrelation/outliers
#
# Hot Spot & Cold Spot Analysis : Computing Gi
statistics
# Hot spot
area = G>0, it's also High-high in
Local Moran's I values
# Cold spot
area = G<0, it's also Low-low in Local Moran's I values
# 1/Deriving spatial weight matrix | 2/Computing Gi
statistics | 3/Mapping Gi statistics
dnb <- dnearneigh(coordinates(hunan), 0, 85,
longlat = TRUE); dnb
x11(); plot(hunan, border = "lightgrey");
plot(dnb, coordinates(hunan), add = TRUE, col = "red")
dnb_lw <- nb2listw(dnb, style = "B");
summary(dnb_lw)
#
coords <- coordinates(hunan)
knb <- knn2nb(knearneigh(coords, k = 8, longlat =
TRUE), row.names = row.names(hunan$sample))
knb_lw <- nb2listw(knb, style = "B");
summary(knb_lw)
x11(); plot(hunan, border = "lightgrey");
plot(knb, coordinates(hunan), pch = 19, cex = 0.6, add = TRUE, col =
"red")
#
fips <- order(hunan$sample)
gi.fixed <- localG(hunan$F, dnb_lw)
hunan.gi <- cbind(hunan, as.matrix(gi.fixed))
names(hunan.gi)[dim(hunan.gi)[2]] <-
"gstat"
head(hunan.gi@data)
#
fips <- order(hunan$sample)
gi.adaptive <- localG(hunan$F, knb_lw)
hunan.gi <- cbind(hunan.gi, as.matrix(gi.adaptive))
names(hunan.gi)[dim(hunan.gi)[2]] <-
"gstat_adaptive"
head(hunan.gi@data)
# Mapping local
Gi statistics : Mapping local Gi values with fixed distance weights
x11(); tm_shape(hunan.gi)+
tm_fill(col =
"gstat",
style
= "pretty",
palette = "-RdBu",
title
= "local Gi value") +
tm_borders(alpha = 0.5)+
tm_text("sample", just="left", xmod=.5, size = 0.7)
+
tm_legend(legend.outside=TRUE)
#
# Mapping local Gi values with adaptive distance
weights
x11();tm_shape(hunan.gi)+
tm_fill(col =
"gstat_adaptive",
style
= "pretty",
palette = "-RdBu",
title
= "local Gi value") +
tm_borders(alpha = 0.5) +
tm_text("sample", just="left", xmod=.5, size = 0.5)
+
tm_legend(legend.outside=TRUE)
#
# Interpretation :
https://rpubs.com/erikaaldisa/spatialclustering
# Exporting
class(hunan.gi)
st_crs(hunan.gi)
s.df <- data.frame(hunan.gi)
writeOGR(hunan.gi, dsn = '.', layer = 'Rspatial',
driver = "ESRI Shapefile"); getwd()
# Exporting raster
: writeRaster(hunan.gi,
"elev_out.tif", gdal = "GTiff" )
# Exporting sf class: st_write(hunan.gi,
"shapefile_out.shp", driver = "ESRI Shapefile") # create to a shapefile
# Exporting sp class: writeOGR(hunan.gi, dsn = '.', layer =
'Rspatial', driver = "ESRI Shapefile")
# Export/import shapefile :
https://stackoverflow.com/questions/13926811/export-a-polygon-from-an-r-plot-as-a-shapefile
# Export/import shapefile :
https://mgimond.github.io/Spatial/reading-and-writing-spatial-data-in-r.html
# TYPE WEIGHTS MATRIX:
#1|Contiguity Based Weights: queen_weights(), rook_weights()
#2|Distance Based Weights: distance_weights()
#3|K-Nearest Neighbor Weights: knn_weights()
#4|Kernel Weights: distance_weights() and
knn_weights() with kernel parameters
#
# saveRDS(iris.dj[4], "iris.dj[4].rds")
# saveRDS(iris.dj1[1], "iris.dj1[1].rds")
# getwd()
# z <-
gzcon(url("https://github.com/mgimond/Spatial/raw/main/Data/texas.rds"))
# P <- readRDS("C:/Abdi-Basid
ADAN/12.Doc.IST.CERD/EAU [En
cours]/arcgis/iris.dj1[1].rds")
# p <- st_as_sf(P)
#
iris.dj <-
read_sf("C:/Users/hp/Documents/ArcGIS 10.8/Djibouti for mask/MASK
SHAPEFILE Djibouti/Regions.shp")
w <- st_as_sf(iris.dj[4]) # ; crs(iris.dj1) <- "+proj=longlat
+datum=WGS84 +no_defs"
iris.dj1 <- read_sf("C:/Abdi-Basid
ADAN/12.Doc.IST.CERD/EAU [En
cours]/arcgis/water.shp")
p <- st_as_sf(iris.dj1)
x11(); plot(iris.dj1[4],pch=10) ;
x11();plot(iris.dj[4])
dev.off();dev.off()
coords <- st_coordinates(st_centroid(iris.dj))
#
# Queen Contiguity Weights:
queen_w <- queen_weights(iris.dj1, order=1,
include_lower_order = FALSE, precision_threshold = 0)
summary(queen_w)
weights_sparsity(queen_w)
has_isolates(queen_w)
#
# details of the weights: e.g. list the neighbors of a
specified observation:
nbrs <- get_neighbors(queen_w, idx = 1)
cat("\nNeighbors of the 1-st observation
are:", nbrs)
#
#
https://cran.r-project.org/web/packages/spNetwork/vignettes/SpatialWeightMatrices.html
#
# Create voronoi polygon with simple feature in R:
#
x11()
tm_shape(w) + tm_polygons() +
tm_shape(p) +
tm_dots(col="F", palette = "RdBu",
auto.palette.mapping = FALSE,
title="Sample water \n(mg/L)", size=0.7) +
tm_text("sample",
just="left", xmod=.5, size = 0.7) +
tm_legend(legend.outside=TRUE)
# p <- st_as_sf( p[,c(4)] )
# p <- st_union(p)
# p <- st_set_crs(p, 4326)
# p <- st_geometry(p)
p <- p[,c(4)]
st_crs(p) <-
"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84
+units=m +no_defs" #
"+proj=longlat +datum=WGS84 +no_defs"
# crs(p1)
st_crs(w) <-
"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84
+units=m +no_defs" #
"+proj=longlat +datum=WGS84 +no_defs"
# crs(p1)
p <-st_transform(p,"+proj=sinu +lon_0=0 +x_0=0
+y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
# polygon map
https://mgimond.github.io/Spatial/coordinate-systems-in-r.htmlhttps://mgimond.github.io/Spatial/coordinate-systems-in-r.html
#
th <-
dirichlet(as.ppp(p)) |> st_as_sfc() |> st_as_sf()
st_crs(th) <- st_crs(p)
th2 <-
st_join(th, p, fn=mean)
th.clp <-
st_intersection(th2, w)
#
x11();tm_shape(th.clp) +
tm_polygons(col="F", palette="RdBu",
auto.palette.mapping=FALSE,
title="Water \n(in inches)")
+
tm_legend(legend.outside=TRUE)
# Exporting sf class
class(th.clp)
st_crs(th.clp) <- "+proj=longlat +datum=WGS84
+no_defs"
st_write(th.clp, "shapefile_out1.shp",
driver = "ESRI Shapefile") #
create to a shapefile
#
# Thiessen polygons :|
https://mgimond.github.io/Spatial/interpolation-in-r.html
Census.Data%>%
st_as_sf(coords = c("Longdec", "Latdec"),
crs =
sp::CRS("+proj=longlat +datum=WGS84")) %>%
mapview()
m1 =
matrix(Census.Data$Longdec,Census.Data$Latdec,ncol=2,nrow= nrow(Census.Data))
%>% st_multipoint()
#
voronoi_grid <- st_voronoi(m1)
x11();plot(voronoi_grid, col = NA)
plot(m1, add = TRUE, col = "blue", pch = 16)
#
attach(Census.Data)
x <- Census.Dat$Latdec; table(duplicated(x))
y <- Census.Dat$Latdec; table(duplicated(y))
tesselation <- deldir(x, y)
tiles <- tile.list(tesselation)
plot(tiles, pch = 19, number = TRUE, cex=0.5)
plot(tiles,showpoints = FALSE)
plot(tiles, close = TRUE)
x11();plot(tiles, pch = 19,border =
"white",fillcol = hcl.colors(50, "Sunset"))
# s <- seq(0, 2 * pi, length.out = 3000); circle
<- list(x = 0.5 * (1 + cos(s)),y = 0.5 * (1 + sin(s)))
# x11();plot(tiles, pch = 19, col.pts = "white",border =
"white", fillcol = hcl.colors(50, "viridis"),clipp =
circle)
#
box <- st_sfc(bbox_polygon(iris.dj1))
v <- st_voronoi(iris.dj1, box)
plot(v)
#
points <- data.frame(x, y,distance =
sqrt((x-100)^2 + (y-100)^2))
ggplot(points) +
geom_voronoi(aes(x,y,fill=distance))
ggplot(points,aes(x,y)) +
stat_voronoi(geom="path") +
geom_point()
# J Interpolation in R:
https://mgimond.github.io/Spatial/interpolation-in-r.html
# Voronoi diagram with deldir :
https://r-charts.com/part-whole/voronoi-diagram/#:~:text=A%20Voronoi%20diagram%20(or%20Thiessen,Voronoi)%20tessellation%20with%20the%20tile.
# Voronoi Diagrams with ggvoronoi:
https://cran.r-project.org/web/packages/ggvoronoi/vignettes/ggvoronoi.html
# Create voronoi polygon with simple feature in R:
https://stackoverflow.com/questions/45719790/create-voronoi-polygon-with-simple-feature-in-r
# Little useless-useful R functions - Interactive
Voronoi diagram generator using R and x11():
https://www.r-bloggers.com/2021/10/little-useless-useful-r-functions-interactive-voronoi-diagram-generator-using-r-and-x11/
# R function for Thiessen Polygons:
https://gis.stackexchange.com/questions/136542/r-function-for-thiessen-polygons
# How to build Voronoi Polygons for point coordinates
in R?:
https://stackoverflow.com/questions/66946196/how-to-build-voronoi-polygons-for-point-coordinates-in-r
# How to create thiessen polygons from points using R
packages?
https://stackoverflow.com/questions/9403660/how-to-create-thiessen-polygons-from-points-using-r-packages
#
irisdj.nb <- poly2nb(iris.dj)
irisdj.lw <- nb2listw(irisdj.nb,zero.policy=TRUE) #
Creation of weight matrix
#
# moran.plot(x, listw, zero.policy=NULL, spChk=NULL,
labels=NULL,
#
xlab=NULL, ylab=NULL, quiet=NULL, plot=TRUE, return_df=TRUE, ...)
#
is.vector(Census.Dat$F)
x11(); moran.plot(as.vector(Census.Dat$F), irisdj.lw,
labels=FALSE,xlab="", ylab="")
# x11(); moran.plot(as.vector(Census.Dat$F),
irisdj.lw, labels=as.character(row.names(Census.Dat) ), pch=19)
x11();
moran.plot(as.vector(scale(Census.Dat$F)),xlim=c(-2, 4), ylim=c(-2,4),
irisdj.lw, labels=as.character(row.names(Census.Dat) ), pch=19)
# spChk quiet return_df
#
mp <- moran.plot(as.vector(scale(Census.Dat$F)),xlim=c(-2,
4), ylim=c(-2,4), irisdj.lw, labels=as.character(row.names(Census.Dat) ),
pch=19)
moran.plot(as.vector(scale(Census.Dat$F)),xlim=c(-2,
4), ylim=c(-2,4), irisdj.lw, labels=as.character(row.names(Census.Dat) ),
pch=19)
if (require(ggplot2, quietly=TRUE)) {
xname <-
attr(mp, "xname")
ggplot(mp,
aes(x=x, y=wx)) + geom_point(shape=1) +
geom_smooth(formula=y ~ x, method="lm") +
geom_hline(yintercept=mean(mp$wx), lty=2) +
geom_vline(xintercept=mean(mp$x), lty=2) + theme_minimal() +
geom_point(data=mp[mp$is_inf,], aes(x=x, y=wx), shape=9) +
geom_text(data=mp[mp$is_inf,], aes(x=x, y=wx, label=labels, vjust=1.5))
+
xlab(xname)
+ ylab(paste0("Spatially lagged ", xname))
}
# https://www.e-education.psu.edu/geog586/node/672
# DOI:
10.1016/j.scitotenv.2020.139584 # INTERPRETATION PLOT
# Output.Areas <- readOGR(".", "C: ")
# Output.Areas2 <-
read_sf("C:/Users/hp/Documents/ArcGIS 10.8/Djibouti for mask/MASK
SHAPEFILE Djibouti/Regions.shp")
# shapefile station et
coordinate by merging
Output.Areas2 <- read_sf("C:/Abdi-Basid
ADAN/12.Doc.IST.CERD/EAU [En
cours]/arcgis/stationwater.shp")
glimpse(Output.Areas2)
OA.Census <- merge(Output.Areas2, Census.Data,
by.x="Latdec", by.y="Longdec")
OA.Census_sf <- st_as_sf(OA.Census)
#
neighbours <- poly2nb(OA.Census)
neighbours
# https://rpubs.com/quarcs-lab/spatial-autocorrelation
#
ozone.dists <-
as.matrix(dist(cbind(ozone$Lon, ozone$Lat)))
ozone.dists.inv <- 1/ozone.dists
diag(ozone.dists.inv) <- 0
#
Moran.I(ozone$Av8top, ozone.dists.inv)
ozone.dists.bin <- (ozone.dists > 0 &
ozone.dists <= .75)
Moran.I(ozone$Av8top, ozone.dists.bin)
#
https://stats.oarc.ucla.edu/r/faq/how-can-i-calculate-morans-i-in-r/
#
#| Importer/Exporter shapefile
# mtq <-
read_sf("data/mtq/martinique.shp")
# mtq <-
st_read("data/mtq/martinique.shp")
#
write_sf(mtq,"data/mtq/martinique.gpkg",delete_layer = TRUE)
#
st_write(mtq,"data/mtq/martinique.gpkg",delete_layer = TRUE)
# st_crs(iris.75)
#
download.file("https://github.com/comeetie/satRday/blob/master/exercises/data.zip?raw=true",destfile
= "data.zip")
# unzip("data.zip",exdir=".")
#
https://antuki.github.io/satRday2019_workshop/exercises/exercises.html
#
iris75 <- st_read("C:/Abdi-Basid
ADAN/12.Doc.IST.CERD/EAU [En
cours]/arcgis/shapefile/data/iris_75.shp", stringsAsFactors = F)
x11(); plot(iris75)
#
iris75.nb <- poly2nb(iris75) # Extraction of list
of neighbours (defined by default with Queen contiguity
iris75.lw <- nb2listw(iris75.nb,zero.policy=TRUE) #
Creation of weight matrix
#
# Calculation of standardised median income:
iris75.data <- as.data.frame(iris75);
names(iris75.data )
iris75.data$med_revenu_std <-
scale(iris75.data$med_revenu_std)
#
#| Moran's diagram
is.vector(iris75.data$med_revenu_std)
#
x11();
moran.plot(as.vector(iris75.data$med_revenu_std), iris75.lw,
labels=FALSE,xlab="", ylab="")
x11();
moran.plot(as.vector(iris75.data$med_revenu_std), iris75.lw,
labels=as.character(iris75.data$CODE_COM), pch=19)
#
#
https://r-spatial.github.io/spdep/reference/moran.plot.html
#
#| Moran's I test
moran.test(iris75.data$med_revenu_std,iris75.lw,
zero.policy=TRUE, randomisation=FALSE)
moran.test(s$Income,lw,
alternative="greater")
#
# Monte-Carlo
simulation of Moran I
MC<- moran.mc(s$Income, lw, nsim=999, alternative="greater")
plot(MC)
# https://mgimond.github.io/simple_moransI_example/
#
https://www.paulamoraga.com/book-spatial/spatial-autocorrelation.html
#
# Calculation of the range of Moran's I
moran.range <- function(lw) {
wmat <-
listw2mat(lw)
return(range(eigen((wmat+t(wmat))/2)$values))
}
moran.range(iris75.lw)
#
lisa_revenus <-
localmoran(as.vector(iris75.data$med_revenu_std), iris75.lw, zero.policy= TRUE)
#
#
https://www.insee.fr/en/statistiques/fichier/3635545/imet131-g-chapitre-3.pdf
#
https://comeetie.github.io/quantilille/lecture/lecture.html
#
map <-
st_read(system.file("shapes/boston_tracts.shp",package =
"spData"), quiet = TRUE)
map$vble <- map$MEDV
mapview(map, zcol = "vble")
#
# https://r-spatial.org/book/15-Measures.html
#
https://mgimond.github.io/Spatial/spatial-autocorrelation-in-r.html
#
https://cran.r-project.org/web/packages/lctools/vignettes/SpatialAutocorrelation.html
#
https://www.efgs.info/wp-content/uploads/informationbase/introduction/Handbook_of_Spatial_Analysis_INSEE_EUROSTAT_2018.pdf
#
# Local
Indicators of Spatial Association-LISA
#
Regionalization with dynamically constrained agglomerative clustering
and partitioning
# Automatic
zoning procedure (AZP)
# Max-p greedy
#
https://geodacenter.github.io/rgeoda/articles/rgeoda_tutorial.html
#
# Conditional Permutations with sfdep
library(sfdep)
nb <- st_contiguity(guerry)
wt <- st_weights(nb)
x <- guerry$crime_pers
p_nb <- cond_permute_nb(nb)
p_wt <- st_weights(p_nb)
observed <- global_c(x, p_nb, p_wt)
reps <- replicate(199, {
p_nb <-
cond_permute_nb(nb)
p_wt <-
st_weights(p_nb)
global_c(x,
p_nb, p_wt)[["C"]]
})
# simulated p-value:
pseudo p-value using the formula
(M+1)/(R+1) :
(sum(observed[["C"]] <= reps) + 1) / (199
+ 1)
#
https://cran.r-project.org/web/packages/sfdep/vignettes/conditional-permutation.html
#
library(rspat)
auck <- spat_data("auctb"); names(auck)
w <- relate(auck, relation="rook") # relation="touches" compute the
spatial weights.
Gi <- autocor(auck$TB, w, "Gi") # Compute the Getis Gi
grays <- rev(gray(seq(0,1,.2)))
auck$Gi <- Gi
plot(auck, "Gi", col=grays)
diag(w) <- TRUE
auck$Gistar <- autocor(auck$TB, w, "Gi*")
plot(auck, "Gistar", main="Gi*",
col=grays)
auck$loc_mean <- apply(w, 1, function(i)
mean(auck$TB[i]))
plot(auck, "loc_mean", main="Local
mean", col=grays)
library(spdep)
sfauck <- sf::st_as_sf(auck)
wr <- poly2nb(sfauck, row.names=sfauck$Id,
queen=FALSE)
lstw <- nb2listw(wr, style="B")
auck$Ii <- localmoran(auck$TB, lstw)
## Warning: [[[<-,SpatVector] only using the first
column
plot(auck, "Ii", main="Local
Moran", col=grays)
auck$TBdens <- 10000 * auck$TB / expanse(auck)
# References
#https://rspatial.org/raster/analysis/analysis.pdf
#https://geodacenter.github.io/rgeoda/articles/rgeoda_tutorial.html#install-rgeoda
# https://www.statisticshowto.com/morans-i/
# https://rspatial.org/rosu/Chapter8.html
#
12-PERFORM A RELATIVE WEIGHTS ANALYSIS (RWA):
rm(list=ls())
#
data=read.table(file("clipboard"),header=TRUE,
sep="\t", dec=".", row.names=1)
str(data)
cbind(names(data))
library(rwa)
library(tidyverse)
library(ggplot2)
data %>%
rwa(outcome =
"A",
predictors
= c("X", "Y","Z"),
applysigns
= TRUE) %>%
plot_rwa()
library(relaimpo)
scaled_ratings <- data.frame(scale(dat[,-1]))
ols.sat<-lm(A~X,Y,Z, data= data )
summary(ols.sat)
calc.relimp(ols.sat, type = c("lmg"), rela =
TRUE)
#
relimp <- calc.relimp(ols.sat, type =
"lmg", importance = TRUE)
str(relimp)
plot(relimp)
# bootstrapped confidence intervals for the relative
significance:
boot_rel <- boot.relimp(ols.sat, type =
"lmg",nboot = 1000)
boot_eval <- booteval.relimp(boot_rel)
plot(boot_eval)
#
https://www.r-bloggers.com/2012/08/the-relative-importance-of-predictors-let-the-games-begin/
#https://martinctc.github.io/rwa/
#https://core.ecu.edu/wuenschk/MV/multReg/Relative_Weights_Analysis.pdf
#
13 - EMPIRICAL ORTHOGONAL FUNCTION ANALYSIS:
require(wql)
# Create an annual matrix time series
chla1 <- aggregate(sfbayChla, 1, mean, na.rm =
TRUE)
chla1 <- chla1[, 1:12] # remove stations with missing years
# eofNum (see examples) suggests n = 1
eof(chla1, 1)
#
# REOF : a matrix with rotated EOFs
# amplitude: a
matrix with amplitude time series of REOFs
# eigen.pct:
all eigenvalues of correlation matrix as percent of total variance
# variance :
variance explained by retained EOFs
#
# Create an annual time series data matrix from sfbay
chlorophyll data
# Average over each year
chla1 <- aggregate(sfbayChla, 1, mean, na.rm =
TRUE)
chla1 <- chla1[, 1:12] # remove stations with missing years
x11();eofNum(chla1)
# These stations appear to act as one with respect to
chlorophyll
# variability on the annual scale because there's one
dominant EOF.
#
# Create an annual matrix time series
chla1 <- aggregate(sfbayChla, 1, mean, na.rm =
TRUE)
chla1 <- chla1[, 1:12] # remove stations with missing years
# eofNum (see examples) suggests n = 1
e1 <- eof(chla1, n = 1)
eofPlot(e1, type = 'coef')
eofPlot(e1, type = 'amp')
"Have fun!
If you find any errors or codes that do not work,
please feel free to write to me at abdi-basid@outlook.com."
Abdi-Basid ADAN
[1] If the package does not load in your
environment, then use the command install.packages('package_name'). The same
applies to all other packages.
Aucun commentaire:
Enregistrer un commentaire