samedi 24 août 2024

Some Preliminary Multivariate Statistical Methods in a Geographical Application using R Programming Language: Principal Component Analysis (PCA rotated, PCA unrotated), Hierarchical Cluster Analysis (HCA), K-means, Spatial Autocorrelation, Relative Weight Analysis (RWA) and Empirical Orthogonal Function Analysis (EOF)

 

 

Abdi-Basid ADAN

"The purpose of this document is to consolidate and improve the various R scripts used to perform the cited analyses."

 

Table of Contents

 

# 1- DATA IMPORT AND BASIC STATISTICS: 2

# 2- DATA QUALITY VERIFICATION : 2

# 3- DESCRIPTIVE STATISTICS: 3

# 4-BIVARIATE ANALYSIS : 3

# 5-DATA TRANSFORMATION : 4

# 6- STANDARDIZED PCA: 5

# 7-ROTATED AND STANDARDIZED PCA :. 6

# 8-HIEARCHICAL CLASSIFICATION ANALYSIS: 6

# 9-K-MEANS : 8

# 10-DISCORANCE HETEROGENEITY AND HOMOGENEITY OF CLUSTER WARD METHOD :. 10

# 11-SPATIAL AUTOCORRELATION: 18

# 12-PERFORM A RELATIVE WEIGHTS ANALYSIS (RWA): 33

# 13 - EMPIRICAL ORTHOGONAL FUNCTION ANALYSIS (EOF): 34

 

# 1- DATA IMPORT AND BASIC STATISTICS:

 

rm(list=ls()) # Nettoyage de l’environnement de travail

rm(list=ls())

rm(list=ls())

#

data=read.table(file("clipboard"),header=TRUE, sep="\t", dec=".", row.names=1)

# data =read.table(file.choose()),header=TRUE, sep="\t", dec=".")

str(data)

dim(data)

cbind(names(data))

 

 

# 2- DATA QUALITY VERIFICATION :

 

names(data)

attach(data)

dim(data)

table(is.na(data))

round(table(is.na(data))[2]/(table(is.na(data))[1]+table(is.na(data))[2])*100,2) # % Vrai

# data=data[,-1]

datan=na.omit(data)

names(datan)

str(data)

datu= unique(datan)

datd= datan[!duplicated(datan)]

library(questionr)[1]

freq(is.na(data))

 

 

 

 

 

 

 

# 3- DESCRIPTIVE STATISTICS:

 

library(pastecs)

 round(stat.desc(data),2)

library(prettyR)

describe(data,num.desc=c("mean","max","min","sd","var","median","valid.n"),xname=NA,horizontal=TRUE)

#

str(data)

summary(data)

library(Hmisc); describe(data)

#

#

library(psych); lowerCor(data)

round(cor(data, use="complete.obs", method="pearson"),3)

round(cor(na.omit(data)),3)

#

quantile(data$Fmg, 0.75) # third quartile

IQR(data$Fmg)

lapply(data[, 1:4], sd)

by(data, data$Fmg, summary)

 

 

# 4-BIVARIATE ANALYSIS :

 

# datan = na.omit(data)

library(gclus)

corr <- abs(cor(datan))

colors <- dmat.color(corr)

order <- order.single(corr)

#

x11() ;cpairs(data,order,panel.colors = colors,border.color = "grey70", gap = 0.45,main = "", show.points = TRUE,pch = 21,bg = rainbow(3))

 # rainbow(3)[iris$Species]

x11(); pairs(data, labels = colnames(data),  pch = 21,  bg = rainbow(3), 

 col = rainbow(3),main = "",row1attop = TRUE,gap = 1, cex.labels = NULL,font.labels = 1)        

# https://r-coder.com/correlation-plot-r/

#

require(Hmisc)

library(psych)

library(corrplot)

#

x11();corPlot(data, cex = 0.3)

cor_test_mat <- corr.test(data)$p; corrplot(cor_test_mat)

corrplot(cor(datan),p.mat = cor_test_mat,insig = "p-value", cex=0.1)

# https://r-coder.com/correlation-plot-r/

# https://statisticsglobe.com/add-p-values-correlation-matrix-plot-r

#

require(Hmisc)

col<- colorRampPalette(c("blue","cyan","green"))(2000)

col<- colorRampPalette(c( "yellow", "orange", "red"))(2000)

heatmap(x = cor(data), col = col, symm = TRUE)

x11();corrplot(cor(data,use="complete.obs", method="pearson"), type="upper", order="hclust", tl.col="black", tl.srt=45)

#

M <- cor(data,use="complete.obs", method="pearson")

ord <- corrMatOrder(M, order = "AOE")

M2 <- M[ord,ord]

 

col1 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "white", "cyan", "#007FFF", "blue", "#00007F"))

col2 <- colorRampPalette(c("#67001F", "#B2182B", "#D6604D", "#F4A582", "#FDDBC7", "#FFFFFF", "#D1E5F0", "#92C5DE", "#4393C3", "#2166AC", "#053061"))

col3 <- colorRampPalette(c("red", "white", "blue"))

col4 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "#7FFF7F", "cyan", "#007FFF", "blue", "#00007F"))

col5<- colorRampPalette(c("blue","cyan","green","white","yellow", "orange", "red"))

whiteblack <- c("white", "black")

 

corrplot.mixed(M2)

cor.test(data$H, data$I, method="pearson", alternative="two.sided")

 

# 5-DATA TRANSFORMATION :

 

library(compositions)

# centered log-ratio transformation

clr(X)

# back-transformation

clrInv(clr(X))

# isometric log-ratio transformation (D-1)

ilr(X)

# back-transformation

ilrInv(ilr(X))

# additive log-ratio transformation (D-1)

alr(X)

# back-transformation

alrInv(alr(X))

# https://www.geo.fu-berlin.de/en/v/soga-r/Advances-statistics/Feature-scales/Logratio_Transformations/index.html#:~:text=In%20R%2Dpackage%20compositions%20the,then%20taken%20as%20a%20composition).

 

 

 

 

# 6- STANDARDIZED PCA:

 

library(SpatialNP)

library(performance)

library(EFAtools)

library(factoextra)

library(FactoMineR)

 

# KMO (Kaiser-Meyer-Olkin) : Data suitable for PCA?

KMO( data[,-1], use = c("na.or.complete"),cor_method = c("kendall"))

 

res.pca <- PCA(data, scale=TRUE,graph=FALSE) # ,quali.sup = 1

res.pca$eig

var <- get_pca_var(res.pca) ;var

var$coord

ind <- get_pca_ind(res); ind

 

fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)

x11(); fviz_contrib(res, choice = "ind", axes = 1, top = 10)

x11(); fviz_pca_ind(res.pca, col.ind="cos2")

fviz_pca_var(res.pca, col.var="cos2",repel = TRUE, axes = c(1,3),

                   select.var = list(cos2 = 16))

                   + theme_grey()

x11(); fviz_pca_biplot(res.pca,cex=0.5)

# https://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/

 

# Geographically Weighted Principal Components Analysis :

library(GWmodel)      # GW models

library(sp)           # Data management

library(spdep)        # Spatial autocorrelation

library(gstat)        # Geostatistics

library(RColorBrewer) # Visualization

library(classInt)     # Class intervals

library(raster)       # spatial data

library(gridExtra)    # Multiple plot

library(ggplot2)      # Multiple plot

library(GWnnegPCA)

#

# Import Shapefile & CVS data

dataFolder<-"D:\\Dropbox\\Spatial Data Analysis and Processing in R\\Data_GWR\\"

COUNTY<-shapefile(paste0(dataFolder,"COUNTY_ATLANTIC.shp"))

df<-read.csv(paste0(dataFolder,"data_atlantic_1998_2012.csv"), header=T)

#

SPDF<-merge(COUNTY,df, by="FIPS")

#

pepper.df <- as.data.frame(rcrop)

is.na(pepper.df)

head(pepper.df)

pca <- prcomp(pepper.df, scale. = TRUE, cor = FALSE) # center = TRUE, scale = TRUE)

(pca$sdev^2 / sum(pca$sdev^2)) * 100

pca$rotation

#

# https://cran.r-project.org/web/packages/GWnnegPCA/GWnnegPCA.pdf

# https://zia207.github.io/geospatial-r-github.io/geographically-weighted-principal-components-analysis.html

 

 

 

 

 

 

 

 

 

 

# 7-ROTATED AND STANDARDIZED PCA :

 

library(ade4)

library(psych)

#

acp.varimax <- principal(r=scale(dat[,-1]), nfactors=5, rotate="varimax", scores=T)

acp.varimax

x11();s.corcircle(acp.varimax$loadings[1:length(data[,-1]),], xax = 1, yax = 2)

x11();s.corcircle(acp.varimax$loadings[1:length(data[,-1]),], xax = 1, yax = 3)

x11();s.corcircle(acp.varimax$loadings[1:length(data[,-1]),], xax = 1, yax = 4)

x11();biplot(principal(datan[,-1],nfactors = 2, rotate = "varimax"),labels=paste0(1:1)); abline(v=0,h=0)

# Varimax

library(psych)

varimax7 <- principal(r=na.omit(data), nfactors=dim(data)[2], rotate="varimax", scores=FALSE)

varimax7 <- varimax(pca_cars$rotation)

promaxx <- principal(r=na.omit(data), nfactors=dim(data)[2], rotate="promax", scores=FALSE)

.

 

# 8-HIEARCHICAL CLASSIFICATION ANALYSIS:

 

data = read.table(file("clipboard"),header=TRUE, sep="\t", dec=".", row.names=1)

str(data)

dim(data)

cbind(names(data))

 

library(caret)

library(DescTools)

library(ggplot2)

library(fpc)

library(FactoMineR)

library(cluster)

library(factoextra)

library('WeightedCluster')

 

# Choix du nombre optimal d’axes à retenir

library(NbClust)

res.nbclust <- NbClust(scale(data), distance = "euclidean", min.nc = 2, max.nc = 9,  method = "complete", index ="all")

cbind(names(data))

dd <- dist(scale(data ), method = "euclidean")

hc <- hclust(dd, method = "ward.D2")

hcd <- as.dendrogram(hc)

#

hc1 <- hclust(dd, method = "complete")

hc2 <- hclust(dd, method = "average")

hc3 <- hclust(dd, method = "single")

hc4 <- hclust(dd, method = "centroid")

hc5 <- hclust(dd, method = "mcquitty")

hc6 <- hclust(dd, method = "median")

 

#

x11()

# par(new=TRUE)

fviz_dend(hc, lwd = 1.1, k = N, cex = 0.55, ylab = "Euclidean Distance", main = "",

          k_colors = c("red", "blue", "green", "yellow"), horiz = FALSE,

          labels_track_height = 1, ggtheme = theme_test())

#

res.pca <- PCA(data, ncp = 3, graph = FALSE)

# Compute hierarchical clustering on principal components

res.hcpc <- HCPC(res.pca, graph = FALSE)

res.hcpc1 <- HCPC(res.pca, nb.clust = 4, graph = FALSE)

#

# HCA map ou Factor map :

x11(); fviz_dend(res.hcpc,

          cex = 0.7,                     # Label size

          palette = "jco",               # Color palette see ?ggpubr::ggpar

          rect = TRUE, rect_fill = TRUE, # Add rectangle around groups

          rect_border = "jco",           # Rectangle color

          labels_track_height = 0.8      # Augment the room for labels

)

x11();fviz_cluster(res.hcpc1 ,

             repel = TRUE,            # Avoid label overlapping

             show.clust.cent = TRUE, # Show cluster centers

             palette = c( "red", "blue", "green","yellow"),         # Color palette see ?ggpubr::ggpar

             ggtheme = theme_minimal(),

             main = "Factor map"),           

 k_colors = c("red", "blue", "green"),         # Color palette see ?ggpubr::ggpar

             ggtheme = theme_test(),

             main = "Factor map")

#

library(FactoMineR)

data(decathlon)

res.pca = PCA(decathlon, quanti.sup=11:12,quali.sup=13)

library(Factoshiny)

resshiny = PCAshiny(res.pca)

resshiny = PCAshiny(resshiny)

# http://factominer.free.fr/graphs/factoshiny-fr.html

#-EXTRACT SUBGROUP

#

x11();plot(hc, main="",cex=0.25,xlab="",sub="",hang=-1, las=1); kk= 3; box(lwd=2, lty=1) 

# grid(lwd=2, col="grey", lty=1)

x=rect.hclust(hc,k=kk,border =1:3)

# par(new=TRUE); dev.off()

#

kk = 3

gr=cutree(hc,k= kk ); cbind(gr)

table(names(gr))

library(questionr); freq(names(gr))

# copier le nom du group rempaklexcr l'espace par virgule sou excel.

G = subset(gr, gr %in% c(    3   ))  

names(G)

cbind(G)

write.table(G, "G1.txt",col.names=FALSE, row.names=TRUE); getwd()

#

H=rep('@',length(G))

G=cbind(names(G),H)   # remplacer par vbirgule  '@'

write.table(G, "G1.txt",col.names=FALSE, row.names=FALSE); getwd()

#

# Clustering Large Applications   

data = read.table(file("clipboard"),header=TRUE, sep="\t", dec=".")

# dataa=read.table(file("clipboard"),header=TRUE, sep="\t", dec=".")

str(data)

dim(data)

attach(data)

summary(data)

cbind(names(data))

#

require(cluster)

#  CLARA

clarax <- clara(data , 2)

clarax <-clara(data, 2, metric = "euclidean", stand = TRUE,

               samples = 50, pamLike = TRUE)

dd <- cbind(data, cluster = clarax$cluster)

head(dd, n = 4)

x11(); plot(data, col = clarax$clustering, main = "CLARA Clustering")

points(data[clarax$medoids,], col = 1:2, pch = 10, cex = 2)

#  PAM

pam_result <- pam(data, k = 2)

x11();plot(data, col = pam_result$clustering, main = "PAM Clustering")

points(data[pam_result$medoids,], col = 1:2, pch = 8, cex = 2)

# https://www.datanovia.com/en/lessons/clara-in-r-clustering-large-applications/#:~:text=the%20next%20section.-,CLARA%20Algorithm,set%20to%20the%20closest%20medoid.

x11(); plot(data, col = kmeans_result$cluster, main = "K-means Clustering")

points(kmeans_result$centers, col = 1:2, pch = 8, cex = 2)

# https://rpubs.com/folwalsh/UL

 

# 9-K-MEANS :

 

 

library(factoextra)

my_data <- scale(data1n)  # Optimal number of clusters for k-means

x11(); fviz_nbclust(my_data, kmeans, method = "gap_stat")

 

# 1. Loading and preparing data

df <- scale(datan[,-1])

set.seed(123)

km.res <- kmeans(scale(datan[,-1]), 3, nstart = 25)

# 3. Visualize

library("factoextra")

x11()

fviz_cluster(km.res, data = df,

             palette = c("#00AFBB", "#E7B800", "#FC4E07"),

             ggtheme = theme_minimal(),

             main = "Partitioning Clustering Plot"

)+ theme_gray()

# Kmean Method 1

library(factoextra)

set.seed(111)

datam=cbind(sapply(na.omit(data), median))

df <- na.omit(datam);df <- scale(df)

distance <- get_dist(df);fviz_dist(distance, gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))

km2 <- kmeans(df, centers = 2, nstart = 25); k2

fviz_cluster(k2, data = df)  # The dimension of the data < 2

#

## Kmean Method 2

library(factoextra)

res.hk <-hkmeans(scale(data), kk)

res.hk <-hkmeans(scale(t(datan)),kk)

#

fviz_dend(res.hk, cex = 0.5, palette = "jco",  rect = TRUE, rect_border = "jco", rect_fill = TRUE)

x11();fviz_cluster(res.hk, palette = "jco", repel = TRUE,  ggtheme = theme_classic())

# x11()

library("factoextra")

df <- scale(t(datan))

km.res <- eclust(df, "kmeans", k = kk, nstart = 25, graph = FALSE);km.res$cluster

fviz_cluster(km.res,  frame.type = "norm", frame.level = 0.68)

#

library(mclust);fit  = Mclust(scale(t(datan)))

#

# Kmean Method  3  (Transposition Manuel)

res.hk <-hkmeans(scale(datan), 2)

fviz_cluster(res.hk,axes = c(1, 2))+ theme_bw()  # verifier la repartition des parametres

fviz_cluster(res.hk,ellipse.type = "n", k_colors = c("#b30000", "#0000FF"))+ theme_bw() # ,geom = "point"

fviz_cluster(res.hk,ellipse.type = "norm")+ theme_bw() # ,geom = c("point", "text")

cbind(res.hk$cluster)

# hkmeans_tree(res.hk, cex = 0.6)

res.hk$tot.withinss/(res.hk$tot.withinss+res.hk$betweenss)*100

res.hk$betweenss/(res.hk$tot.withinss+res.hk$betweenss)*100

# 10-DISCORANCE HETEROGENEITY AND HOMOGENEITY OF CLUSTER WARD METHOD :

 

# Group of Cluster

cbind(names(data))

cbind(row.names(data))

cbind(data.frame(res.hk$cluster)[,1]); table(data.frame(res.hk$cluster)[,1])

#

grp1=data[,c(3,5,8,9,12,14,16,27,26,34)]; names(grp1); length(grp1)

grp2=data[,-c(3,5,8,9,12,14,16,27,26,34)];names(grp2); length(grp2)

#grp3=data[-c(3,5,8,9,12,14,16,27,26,34,20,32,24),];names(grp3); length(grp3) # Severity Duration

 

#

grp11=data[,c(3,5,8,9,12,14,16,6,34,10,15,17,28,19,7)];  cbind(names(grp11));length(grp11)

grp22=data[,-c(3,5,8,9,12,14,16,6,34,10,15,17,28,19,7)]; cbind(names(grp22));length(grp22)

#

grp11s=data[,c(       5,8,  15, 12,14,16,6,34,  19,  17,28  )]; cbind(names(grp11s));length(grp11s)

grp22s=data[,-c(3,9,  5,8,  15, 12,14,16,6,34,  19,  17,28  )];cbind(names(grp22s));length(grp22s)

#

#

library(lmomco);library(lmomRFA)

summary(regtst(regsamlmu(grp1))); regtst(regsamlmu(grp1))

summary(regtst(regsamlmu(grp2))); regtst(regsamlmu(grp2))

#summary(regtst(regsamlmu(grp3)))

summary(regtst(regsamlmu(grp11)));  regtst(regsamlmu(grp11))

summary(regtst(regsamlmu(grp22)));  regtst(regsamlmu(grp22))

#

summary(regtst(regsamlmu(grp11s))); regtst(regsamlmu(grp11s))

summary(regtst(regsamlmu(grp22s))); regtst(regsamlmu(grp22s))

#

#-----Homogeneity Test H value :

Vi1=regsamlmu(grp1)[2]*(regsamlmu(grp1)[3]-sapply(regsamlmu(grp1)[3], mean))^2/ regsamlmu(grp1)[2]

Vi2=regsamlmu(grp2)[2]*(regsamlmu(grp2)[3]-sapply(regsamlmu(grp2)[3], mean))^2/ regsamlmu(grp2)[2]

#

V1=sqrt(sum(Vi1))

V2=sqrt(sum(Vi2))

VBAR=mean(c(V1,V2)); VSD=sd(c(V1,V2))

#

H1=(V1-VBAR)/VSD; H1

H2=(V2-VBAR)/VSD; H2

#-----Homogeneity Test H value

Vi11=regsamlmu(grp11)[2]*(regsamlmu(grp11)[3]-sapply(regsamlmu(grp11)[3], mean))^2/ regsamlmu(grp11)[2]

Vi22=regsamlmu(grp22)[2]*(regsamlmu(grp22)[3]-sapply(regsamlmu(grp22)[3], mean))^2/ regsamlmu(grp22)[2]

#

V11=sqrt(sum(Vi11))

V22=sqrt(sum(Vi22))

VBAR=mean(c(V11,V22)); VSD=sd(c(V11,V22))

#

H11=(V11-VBAR)/VSD; H11

H22=(V22-VBAR)/VSD; H22

#-----Homogeneity Test H value

Vi11s=regsamlmu(grp11s)[2]*(regsamlmu(grp11s)[3]-sapply(regsamlmu(grp11s)[3], mean))^2/ regsamlmu(grp11s)[2]

Vi22s=regsamlmu(grp22s)[2]*(regsamlmu(grp22s)[3]-sapply(regsamlmu(grp22s)[3], mean))^2/ regsamlmu(grp22s)[2]

#

V11s=sqrt(sum(Vi11s))

V22s=sqrt(sum(Vi22s))

VBAR=mean(c(V11s,V22s)); VSD=sd(c(V11s,V22s))

#

H11s=(V11s-VBAR)/VSD; round(H11s,3)

H22s=(V22s-VBAR)/VSD; round(H22s,3)

#-#-#-#-#

data <- as.regdata(regsamlmu(grp3)) # A "regional" data structure

Dhosking <- sort(regtst(data)$D, decreasing=TRUE) # Discordancy

Dlmomco <- lmrdiscord(site=data$name, t2=data$t, t3=data$t_3, t4=data$t_4)

Dasquith <- Dlmomco$D

Dasquith

# Now show the site id, and the two discordancy computations

print(data.frame(NAME=data$name, Dhosking=Dhosking, Dasquith=Dasquith))

#-page-266 https://cran.csiro.au/web/packages/lmomco/lmomco.pdf

#

lmrd(samlmu(t(data[,1])))

lmrd(samlmu(t(data[,1])), xaxs="i", yaxs="i", las=1)

aa <- apply(t(data), 1, samlmu)

aaa=regsamlmu(data)

mean=cbind(sapply(data, mean,na.rm = TRUE))

n=cbind(sapply(data, length))

# bb=t(aa)

# name=row.names(t(data))

# cc<-data.frame(name=name, n=n,mean=mean)

# dd=cbind(aa,cc)

regtst(aaa, nsim=500)

regtst(regsamlmu(data))

#https://cran.r-project.org/web/packages/lmomRFA/lmomRFA.pdf

#https://rdrr.io/cran/lmomRFA/man/regtst.html

#---------------------------        Graph regst

#https://rdrr.io/cran/lmomco/man/plotlmrdia.html

#https://rdrr.io/cran/lmom/man/lmrd.html

#------------------Discordance or dissimilarity

#https://cran.rstudio.com/web/packages/fasthplus/readme/README.html

#https://cran.r-project.org/web/packages/fasthplus/vignettes/fasthplus.html

#

#  Bivariate Discrdancy Test

library(lmomco);library(lmomRFA)

cbind(row.names(data))

grp3=data[c(32,35,1,18,34,29,2,25,23,24,9,31),]

grp4=data[-c(32,35,1,18,34,29,2,25,23,24,9,31),]

# L-comoment ratios (L-covariance, L-coskewness and L-cokurtosis) in the bivariate case

D <- data.frame(X1=rnorm(30), X2=rnorm(30), X3=rnorm(30))

L1 <-     Lcomoment.matrix(D,k=1)

L2 <-     Lcomoment.matrix(D,k=2)

L3 <-     Lcomoment.matrix(D,k=3)

LkLCV <-  Lcomoment.coefficients(L1,L2)

LkTAU3 <- Lcomoment.coefficients(L3,L2)

Lcomoment.correlation(L2)

X1=rnorm(101);X2=rnorm(101)+X1

Lcoskew12=Lcomoment.Lk12(X1,X2)

Lcorr12=Lcomoment.Lk12(X1,X2,k=2)/Lcomoment.Lk12(X1,X1,k=2)

rhop12=cor(X1,X2, method="pearson")

print(Lcorr12-rhop12)

#

X=rnorm(200);Y=X+rexp(200)

z=lcomoms2(data.frame(X=X, Y=Y),diag=TRUE)

zz=lcomoms2(data.frame(X=X, Y=Y),opdiag=TRUE)

#

X=grp3[,1];Y=grp3[,2]

z=lcomoms2(grp3,diag=TRUE)

zz=lcomoms2(data.frame(X=X, Y=Y),opdiag=TRUE)

lmrdiscord(site=row.names(grp3), t2=zz$T2, t3=zz$T3, t4=zz$T4)

#

regtst(regsamlmu(grp3))

#dd<-data.frame(

#  name      =c("site 1", "site 2", "site 3"),

#  n         =c(  20,   30,   40),

#  mean      =c( 100,  110,  120),

#  LCV       =c(0.20, 0.25, 0.30),

#  L_skewness=c(0.15, 0.20, 0.25),

#  L_kurtosis=c(0.10, 0.15, 0.20),

#  t_5       =c(0.10, 0.12, 0.14))

# Convert to class "regdata"

# rdd<-as.regdata(dd)

# regtst(regdata, nsim=1000)

#https://cran.rstudio.org/web/packages/lmomco/lmomco.pdf

#

#

# Quality Measure of Cluster with Weight Cluster

??WeightedCluster

library(WeightedCluster)

str(mvad)

mvad

names(mvad)

#

mvad.alphabet <- c("employment", "FE", "HE", "joblessness", "school", "training")

mvad.labels <- c("Employment", "Further Education", "Higher Education","Joblessness", "School", "Training")

mvad.scodes <- c("EM", "FE", "HE", "JL", "SC", "TR")

mvadseq <- seqdef(mvad[, 17:86], alphabet = mvad.alphabet,   states = mvad.scodes, labels = mvad.labels,weights = mvad$weight, xtstep = 6)

subm.custom <- matrix(

  c(0, 1, 1, 2, 1, 1,

    1, 0, 1, 2, 1, 2,

    1, 1, 0, 3, 1, 2,

    2, 2, 3, 0, 3, 1,

    1, 1, 1, 3, 0, 2,

    1, 2, 2, 1, 2, 0),nrow = 6, ncol = 6, byrow = TRUE)

wardCluster <- hclust(as.dist(mvaddist), method="ward.D", members=mvad$weight)

wardTree <- as.seqtree(wardCluster, seqdata=mvadseq, diss=mvaddist, ncluster=6)

seqtreedisplay(wardTree, type="d", border=NA, showdepth=TRUE)

#

#

#

# Homogeneity Test Between Clusterings (en 2 vecteur)

gr1=cbind(c(data[,3],data[,5],data[,8],data[,9],data[,12],data[,14],data[,16] ,data[,34],data[,10],data[,15],data[,17],data[,28],data[,19],data[,7]))

gr2=cbind(c(data[,23],data[,24],data[,25],data[,26],data[,27],data[,29],data[,30],data[,31],data[,32],data[,33],data[,1],data[,2],data[,4],data[,6],data[,11],data[,13],data[,18],data[,20],data[,21],data[,22]))

 

n1=rep("grp1",length(gr1))

n2=rep("grp2",length(gr2))

m=cbind(c(gr1,gr2))

mm=cbind(c(n1,n2))

ndata=data.frame(m,mm)

library(car);leveneTest(m ~ mm, data = ndata)

library(car);fligner.test(m ~ mm, data = ndata)

#https://www.datanovia.com/en/lessons/homogeneity-of-variance-test-in-r

#

library(clv)

datas=scale(t(datan),center=TRUE); km=kmeans(datas,2); km

cls.scatt.data(datas,res.hk$cluster); aggregate(t(datan), list(res.hk$cluster), mean)

#

# Homogeneity Between Clusterings (en 2 vecteur)  BUT THE SAME  LENGTH

library(clevr)

#true <- c(1,1,1,2,2)  # ground truth clustering

#pred <- c(1,1,2,2,2)  # predicted clustering

#

grp11s=data[,c(      5,8,  15, 12,14,16,6,34,  19,  17,28  )]; cbind(names(grp11s));length(grp11s)

grp22s=data[,-c(3,9, 5,8,  15, 12,14,16,6,34,  19,  17,28  )];cbind(names(grp22s));length(grp22s)

#

true=cbind(c(data[,5],data[,8],data[,15],data[,12],data[,14],data[,16],data[,6],data[,34],data[,19],data[,17],data[,28]) )

pred=cbind(c(data[,34],data[,10],data[,15],data[,17],data[,28],data[,19],data[,7]))  

homogeneity(true, pred)

#

#The homogeneity ranges between 0 and 1, where 1 indicates a perfect homogeneity

#https://rdrr.io/cran/clevr/man/homogeneity.html

#

# Homogeneity: Calculate within-cluster

library(cata)

homogeneity(grp1)

homogeneity(bread$cata[1:7,,1:6])

# https://rdrr.io/cran/cata/man/homogeneity.html

#SOUS ANNEXE

# ANOVA Analysis in clustering (en 2 vecteur)

lm=lm(m ~ mm, data = ndata)

summary(anova(lm))

#

cbind(names(data))

gr1=data[,c(5,8,9,12,14,  16,17,26,34)]; dim(gr1)[2]

gr2=data[,-c(5,8,9,12,14, 16,17,26,34)]; dim(gr2)[2]

#

names(gr1)

names(gr2)

#

gr11=cbind(c(gr1[,1],gr1[,2],gr1[,3],gr1[,4],gr1[,5],gr1[,6],gr1[,7],gr1[,8],gr1[,9]))

#

gr22=cbind(c(gr2[,1],gr2[,2],gr2[,3],gr2[,4],gr2[,5],gr2[,6],gr2[,7],gr2[,8],gr2[,9],gr2[,10],

             gr2[,11],gr2[,12],gr2[,13],gr2[,14],gr2[,15],gr2[,16],gr2[,17],gr2[,18],gr2[,19],gr2[,20],

             gr2[,21] ,gr2[,22],gr2[,23],gr2[,24],gr2[,25],gr2[,26] ))

#gr11=cbind(c(median(na.omit(gr1[,1])), median(na.omit(gr1[,2])), median(na.omit(gr1[,3])), median(na.omit( gr1[,4])), median(na.omit(gr1[,5])), median(na.omit(gr1[,6])), median(na.omit(gr1[,7])), median(na.omit(gr1[,8])), median(na.omit(gr1[,9]))  ))

#gr22=cbind(c(median(na.omit(gr2[,1])), median(na.omit(gr2[,2])), median(na.omit(gr2[,3])), median(na.omit(gr2[,4])),  median(na.omit(gr2[,5])), median(na.omit(gr2[,6])), median(na.omit(gr2[,7])), median(na.omit(gr2[,8])), median(na.omit(gr2[,9])), median(na.omit(gr2[,10])), median(na.omit(gr2[,11])),median(na.omit(gr2[,12])),median(na.omit(gr2[,13])),median(na.omit(gr2[,14])),median(na.omit(gr2[,15])),median(na.omit(gr2[,16])),median(na.omit(gr2[,17])),median(na.omit(gr2[,18])),median(na.omit(gr2[,19])),median(na.omit(gr2[,20])), median(na.omit(gr2[,21])),median(na.omit(gr2[,22])),median(na.omit(gr2[,23])),median(na.omit(gr2[,24])),median(na.omit(gr2[,25])),median(na.omit(gr2[,26])) ))

#

gr12=rbind(gr11,gr22)

#

grp11=data[,c(3,5,8,9,12,14,16        ,34,    10,15,17,28,19,7)]

grp22=data[,-c(3,5,8,9,12,14,16       ,34,    10,15,17,28,19,7)]

group= c(2,2,1,2,1,2,1,1,1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2)

#        1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35

#

gr1=cbind(c(data[,3],data[,5],data[,8],data[,9],data[,12],data[,14],data[,16] ,data[,34],data[,10],data[,15],data[,17],data[,28],data[,19],data[,7]))

gr2=cbind(c(data[,23],data[,24],data[,25],data[,26],data[,27],data[,29],data[,30],data[,31],data[,32],data[,33],data[,1],data[,2],data[,4],data[,6],data[,11],data[,13],data[,18],data[,20],data[,21],data[,22]))

ybar= mean(na.omit(c(gr1,gr2)))

eff1=length(gr1);hybar1=mean(na.omit(gr1)); var1=var(na.omit(gr1))

eff2=length(gr2);hybar2=mean(na.omit(gr2)); var2=var(na.omit(gr2))

#

# within-cluster sums of squares:  minimize

# Between-cluster sums of squares: maximize

var_inter=(((eff1*(hybar1^2))+(eff2*(hybar2^2))/sum(eff1+eff2)))-(ybar^2);var_inter

var_intra =(((eff1*var1)+(eff2*var2))/sum(eff1+eff2));var_intra

var_tot=var_inter+var_intra;var_tot

var_intra/var_tot*100

var_inter/var_tot*100

#https://www.univ-montp3.fr/miap/ens/site/uploads/Main.StatL2S4/Cours11E46XP3.pdf

SSW=sum((na.omit(gr1)-hybar1)^2) +sum((na.omit(gr2)-hybar2)^2);SSW

SSB=sum((rep(hybar1,length(gr1))-ybar)^2) + sum((rep(hybar2,length(gr2))-ybar)^2);SSB

SST=sum((na.omit(gr1)-ybar)^2) +sum((na.omit(gr2)-ybar)^2);SST

SSW/SST*100  # minimize

SSB/SST*100  # maximize

#https://towardsdatascience.com/explain-ml-in-a-simple-way-k-means-clustering-e925d019743b

ybar= mean(na.omit(c(gr12)))

eff1=length(na.omit(gr11)); eff2=length(na.omit(gr22)); s11=sum(na.omit(gr11));s22=sum(na.omit(gr22));s11q=sum(na.omit(gr11^2));s22q=sum(na.omit(gr22^2))

hybar1=mean(na.omit(gr11)); hybar2=mean(na.omit(gr22))

var1=(s11q/eff1)-(hybar1^2);var2=(s22q/eff2)-(hybar2^2) #var(na.omit(gr11)) var2=var(na.omit(gr22))

moycond1=(hybar1^2)*eff1;moycond2=(hybar2^2)*eff2

varcond1=(var1)*eff1;varcond2=(var2)*eff2

efft=eff1+eff2; moytot=sum(s11+s22)/efft; moytotq=sum(s11q+s22q)/efft

Moymoy=sum(hybar1+hybar2)/efft; Varmoy=sum(var1+var2)/efft

Moymoycond=sum(moycond1+moycond2)/efft;Moyvarcond=sum(varcond1+varcond2)/efft

#

var_inter  =(((eff1*(hybar1^2))+(eff2*(hybar2^2))/sum(eff1+eff2)))-(ybar^2)

var_intra  =(((eff1*var1)+(eff2*var2))/sum(eff1+eff2))

var_intra2 = eff1*((sum(na.omit(gr11^2))/eff1)-(hybar1^2)) +  eff2*((sum(na.omit(gr22^2))/eff2)-(hybar2^2))

var_tot    =  (sum(na.omit(gr11)^2)/eff1)-ybar^2+ (sum(na.omit(gr22)^2)/eff2)-ybar^2

var_tot2   =var_inter+var_intra2

var_intra/var_tot*100    # MINimiser

var_inter/var_tot*100    # MAXimiser

#

vartot=moytotq-(moytot^2)

varinter=Moymoycond-(moytot^2)

varintra=Moyvarcond

varintra/vartot*100    # within  MINimiser

varinter/vartot*100    # Between MAXimiser

set.seed(111)

calc_SS <- function(df) sum(as.matrix(dist(df)^2)) / (2 * nrow(df))

df2 <- data

cbind(row.names(df2))

kmeans <- kmeans(as.matrix(df2), 2)

#

df2$cluster <- kmeans$cluster

df2 %>%

  group_by(cluster) %>%

  nest() %>%

  mutate(within_SS = map_dbl(data, ~calc_SS(.x))) %>%

  arrange(cluster)

kmeans$betweenss/ kmeans$totss*100

kmeans$tot.withinss/ kmeans$totss*100

 

 

 

 

# 11-SPATIAL AUTOCORRELATION:

 

library(terra)

library(rgeoda)

library(ape)

library(spdep)

library(sp)

library(rgdal)

library(cartography)

library(dplyr)

library(deldir)

library(tripack)

library(ggplot2)

library(tidyr)

library(zip)

library(tidyverse) 

library(sf)

library(rgeos)

library(tmap)

library(tmaptools)

library(spgwr)

library(grid)

library(gridExtra)

library(spData)

library(spatstat)

library(sf)

library(mapview)

require(DCluster)

library(ggvoronoi)

library(gstat)

knitr::opts_chunk$set(echo = TRUE)

#

Census.Data = read.table(file("clipboard"),header=TRUE, sep="\t", dec=".", row.names=1)

# Census.Data = read.table(file("clipboard"),header=TRUE, sep="\t", dec=".")

summary(Census.Data)

# Census.Data =Census.Data[1:6,]

str(Census.Data)

names(Census.Data)

attach(Census.Data)

dim(Census.Data)

glimpse(Census.Data)

#

Census.Dat=na.omit(Census.Data)

summary(Census.Dat)

table(is.na(Census.Dat))

Census.Datu= unique(Census.Dat)

# Census.Datd= Census.Dat[!duplicated(Census.Dat)]

#

# OUTLIERS VALUE___________________________________

# Q1-3.0 x IQR

# Q3 + 3.0 x IQR

# summary(data[,1])

X=datan[, 15 ]

Q1= quantile(X, probs = c(0.25)); Q1

Q3= quantile(X, probs = c(0.75)); Q3

#

IQR= Q3-Q1; IQR

B1=Q1-(3*IQR); B2=Q3+(3*IQR); rbind(B1,B2)

#

# NORMALITY AND HOMOGENEITY TEST___________________

library(tidyverse)

library(ggpubr)

library(car)

library(LaplacesDemon)

library(vGWAS)

library(onewaytests)

library(caret)

# install.packages('vGWAS', repos = 'http://r-forge.r-project.org')

cbind(names(Census.Data))

dim(Census.Data)

Z=Census.Data[,4]

#

shapiro.test(Z)

nortest::sf.test(Z)

brown.forsythe.test(y = Census.Dat$F, group = Census.Dat$sample)

leveneTest(Census.Dat$F ~ Census.Dat$sample, Census.Dat)

#

Census.Dat %>%

  group_by(sample) %>%

  summarize(Variance=var(Census.Dat$F))

bf.test(Census.Dat$F ~ Census.Dat$sample, data = Census.Dat)

#

# TRANSFORMATION BOXCOX()__________________________

data = Census.Dat[,4]

bc_trans <- BoxCoxTrans(data)

transformed_data <- predict(bc_trans, data)

x11();hist(transformed_data)

x11();qqnorm(transformed_data); qqline(transformed_data)

# Deviations from the straight line are minimal = indicates normal distribution.

# boxcox Equivalent de log en graph de qqnorm

# https://stats.stackexchange.com/questions/52293/r-qqplot-how-to-see-whether-data-are-normally-distributed

# https://www.geeksforgeeks.org/how-to-use-boxcox-transformation-in-the-caret-package-in-r/

 

# Importing geospatial data (shapefile) : deja fait sous arcgis thiessens merge avec le contour country OU utiliser directement sous R : "th.clp" file

#

hunan <- readOGR(dsn = "C:/Abdi-Basid ADAN/12.Doc.IST.CERD/EAU   [En cours]/arcgis/thiessenpolygonwater.shp")

names(hunan ) # Name Layers

gdppc <- read.table(file("clipboard"), header=TRUE, sep="\t"); str(gdppc) # read_csv("data/attributeTable/Hunan_GDPPC.csv")

table(is.na(gdppc)); summary(gdppc); gdppc1=na.omit(gdppc)

 

# MORAN INDICE autocorrelation verification :

F.dists <- as.matrix(dist(cbind(gdppc1$Longdec, gdppc1$Latdec)))

F.dists.inv <- 1/F.dists ; F.dists.inv[1:2, 1:2]

diag(F.dists.inv) <- 0

Moran.I(gdppc1$F, F.dists.inv)

# Binary distance matrix :distance est inférieure à d soient considérées comme connectées et que les paires dont la distance est supérieure à d ne le soient pas.

F.dists.bin <- (F.dists > 0 & F.dists <= .75)

Moran.I(gdppc1$F, F.dists.bin)

#   We can reject the null hypothesis that there is: zero spatial autocorrelation present in the variable / randomly disbursed.

# $p.value UNDER at alpha = 0.05.

# https://stats.oarc.ucla.edu/r/faq/how-can-i-calculate-morans-i-in-r

 

# hunan@data <- left_join(hunan@data, gdppc)

x11(); qtm(hunan)

x11(); spplot(hunan, "Shape_Area")

# vignette("tmap-getstarted")

tmap_tip()

# plot map : https://www.kaggle.com/code/umeshnarayanappa/quickstart-guide-shapefiles-and-r-tmap

#

# Deriving spatial weight matrix : Creation of weight matrix:

wm_q <- poly2nb(hunan, queen = TRUE); summary(wm_q)

x11();  plot(hunan, border = 'lightgrey'); plot(wm_q, coordinates(hunan), pch = 19, cex = 0.6, add = TRUE, col = "red")

rswm_q <- nb2listw(wm_q, zero.policy = TRUE) # Row-standardised weights matrix

knn8   <- knn2nb(knearneigh(coordinates(hunan), k =8, longlat = TRUE), row.names = row.names(hunan$GDPPC2012))

knn_lw <- nb2listw(knn8, style = "B")

summary(knn8)

#

# Point/Station en shapefile pour le Queens Weight Matrix # K-Nearest Neighbor Weights

queen_w <- queen_weights(iris.dj1, order=1, include_lower_order = FALSE, precision_threshold = 0)

get_neighbors(queen_w, idx = 1)

#

# Computing Local Moran's I  (for Queen Contiguity neighbours) :

fips <- order(gdppc$sample)

localMI <- localmoran(as.vector(hunan$F), rswm_q)

head(localMI)

#  Computing Local Moran's I (for adaptive weight matrix 8 neighbours):

fips1 <- order(gdppc $F)

localMI1 <- localmoran(as.vector(hunan$F), knn_lw)

print(localMI1); View(localMI1)

printCoefmat(data.frame( localMI1[fips1], row.names = hunan$sample[fips1] ), check.names = FALSE)

# Ii:      the local Moran's I statistics

# E.Ii:    the expectation of local moran statistic under the randomisation hypothesis

# Var.Ii:  the variance of local moran statistic under the randomisation hypothesis

# Z.Ii:    the standard deviate of local moran statistic

# Pr():    the p-value of local moran statistic

#

#  Mapping the Local Moran's I :

hunan.localMI <- cbind(hunan, localMI)

x11();tm_shape(hunan.localMI) +

  tm_fill(col = "Ii",

          style = "pretty",

          palette = "RdBu",

          title = "local moran statistics")+

  tm_borders(alpha = 0.5)

# Mapping Local Moran's I p-values :

x11();tm_shape(hunan.localMI) +

  tm_fill(col = "Ii",

          breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),

          palette = "-Blues",

          title = "local Moran's I p-values")+

  tm_borders(alpha = 0.5)

#  Mapping both local Moran's I values & p-values

x11()

localMI.map <- tm_shape(hunan.localMI) +

  tm_fill(col = "Ii",

          style = "pretty",

          title = "local moran statistics")+

  tm_borders(alpha = 0.5)

pvalue.map <- tm_shape(hunan.localMI) +

  tm_fill(col = "Ii",

          breaks=c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),

          palette = "-Blues",

          title = "local Moran's I p-values")+

  tm_borders(alpha = 0.5)

tmap_arrange(localMI.map, pvalue.map, asp = 1, ncol = 2)

#

# Creating LISA Cluster map :

#---------------------------

# LISA cluster map shows the significant locations color coded by type of spatial autocorrelation :

# 1  plot the Moran scatterplot + 2  generate LISA cluster map :

# 1  plot the Moran scatterplot :

X11(); nci <- moran.plot(hunan$F, rswm_q, labels = as.character(hunan$sample), xlab = "F", ylab = "Spatially Lag F")

# We can see that the plot is split into 4 quadrants, the top right corner belong to areas that have high   surrounded by other areas that have average level  (high-high locations).

#

# Plotting Moran scatterplot with standardised variable :

hunan$Z.F <- scale(hunan$F) %>% as.vector

knitr::kable(head(hunan, n=5))

X11(); nci2 <- moran.plot(hunan$Z.F, rswm_q, labels = as.character(hunan$sample), xlab = "F", ylab = "Spatially Lag F")

# high-high Quadrant 4 - Cluster/Positive autocorrelation

# low-low Quadrant 1 - Cluster/Positive autocorrelation

# low-high Quadrant 2 - Outlier/Negative autocorrelation

# high-low Quadrant 3 - Outlier/Negative autocorrelation

# non-significant Moran (p-value of localMI is compared to the significance level)

#

# 2  generate LISA cluster map

quadrant <- vector(mode = "numeric", length = nrow(localMI))

DV <- hunan$F - mean(hunan$F)

C_mI <- localMI[,1] - mean(localMI[,1])

signif <- 0.05

#| D1=-LH | G1=+HH | D2=+LL | G2=-HL : -outliers | +cluster

quadrant[DV >0 & C_mI>0] <- 4     

quadrant[DV <0 & C_mI<0] <- 1     

quadrant[DV <0 & C_mI>0] <- 2

quadrant[DV >0 & C_mI<0] <- 3

quadrant[localMI[,5]>signif] <- 0

#

hunan.localMI$quadrant <- quadrant

colors <- c("#ffffff", "#2c7bb6", "#abd9e9", "#fdae61", "#d7191c")

clusters <- c("insignificant", "low-low", "low-high", "high-low", "high-high")

knitr::kable(head(hunan.localMI, n=5))

#

x11();tm_shape(hunan.localMI) +

  tm_fill(col = "quadrant", style = "cat", palette = colors[c(sort(unique(quadrant)))+1], labels = clusters[c(sort(unique(quadrant)))+1], popup.vars = c("Postal.Code")) +

  tm_view(set.zoom.limits = c(11,17)) +

  tm_borders(alpha=0.5)

# low low = cold spot | High High = hot spot.

# high-high cluster indicating positive autocorrelation.

# low-high cluster indicating negative autocorrelation/outliers

#

# Hot Spot & Cold Spot Analysis : Computing Gi statistics

#    Hot spot area =  G>0, it's also High-high in Local Moran's I values

#    Cold spot area = G<0, it's also Low-low in Local Moran's I values

# 1/Deriving spatial weight matrix | 2/Computing Gi statistics | 3/Mapping Gi statistics

dnb <- dnearneigh(coordinates(hunan), 0, 85, longlat = TRUE); dnb

x11(); plot(hunan, border = "lightgrey"); plot(dnb, coordinates(hunan), add = TRUE, col = "red")

dnb_lw <- nb2listw(dnb, style = "B"); summary(dnb_lw)

#

coords <- coordinates(hunan)

knb <- knn2nb(knearneigh(coords, k = 8, longlat = TRUE), row.names = row.names(hunan$sample))

knb_lw <- nb2listw(knb, style = "B"); summary(knb_lw)

x11(); plot(hunan, border = "lightgrey"); plot(knb, coordinates(hunan), pch = 19, cex = 0.6, add = TRUE, col = "red")

#

fips <- order(hunan$sample)

gi.fixed <- localG(hunan$F, dnb_lw)

hunan.gi <- cbind(hunan, as.matrix(gi.fixed))

names(hunan.gi)[dim(hunan.gi)[2]] <- "gstat"

head(hunan.gi@data)

#

fips <- order(hunan$sample)

gi.adaptive <- localG(hunan$F, knb_lw)

hunan.gi <- cbind(hunan.gi, as.matrix(gi.adaptive))

names(hunan.gi)[dim(hunan.gi)[2]] <- "gstat_adaptive"

head(hunan.gi@data)

#  Mapping local Gi statistics : Mapping local Gi values with fixed distance weights

x11(); tm_shape(hunan.gi)+

  tm_fill(col = "gstat",

          style = "pretty",

          palette = "-RdBu",

          title = "local Gi value")  +

  tm_borders(alpha = 0.5)+

  tm_text("sample", just="left", xmod=.5, size = 0.7) +

  tm_legend(legend.outside=TRUE)

#

# Mapping local Gi values with adaptive distance weights

x11();tm_shape(hunan.gi)+ 

  tm_fill(col = "gstat_adaptive",

          style = "pretty",

          palette = "-RdBu",

          title = "local Gi value") +

  tm_borders(alpha = 0.5) +

  tm_text("sample", just="left", xmod=.5, size = 0.5) +

  tm_legend(legend.outside=TRUE)

#

# Interpretation : https://rpubs.com/erikaaldisa/spatialclustering

# Exporting

class(hunan.gi)

st_crs(hunan.gi)

s.df <- data.frame(hunan.gi)

writeOGR(hunan.gi, dsn = '.', layer = 'Rspatial', driver = "ESRI Shapefile"); getwd()

# Exporting raster  :    writeRaster(hunan.gi, "elev_out.tif", gdal = "GTiff" )

# Exporting sf class:    st_write(hunan.gi, "shapefile_out.shp", driver = "ESRI Shapefile")  # create to a shapefile

# Exporting sp class:    writeOGR(hunan.gi, dsn = '.', layer = 'Rspatial', driver = "ESRI Shapefile")

# Export/import shapefile :  https://stackoverflow.com/questions/13926811/export-a-polygon-from-an-r-plot-as-a-shapefile

# Export/import shapefile :  https://mgimond.github.io/Spatial/reading-and-writing-spatial-data-in-r.html

 

# TYPE WEIGHTS MATRIX:

#1|Contiguity Based Weights:       queen_weights(), rook_weights()

#2|Distance Based Weights:         distance_weights()

#3|K-Nearest Neighbor Weights:     knn_weights()

#4|Kernel Weights:                 distance_weights() and knn_weights() with kernel parameters

#

# saveRDS(iris.dj[4], "iris.dj[4].rds")

# saveRDS(iris.dj1[1], "iris.dj1[1].rds")

# getwd()

# z <- gzcon(url("https://github.com/mgimond/Spatial/raw/main/Data/texas.rds"))

# P <- readRDS("C:/Abdi-Basid ADAN/12.Doc.IST.CERD/EAU   [En cours]/arcgis/iris.dj1[1].rds")

# p <- st_as_sf(P)

#

iris.dj <- read_sf("C:/Users/hp/Documents/ArcGIS 10.8/Djibouti for mask/MASK SHAPEFILE Djibouti/Regions.shp")

w <- st_as_sf(iris.dj[4]) # ;  crs(iris.dj1) <- "+proj=longlat +datum=WGS84 +no_defs"

iris.dj1 <- read_sf("C:/Abdi-Basid ADAN/12.Doc.IST.CERD/EAU   [En cours]/arcgis/water.shp")

p <- st_as_sf(iris.dj1)

x11(); plot(iris.dj1[4],pch=10) ; x11();plot(iris.dj[4])

dev.off();dev.off()

coords <- st_coordinates(st_centroid(iris.dj))

#

# Queen Contiguity Weights:

queen_w <- queen_weights(iris.dj1, order=1, include_lower_order = FALSE, precision_threshold = 0)

summary(queen_w)

weights_sparsity(queen_w)

has_isolates(queen_w)

#

# details of the weights: e.g. list the neighbors of a specified observation:

nbrs <- get_neighbors(queen_w, idx = 1)

cat("\nNeighbors of the 1-st observation are:", nbrs)

#

# https://cran.r-project.org/web/packages/spNetwork/vignettes/SpatialWeightMatrices.html

#

# Create voronoi polygon with simple feature in R:

#

x11()

tm_shape(w) + tm_polygons() +

  tm_shape(p) +

  tm_dots(col="F", palette = "RdBu", auto.palette.mapping = FALSE,

          title="Sample water \n(mg/L)", size=0.7) +

  tm_text("sample", just="left", xmod=.5, size = 0.7) +

  tm_legend(legend.outside=TRUE)

# p <- st_as_sf( p[,c(4)] )

# p <- st_union(p)

# p <- st_set_crs(p, 4326)

# p <- st_geometry(p)

p <- p[,c(4)]

st_crs(p) <-  "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"     # "+proj=longlat +datum=WGS84 +no_defs"  # crs(p1)

st_crs(w) <-  "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"     # "+proj=longlat +datum=WGS84 +no_defs"  # crs(p1)

p <-st_transform(p,"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs")

# polygon map  https://mgimond.github.io/Spatial/coordinate-systems-in-r.htmlhttps://mgimond.github.io/Spatial/coordinate-systems-in-r.html

#

th  <- dirichlet(as.ppp(p)) |> st_as_sfc() |> st_as_sf()

st_crs(th) <- st_crs(p)

th2     <- st_join(th, p, fn=mean)

th.clp   <- st_intersection(th2, w)

#

x11();tm_shape(th.clp) +

  tm_polygons(col="F", palette="RdBu", auto.palette.mapping=FALSE,

              title="Water  \n(in inches)") +

  tm_legend(legend.outside=TRUE)

# Exporting sf class

class(th.clp)

st_crs(th.clp) <- "+proj=longlat +datum=WGS84 +no_defs"

st_write(th.clp, "shapefile_out1.shp", driver = "ESRI Shapefile")  # create to a shapefile

#

# Thiessen polygons :| https://mgimond.github.io/Spatial/interpolation-in-r.html

 

Census.Data%>%

  st_as_sf(coords = c("Longdec", "Latdec"),

  crs = sp::CRS("+proj=longlat +datum=WGS84")) %>%

mapview()

m1 = matrix(Census.Data$Longdec,Census.Data$Latdec,ncol=2,nrow= nrow(Census.Data)) %>% st_multipoint()

#

voronoi_grid <- st_voronoi(m1)

x11();plot(voronoi_grid, col = NA)

plot(m1, add = TRUE, col = "blue", pch = 16)

#

attach(Census.Data)

x <- Census.Dat$Latdec;   table(duplicated(x))

y <- Census.Dat$Latdec;   table(duplicated(y))

tesselation <- deldir(x, y)

tiles <- tile.list(tesselation)

plot(tiles, pch = 19, number = TRUE, cex=0.5)

plot(tiles,showpoints = FALSE)

plot(tiles, close = TRUE)

x11();plot(tiles, pch = 19,border = "white",fillcol = hcl.colors(50, "Sunset"))

# s <- seq(0, 2 * pi, length.out = 3000); circle <- list(x = 0.5 * (1 + cos(s)),y = 0.5 * (1 + sin(s)))

# x11();plot(tiles, pch = 19,  col.pts = "white",border = "white", fillcol = hcl.colors(50, "viridis"),clipp = circle)

#

box <- st_sfc(bbox_polygon(iris.dj1))

v <- st_voronoi(iris.dj1, box)

plot(v)

#

points <- data.frame(x, y,distance = sqrt((x-100)^2 + (y-100)^2))

ggplot(points) +

                     geom_voronoi(aes(x,y,fill=distance))

ggplot(points,aes(x,y)) +

                     stat_voronoi(geom="path") +

                      geom_point()

# J Interpolation in R:  https://mgimond.github.io/Spatial/interpolation-in-r.html

# Voronoi diagram with deldir : https://r-charts.com/part-whole/voronoi-diagram/#:~:text=A%20Voronoi%20diagram%20(or%20Thiessen,Voronoi)%20tessellation%20with%20the%20tile.

# Voronoi Diagrams with ggvoronoi: https://cran.r-project.org/web/packages/ggvoronoi/vignettes/ggvoronoi.html

# Create voronoi polygon with simple feature in R: https://stackoverflow.com/questions/45719790/create-voronoi-polygon-with-simple-feature-in-r

# Little useless-useful R functions - Interactive Voronoi diagram generator using R and x11(): https://www.r-bloggers.com/2021/10/little-useless-useful-r-functions-interactive-voronoi-diagram-generator-using-r-and-x11/

# R function for Thiessen Polygons: https://gis.stackexchange.com/questions/136542/r-function-for-thiessen-polygons

# How to build Voronoi Polygons for point coordinates in R?: https://stackoverflow.com/questions/66946196/how-to-build-voronoi-polygons-for-point-coordinates-in-r

# How to create thiessen polygons from points using R packages? https://stackoverflow.com/questions/9403660/how-to-create-thiessen-polygons-from-points-using-r-packages

#

irisdj.nb <- poly2nb(iris.dj)

irisdj.lw <- nb2listw(irisdj.nb,zero.policy=TRUE) # Creation of weight matrix

#

# moran.plot(x, listw, zero.policy=NULL, spChk=NULL, labels=NULL,

#           xlab=NULL, ylab=NULL, quiet=NULL, plot=TRUE, return_df=TRUE, ...)

#

is.vector(Census.Dat$F)

x11(); moran.plot(as.vector(Census.Dat$F), irisdj.lw, labels=FALSE,xlab="", ylab="")

# x11(); moran.plot(as.vector(Census.Dat$F), irisdj.lw, labels=as.character(row.names(Census.Dat) ), pch=19)

x11(); moran.plot(as.vector(scale(Census.Dat$F)),xlim=c(-2, 4), ylim=c(-2,4), irisdj.lw, labels=as.character(row.names(Census.Dat) ), pch=19)

# spChk quiet return_df

#

mp <- moran.plot(as.vector(scale(Census.Dat$F)),xlim=c(-2, 4), ylim=c(-2,4), irisdj.lw, labels=as.character(row.names(Census.Dat) ), pch=19)

moran.plot(as.vector(scale(Census.Dat$F)),xlim=c(-2, 4), ylim=c(-2,4), irisdj.lw, labels=as.character(row.names(Census.Dat) ), pch=19)

if (require(ggplot2, quietly=TRUE)) {

  xname <- attr(mp, "xname")

  ggplot(mp, aes(x=x, y=wx)) + geom_point(shape=1) +

    geom_smooth(formula=y ~ x, method="lm") +

    geom_hline(yintercept=mean(mp$wx), lty=2) +

    geom_vline(xintercept=mean(mp$x), lty=2) + theme_minimal() +

    geom_point(data=mp[mp$is_inf,], aes(x=x, y=wx), shape=9) +

    geom_text(data=mp[mp$is_inf,], aes(x=x, y=wx, label=labels, vjust=1.5)) +

    xlab(xname) + ylab(paste0("Spatially lagged ", xname))

}

# https://www.e-education.psu.edu/geog586/node/672

#  DOI: 10.1016/j.scitotenv.2020.139584 # INTERPRETATION PLOT

# Output.Areas <- readOGR(".", "C: ")

# Output.Areas2 <- read_sf("C:/Users/hp/Documents/ArcGIS 10.8/Djibouti for mask/MASK SHAPEFILE Djibouti/Regions.shp")

# shapefile station et  coordinate by merging

Output.Areas2 <- read_sf("C:/Abdi-Basid ADAN/12.Doc.IST.CERD/EAU   [En cours]/arcgis/stationwater.shp")

glimpse(Output.Areas2)

OA.Census <- merge(Output.Areas2, Census.Data, by.x="Latdec", by.y="Longdec")

OA.Census_sf <- st_as_sf(OA.Census)

#

neighbours <- poly2nb(OA.Census)

neighbours

# https://rpubs.com/quarcs-lab/spatial-autocorrelation

#

ozone.dists <- as.matrix(dist(cbind(ozone$Lon, ozone$Lat)))

ozone.dists.inv <- 1/ozone.dists

diag(ozone.dists.inv) <- 0

#

Moran.I(ozone$Av8top, ozone.dists.inv)

ozone.dists.bin <- (ozone.dists > 0 & ozone.dists <= .75)

Moran.I(ozone$Av8top, ozone.dists.bin)

# https://stats.oarc.ucla.edu/r/faq/how-can-i-calculate-morans-i-in-r/

#

#| Importer/Exporter shapefile

# mtq <- read_sf("data/mtq/martinique.shp")

# mtq <- st_read("data/mtq/martinique.shp")

# write_sf(mtq,"data/mtq/martinique.gpkg",delete_layer = TRUE)

# st_write(mtq,"data/mtq/martinique.gpkg",delete_layer = TRUE)

# st_crs(iris.75)

# download.file("https://github.com/comeetie/satRday/blob/master/exercises/data.zip?raw=true",destfile = "data.zip")

# unzip("data.zip",exdir=".")

# https://antuki.github.io/satRday2019_workshop/exercises/exercises.html

#

iris75 <- st_read("C:/Abdi-Basid ADAN/12.Doc.IST.CERD/EAU   [En cours]/arcgis/shapefile/data/iris_75.shp", stringsAsFactors = F)

x11(); plot(iris75)

#

iris75.nb <- poly2nb(iris75) # Extraction of list of neighbours (defined by default with Queen contiguity

iris75.lw <- nb2listw(iris75.nb,zero.policy=TRUE) # Creation of weight matrix

#

# Calculation of standardised median income:

iris75.data <- as.data.frame(iris75); names(iris75.data )

iris75.data$med_revenu_std <- scale(iris75.data$med_revenu_std)

#

#| Moran's diagram

is.vector(iris75.data$med_revenu_std)

#

x11(); moran.plot(as.vector(iris75.data$med_revenu_std), iris75.lw, labels=FALSE,xlab="", ylab="")

x11(); moran.plot(as.vector(iris75.data$med_revenu_std), iris75.lw, labels=as.character(iris75.data$CODE_COM), pch=19)

#

# https://r-spatial.github.io/spdep/reference/moran.plot.html

#

#| Moran's I test

moran.test(iris75.data$med_revenu_std,iris75.lw, zero.policy=TRUE, randomisation=FALSE)

moran.test(s$Income,lw, alternative="greater")

#

#  Monte-Carlo simulation of Moran I

MC<- moran.mc(s$Income, lw, nsim=999, alternative="greater")

plot(MC)

# https://mgimond.github.io/simple_moransI_example/

# https://www.paulamoraga.com/book-spatial/spatial-autocorrelation.html

#

# Calculation of the range of Moran's I

moran.range <- function(lw) {

  wmat <- listw2mat(lw)

  return(range(eigen((wmat+t(wmat))/2)$values))

}

moran.range(iris75.lw)

#

lisa_revenus <- localmoran(as.vector(iris75.data$med_revenu_std), iris75.lw, zero.policy= TRUE)

#

# https://www.insee.fr/en/statistiques/fichier/3635545/imet131-g-chapitre-3.pdf

# https://comeetie.github.io/quantilille/lecture/lecture.html

#

map <- st_read(system.file("shapes/boston_tracts.shp",package = "spData"), quiet = TRUE)

map$vble <- map$MEDV

mapview(map, zcol = "vble")

#

# https://r-spatial.org/book/15-Measures.html

# https://mgimond.github.io/Spatial/spatial-autocorrelation-in-r.html

# https://cran.r-project.org/web/packages/lctools/vignettes/SpatialAutocorrelation.html

# https://www.efgs.info/wp-content/uploads/informationbase/introduction/Handbook_of_Spatial_Analysis_INSEE_EUROSTAT_2018.pdf

#

#  Local Indicators of Spatial Association-LISA

#  Regionalization with dynamically constrained agglomerative clustering and partitioning

#  Automatic zoning procedure (AZP)

#  Max-p greedy

#  https://geodacenter.github.io/rgeoda/articles/rgeoda_tutorial.html

#

# Conditional Permutations with sfdep

library(sfdep)

nb <- st_contiguity(guerry)

wt <- st_weights(nb)

x <- guerry$crime_pers

p_nb <- cond_permute_nb(nb)

p_wt <- st_weights(p_nb)

observed <- global_c(x, p_nb, p_wt)

reps <- replicate(199, {

  p_nb <- cond_permute_nb(nb)

  p_wt <- st_weights(p_nb)

  global_c(x, p_nb, p_wt)[["C"]]

})

# simulated p-value:  pseudo p-value using the formula  (M+1)/(R+1) :

(sum(observed[["C"]] <= reps) + 1) / (199 + 1)

# https://cran.r-project.org/web/packages/sfdep/vignettes/conditional-permutation.html

#

library(rspat)

auck <- spat_data("auctb"); names(auck)

w <- relate(auck, relation="rook")  # relation="touches" compute the spatial weights.

Gi <- autocor(auck$TB, w, "Gi")     # Compute the Getis Gi

grays <- rev(gray(seq(0,1,.2)))

auck$Gi <- Gi

plot(auck, "Gi", col=grays)

diag(w) <- TRUE

auck$Gistar <- autocor(auck$TB, w, "Gi*")

plot(auck, "Gistar", main="Gi*", col=grays)

auck$loc_mean <- apply(w, 1, function(i) mean(auck$TB[i]))

plot(auck, "loc_mean", main="Local mean", col=grays)

library(spdep)

sfauck <- sf::st_as_sf(auck)

wr <- poly2nb(sfauck, row.names=sfauck$Id, queen=FALSE)

lstw <- nb2listw(wr, style="B")

auck$Ii <- localmoran(auck$TB, lstw)

## Warning: [[[<-,SpatVector] only using the first column

plot(auck, "Ii", main="Local Moran", col=grays)

auck$TBdens <- 10000 * auck$TB / expanse(auck)

 

# References

#https://rspatial.org/raster/analysis/analysis.pdf

#https://geodacenter.github.io/rgeoda/articles/rgeoda_tutorial.html#install-rgeoda

# https://www.statisticshowto.com/morans-i/

# https://rspatial.org/rosu/Chapter8.html

 

 

# 12-PERFORM A RELATIVE WEIGHTS ANALYSIS (RWA):

 

rm(list=ls())

#

data=read.table(file("clipboard"),header=TRUE, sep="\t", dec=".", row.names=1)

str(data)

cbind(names(data))

library(rwa)

library(tidyverse)

library(ggplot2)

 

data %>%

  rwa(outcome = "A",

      predictors = c("X", "Y","Z"),

      applysigns = TRUE) %>%

  plot_rwa()

 

library(relaimpo)

scaled_ratings <- data.frame(scale(dat[,-1]))

ols.sat<-lm(A~X,Y,Z, data= data  )

summary(ols.sat)

calc.relimp(ols.sat, type = c("lmg"), rela = TRUE)

#

relimp <- calc.relimp(ols.sat, type = "lmg", importance = TRUE)

str(relimp)

plot(relimp)

# bootstrapped confidence intervals for the relative significance:

boot_rel <- boot.relimp(ols.sat, type = "lmg",nboot = 1000)

boot_eval <- booteval.relimp(boot_rel)

plot(boot_eval)

# https://www.r-bloggers.com/2012/08/the-relative-importance-of-predictors-let-the-games-begin/

#https://martinctc.github.io/rwa/

#https://core.ecu.edu/wuenschk/MV/multReg/Relative_Weights_Analysis.pdf

 

# 13 - EMPIRICAL ORTHOGONAL FUNCTION ANALYSIS:   

 

require(wql)

# Create an annual matrix time series

chla1 <- aggregate(sfbayChla, 1, mean, na.rm = TRUE)

chla1 <- chla1[, 1:12]  # remove stations with missing years

# eofNum (see examples) suggests n = 1

eof(chla1, 1)

#

# REOF     :      a matrix with rotated EOFs

# amplitude:   a matrix with amplitude time series of REOFs

# eigen.pct:   all eigenvalues of correlation matrix as percent of total variance

# variance :    variance explained by retained EOFs

#

# Create an annual time series data matrix from sfbay chlorophyll data

# Average over each year

chla1 <- aggregate(sfbayChla, 1, mean, na.rm = TRUE) 

chla1 <- chla1[, 1:12]  # remove stations with missing years

x11();eofNum(chla1)

# These stations appear to act as one with respect to chlorophyll

# variability on the annual scale because there's one dominant EOF.

#

# Create an annual matrix time series

chla1 <- aggregate(sfbayChla, 1, mean, na.rm = TRUE)

chla1 <- chla1[, 1:12]  # remove stations with missing years

 

# eofNum (see examples) suggests n = 1

e1 <- eof(chla1, n = 1)

eofPlot(e1, type = 'coef')

eofPlot(e1, type = 'amp')

 

 

"Have fun!

If you find any errors or codes that do not work, please feel free to write to me at abdi-basid@outlook.com."

 

Abdi-Basid ADAN



[1] If the package does not load in your environment, then use the command install.packages('package_name'). The same applies to all other packages.

Aucun commentaire:

Enregistrer un commentaire

Access Various Climate Data, Manipulate Different File Formats, and Downscale GCM (CMIP5 and CMP6) and RCM (CORDEX and CORDEX CORE) Models Using a Stochastic Approach, ALL with the R programming Language

  Abdi-Basid ADAN "The purpose of this document is to consolidate and improve the various R scripts used to perform the cited analy...