--- title: "Cluster Analysis" author: "Sebastian Hönel" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Cluster Analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` > In this vignette, we are evaluating neighborhood-search, network-centralities and vicinities using Bayesian estimators as implemented in *mmb*. # Vicinity Example Using some gradient-coloring: ```{r} library(mmb) library(Rtsne) library(ggplot2) library(ggpubr) set.seed(1) chooseSample <- 1 df <- iris[sample(rownames(iris)), ] #df <- iris[,] tsne <- Rtsne::Rtsne(df[, 1:4], check_duplicates = FALSE) # Attach: df$X <- tsne$Y[, 1] df$Y <- tsne$Y[, 2] # Let's pick one sample to calculate the vicinity for: s <- df[chooseSample, 1:4] vics <- mmb::vicinitiesForSample(doEcdf = FALSE, shiftAmount = 1, df = df[, 1:4], sampleFromDf = s, selectedFeatureNames = colnames(s), retainMinValues = 5) # Attach to df: df$vics <- vics$vicinity # TEMP TEMP TEMP: Discretize vics mmbd <- mmb::discretizeVariableToRanges(df$vics, numRanges = length(levels(df$Species))) df$vicsD <- sapply(df$vics, function(v) { for (i in 1:length(mmbd)) { r <- mmbd[[i]] if (v >= r[1] && v < r[2]) return(paste("R", i, sep = "_")) } }) # Also, create a binary classification: df$vicsB <- sapply(df$vics, function(v) { return(if (v > 0.1) "P" else "N") }) # Additionally, compute the Euclidean-distance: df$vicsE <- as.vector(philentropy::distance(df[, 1:4])[1,]) df$vicsE <- max(df$vicsE) - df$vicsE pointCommon <- geom_point(shape=1, size=3, color="#000000", data=df[chooseSample,], mapping=aes(x=df[chooseSample,]$X, y=df[chooseSample,]$Y)) g1 <- ggplot(df, aes(x=X, y=Y, color=Species)) + geom_point() + pointCommon + stat_ellipse() g2 <- ggplot(df, aes(x=X, y=Y)) + geom_point(aes(color=vics)) + #stat_ellipse() + scale_color_gradient(low="blue", high="red") # TEMP TEMP TEMP g3 <- ggplot(df, aes(x=X, y=Y, color=vicsD)) + geom_point() #+ #stat_ellipse() g4 <- ggplot(df, aes(x=X, y=Y, color=vicsB)) + geom_point() + stat_ellipse() g5 <- ggplot(df, aes(x=X, y=Y)) + geom_point(aes(color=vicsE)) + #stat_ellipse() + scale_color_gradient(low="blue", high="red") ggarrange(g1, g2, g3, g4, g5) ```