Parameters from Cluster Models (k-means, ...)
Source:R/methods_dbscan.R
, R/methods_hclust.R
, R/methods_kmeans.R
, and 2 more
model_parameters.kmeans.Rd
Format cluster models obtained for example by kmeans()
.
Usage
# S3 method for class 'dbscan'
model_parameters(model, data = NULL, clusters = NULL, ...)
# S3 method for class 'hclust'
model_parameters(model, data = NULL, clusters = NULL, ...)
# S3 method for class 'pvclust'
model_parameters(model, data = NULL, clusters = NULL, ci = 0.95, ...)
# S3 method for class 'kmeans'
model_parameters(model, ...)
# S3 method for class 'hkmeans'
model_parameters(model, ...)
# S3 method for class 'Mclust'
model_parameters(model, data = NULL, clusters = NULL, ...)
# S3 method for class 'pam'
model_parameters(model, data = NULL, clusters = NULL, ...)
Examples
# \donttest{
# DBSCAN ---------------------------
if (require("dbscan", quietly = TRUE)) {
model <- dbscan::dbscan(iris[1:4], eps = 1.45, minPts = 10)
rez <- model_parameters(model, iris[1:4])
rez
# Get clusters
predict(rez)
# Clusters centers in long form
attributes(rez)$means
# Between and Total Sum of Squares
attributes(rez)$Sum_Squares_Total
attributes(rez)$Sum_Squares_Between
# HDBSCAN
model <- dbscan::hdbscan(iris[1:4], minPts = 10)
model_parameters(model, iris[1:4])
}
#> # Clustering Solution
#>
#> The 2 clusters accounted for 77.26% of the total variance of the original data.
#>
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1 | 100 | 139.80 | 6.26 | 2.87 | 4.91 | 1.68
#> 2 | 50 | 15.15 | 5.01 | 3.43 | 1.46 | 0.25
# }
#
# Hierarchical clustering (hclust) ---------------------------
data <- iris[1:4]
model <- hclust(dist(data))
clusters <- cutree(model, 3)
rez <- model_parameters(model, data, clusters)
rez
#> # Clustering Solution
#>
#> The 3 clusters accounted for 86.86% of the total variance of the original data.
#>
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1 | 50 | 15.15 | 5.01 | 3.43 | 1.46 | 0.25
#> 2 | 72 | 64.62 | 6.55 | 2.96 | 5.27 | 1.85
#> 3 | 28 | 9.75 | 5.53 | 2.64 | 3.96 | 1.23
# Get clusters
predict(rez)
#> [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#> [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 2 3 2 3 2 3 3 3 3 2 3 2 3 3 2 3 2 3 2 2
#> [75] 2 2 2 2 2 3 3 3 3 2 3 2 2 2 3 3 3 2 3 3 3 3 3 2 3 3 2 2 2 2 2 2 3 2 2 2 2
#> [112] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [149] 2 2
# Clusters centers in long form
attributes(rez)$means
#> Cluster n_Obs Sum_Squares Variable Mean
#> 1 1 50 15.151000 Sepal.Length 5.006000
#> 2 1 50 15.151000 Sepal.Width 3.428000
#> 3 1 50 15.151000 Petal.Length 1.462000
#> 4 1 50 15.151000 Petal.Width 0.246000
#> 5 2 72 64.624722 Sepal.Length 6.545833
#> 6 2 72 64.624722 Sepal.Width 2.963889
#> 7 2 72 64.624722 Petal.Length 5.273611
#> 8 2 72 64.624722 Petal.Width 1.850000
#> 9 3 28 9.749286 Sepal.Length 5.532143
#> 10 3 28 9.749286 Sepal.Width 2.635714
#> 11 3 28 9.749286 Petal.Length 3.960714
#> 12 3 28 9.749286 Petal.Width 1.228571
# Between and Total Sum of Squares
attributes(rez)$Total_Sum_Squares
#> NULL
attributes(rez)$Between_Sum_Squares
#> NULL
# \donttest{
#
# pvclust (finds "significant" clusters) ---------------------------
if (require("pvclust", quietly = TRUE)) {
data <- iris[1:4]
# NOTE: pvclust works on transposed data
model <- pvclust::pvclust(datawizard::data_transpose(data, verbose = FALSE),
method.dist = "euclidean",
nboot = 50,
quiet = TRUE
)
rez <- model_parameters(model, data, ci = 0.90)
rez
# Get clusters
predict(rez)
# Clusters centers in long form
attributes(rez)$means
# Between and Total Sum of Squares
attributes(rez)$Sum_Squares_Total
attributes(rez)$Sum_Squares_Between
}
#> [1] 134.5687
# }
# \donttest{
#
# K-means -------------------------------
model <- kmeans(iris[1:4], centers = 3)
rez <- model_parameters(model)
rez
#> # Clustering Solution
#>
#> The 3 clusters accounted for 88.43% of the total variance of the original data.
#>
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1 | 62 | 39.82 | 5.90 | 2.75 | 4.39 | 1.43
#> 2 | 50 | 15.15 | 5.01 | 3.43 | 1.46 | 0.25
#> 3 | 38 | 23.88 | 6.85 | 3.07 | 5.74 | 2.07
# Get clusters
predict(rez)
#> [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [38] 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#> [75] 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 3 3 3 3 1 3 3 3 3
#> [112] 3 3 1 1 3 3 3 3 1 3 1 3 1 3 3 1 1 3 3 3 3 3 1 3 3 3 3 1 3 3 3 1 3 3 3 1 3
#> [149] 3 1
# Clusters centers in long form
attributes(rez)$means
#> Cluster n_Obs Sum_Squares Variable Mean
#> 1 1 62 39.82097 Sepal.Length 5.901613
#> 2 1 62 39.82097 Sepal.Width 2.748387
#> 3 1 62 39.82097 Petal.Length 4.393548
#> 4 1 62 39.82097 Petal.Width 1.433871
#> 5 2 50 15.15100 Sepal.Length 5.006000
#> 6 2 50 15.15100 Sepal.Width 3.428000
#> 7 2 50 15.15100 Petal.Length 1.462000
#> 8 2 50 15.15100 Petal.Width 0.246000
#> 9 3 38 23.87947 Sepal.Length 6.850000
#> 10 3 38 23.87947 Sepal.Width 3.073684
#> 11 3 38 23.87947 Petal.Length 5.742105
#> 12 3 38 23.87947 Petal.Width 2.071053
# Between and Total Sum of Squares
attributes(rez)$Sum_Squares_Total
#> [1] 681.3706
attributes(rez)$Sum_Squares_Between
#> [1] 602.5192
# }
# \donttest{
#
# Hierarchical K-means (factoextra::hkclust) ----------------------
if (require("factoextra", quietly = TRUE)) {
data <- iris[1:4]
model <- factoextra::hkmeans(data, k = 3)
rez <- model_parameters(model)
rez
# Get clusters
predict(rez)
# Clusters centers in long form
attributes(rez)$means
# Between and Total Sum of Squares
attributes(rez)$Sum_Squares_Total
attributes(rez)$Sum_Squares_Between
}
#> [1] 602.5192
# }
if (require("mclust", quietly = TRUE)) {
model <- mclust::Mclust(iris[1:4], verbose = FALSE)
model_parameters(model)
}
#> # Clustering Solution
#>
#> The 2 clusters accounted for 77.26% of the total variance of the original data.
#>
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1 | 50 | 15.15 | 5.01 | 3.43 | 1.46 | 0.25
#> 2 | 100 | 139.80 | 6.26 | 2.87 | 4.91 | 1.68
# \donttest{
#
# K-Medoids (PAM and HPAM) ==============
if (require("cluster", quietly = TRUE)) {
model <- cluster::pam(iris[1:4], k = 3)
model_parameters(model)
}
#> # Clustering Solution
#>
#> The 3 clusters accounted for 88.43% of the total variance of the original data.
#>
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1 | 50 | 15.15 | 5.01 | 3.43 | 1.46 | 0.25
#> 2 | 62 | 39.82 | 5.90 | 2.75 | 4.39 | 1.43
#> 3 | 38 | 23.88 | 6.85 | 3.07 | 5.74 | 2.07
if (require("fpc", quietly = TRUE)) {
model <- fpc::pamk(iris[1:4], criterion = "ch")
model_parameters(model)
}
#> # Clustering Solution
#>
#> The 3 clusters accounted for 88.43% of the total variance of the original data.
#>
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1 | 50 | 15.15 | 5.01 | 3.43 | 1.46 | 0.25
#> 2 | 62 | 39.82 | 5.90 | 2.75 | 4.39 | 1.43
#> 3 | 38 | 23.88 | 6.85 | 3.07 | 5.74 | 2.07
# }