Parameters from Cluster Models (k-means, ...) — model

Format cluster models obtained for example by kmeans().

Usage

# S3 method for class 'dbscan'
model_parameters(model, data = NULL, clusters = NULL, ...)

# S3 method for class 'hclust'
model_parameters(model, data = NULL, clusters = NULL, ...)

# S3 method for class 'pvclust'
model_parameters(model, data = NULL, clusters = NULL, ci = 0.95, ...)

# S3 method for class 'kmeans'
model_parameters(model, ...)

# S3 method for class 'hkmeans'
model_parameters(model, ...)

# S3 method for class 'Mclust'
model_parameters(model, data = NULL, clusters = NULL, ...)

# S3 method for class 'pam'
model_parameters(model, data = NULL, clusters = NULL, ...)

Arguments

model: Cluster model.
data: A data.frame.
clusters: A vector with clusters assignments (must be same length as rows in data).
...: Arguments passed to or from other methods.
ci: Confidence Interval (CI) level. Default to 0.95 (95%).

Examples

# \donttest{
# DBSCAN ---------------------------
if (require("dbscan", quietly = TRUE)) {
  model <- dbscan::dbscan(iris[1:4], eps = 1.45, minPts = 10)

  rez <- model_parameters(model, iris[1:4])
  rez

  # Get clusters
  predict(rez)

  # Clusters centers in long form
  attributes(rez)$means

  # Between and Total Sum of Squares
  attributes(rez)$Sum_Squares_Total
  attributes(rez)$Sum_Squares_Between

  # HDBSCAN
  model <- dbscan::hdbscan(iris[1:4], minPts = 10)
  model_parameters(model, iris[1:4])
}
#> # Clustering Solution
#> 
#> The 2 clusters accounted for 77.26% of the total variance of the original data.
#> 
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1       |   100 |      139.80 |         6.26 |        2.87 |         4.91 |        1.68
#> 2       |    50 |       15.15 |         5.01 |        3.43 |         1.46 |        0.25
# }
#
# Hierarchical clustering (hclust) ---------------------------
data <- iris[1:4]
model <- hclust(dist(data))
clusters <- cutree(model, 3)

rez <- model_parameters(model, data, clusters)
rez
#> # Clustering Solution
#> 
#> The 3 clusters accounted for 86.86% of the total variance of the original data.
#> 
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1       |    50 |       15.15 |         5.01 |        3.43 |         1.46 |        0.25
#> 2       |    72 |       64.62 |         6.55 |        2.96 |         5.27 |        1.85
#> 3       |    28 |        9.75 |         5.53 |        2.64 |         3.96 |        1.23

# Get clusters
predict(rez)
#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 2 3 2 3 2 3 3 3 3 2 3 2 3 3 2 3 2 3 2 2
#>  [75] 2 2 2 2 2 3 3 3 3 2 3 2 2 2 3 3 3 2 3 3 3 3 3 2 3 3 2 2 2 2 2 2 3 2 2 2 2
#> [112] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [149] 2 2

# Clusters centers in long form
attributes(rez)$means
#>    Cluster n_Obs Sum_Squares     Variable     Mean
#> 1        1    50   15.151000 Sepal.Length 5.006000
#> 2        1    50   15.151000  Sepal.Width 3.428000
#> 3        1    50   15.151000 Petal.Length 1.462000
#> 4        1    50   15.151000  Petal.Width 0.246000
#> 5        2    72   64.624722 Sepal.Length 6.545833
#> 6        2    72   64.624722  Sepal.Width 2.963889
#> 7        2    72   64.624722 Petal.Length 5.273611
#> 8        2    72   64.624722  Petal.Width 1.850000
#> 9        3    28    9.749286 Sepal.Length 5.532143
#> 10       3    28    9.749286  Sepal.Width 2.635714
#> 11       3    28    9.749286 Petal.Length 3.960714
#> 12       3    28    9.749286  Petal.Width 1.228571

# Between and Total Sum of Squares
attributes(rez)$Total_Sum_Squares
#> NULL
attributes(rez)$Between_Sum_Squares
#> NULL
# \donttest{
#
# pvclust (finds "significant" clusters) ---------------------------
if (require("pvclust", quietly = TRUE)) {
  data <- iris[1:4]
  # NOTE: pvclust works on transposed data
  model <- pvclust::pvclust(datawizard::data_transpose(data, verbose = FALSE),
    method.dist = "euclidean",
    nboot = 50,
    quiet = TRUE
  )

  rez <- model_parameters(model, data, ci = 0.90)
  rez

  # Get clusters
  predict(rez)

  # Clusters centers in long form
  attributes(rez)$means

  # Between and Total Sum of Squares
  attributes(rez)$Sum_Squares_Total
  attributes(rez)$Sum_Squares_Between
}
#> [1] 134.5687
# }
# \donttest{
#
# K-means -------------------------------
model <- kmeans(iris[1:4], centers = 3)
rez <- model_parameters(model)
rez
#> # Clustering Solution
#> 
#> The 3 clusters accounted for 88.43% of the total variance of the original data.
#> 
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1       |    62 |       39.82 |         5.90 |        2.75 |         4.39 |        1.43
#> 2       |    50 |       15.15 |         5.01 |        3.43 |         1.46 |        0.25
#> 3       |    38 |       23.88 |         6.85 |        3.07 |         5.74 |        2.07

# Get clusters
predict(rez)
#>   [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [38] 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#>  [75] 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 3 3 3 3 1 3 3 3 3
#> [112] 3 3 1 1 3 3 3 3 1 3 1 3 1 3 3 1 1 3 3 3 3 3 1 3 3 3 3 1 3 3 3 1 3 3 3 1 3
#> [149] 3 1

# Clusters centers in long form
attributes(rez)$means
#>    Cluster n_Obs Sum_Squares     Variable     Mean
#> 1        1    62    39.82097 Sepal.Length 5.901613
#> 2        1    62    39.82097  Sepal.Width 2.748387
#> 3        1    62    39.82097 Petal.Length 4.393548
#> 4        1    62    39.82097  Petal.Width 1.433871
#> 5        2    50    15.15100 Sepal.Length 5.006000
#> 6        2    50    15.15100  Sepal.Width 3.428000
#> 7        2    50    15.15100 Petal.Length 1.462000
#> 8        2    50    15.15100  Petal.Width 0.246000
#> 9        3    38    23.87947 Sepal.Length 6.850000
#> 10       3    38    23.87947  Sepal.Width 3.073684
#> 11       3    38    23.87947 Petal.Length 5.742105
#> 12       3    38    23.87947  Petal.Width 2.071053

# Between and Total Sum of Squares
attributes(rez)$Sum_Squares_Total
#> [1] 681.3706
attributes(rez)$Sum_Squares_Between
#> [1] 602.5192
# }
# \donttest{
#
# Hierarchical K-means (factoextra::hkclust) ----------------------
if (require("factoextra", quietly = TRUE)) {
  data <- iris[1:4]
  model <- factoextra::hkmeans(data, k = 3)

  rez <- model_parameters(model)
  rez

  # Get clusters
  predict(rez)

  # Clusters centers in long form
  attributes(rez)$means

  # Between and Total Sum of Squares
  attributes(rez)$Sum_Squares_Total
  attributes(rez)$Sum_Squares_Between
}
#> [1] 602.5192
# }
if (require("mclust", quietly = TRUE)) {
  model <- mclust::Mclust(iris[1:4], verbose = FALSE)
  model_parameters(model)
}
#> # Clustering Solution
#> 
#> The 2 clusters accounted for 77.26% of the total variance of the original data.
#> 
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1       |    50 |       15.15 |         5.01 |        3.43 |         1.46 |        0.25
#> 2       |   100 |      139.80 |         6.26 |        2.87 |         4.91 |        1.68
# \donttest{
#
# K-Medoids (PAM and HPAM) ==============
if (require("cluster", quietly = TRUE)) {
  model <- cluster::pam(iris[1:4], k = 3)
  model_parameters(model)
}
#> # Clustering Solution
#> 
#> The 3 clusters accounted for 88.43% of the total variance of the original data.
#> 
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1       |    50 |       15.15 |         5.01 |        3.43 |         1.46 |        0.25
#> 2       |    62 |       39.82 |         5.90 |        2.75 |         4.39 |        1.43
#> 3       |    38 |       23.88 |         6.85 |        3.07 |         5.74 |        2.07
if (require("fpc", quietly = TRUE)) {
  model <- fpc::pamk(iris[1:4], criterion = "ch")
  model_parameters(model)
}
#> # Clustering Solution
#> 
#> The 3 clusters accounted for 88.43% of the total variance of the original data.
#> 
#> Cluster | n_Obs | Sum_Squares | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width
#> ---------------------------------------------------------------------------------------
#> 1       |    50 |       15.15 |         5.01 |        3.43 |         1.46 |        0.25
#> 2       |    62 |       39.82 |         5.90 |        2.75 |         4.39 |        1.43
#> 3       |    38 |       23.88 |         6.85 |        3.07 |         5.74 |        2.07
# }