This code was used by the articles presented in bayestestR to simulate statistical models.

Functions

Packages

library(tidyverse)
library(logspline)
library(bayestestR)
library(parameters)
library(see)
library(rstanarm)

Features extraction

compute_indices <- function(models){

  params <- parameters::model_parameters(models$frequentist)[2, ] %>%
      dplyr::select(beta, SE, CI_low_95_freq = CI_low, CI_high_95_freq = CI_high, p_value = p)
  

  posterior <- as.data.frame(models$bayesian)$x
  
  algorithm <- insight::find_algorithm(models$bayesian)
  params$chains <- algorithm$chains
  params$iterations <- algorithm$iterations
  params$warmup <- algorithm$warmup / params$iterations
  params$samples <- nrow(posterior)
  
  prior <- bayestestR::describe_prior(models$bayesian)[2, ]
  params$Prior_Distribution <- prior$Prior_Distribution
  params$Prior_Location <- prior$Prior_Location
  params$Prior_Scale <- prior$Prior_Scale
  
  params$Median <- median(posterior)
  params$Mean <- mean(posterior)
  map <- map_estimate(posterior)
  params$MAP <- map
  
  params$MAP_density <- attributes(map)$MAP_density
  params$p_direction <- p_direction(posterior, method="direct")
  params$p_direction_AUC <- p_direction(posterior, method="logspline")
  params$p_MAP <- p_map(posterior)
  
  range <- rope_range(models$bayesian)
  params$ROPE_range <- range[[2]]
  params$ROPE_90 <- rope(posterior, range = range, ci=0.90)$ROPE_Percentage
  params$ROPE_95 <- rope(posterior, range = range, ci=0.95)$ROPE_Percentage
  params$ROPE_full <- rope(posterior, range = range, ci=1)$ROPE_Percentage
  params$p_ROPE <- tryCatch({
  p_rope(posterior, range = range)
  }, error = function(error_condition) {
      NA
  })
  # Bayesfactor
  # This doens't really work
  # params$BF <- as.numeric(bayesfactor_savagedickey(models$bayesian,
  #                                   verbose=FALSE)$BF)
  
  params$BF <- as.numeric(bayesfactor_savagedickey(posterior,
                                    prior = distribution_normal(length(posterior),
                                                                mean=params$Prior_Location,
                                                                sd=params$Prior_Scale),
                                    hypothesis = 0,
                                    verbose=FALSE))
  
  # CI
  ci_95 <- hdi(posterior, ci=0.95)
  params$CI_low_95_hdi <- ci_95$CI_low
  params$CI_high_95_hdi <- ci_95$CI_high
  
  ci_90 <- hdi(posterior, ci=0.90)
  params$CI_low_90_hdi <- ci_90$CI_low
  params$CI_high_90_hdi <- ci_90$CI_high
  
  ci_95 <- ci(posterior, ci=0.95)
  params$CI_low_95_quantile <- ci_95$CI_low
  params$CI_high_95_quantile <- ci_95$CI_high
  
  ci_90 <- ci(posterior, ci=0.90)
  params$CI_low_90_quantile <- ci_90$CI_low
  params$CI_high_90_quantile <- ci_90$CI_high
    
  
  params
}

Study 1 - Sample Size and Error

Method

The simulation aimed at modulating the following characteristics:

  • Model type: linear or logistic.
  • “True” effect (original regression coefficient from which data is drawn): Can be 1 or 0 (no effect).
  • Sample size: From 20 to 100 by steps of 10.
  • Error: Gaussian noise applied to the predictor with SD uniformly spread between 0.33 and 6.66 (with 1000 different values).

We generated a dataset for each combination of these characteristics, resulting in a total of 2 * 2 * 9 * 1000 = 36000 Bayesian and frequentist models.

Visualise data

Example data

effects <- c(0, 1)
sample_sizes <- c(240)
outcome_types <- c("binary", "linear")
errors <- seq(0.33, 6.66, length.out = 3)

example_data <- data.frame()
for(outcome_type in outcome_types){
  for(true_effect in effects){
    for(error in errors){
      for(sample_size in sample_sizes){
        data <- generate_data(true_effect, outcome_type, sample_size, error)
        example_data <- rbind(
          example_data,
          mutate(data,
                 true_effect=true_effect,
                 outcome_type=outcome_type,
                 sample_size=sample_size,
                 error=error))
      }
    }
  }
}

linear_noeffect <- example_data %>% 
  filter(outcome_type == "linear",
         true_effect==0) %>% 
  mutate(sample_size = as.factor(sample_size),
         error = as.factor(error)) %>% 
  ggplot(aes(x=x, y=y, color=error)) +
  geom_point2(alpha=0.2, size=3) +
  geom_smooth(method="lm")+
  theme_modern() +
  scale_color_material_d() +
  facet_wrap(~error, scale="free", nrow=3)
linear_effect <- example_data %>% 
  filter(outcome_type == "linear",
         true_effect==1) %>% 
  mutate(sample_size = as.factor(sample_size),
         error = as.factor(error)) %>% 
  ggplot(aes(x=x, y=y, color=error)) +
  geom_point2(alpha=0.2, size=3) +
  geom_smooth(method="lm")+
  theme_modern() +
  scale_color_material_d() +
  facet_wrap(~error, scale="free", nrow=3)
binary_noeffect <- example_data %>% 
  filter(outcome_type == "binary",
         true_effect==0) %>% 
  mutate(true_effect = as.factor(true_effect),
         sample_size = as.factor(sample_size),
         error = as.factor(error)) %>% 
  ggplot(aes(x=x, y=y, color=error)) +
  geom_point2(alpha=0.2, size=3) +
  geom_smooth(method="glm")+
  theme_modern() +
  scale_color_material_d() +
  facet_wrap(~true_effect*error, scale="free", nrow=3)
binary_effect <- example_data %>% 
  filter(outcome_type == "binary",
         true_effect==1) %>% 
  mutate(true_effect = as.factor(true_effect),
         sample_size = as.factor(sample_size),
         error = as.factor(error)) %>% 
  ggplot(aes(x=x, y=y, color=error)) +
  geom_point2(alpha=0.2, size=3) +
  geom_smooth(method="glm")+
  theme_modern() +
  scale_color_material_d() +
  facet_wrap(~true_effect*error, scale="free", nrow=3)

plots(linear_effect, linear_noeffect, 
      binary_effect, binary_noeffect, 
      nrow = 2,
      tags=c("Linear - Effect", "Linear - No effect",
             "Binary - Effect", "Binary - No effect"))

Study 2 - Sampling Characteristics

Method

The simulation aimed at modulating the following characteristics:

  • Model type: linear or logistic.
  • “True” effect (original regression coefficient from which data is drawn): Can be 1 or 0 (no effect).
  • draws: from 10 to 5000 by step of 5 (1000 iterations).
  • warmup: Ratio of warmup iterations. from 1/10 to 9/10 by step of 0.1 (9 iterations).

We generated 3 datasets for each combination of these characteristics, resulting in a total of 2 * 2 * 1000 * 9 = 34560 Bayesian and frequentist models.

Study 3 - Priors Specification

The simulation aimed at modulating the following characteristics:

  • Model type: linear or logistic.
  • prior direction: either “congruent” (1), “uninformative” (0) or “opposite” (-1).
  • prior precision: prior SD from 0.5 to 1.5.

Session

References