Generate Bootstrapped data

Convenience Functions

generate_parent_population <- function(noise=0.1, n=10000){
  y <- rnorm(n, 0, 1)
  x <- y + rnorm(n, 0, noise)
  return(data.frame(x=x, y=y, noise=noise))
}

extract_sample <- function(data, n=50){
  return(data[sample(nrow(data), n), ])
}

extract_parameters <- function(model){
  params <- parameters::model_parameters(model)[2, c(2, 3)]
  
  boot1 <- parameters::model_parameters(model, bootstrap=TRUE, n=1000)[2, c(2, 3)]
  names(boot1) <- paste0(names(boot1), "_1000")
  
  boot2 <- parameters::model_parameters(model, bootstrap=TRUE, n=4000)[2, c(2, 3)]
  names(boot2) <- paste0(names(boot2), "_4000")
  
  return(cbind(params, boot1, boot2))
}

Creating Data

noises <- c(0.25, 0.5, 1, 1.5, 2, 2.5)
iterations <- 1000
sample_size <- 50

final_data <- data.frame()
for(noise in noises){
  print(noise)
  data <- generate_parent_population(noise, n=10000)
  parent_model <- lm(y ~ x, data=data)
  parent_params <- parameters::model_parameters(parent_model)[2, c(2, 3)]
  names(parent_params) <- paste0("Parent_", names(parent_params))
  
  for(i in 1:iterations){
    cat(".")
    sample <- extract_sample(data, n=sample_size)
    
    model <- lm(y ~ x, data=sample)
    params <- cbind(parent_params, extract_parameters(model))
    params$Noise <- noise
    
    final_data <- rbind(final_data, params)
  }
  write.csv(final_data, "../data/bootstrapped.csv", row.names = FALSE)
}