Simulate data with specific characteristics.

simulate_correlation(n = 100, r = 0.5, mean = 0, sd = 1, names = NULL, ...)

simulate_ttest(n = 100, d = 0.5, names = NULL, ...)

simulate_difference(n = 100, d = 0.5, names = NULL, ...)

Arguments

n

The number of observations to be generated.

r

A value or vector corresponding to the desired correlation coefficients.

mean

A value or vector corresponding to the mean of the variables.

sd

A value or vector corresponding to the SD of the variables.

names

A character vector of desired variable names.

...

Arguments passed to or from other methods.

d

A value or vector corresponding to the desired difference between the groups.

Examples

# Correlation -------------------------------- data <- simulate_correlation(r = 0.5) plot(data$V1, data$V2)
cor.test(data$V1, data$V2)
#> #> Pearson's product-moment correlation #> #> data: data$V1 and data$V2 #> t = 5.7155, df = 98, p-value = 1.18e-07 #> alternative hypothesis: true correlation is not equal to 0 #> 95 percent confidence interval: #> 0.3366433 0.6341398 #> sample estimates: #> cor #> 0.5 #>
summary(lm(V2 ~ V1, data = data))
#> #> Call: #> lm(formula = V2 ~ V1, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -1.9245 -0.5906 -0.1631 0.3893 2.5629 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) -1.110e-17 8.704e-02 0.000 1 #> V1 5.000e-01 8.748e-02 5.715 1.18e-07 *** #> --- #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #> #> Residual standard error: 0.8704 on 98 degrees of freedom #> Multiple R-squared: 0.25, Adjusted R-squared: 0.2423 #> F-statistic: 32.67 on 1 and 98 DF, p-value: 1.18e-07 #>
# Specify mean and SD data <- simulate_correlation(r = 0.5, n = 50, mean = c(0, 1), sd = c(0.7, 1.7)) cor.test(data$V1, data$V2)
#> #> Pearson's product-moment correlation #> #> data: data$V1 and data$V2 #> t = 4, df = 48, p-value = 0.000218 #> alternative hypothesis: true correlation is not equal to 0 #> 95 percent confidence interval: #> 0.2574879 0.6832563 #> sample estimates: #> cor #> 0.5 #>
round(c(mean(data$V1), sd(data$V1)), 1)
#> [1] 0.0 0.7
round(c(mean(data$V2), sd(data$V2)), 1)
#> [1] 1.0 1.7
summary(lm(V2 ~ V1, data = data))
#> #> Call: #> lm(formula = V2 ~ V1, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -3.2338 -1.0836 0.1107 1.1077 3.2119 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 1.0000 0.2104 4.754 1.86e-05 *** #> V1 1.2143 0.3036 4.000 0.000218 *** #> --- #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #> #> Residual standard error: 1.487 on 48 degrees of freedom #> Multiple R-squared: 0.25, Adjusted R-squared: 0.2344 #> F-statistic: 16 on 1 and 48 DF, p-value: 0.000218 #>
# Generate multiple variables cor_matrix <- matrix(c( 1.0, 0.2, 0.4, 0.2, 1.0, 0.3, 0.4, 0.3, 1.0 ), nrow = 3 ) data <- simulate_correlation(r = cor_matrix, names = c("y", "x1", "x2")) cor(data)
#> y x1 x2 #> y 1.0 0.2 0.4 #> x1 0.2 1.0 0.3 #> x2 0.4 0.3 1.0
summary(lm(y ~ x1, data = data))
#> #> Call: #> lm(formula = y ~ x1, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -1.99442 -0.73220 0.00153 0.66102 2.60331 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 4.441e-18 9.848e-02 0.000 1.000 #> x1 2.000e-01 9.897e-02 2.021 0.046 * #> --- #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #> #> Residual standard error: 0.9848 on 98 degrees of freedom #> Multiple R-squared: 0.04, Adjusted R-squared: 0.0302 #> F-statistic: 4.083 on 1 and 98 DF, p-value: 0.04604 #>
# t-test -------------------------------- data <- simulate_ttest(n = 30, d = 0.3) plot(data$V1, data$V0)
round(c(mean(data$V1), sd(data$V1)), 1)
#> [1] 0.0 0.9
diff(t.test(data$V1 ~ data$V0)$estimate)
#> mean in group 1 #> 0.5464734
summary(lm(V1 ~ V0, data = data))
#> #> Call: #> lm(formula = V1 ~ V0, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -1.5789 -0.8196 0.1068 0.5207 1.5425 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) -0.2550 0.2213 -1.153 0.259 #> V01 0.5465 0.3239 1.687 0.103 #> #> Residual standard error: 0.885 on 28 degrees of freedom #> Multiple R-squared: 0.09229, Adjusted R-squared: 0.05987 #> F-statistic: 2.847 on 1 and 28 DF, p-value: 0.1027 #>
summary(glm(V0 ~ V1, data = data, family = "binomial"))
#> #> Call: #> glm(formula = V0 ~ V1, family = "binomial", data = data) #> #> Deviance Residuals: #> Min 1Q Median 3Q Max #> -1.4764 -1.0728 -0.6724 1.0838 1.5624 #> #> Coefficients: #> Estimate Std. Error z value Pr(>|z|) #> (Intercept) -0.1472 0.3845 -0.383 0.702 #> V1 0.7340 0.4586 1.601 0.109 #> #> (Dispersion parameter for binomial family taken to be 1) #> #> Null deviance: 41.455 on 29 degrees of freedom #> Residual deviance: 38.578 on 28 degrees of freedom #> AIC: 42.578 #> #> Number of Fisher Scoring iterations: 4 #>
# Difference -------------------------------- data <- simulate_difference(n = 30, d = 0.3) plot(data$V1, data$V0)
round(c(mean(data$V1), sd(data$V1)), 1)
#> [1] 0.0 0.9
diff(t.test(data$V1 ~ data$V0)$estimate)
#> mean in group 1 #> 0.3
summary(lm(V1 ~ V0, data = data))
#> #> Call: #> lm(formula = V1 ~ V0, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -1.5011 -0.6212 0.0000 0.6212 1.5011 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) -0.1500 0.2206 -0.680 0.502 #> V01 0.3000 0.3119 0.962 0.344 #> #> Residual standard error: 0.8542 on 28 degrees of freedom #> Multiple R-squared: 0.03198, Adjusted R-squared: -0.00259 #> F-statistic: 0.9251 on 1 and 28 DF, p-value: 0.3444 #>
summary(glm(V0 ~ V1, data = data, family = "binomial"))
#> #> Call: #> glm(formula = V0 ~ V1, family = "binomial", data = data) #> #> Deviance Residuals: #> Min 1Q Median 3Q Max #> -1.436 -1.143 0.000 1.143 1.436 #> #> Coefficients: #> Estimate Std. Error z value Pr(>|z|) #> (Intercept) -3.116e-16 3.711e-01 0.000 1.000 #> V1 4.369e-01 4.515e-01 0.968 0.333 #> #> (Dispersion parameter for binomial family taken to be 1) #> #> Null deviance: 41.589 on 29 degrees of freedom #> Residual deviance: 40.618 on 28 degrees of freedom #> AIC: 44.618 #> #> Number of Fisher Scoring iterations: 4 #>