<- function (palette = "black_and_white", base_size = 14, base_family = "sans",
mytheme base_fontface = "plain", base_line_size = base_size/20, base_rect_size = base_size/14,
axis_text_angle = 0, border = FALSE) {
<- function(x) {
is_bool is_logical(x, n = 1) && !is.na(x)
} <- axis_text_angle[1]
angle if (!angle %in% c(0, 45, 90, 270))
stop(sprintf("'axis_text_angle' must be one of [%s]",
paste(c(0, 45, 90, 270), collapse = ", ")), ".\nFor other angles, use the guide_axis() function in ggplot2 instead",
call. = FALSE)
if (!palette %in% names(ggprism::ggprism_data$themes)) {
stop("The palette ", paste(palette), " does not exist.\n See names(ggprism_data$themes) for valid palette names")
}<- tibble::deframe(ggprism::ggprism_data$themes[[palette]])
colours if (!is_bool(border)) {
stop("border must be either: TRUE or FALSE")
}else {
if (border) {
<- element_rect(fill = NA)
panel.border <- element_blank()
axis.line
}else if (!border) {
<- element_blank()
panel.border <- element_line()
axis.line
}
}<- theme(line = element_line(colour = colours["axisColor"],
t size = base_line_size, linetype = 1, lineend = "square"),
rect = element_rect(fill = "white", colour = colours["axisColor"],
size = base_rect_size, linetype = 1), text = element_text(family = base_family,
face = base_fontface, colour = colours["graphTitleColor"],
size = base_size, lineheight = 0.9, hjust = 0.5,
vjust = 0.5, angle = 0, margin = margin(), debug = FALSE),
prism.ticks.length = unit(base_size/50, "pt"), axis.line = axis.line,
axis.line.x = NULL, axis.line.y = NULL, axis.text = element_text(size = rel(0.95),
colour = colours["axisLabelColor"]), axis.text.x = element_text(margin = margin(t = 0.8 *
/4), angle = axis_text_angle, hjust = ifelse(axis_text_angle %in%
base_sizec(45, 90, 270), 1, 0.5), vjust = ifelse(axis_text_angle %in%
c(0, 90, 270), 0.5, 1)), axis.text.x.top = element_text(margin = margin(b = 0.8 *
/4), vjust = 0), axis.text.y = element_text(margin = margin(r = 0.5 *
base_size/4), hjust = 1), axis.text.y.right = element_text(margin = margin(l = 0.5 *
base_size/4), hjust = 0), axis.ticks = element_line(),
base_sizeaxis.ticks.length = unit(3, "points"), axis.ticks.length.x = NULL,
axis.ticks.length.x.top = NULL, axis.ticks.length.x.bottom = NULL,
axis.ticks.length.y = NULL, axis.ticks.length.y.left = NULL,
axis.ticks.length.y.right = NULL, axis.title = element_text(colour = colours["axisTitleColor"]),
axis.title.x = element_text(margin = margin(t = base_size *
0.6), vjust = 1), axis.title.x.top = element_text(margin = margin(b = base_size *
0.6), vjust = 0), axis.title.y = element_text(angle = 90,
margin = margin(r = base_size * 0.6), vjust = 1),
axis.title.y.right = element_text(angle = -90, margin = margin(l = base_size *
0.6), vjust = 0), legend.background = element_blank(),
legend.spacing = unit(base_size, "pt"), legend.spacing.x = NULL,
legend.spacing.y = NULL, legend.margin = margin(base_size/2,
/2, base_size/2, base_size/2), legend.key = element_blank(),
base_sizelegend.key.size = unit(1.2, "lines"), legend.key.height = NULL,
legend.key.width = unit(base_size * 1.8, "pt"), legend.text = element_text(size = rel(0.8),
face = "plain"), legend.text.align = NULL, legend.title = element_blank(),
legend.title.align = NULL, legend.position = "right",
legend.direction = NULL, legend.justification = "center",
legend.box = NULL, legend.box.margin = margin(0, 0, 0,
0, "cm"), legend.box.background = element_blank(),
legend.box.spacing = unit(base_size, "pt"), panel.background = element_rect(fill = ifelse(palette ==
"office", colours["plottingAreaColor"], NA), colour = NA),
panel.border = panel.border, panel.grid = element_blank(),
panel.grid.minor = element_blank(), panel.spacing = unit(base_size/2,
"pt"), panel.spacing.x = NULL, panel.spacing.y = NULL,
panel.ontop = FALSE, strip.background = element_blank(),
strip.text = element_text(colour = colours["axisTitleColor"],
size = rel(0.8), margin = margin(base_size/2.5, base_size/2.5,
/2.5, base_size/2.5)), strip.text.x = element_text(margin = margin(b = base_size/3)),
base_sizestrip.text.y = element_text(angle = -90, margin = margin(l = base_size/3)),
strip.text.y.left = element_text(angle = 90), strip.placement = "inside",
strip.placement.x = NULL, strip.placement.y = NULL, strip.switch.pad.grid = unit(base_size/4,
"pt"), strip.switch.pad.wrap = unit(base_size/4,
"pt"), plot.background = element_rect(fill = colours["pageBackgroundColor"],
colour = NA), plot.title = element_text(size = rel(1.2),
hjust = 0.5, vjust = 1, margin = margin(b = base_size)),
plot.title.position = "panel", plot.subtitle = element_text(hjust = 0.5,
vjust = 1, margin = margin(b = base_size/2)), plot.caption = element_text(size = rel(0.8),
hjust = 1, vjust = 1, margin = margin(t = base_size/2)),
plot.caption.position = "panel", plot.tag = element_text(size = rel(1.2),
hjust = 0.5, vjust = 0.5), plot.tag.position = "topleft",
plot.margin = margin(base_size/2, base_size/2, base_size/2,
/2), complete = TRUE)
base_size::ggprism_data$themes[["all_null"]] %+replace% t
ggprism }
# Load libraries
library(dplyr)
library(purrr)
library(stringr)
library(ggplot2)
library(ggpubr)
library(jcolors)
library(cowplot)
library(ggprism)
library(tidybayes)
library(reshape2)
library(parallel)
library(rstan)
= c("Paper Effect Size", "Paper Effect Size/2", "Paper Effect Size/3",
effect_size "Paper Effect Size/4")
# Functions
<- function(b) exp(c(0, b))/sum(exp(c(0, b)))
psoftmax
<- function(variable, words_to_replace, replace_with = "") {
words_replace
library(dplyr)
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# variable <- enquo(variable) var <- select(data, !!variable) %>% .[,]
if (!is.character(variable))
<- as.character(variable)
variable
for (word_i in 1:length(words_to_replace)) {
<- which(str_detect(variable, fixed(words_to_replace[word_i])))
word_to_rm_index
if (any(word_to_rm_index)) {
<- words_to_replace[word_i]
words_to_replace_i <- sapply(seq_along(word_to_rm_index), function(i) {
new_words <- gsub(words_to_replace_i, replace_with,
variable[word_to_rm_index][i] fixed = TRUE)
variable[word_to_rm_index][i],
})<- new_words
variable[word_to_rm_index] else {
} warning(paste0("Sting '", words_to_replace, "' not found"))
}
}
return(variable)
}
<- function(x) {
seq_vector <- c(which(diff(x) != 0), length(x))
idx <- c(idx[1], diff(idx))
time rep(x = 1:length(idx), times = time)
}
# Load data
<- readRDS("../Pilot/Short Version/Data/data_short.rds")$data %>%
data_short mutate(subject = 1:n()) %>%
rename(age = Age., A3 = A3.)
<- readRDS("../Pilot/Short Version/Data/data_short.rds")$question
questions
# ----- Mental Accounting ----- # Remove final questionnaires
<- data_short[, -(48:75)]
data_short_MA # Remove response time
<- str_detect(names(data_short_MA), "time_")
idx_time <- data_short_MA[, !idx_time]
data_short_MA
<- data_short_MA %>%
data_short_MA rename_with(~gsub(".", "", .x, fixed = TRUE)) %>%
rename_with(toupper, .cols = -contains("subject"))
<- nrow(data_short_MA) n_sbj
Here we conduct a simulation-based power analysis using the software environment R and the probabilistic programming language Stan for Bayesian statistical inference. For each scenario, we define the target effect size as half of the effect size found in the original reference papers in order to minimize the risk of performing the analysis on inflated effect sizes. Our goal is to obtain 0.8 power to detect the target effect size for each scenario at the standard 0.05 alpha error probability. We estimate the power for each sample ranging from 50 to 300 participants in increments of 50. We find that a sample of 250 participants per country is needed to achieve 80% power in all scenarios.
functions {
/*
* Args:
* y: response value 0: A, 1: B, 2: No difference
* ds: different (0|1) - same (2) probability
* cA: conditional (different) A probability
* Returns:
* a scalar to be added to the log posterior
*/
real mental_accounting_lpdf(real y, real ds, real cA) {
if (y == 0) { // Stay (Explore)
return bernoulli_lpmf(1 | ds) + bernoulli_lpmf(1 | cA);
else if (y == 1) { // Switch (Explore)
} return bernoulli_lpmf(1 | ds) + bernoulli_lpmf(0 | cA);
else { // Select
} return bernoulli_lpmf(0 | ds);
}
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K_ds; // number of population-level effects
matrix[N, K_ds] X_ds; // population-level design matrix
int<lower=1> K_cA; // number of population-level effects
matrix[N, K_cA] X_cA; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}parameters {
vector[K_ds] b_ds; // population-level effects
vector[K_cA] b_cA; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
// initialize different - same linear predictor term
vector[N] ds = X_ds * b_ds;
// initialize linear predictor term
vector[N] cA = X_cA * b_cA;
for (n in 1:N) {
// apply the inverse link function
ds[n] = inv_logit(ds[n]);
}for (n in 1:N) {
// apply the inverse link function
cA[n] = inv_logit(cA[n]);
}for (n in 1:N) {
target += mental_accounting_lpdf(Y[n] | ds[n], cA[n]);
}
}// priors including constants
target += normal_lpdf(b_ds | 0, 5);
target += normal_lpdf(b_cA | 0, 5);
}
# ---- Power Calculation ---- #
<- 4
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150)
n_sbj_range
# Calculate the probability of responding 'different'
= function(p) log(p/(1-p))
logit
# Effect found in the paper Thaler 1985
= 87
n_sbj = 1:4
scenario = c(56, 66, 22, 19)
A = c(16, 14, 61, 63)
B = (A+B)/n_sbj # Probability to choose Different
ds = A/(A+B)
cA
= logit(ds)
effect_ds = logit(cA)
effect_cA
<- function(sbj, mu_ds, mu_cA){
sim_y
= 0.5
std = 87
n_sbj = 1:4
scenario = c(56, 66, 22, 19)
A = c(16, 14, 61, 63)
B = (A+B)/n_sbj # Probability to choose Different
ds = A/(A+B)
cA
# Probability of 'Different' and Conditional Probability of 'A'
<- plogis(mu_ds)
p_ds <- plogis(mu_cA)
p_cA
<- sapply( scenario, function(i){
response <- rbinom( 1, 1, p_ds[i])
y_ds if( y_ds==1 ){
= rbinom(1, 1, (1-cA[i]) )
response else {
} = 2
response
}
response
})
return( data.frame(subject=sbj, response, scenario=paste0('s', scenario)) )
}
<- function(x){
samplingfunction
#Simulate data
<- purrr::map_dfr(seq_len(n),
data_sim mu_ds=effect_ds,
sim_y, mu_cA=effect_cA)
## Create Stan Data
<- model.matrix(~ 0 + scenario, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X_ds = X,
X_cA = X,
K_ds = ncol(X),
K_cA = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
# Fit the model
<- rstan::sampling(mixture_scenario_1,
fit iter = 2000,
cores = 2,
chains = 2,
pars = c('b_ds', 'b_cA'),
data = stan_data,
refresh = 0,
save_warmup = FALSE)
<- rstan::extract(fit)
efit colnames(efit$b_cA) <- c(
'gain-gain VS gain',
'loss-loss VS loss',
'gain-loss VS gain',
'loss-gain VS loss'
)
<- as.data.frame(efit$b_cA)
df <- with(df, `gain-gain VS gain` - `gain-loss VS gain`)
gain <- with(df, `loss-loss VS loss` - `loss-gain VS loss`)
loss
rbind(
data.frame(
estimate = mean(gain),
lwr = quantile(gain, probs=0.025),
upr = quantile(gain, probs=0.975),
type = 'gain',
N = n
),
data.frame(
estimate = mean(loss),
lwr = quantile(loss, probs=0.025),
upr = quantile(loss, probs=0.975),
type = 'loss',
N = n
)
)
}
<- function(x){
samplingfunction2
#Simulate data
<- purrr::map_dfr(seq_len(n),
data_sim mu_ds=effect_ds/2,
sim_y, mu_cA=effect_cA/2)
## Create Stan Data
<- model.matrix(~ 0 + scenario, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X_ds = X,
X_cA = X,
K_ds = ncol(X),
K_cA = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
# Fit the model
<- rstan::sampling(mixture_scenario_1,
fit iter = 2000,
cores = 2,
chains = 2,
pars = c('b_ds', 'b_cA'),
data = stan_data,
refresh = 0,
save_warmup = FALSE)
<- rstan::extract(fit)
efit colnames(efit$b_cA) <- c(
'gain-gain VS gain',
'loss-loss VS loss',
'gain-loss VS gain',
'loss-gain VS loss'
)
<- as.data.frame(efit$b_cA)
df <- with(df, `gain-gain VS gain` - `gain-loss VS gain`)
gain <- with(df, `loss-loss VS loss` - `loss-gain VS loss`)
loss
rbind(
data.frame(
estimate = mean(gain),
lwr = quantile(gain, probs=0.025),
upr = quantile(gain, probs=0.975),
type = 'gain',
N = n
),
data.frame(
estimate = mean(loss),
lwr = quantile(loss, probs=0.025),
upr = quantile(loss, probs=0.975),
type = 'loss',
N = n
)
)
}
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("mixture_scenario_1", "n_sbj_range", "sim_y", "effect_ds",
parallel"effect_cA", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("mixture_scenario_1", "n_sbj_range", "sim_y", "effect_ds",
parallel"effect_cA", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- power %>%
pw1 mutate(sim_n = rep(rep(1:n_sim, each = 2), length(n_sbj_range))) %>%
mutate(is_sign = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(sim_n, N) %>%
mutate(is_significant = all(is_sign)) %>%
filter(row_number() == 1) %>%
group_by(N) %>%
summarise(power = mean(is_significant)) %>%
mutate(effsize = "Paper Effect Size")
<- power2 %>%
pw2 mutate(sim_n = rep(rep(1:n_sim, each = 2), length(n_sbj_range))) %>%
mutate(is_sign = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(sim_n, N) %>%
mutate(is_significant = all(is_sign)) %>%
filter(row_number() == 1) %>%
group_by(N) %>%
summarise(power = mean(is_significant)) %>%
mutate(effsize = "Paper Effect Size/2")
rbind(pw1, pw2) %>%
ggplot(aes(N, power, color = effsize, linetype = effsize)) + geom_line(size = 1) +
geom_point(size = 3) + geom_hline(yintercept = 0.8, linetype = 2) + scale_y_continuous(limits = c(0,
1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) + labs(x = "Sample Size",
y = "1-β", title = "Original Paper Effect Size") + scale_color_jcolors(palette = "pal6") +
mytheme()
Mr. A was given two tickets to the Regional lottery. He won $50 in one
lottery and $25 in the other.
Mr. B was given a ticket to a
single, larger Regional lottery. He won $75.
Who is happier?
A/B/No difference
Mr. A received a letter from the IRS saying that he made a minor
arithmetical mistake on his tax return and owed $100. He received a
similar letter the same day from his state income tax authority saying
he owed $50. There were no other repercussions from either mistake.
Mr. B received a letter from the IRS saying he made a minor
arithmetical mistake on his tax return and owed $150. There were no
other repercussions from this mistake.
Who more upset?
A/B/No difference
Mr. A bought his first National lottery ticket and won $100. Also, in a
freak accident, he damaged the rug in his apartment and had to pay the
landlord $80.
Mr. B bought his first National lottery ticket
and won $20.
Who is happier?
A/B/No difference
Mr. A’s car was damaged in a parking lot. He had to spend $200 to repair
the damage. The same day the car was damaged, he won $25 in the office
holiday raffle.
Mr. B’s car was damaged in a parking lot. He
had to spend $175 to repair the damage.
Who is more upset?
A/B/No difference
functions {
/*
* Args:
* y: response value 0: A, 1: B, 2: No difference
* ds: different (0|1) - same (2) probability
* cA: conditional (different) A probability
* Returns:
* a scalar to be added to the log posterior
*/
real mental_accounting_lpdf(real y, real ds, real cA) {
if (y == 0) { // Stay (Explore)
return bernoulli_lpmf(1 | ds) + bernoulli_lpmf(1 | cA);
} else if (y == 1) { // Switch (Explore)
return bernoulli_lpmf(1 | ds) + bernoulli_lpmf(0 | cA);
} else { // Select
return bernoulli_lpmf(0 | ds);
}
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K_ds; // number of population-level effects
matrix[N, K_ds] X_ds; // population-level design matrix
int<lower=1> K_cA; // number of population-level effects
matrix[N, K_cA] X_cA; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K_ds] b_ds; // population-level effects
vector[K_cA] b_cA; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
// initialize different - same linear predictor term
vector[N] ds = X_ds * b_ds;
// initialize linear predictor term
vector[N] cA = X_cA * b_cA;
for (n in 1:N) {
// apply the inverse link function
ds[n] = inv_logit(ds[n]);
}
for (n in 1:N) {
// apply the inverse link function
cA[n] = inv_logit(cA[n]);
}
for (n in 1:N) {
target += mental_accounting_lpdf(Y[n] | ds[n], cA[n]);
}
}
// priors including constants
target += normal_lpdf(b_ds | 0, 5);
target += normal_lpdf(b_cA | 0, 5);
}
<- 4
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150, 200)
n_sbj_range
<- rbind(
data_paper # Buyer
rbind(
# Market Value = 5
data.frame(
market_value = 5, buyer = 'Friend',
response = c(rep(0, round(31*0.68)), rep(5, round(31*0.26)), rep(10, round(31*0.03)))
),data.frame(
market_value = 5, buyer = 'Friend',
response = c(rep(0, round(28*0.14)), rep(5, round(28*0.79)), rep(10, round(28*0.0)))
),data.frame(
market_value = 5, buyer = 'Friend',
response = c(rep(0, round(26*0.0)), rep(5, round(26*0.69)), rep(10, round(26*0.23)))
),# Market Value = 10
data.frame(
market_value = 10, buyer = 'Friend',
response = c(rep(0, round(31*0.66)), rep(5, round(31*0.26)), rep(10, round(31*0.06)))
),data.frame(
market_value = 10, buyer = 'Friend',
response = c(rep(0, round(28*0.07)), rep(5, round(28*0.79)), rep(10, round(28*0.04)))
),data.frame(
market_value = 10, buyer = 'Friend',
response = c(rep(0, round(26*0.0)), rep(5, round(26*0.15)), rep(10, round(26*0.69)))
)
),# Stranger
rbind(
# Market Value = 5
data.frame(
market_value = 5, buyer = 'Stranger',
response = c(rep(0, round(31*0.06)), rep(5, round(31*0.77)), rep(10, round(31*0.10)))
),data.frame(
market_value = 5, buyer = 'Stranger',
response = c(rep(0, round(28*0.0)), rep(5, round(28*0.79)), rep(10, round(28*0.07)))
),data.frame(
market_value = 5, buyer = 'Stranger',
response = c(rep(0, round(26*0.0)), rep(5, round(26*0.42)), rep(10, round(26*0.46)))
),# Market Value = 10
data.frame(
market_value = 10, buyer = 'Stranger',
response = c(rep(0, round(31*0.06)), rep(5, round(31*0.16)), rep(10, round(31*0.58)))
),data.frame(
market_value = 10, buyer = 'Stranger',
response = c(rep(0, round(28*0.0)), rep(5, round(28*0.14)), rep(10, round(28*0.57)))
),data.frame(
market_value = 10, buyer = 'Stranger',
response = c(rep(0, round(26*0.0)), rep(5, round(26*0.0)), rep(10, round(26*0.73)))
)
)%>%
) mutate(response = ifelse(response >= market_value, 1, 0))
= function(p) log(p/(1-p))
logit = data_paper %>% group_by(buyer) %>% summarise(mean(response)) %>% .[,2,drop=T]
p_effect <- logit( p_effect )
linear_effect = p_effect[2] - p_effect[1]
eff_size = linear_effect[2] - linear_effect[1]
eff_size
<- function(sbj, eff_size){
sim_y
= function(p) log(p/(1-p))
logit
= c(0.48, 0.9)
p_effect = c('Friend', 'Stranger')
buyer <- logit( p_effect )
linear_effect 2] <- linear_effect[1]+eff_size
linear_effect[
# Population Effect
<- plogis(linear_effect)
theta = rbinom(2, 1, theta)
response
data.frame(subject=sbj, buyer, response)
}
# -- Run Models in Parallel -- #
<-function(x){
samplingfunction
= c('Friend', 'Stranger')
buyer #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size)
data_sim
## Create Stan Data
<- model.matrix(~ buyer, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(buyer[1], ' - ', buyer[2]),
N = n
# Rhat_ok = all(base::summary(fit)[['summary']][,'Rhat']<1.05)
)
}
<-function(x){
samplingfunction2
= c('Friend', 'Stranger')
buyer #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/2)
data_sim
## Create Stan Data
<- model.matrix(~ buyer, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(buyer[1], ' - ', buyer[2]),
N = n
# Rhat_ok = all(base::summary(fit)[['summary']][,'Rhat']<1.05)
)
}
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n)
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
rbind(power %>%
mutate(effsize = "Paper Effect Size"), power2 %>%
mutate(effsize = "Paper Effect Size/2")) %>%
mutate(effsize = factor(effsize, levels = effect_size)) %>%
mutate(is_significant = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(N, effsize) %>%
summarise(power = mean(is_significant)) %>%
ungroup() %>%
ggplot(aes(N, power)) + geom_line(aes(linetype = effsize, color = effsize), size = 1) +
geom_point(aes(color = effsize), size = 3) + geom_hline(yintercept = 0.8, linetype = 2) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) +
labs(x = "Sample Size", y = "1-β") + scale_color_jcolors(palette = "pal6") +
mytheme() + theme(legend.position = "right")
You are going to a sold-out game of your favorite local sport-team, and
you have an extra ticket. The price marked on the ticket is $5 but [you
were given your tickets for free by a friend / which is what you paid
for each ticket / but you paid $10 for each ticket when you bought them
from another student].
You want to get rid of the extra ticket
and you know the going price is $5. You find someone who wants the
ticket. There is no law against charging more than the price marked on
the ticket.
What price do you ask for if…
a) He is a friend
b) He is a
stranger
What would you have said if you found out that the going market price
was $10 instead?
a) He is a friend
b) He is a stranger
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
vector[N] mu = X * b;
target += bernoulli_logit_lpmf(Y | mu);
}
// priors including constants
target += normal_lpdf(b | 0, 5);
}
# ---- Power Calculation ---- #
<- 5
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150, 200, 250, 300)
n_sbj_range
# Effect found in the paper Thaler 1985
# n_sbj_paper = 87
= c('Resort Hotel', 'Grocery Store')
store = 2.65
resort = 1.5
grocery
= resort - grocery
eff_size
<- function(sbj, eff_size){
sim_y
= 2.65
resort <- c( log(resort), log(resort-eff_size) )
linear_effect
# Calculate the probability of responding 'different'
= function(linear_predictor, shape) shape*exp(-linear_predictor)
inv_link
= c('Resort Hotel', 'Grocery Store')
store
# Population Effect
<- 25
shape_gamma <- inv_link(linear_effect, shape_gamma)
rate_gamma
data.frame(subject=sbj, store, response = rgamma(2, shape_gamma, rate_gamma))
}
# -- Run Models in Parallel -- #
<-function(x){
samplingfunction
= c('Resort Hotel', 'Grocery Store')
store #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size)
data_sim
## Create Stan Data
<- model.matrix(~ store, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(gamma_scenario_3,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(store[1], ' - ', store[2]),
N = n
)# }
}
<-function(x){
samplingfunction2
= c('Resort Hotel', 'Grocery Store')
store
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/2)
data_sim
## Create Stan Data
<- model.matrix(~ store, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(gamma_scenario_3,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(store[1], ' - ', store[2]),
N = n
)# }
}
<-function(x){
samplingfunction3
= c('Resort Hotel', 'Grocery Store')
store
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/3)
data_sim
## Create Stan Data
<- model.matrix(~ store, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(gamma_scenario_3,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(store[1], ' - ', store[2]),
N = n
)# }
}
<-function(x){
samplingfunction4
= c('Resort Hotel', 'Grocery Store')
store
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/4)
data_sim
## Create Stan Data
<- model.matrix(~ store, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(gamma_scenario_3,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(store[1], ' - ', store[2]),
N = n
)# }
}
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("gamma_scenario_3", "n_sbj_range", "sim_y", "eff_size",
parallel"n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("gamma_scenario_3", "n_sbj_range", "sim_y", "eff_size",
parallel"n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power3 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("gamma_scenario_3", "n_sbj_range", "sim_y", "eff_size",
parallel"n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction3)
parallel
})<- rbind(power3, out)
power3
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power4 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("gamma_scenario_3", "n_sbj_range", "sim_y", "eff_size",
parallel"n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction4)
parallel
})<- rbind(power4, out)
power4
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- rbind(power %>%
power_line_plot mutate(effsize = "Paper Effect Size"), power2 %>%
mutate(effsize = "Paper Effect Size/2"), power3 %>%
mutate(effsize = "Paper Effect Size/3"), power4 %>%
mutate(effsize = "Paper Effect Size/4")) %>%
mutate(effsize = factor(effsize, levels = effect_size)) %>%
mutate(is_significant = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(N, effsize) %>%
summarise(power = mean(is_significant)) %>%
ungroup() %>%
ggplot(aes(N, power)) + geom_line(aes(linetype = effsize, color = effsize), size = 1) +
geom_point(aes(color = effsize), size = 3) + geom_hline(yintercept = 0.8, linetype = 2) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) +
labs(x = "Sample Size", y = "1-β") + scale_color_jcolors(palette = "pal6") +
mytheme() + theme(legend.position = "right")
# power_quantile_plot <- rbind( power %>% mutate(effsize = 'Paper Effect
# Size'), power2 %>% mutate(effsize = 'Paper Effect Size/2'), power3 %>%
# mutate(effsize = 'Paper Effect Size/3'), power4 %>% mutate(effsize = 'Paper
# Effect Size/4') ) %>% ggplot(aes(N, lwr, color = effsize)) +
# stat_pointinterval() + mytheme() + theme(legend.position = 'none') +
# geom_hline(yintercept = 0, linetype=2) + geom_line(data = rbind( power %>%
# mutate(effsize = 'Paper Effect Size'), power2 %>% mutate(effsize = 'Paper
# Effect Size/2'), power3 %>% mutate(effsize = 'Paper Effect Size/3'), power4
# %>% mutate(effsize = 'Paper Effect Size/4')) %>% group_by(N, effsize) %>%
# summarise(m=median(lwr)), aes(N, m, color=effsize)) +
# scale_x_continuous(breaks = NULL) + labs(x='Sample Size', y='2.5% Quantile
# Posterior') + scale_color_jcolors(palette = 'pal6') + #
# scale_color_manual(values = c('gray80', '#6B244C')) + facet_wrap(~effsize)
# plot_grid(plot_grid(NULL, power_line_plot, NULL, rel_widths = c(0.2, 1, 0.2),
# nrow = 1), NULL, power_quantile_plot, rel_heights = c(0.6, 0.1, 1), nrow = 3
# )
power_line_plot
You are on the beach on a hot day, and you would really like a cold bottle of beer. A friend offers to go buy the beer from [a fancy resort hotel / a small, run-down grocery store]. They ask how much you are willing to pay for the beer: they will only buy it if it costs as much or less than the price you state. You trust your friend, and there is no possibility of bargaining with the bartender.
What price do you tell them?
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
real<lower=0> shape; // shape parameter
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = X * b;
for (n in 1:N) {
// apply the inverse link function
mu[n] = shape * exp(-(mu[n]));
}target += gamma_lpdf(Y | shape, mu);
}// priors including constants
target += normal_lpdf(b | 0, 5);
target += gamma_lpdf(shape | 0.01, 0.01);
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
real<lower=0> shape; // shape parameter
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = X * b;
for (n in 1:N) {
// apply the inverse link function
mu[n] = shape * exp(-(mu[n]));
}
target += gamma_lpdf(Y | shape, mu);
}
// priors including constants
target += normal_lpdf(b | 0, 5);
target += gamma_lpdf(shape | 0.01, 0.01);
}
# ---- Power Calculation ---- #
<- 5
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150, 200, 250, 300)
n_sbj_range
# Effect found in the paper Thaler 1985
= function(p) log(p/(1-p))
logit = c(0.29, 0.68)
p_effect <- logit( p_effect )
linear_effect = p_effect[2] - p_effect[1]
eff_size = linear_effect[2] - linear_effect[1]
eff_size
<- function(sbj, eff_size){
sim_y
= function(p) log(p/(1-p))
logit
= c(0.29, 0.68)
p_effect = c('High', 'Low')
price <- logit( p_effect )
linear_effect 2] <- linear_effect[1]+eff_size
linear_effect[
# Population Effect
<- plogis(linear_effect)
theta = rbinom(2, 1, theta)
response
data.frame(subject=sbj, price, response)
}
# -- Run Models in Parallel -- #
<-function(x){
samplingfunction
= c('Low', 'High')
price #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size)
data_sim
## Create Stan Data
<- model.matrix(~ price, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(price[1], ' - ', price[2]),
N = n
# Rhat_ok = all(base::summary(fit)[['summary']][,'Rhat']<1.05)
)
}
<-function(x){
samplingfunction2
= c('Low', 'High')
price #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/2)
data_sim
## Create Stan Data
<- model.matrix(~ price, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(price[1], ' - ', price[2]),
N = n
)# }
}
<-function(x){
samplingfunction3
= c('Low', 'High')
price #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/3)
data_sim
## Create Stan Data
<- model.matrix(~ price, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(price[1], ' - ', price[2]),
N = n
)# }
}
<-function(x){
samplingfunction4
= c('Low', 'High')
price #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/4)
data_sim
## Create Stan Data
<- model.matrix(~ price, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(price[1], ' - ', price[2]),
N = n
)# }
}
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n)
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power3 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction3)
parallel
})<- rbind(power3, out)
power3
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power4 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction4)
parallel
})<- rbind(power4, out)
power4
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- rbind(power %>%
power_line_plot mutate(effsize = "Paper Effect Size"), power2 %>%
mutate(effsize = "Paper Effect Size/2"), power3 %>%
mutate(effsize = "Paper Effect Size/3"), power4 %>%
mutate(effsize = "Paper Effect Size/4")) %>%
mutate(effsize = factor(effsize, levels = effect_size)) %>%
mutate(is_significant = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(N, effsize) %>%
summarise(power = mean(is_significant)) %>%
ungroup() %>%
ggplot(aes(N, power)) + geom_line(aes(linetype = effsize, color = effsize), size = 1) +
geom_point(aes(color = effsize), size = 3) + geom_hline(yintercept = 0.8, linetype = 2) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) +
labs(x = "Sample Size", y = "1-β") + scale_color_jcolors(palette = "pal6") +
mytheme() + theme(legend.position = "right")
# power_quantile_plot <- rbind( power %>% mutate(effsize = 'Paper Effect
# Size'), power2 %>% mutate(effsize = 'Paper Effect Size/2'), power3 %>%
# mutate(effsize = 'Paper Effect Size/3'), power4 %>% mutate(effsize = 'Paper
# Effect Size/4') ) %>% ggplot(aes(N, lwr, color = effsize)) +
# stat_pointinterval() + mytheme() + theme(legend.position = 'none') +
# geom_hline(yintercept = 0, linetype=2) + geom_line(data = rbind( power %>%
# mutate(effsize = 'Paper Effect Size'), power2 %>% mutate(effsize = 'Paper
# Effect Size/2'), power3 %>% mutate(effsize = 'Paper Effect Size/3'), power4
# %>% mutate(effsize = 'Paper Effect Size/4')) %>% group_by(N, effsize) %>%
# summarise(m=median(lwr)), aes(N, m, color=effsize)) +
# scale_x_continuous(breaks = NULL) + labs(x='Sample Size', y='2.5% Quantile
# Posterior') + scale_color_jcolors(palette = 'pal6') + #
# scale_color_manual(values = c('gray80', '#6B244C')) + facet_wrap(~effsize)
# plot_grid(plot_grid(power_line_plot, rel_widths = c(0.2, 1, 0.2), nrow = 1),
# NULL, power_quantile_plot, rel_heights = c(0.6, 0.1, 1), nrow = 3 )
power_line_plot
You are about to purchase a jacket for [$125 / $15] and a Bluetooth speaker for [$15 / $125]. The salesman informs you that the speaker you wish to buy is on sale for [$10 / $120] at the other branch store, located 20 minutes away.
Would you make the trip to the other store? a) Yes a) No
Imagine that you are about to purchase a jacket for [$125 / $15] and a calculator for [$15 / $125]. The salesman informs you that the calculator you wish to buy is on sale for [$10 / $120] at the other branch store, located 20 minutes away.
Would you make the trip to the other store? a) Yes a) No
<- rstan::stan_model("bernoulli_scenario_4.stan") bernoulli_scenario_4
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
namespace Eigen {
^
/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
namespace Eigen {
^
;
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
#include <complex>
^~~~~~~~~
3 errors generated.
make: *** [foo.o] Error 1
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
vector[N] mu = X * b;
target += bernoulli_logit_lpmf(Y | mu);
}
// priors including constants
target += normal_lpdf(b | 0, 5);
}
# ---- Power Calculation ---- #
<- 5
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150, 200, 250, 300)
n_sbj_range
# Effect found in the paper Thaler 1985
= function(p) log(p/(1-p))
logit = c('Cash', 'Ticket')
loss = c(0.88, 0.46)
p_effect
<- logit( p_effect )
linear_effect = linear_effect[2] - linear_effect[1]
eff_size
<- function(sbj, eff_size){
sim_y
= function(p) log(p/(1-p))
logit
= c('Cash', 'Ticket')
loss = c(0.88, 0.46)
p_effect <- logit( p_effect )
linear_effect 2] <- linear_effect[1]+eff_size
linear_effect[
# Population Effect
<- plogis(linear_effect)
theta = rbinom(2, 1, theta)
response
data.frame(subject=sbj, loss, response)
}
# -- Run Models in Parallel -- #
<-function(x){
samplingfunction
= c('Cash', 'Ticket')
loss #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size)
data_sim
## Create Stan Data
<- model.matrix(~ loss, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(loss[2], ' - ', loss[1]),
N = n
# Rhat_ok = all(base::summary(fit)[['summary']][,'Rhat']<1.05)
)
}
<-function(x){
samplingfunction2
= c('Cash', 'Ticket')
loss #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/2)
data_sim
## Create Stan Data
<- model.matrix(~ loss, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(loss[2], ' - ', loss[1]),
N = n
)# }
}
<-function(x){
samplingfunction3
= c('Cash', 'Ticket')
loss #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/3)
data_sim
## Create Stan Data
<- model.matrix(~ loss, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(loss[2], ' - ', loss[1]),
N = n
)# }
}
<-function(x){
samplingfunction4
= c('Cash', 'Ticket')
loss #Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/4)
data_sim
## Create Stan Data
<- model.matrix(~ loss, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
N_sbj = n,
J_sbj = data_sim$subject,
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_4,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)# s <- summary(fit)
# if( all(s$summary[,'Rhat']<1.05) ){
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0(loss[2], ' - ', loss[1]),
N = n
)# }
}
# ---- Power Calculation ---- #
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
# ---- Power Calculation ---- #
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n)
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
# ---- Power Calculation ---- #
<- data.frame()
power3 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction3)
parallel
})<- rbind(power3, out)
power3
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power4 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_4", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction4)
parallel
})<- rbind(power4, out)
power4
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- rbind(power %>%
power_line_plot mutate(effsize = "Paper Effect Size"), power2 %>%
mutate(effsize = "Paper Effect Size/2"), power3 %>%
mutate(effsize = "Paper Effect Size/3"), power4 %>%
mutate(effsize = "Paper Effect Size/4")) %>%
mutate(effsize = factor(effsize, levels = effect_size)) %>%
mutate(is_significant = ifelse(upr < 0, TRUE, FALSE)) %>%
group_by(N, effsize) %>%
summarise(power = mean(is_significant)) %>%
ungroup() %>%
ggplot(aes(N, power)) + geom_line(aes(linetype = effsize, color = effsize), size = 1) +
geom_point(aes(color = effsize), size = 3) + geom_hline(yintercept = 0.8, linetype = 2) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) +
labs(x = "Sample Size", y = "1-β") + scale_color_jcolors(palette = "pal6") +
mytheme() + theme(legend.position = "right")
# power_quantile_plot <- rbind( power %>% mutate(effsize = 'Paper Effect
# Size'), power2 %>% mutate(effsize = 'Paper Effect Size/2'), power3 %>%
# mutate(effsize = 'Paper Effect Size/3'), power4 %>% mutate(effsize = 'Paper
# Effect Size/4') ) %>% ggplot(aes(N, upr, color = effsize)) +
# stat_pointinterval() + mytheme() + theme(legend.position = 'none') +
# geom_hline(yintercept = 0, linetype=2) + geom_line(data = rbind( power %>%
# mutate(effsize = 'Paper Effect Size'), power2 %>% mutate(effsize = 'Paper
# Effect Size/2'), power3 %>% mutate(effsize = 'Paper Effect Size/3'), power4
# %>% mutate(effsize = 'Paper Effect Size/4')) %>% group_by(N, effsize) %>%
# summarise(m=median(upr)), aes(N, m, color=effsize)) +
# scale_x_continuous(breaks = NULL) + labs(x='Sample Size', y='2.5% Quantile
# Posterior') + scale_color_jcolors(palette = 'pal6') + #
# scale_color_manual(values = c('gray80', '#6B244C')) + facet_wrap(~effsize)
# plot_grid(plot_grid(power_line_plot, rel_widths = c(0.2, 1, 0.2), nrow = 1),
# NULL, power_quantile_plot, rel_heights = c(0.6, 0.1, 1), nrow = 3 )
power_line_plot
Imagine you are on your way to a play with [a pair of tickets for which you have paid $40 / the intention of buying two tickets worth $40 in total]. On entering the theater, you discover that [you have lost the tickets / you have lost $40 in cash].
Would you pay $40 for another pair of tickets? a) Yes b) No
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
vector[N] mu = X * b;
target += bernoulli_logit_lpmf(Y | mu);
}
// priors including constants
target += normal_lpdf(b | 0, 5);
}
# ---- Power Calculation ---- #
<- 5
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150, 200, 250, 300)
n_sbj_range
# Effect found in the paper Thaler 1985
= function(p) log(p/(1-p))
logit = c(0, 1) # Yearly - Per-session
frame = c(0.18, 0.78)
p_effect
<- logit( p_effect )
linear_effect = linear_effect[2] - linear_effect[1]
eff_size
<- function(sbj, eff_size){
sim_y
= function(p) log(p/(1-p))
logit
= c(0, 1) # Yearly - Per-session
frame = c(0.18, 0.78)
p_effect <- logit( p_effect )
linear_effect 2] <- linear_effect[1]+eff_size
linear_effect[= 0.5
std
# Random Effect
<- rnorm(1, 0, std)
re # Population Effect
<- plogis(linear_effect+re)
theta = rbinom(2, 1, theta)
response
data.frame(subject=sbj, frame, response)
}
# -- Run Models in Parallel -- #
<-function(x){
samplingfunction
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size)
data_sim
## Create Stan Data
<- model.matrix(~ frame, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Per-session - Yearly'),
N = n
)
}
<-function(x){
samplingfunction2
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/2)
data_sim
## Create Stan Data
<- model.matrix(~ frame, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Per-session - Yearly'),
N = n
)
}
<-function(x){
samplingfunction3
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/3)
data_sim
## Create Stan Data
<- model.matrix(~ frame, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Per-session - Yearly'),
N = n
)
}
<-function(x){
samplingfunction4
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/4)
data_sim
## Create Stan Data
<- model.matrix(~ frame, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Per-session - Yearly'),
N = n
)
}
# ---- Power Calculation ---- #
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
# ---- Power Calculation ---- #
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n)
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
# ---- Power Calculation ---- #
<- data.frame()
power3 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction3)
parallel
})<- rbind(power3, out)
power3
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power4 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction4)
parallel
})<- rbind(power4, out)
power4
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- rbind(power %>%
power_line_plot mutate(effsize = "Paper Effect Size"), power2 %>%
mutate(effsize = "Paper Effect Size/2"), power3 %>%
mutate(effsize = "Paper Effect Size/3"), power4 %>%
mutate(effsize = "Paper Effect Size/4")) %>%
mutate(effsize = factor(effsize, levels = effect_size)) %>%
mutate(is_significant = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(N, effsize) %>%
summarise(power = mean(is_significant)) %>%
ungroup() %>%
ggplot(aes(N, power)) + geom_line(aes(linetype = effsize, color = effsize), size = 1) +
geom_point(aes(color = effsize), size = 3) + geom_hline(yintercept = 0.8, linetype = 2) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) +
labs(x = "Sample Size", y = "1-β") + scale_color_jcolors(palette = "pal6") +
mytheme() + theme(legend.position = "right")
# power_quantile_plot <- rbind( power %>% mutate(effsize = 'Paper Effect
# Size'), power2 %>% mutate(effsize = 'Paper Effect Size/2'), power3 %>%
# mutate(effsize = 'Paper Effect Size/3'), power4 %>% mutate(effsize = 'Paper
# Effect Size/4') ) %>% ggplot(aes(N, lwr, color = effsize)) +
# stat_pointinterval() + mytheme() + theme(legend.position = 'none') +
# geom_hline(yintercept = 0, linetype=2) + geom_line(data = rbind( power %>%
# mutate(effsize = 'Paper Effect Size'), power2 %>% mutate(effsize = 'Paper
# Effect Size/2'), power3 %>% mutate(effsize = 'Paper Effect Size/3'), power4
# %>% mutate(effsize = 'Paper Effect Size/4')) %>% group_by(N, effsize) %>%
# summarise(m=median(lwr)), aes(N, m, color=effsize)) +
# scale_x_continuous(breaks = NULL) + labs(x='Sample Size', y='2.5% Quantile
# Posterior') + scale_color_jcolors(palette = 'pal6') + #
# scale_color_manual(values = c('gray80', '#6B244C')) + facet_wrap(~effsize)
# plot_grid(plot_grid(power_line_plot, rel_widths = c(0.2, 1, 0.2), nrow = 1),
# NULL, power_quantile_plot, rel_heights = c(0.6, 0.1, 1), nrow = 3 )
power_line_plot
You have a gym membership in a nearby town that you travel to on a regular basis. You go to this gym every single Monday night. The membership allows you to pay [$20 (non-refundable) per session / $1000 a year (i.e., roughly $20 per visit)]. One Monday, just after you have arrived and changed into your gym clothes, you receive a phone call that requires you to leave and forego your exercise that evening.
Which of the following statements better captures your feelings about the cost of the missed workout? a) I feel like I wasted $20 b) I feel like I wasted nothing, since my visit had already been paid for c) I feel like I wasted something, but no specific amount or measure comes to mind
<- rstan::stan_model("bernoulli_scenario_6.stan") bernoulli_scenario_6
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
vector[N] mu = X * b;
target += bernoulli_logit_lpmf(Y | mu);
}
// priors including constants
target += normal_lpdf(b | 0, 5);
}
# ---- Power Calculation ---- #
<- 5
nModels <- 2
nChains <- nModels*nChains
nClusters
<- 200
n_sim <- c(10, 50, 100, 150, 200, 250, 300)
n_sbj_range
# Effect found in the paper Thaler 1985
= function(p) log(p/(1-p))
logit = c(0, 1) # Free - Purchased
coupon = c(0.05, 0.25)
p_effect
<- logit( p_effect )
linear_effect = linear_effect[2] - linear_effect[1]
eff_size
<- function(sbj, eff_size){
sim_y
= function(p) log(p/(1-p))
logit
= c(0, 1) # Free - Purchased
coupon = c(0.05, 0.25)
p_effect <- logit( p_effect )
linear_effect 2] <- linear_effect[1]+eff_size
linear_effect[= 0.5
std
# Random Effect
<- rnorm(1, 0, std)
re # Population Effect
<- plogis(linear_effect+re)
theta = rbinom(2, 1, theta)
response
data.frame(subject=sbj, coupon, response)
}
# -- Run Models in Parallel -- #
<-function(x){
samplingfunction
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size)
data_sim
## Create Stan Data
<- model.matrix(~ coupon, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Purchased - Free'),
N = n
)
}
<-function(x){
samplingfunction2
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/2)
data_sim
## Create Stan Data
<- model.matrix(~ coupon, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Purchased - Free'),
N = n
)
}
<-function(x){
samplingfunction3
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/3)
data_sim
## Create Stan Data
<- model.matrix(~ coupon, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Purchased - Free'),
N = n
)
}
<-function(x){
samplingfunction4
#Simulate data
<- purrr::map_dfr(seq_len(n), sim_y, eff_size=eff_size/4)
data_sim
## Create Stan Data
<- model.matrix(~ coupon, data_sim)
X
#Stan Data
<- list(
stan_data N = nrow(data_sim),
Y = data_sim$response,
X = X,
K = ncol(X),
prior_only = 0
)
<- rstan::sampling(bernoulli_scenario_6,
fit iter = 2000,
chains = 2,
cores = 2,
pars = 'b',
refresh = 0,
data = stan_data,
save_warmup = FALSE
)
<- rstan::extract(fit)
efit
data.frame(
estimate = apply(efit$b, 2, mean)[2],
lwr = apply(efit$b, 2, quantile, probs=0.025)[2],
upr = apply(efit$b, 2, quantile, probs=0.975)[2],
store = paste0('Purchased - Free'),
N = n
)
}
# ---- Power Calculation ---- #
<- data.frame()
power for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction)
parallel
})<- rbind(power, out)
power
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
# ---- Power Calculation ---- #
<- data.frame()
power2 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n)
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction2)
parallel
})<- rbind(power2, out)
power2
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
# ---- Power Calculation ---- #
<- data.frame()
power3 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction3)
parallel
})<- rbind(power3, out)
power3
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- data.frame()
power4 for (n in n_sbj_range) {
message("\n\nN Subjects: ", n, "\n")
message("Setting up Clusters...")
<- parallel::makeCluster(nClusters, outfile = "")
cl ::clusterExport(cl, c("bernoulli_scenario_6", "n_sbj_range", "sim_y",
parallel"eff_size", "n"))
<- map_dfr(seq_len(round(n_sim/nModels)), function(i) {
out message("\r", "Simulation: ", i * nModels, "/", n_sim, appendLF = FALSE)
::parLapply(cl, 1:nModels, samplingfunction4)
parallel
})<- rbind(power4, out)
power4
message("Shutting down Clusters...")
::stopCluster(cl)
parallel }
<- rbind(power %>%
power_line_plot mutate(effsize = "Paper Effect Size"), power2 %>%
mutate(effsize = "Paper Effect Size/2"), power3 %>%
mutate(effsize = "Paper Effect Size/3"), power4 %>%
mutate(effsize = "Paper Effect Size/4")) %>%
mutate(effsize = factor(effsize, levels = effect_size)) %>%
mutate(is_significant = ifelse(lwr > 0, TRUE, FALSE)) %>%
group_by(N, effsize) %>%
summarise(power = mean(is_significant)) %>%
ungroup() %>%
ggplot(aes(N, power)) + geom_line(aes(linetype = effsize, color = effsize), size = 1) +
geom_point(aes(color = effsize), size = 3) + geom_hline(yintercept = 0.8, linetype = 2) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) + scale_x_continuous(breaks = n_sbj_range) +
labs(x = "Sample Size", y = "1-β") + scale_color_jcolors(palette = "pal6") +
mytheme() + theme(legend.position = "right")
# power_quantile_plot <- rbind( power %>% mutate(effsize = 'Paper Effect
# Size'), power2 %>% mutate(effsize = 'Paper Effect Size/2'), power3 %>%
# mutate(effsize = 'Paper Effect Size/3'), power4 %>% mutate(effsize = 'Paper
# Effect Size/4') ) %>% ggplot(aes(N, lwr, color = effsize)) +
# stat_pointinterval() + mytheme() + theme(legend.position = 'none') +
# geom_hline(yintercept = 0, linetype=2) + geom_line(data = rbind( power %>%
# mutate(effsize = 'Paper Effect Size'), power2 %>% mutate(effsize = 'Paper
# Effect Size/2'), power3 %>% mutate(effsize = 'Paper Effect Size/3'), power4
# %>% mutate(effsize = 'Paper Effect Size/4')) %>% group_by(N, effsize) %>%
# summarise(m=median(lwr)), aes(N, m, color=effsize)) +
# scale_x_continuous(breaks = NULL) + labs(x='Sample Size', y='2.5% Quantile
# Posterior') + scale_color_jcolors(palette = 'pal6') + #
# scale_color_manual(values = c('gray80', '#6B244C')) + facet_wrap(~effsize)
# plot_grid(plot_grid(power_line_plot, rel_widths = c(0.2, 1, 0.2), nrow = 1),
# NULL, power_quantile_plot, rel_heights = c(0.6, 0.1, 1), nrow = 3 )
power_line_plot
You are flying with a friend who has upgraded their ticket to business class and has two coupons, each worth $35, which can be used to upgrade your ticket/seat from economy to business class. They purchased one coupon for the standard price of $35 and received the other one for free. Just one coupon is required to upgrade your class on the current flight, and your friend offers you the coupon [which he purchased / which he got for free] so that you can upgrade as well.
What do you think is the most appropriate thing to do? a) Pay your friend $35 for the coupon. b) Consider it a gift and not pay for the coupon. c) Pay some, but not the full amount for the coupon (for example, half the price).
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b; // population-level effects
}
transformed parameters {
}
model {
// likelihood including constants
if (!prior_only) {
vector[N] mu = X * b;
target += bernoulli_logit_lpmf(Y | mu);
}
// priors including constants
target += normal_lpdf(b | 0, 5);
}