0. Reference
1. Sample dataset
2. ARR
3. Interpretation
0. Reference
- https://en.wikipedia.org/wiki/Risk_difference
1. Sample dataset
library(dplyr)
library(data.table)
set.seed(230706)
# toy data
dt<-data.table(
pid = sample(1:5000, 2000, replace = F),
mace = sample(0:1, 2000, replace = T, prob = c(0.8,0.2)),
mace_sub = sample(c("acute_mi","is","cr","death","cvd_death"), 2000, replace = T, prob = rep(0.2,5)),
dm = sample(0:1, 2000, replace = T, prob = c(0.8,0.2)),
dementia = sample(0:1, 2000, replace = T, prob = c(0.8,0.2)),
myalgia = sample(0:1, 2000, replace = T, prob = c(0.8,0.2)),
age = sample(0:99, 2000, replace = T),
group = sample(c("Atorvastatin","Pravastatin"), 2000, replace = T, prob = c(0.8,0.2)),
from = sample(c("0","1","2","3","4","5","6","7","8"), 2000, replace = T),
index_year = sample(2006:2022, 2000, replace = T),
event_tars_year = sample(0:15, 2000, replace = T)
)
glimpse(dt)
2. ARR
dt2 <- dt %>%
mutate(
is = ifelse(mace_sub=="is",1,0),
acute_mi = ifelse(mace_sub=="acute_mi",1,0),
cr = ifelse(mace_sub=="cr",1,0),
cvd_death = ifelse(mace_sub=="cvd_death",1,0),
death = ifelse(mace_sub=="death",1,0)
)
# risk difference custom query
risk_difference <- function(dt, outcome){
# 0: atorvastatin, 1: pravastatin
exposed <- dt$group
outcome <- dt[,get(outcome)]
# 위험 차이 계산
risk_exposed <- sum(exposed == "Pravastatin" & outcome == 1) / sum(exposed == "Pravastatin")
risk_unexposed <- sum(exposed == "Atorvastatin" & outcome == 1) / sum(exposed == "Atorvastatin")
risk_difference <- risk_exposed - risk_unexposed
# 95% 신뢰구간 계산
n_exposed <- sum(exposed == "Pravastatin")
n_unexposed <- sum(exposed == "Atorvastatin")
se_difference <- sqrt(risk_exposed * (1 - risk_exposed) / n_exposed + risk_unexposed * (1 - risk_unexposed) / n_unexposed)
z_value <- qnorm(0.975) # 95% 신뢰수준에서의 Z값
ci <- risk_difference + c(-1, 1) * z_value * se_difference
return(paste0(round(risk_difference,3)," [",round(ci[1],3),";",round(ci[2],3),"]"))
}
paste0("Test:",risk_difference(dt,'mace'))
dt_rd_all = data.table(sub_name = "MACE",Risk_Diff = risk_difference(dt,"mace"))
for (i in 1:length(unique(dt2$mace_sub))) {
sub_name = unique(dt2$mace_sub)[i]
Risk_Diff = risk_difference(dt2, sub_name)
dt_rd_sub = data.table(sub_name, Risk_Diff)
dt_rd_all = rbind(dt_rd_all, dt_rd_sub)
}
(dt_rd_all)
3. Interpretation
- Risk Diff 값 (%) 만큼의 절대 발생률 차이가 있다.
- CI가 0.0을 포함한다면 통계적으로 유의하지 않다.