#
1. coefplot#
// Import data
use 18_nhtable102feb2023.dta
// Write model
logistic female sbp age i.race i.educ income dm smk bmi
// Command for figure
coefplot, drop(_cons) eform /// this is the most important part
title("Title name here", size(med) margin(medsmall) color(black)) ///
xline(1, lwidth(thin) lpattern(dash) lcolor(red%50)) ///
xscale(log) ///
xtitle("Odds Ratios and 95% Confidence Intervals", margin(medsmall)) ///
mlabel format(%9.2f) mlabposition(12) mlabgap(*.5) ///
legend(off) ciopts(lwidth(*1 *5)) ///
levels(95) msym(s) mfcolor(white) ///
note("put anything you want" ///
,span size(vsmall)) ///
plotregion(margin(zero)) ///
graphregion(color(white))
graph export "model_example.png", as(png) replace
2. matchrun#
*Purpose: Pull match run sequence number for organs (designed for lung) that were transplanted
*Designed by Mary Grace Bowring, adapted by Jess Ruck (any mistakes are definitely Jess's)
*Last updated: 1/18/23
capture log close
clear
global DATA /dcs04/legacy-dcs01-igm/segevlab/data/srtr/srtr2211/saf/stata
global FILE /users/ //update with your file path
global LAS /users/ //update with your file path
global START mdy(01,01,2015) //update this for your start date
global END mdy(12,31,2021) //update this for your end date (last transplant date included)
global CENS mdy(11,01,2022) //update this with your censoring date
global OFFERS1 /dcs04/legacy-dcs01-igm/segevlab/data/srtr/srtr2106/ptr //this folder has all of 2015-2020
global OFFERS2 /dcs04/legacy-dcs01-igm/segevlab/data/srtr/srtr2208/ptr/stata //this folder has all of 2021 to part of 2022. NOTE: missing heart match run data for 2021
use rec_tx_dt px_id donor_id rec_tx_org_ty rec_age_at_tx ///
don_ty rec_tx_procedure_ty using ${DATA}/tx_lu.dta, clear
//inclusion/exclusion
keep if inrange(rec_tx_dt, $START, $END)
keep if rec_tx_org_ty=="LU" //lung; update as needed
keep if rec_age_at>=18 //adults only; update as needed
keep if don_ty=="C" //deceased donor transplants only
drop if inlist(rec_tx_procedure,605,606) //drop lobar transplants for lung
keep donor_id px_id rec_tx_procedure_ty rec_tx_dt
tempfile lu_tx
save `lu_tx',replace
***************************************************************************
//2021 offer data
merge 1:m px_id donor_id using $OFFERS2/ptr_lu_20210101_20211231_pub.dta, keep(master match) keepusing(match_id match_submit_dt ptr_sequence_num ptr_offer_id ptr_offer_acpt ptr_org_placed )
//checking that those that didnt match were outside of offer data dates
codebook rec_tx_dt if _merge==1
codebook rec_tx_dt if _merge==3
list rec_tx_dt if _merge==1 & inrange(rec_tx_dt, mdy(12,25,2020),mdy(1,09,2022))
tab rec_tx_proc if _merge==1 & inrange(rec_tx_dt, mdy(12,25,2020),mdy(1,09,2022))
//9 on the fringe- hopefully in the other data
drop if _merge==1
drop _merge
//at this point ptr_seq isnt missing
//but might be mulitple match_id/match runs for each pair
codebook ptr_seq
sort match_id px_id donor_id
bys px_id donor_id (match_id) : gen n=_n
order n match_id px_id donor_id ptr_seq ptr_offer_id ptr_offer_acpt
tab n
bys px_id donor_id : gen N=_N
order N
list if N==3,sepby(px_id)
list if N==2,sepby(px_id)
//keep the larger match_id for each pair
codebook px_id
codebook donor_id
//keeps largest match run
keep if N==n
codebook px_id
codebook donor_id
keep px_id donor_id ptr_sequence_numa rec_tx_dt
tempfile offers_2021
save `offers_2021',replace
***************************************************************************
***************************************************************************
//now a loop for other years
forvalues i=2015/2020 {
use `lu_tx',clear
merge 1:m px_id donor_id using $OFFERS1/ptr_lu_`i'0101_`i'1231.dta, keep(master match) keepusing(match_id match_submit_dt ptr_sequence_num ptr_offer_id ptr_offer_acpt ptr_org_placed )
//checking that those that didnt match were outside of offer data dates
sum rec_tx_dt if _merge==3
local min = r(min)
local max = r(max)
list rec_tx_dt if _merge==1 & inrange(rec_tx_dt, `min',`max')
tab rec_tx_proc if _merge==1 & inrange(rec_tx_dt, `min', `max')
drop if _merge==1
drop _merge
//at this point ptr_seq isnt missing
//but might be mulitple match_id/match runs for each pair
codebook ptr_seq
sort match_id px_id donor_id
bys px_id donor_id (match_id) : gen n=_n
order n match_id px_id donor_id ptr_seq ptr_offer_id ptr_offer_acpt
tab n
bys px_id donor_id : gen N=_N
order N
list if N==3,sepby(px_id)
list if N==2,sepby(px_id)
//at this point i think reasonable to keep the larger match_id for each pair
codebook px_id
codebook donor_id
//keeps largest match run
keep if N==n
codebook px_id
codebook donor_id
keep px_id donor_id ptr_sequence_num rec_tx_dt
tempfile offers_`i'
save `offers_`i'',replace
}
***************************************************************************
***************************************************************************
use `offers_2015',clear
merge 1:1 px_id donor_id ptr_seq using `offers_2016', nogen
merge 1:1 px_id donor_id ptr_seq using `offers_2017', nogen
merge 1:1 px_id donor_id ptr_seq using `offers_2018', nogen
merge 1:1 px_id donor_id ptr_seq using `offers_2019', nogen
merge 1:1 px_id donor_id ptr_seq using `offers_2020', nogen
merge 1:1 px_id donor_id ptr_seq using `offers_2021', nogen
save $FILE/lung_offers.dta,replace
*******************************************************************************
//the method above may provide slightly higher ptr_seq number (bypass are included in that count)
//below we recalculate ptr_seq for each donor before merging with transplants
/*
use match_id px_id donor_id match_submit_dt ptr_sequence_num ptr_offer_id ptr_offer_acpt ptr_org_placed using $OFFERS2/ptr_lu_20210101_20211231_pub.dta,clear
sort match_id donor_id ptr_seq
bys donor_id : egen final_match_id = max(match_id)
order final_match_id
drop if final_match_id !=match_id
//drop if ptr_offer_acpt=="B"
isid match_id px_id donor_id
bys match_id (ptr_seq) : gen new_sequence = _n
compare ptr_seq new_seq
merge m:1 px_id donor_id using `lu_tx',keep(using match)
sort rec_tx_dt
list rec_tx_dt rec_tx_pro if _merge==2 & inrange(rec_tx_dt, mdy(12,25,2020),mdy(1,09,2022)) ,sep(100)
codebook px_id donor_id if _merge==3
//we're losing ~50 pateitns and donors with this latter method- best to just do the first
//median seq numbers arent that different after dropping bypasses anyway
*/
3. lungdx#
*Standardized Lung Diagnosis Recoding for TRC
*Input from OPTN guidelines and Dr. Christian Merlo
*Last updated: 1/18/23
recode rec_dgn (100 103 105 107 109 111 113 114 116 214 410 412 433 450 452 1603 1606/1608 1611 = 1 "Obstructive") ///
(200 202 203 205 206 208/210 212 216 218 220 442 1500/1502 1517 1548 1549 1600 1601 1614 1615 = 2 "Pulm Vascular") ///
(300 302 303 305 1602 = 3 "CF and Immunodeficiency") ///
(106 213 215 217 219 400/411 413/420 422/424 432/434 437 438 440/442 444 446/449 451/454 1518/1525 1550/1557 1599 1604 1605 1609 1610 1612 1613 1616/1617 = 4 "Restrictive") ///
(1997 1999 = 5 "Other"), gen(dgn)
lab var dgn "Recipient Diagnosis"
tab dgn,m
4. nhdemo#
cls
*! demographics
*! essence: #41
quietly {
capture log close
global date: di %td date(c(current_date),"DMY")
quietly log using "03_nhdemo$date.log", replace
version 12
global nh3 https://wwwn.cdc.gov/nchs/data/nhanes3/1a/
global start = clock(c(current_time),"hms")/10^3
//1988-1994
noi di "loading... 1988-1994 from $nh3"
infix seqn 1-5 dmpstat 11 dmarethn 12 dmaethnr 14 hssex 15 hsageir 18-19 ///
hfa8r 1256-1257 hff19r 1409-1410 sdpphase 42 using ${nh3}adult.DAT, clear
tempfile surv1
gen surv=1
save `surv1'
//1999-2017
global nhanes "https://wwwn.cdc.gov/Nchs/Nhanes"
tokenize "`c(ALPHA)'" //i.e., "`2'" -> B, "`3'" -> C, etc.
local Letter=1 //tokens: B-J
local N=2 //surveys: 1999-2017
foreach y of numlist 1999 2001 2003 2005 2007 2009 2011 2013 2015 2017 {
local yplus1=`y' + 1
if `y'==1999 {
local letter: di ""
}
if `y' >2000 {
local letter: di "_``Letter''"
}
import sasxport5 "$nhanes/`y'-`yplus1'/DEMO`letter'.XPT", clear
noi di "loading... `y'-`yplus1' from $nhanes"
tempfile surv`N'
gen surv=`N'
save `surv`N''
local N=`N' + 1
local Letter=`Letter' + 1
}
clear
noi di "..."
noi di "LOADED!"
forvalues i=1/11 {
append using `surv`i''
}
compress
local sample=100
sample `sample'
lab dat "`sample'% of NHANES 1988-2018, Demographics"
save "02_nhdemo$date", replace
noi di ""
noi di ""
noi di "02_nhdemo$date.dta saved to " word("`c(pwd)'",1)
//big data!
noi di ""
noi di "rows x columns:"
noi count
ds
noi di wordcount(r(varlist))
global finish = clock(c(current_time),"hms")/10^3
global duration = round(($finish - $start)/60,1)
noi di "runtime = $duration minute(s)"
log close
}
5. nhexam#
cls
*! assemble datasets
*! exam, lab, q&a
*! do so for all nh surveys
quietly {
capture log close
global date: di %td date(c(current_date),"DMY")
log using "06_nhexam$date.log", replace
version 12
global nh3 https://wwwn.cdc.gov/nchs/data/nhanes3/1a/
global nhanes "https://wwwn.cdc.gov/Nchs/Nhanes"
global start = clock(c(current_time),"hms")/10^3
//1988-1994
noi di "processing... 1988-1994/EXAM.DAT"
infix seqn 1-5 pepmnk1r 1423-1425 pepmnk5r 1428-1430 bmpbmi 1524-1527 ///
using ${nh3}exam.dat, clear
tempfile exam
save `exam'
noi di "processing... 1988-1994/LAB.DAT"
infix seqn 1-5 ghp 1861-1864 ubp 1965-1970 urp 1956-1960 sgpsi 1761-1765 ///
cep 1784-1787 using ${nh3}lab.dat, clear
tempfile lab
save `lab'
noi di "processing... 1988-1994/ADULT.DAT"
infix seqn 1-5 hae4d2 1605 had1 1561 had6 1568 had10 1578 hak1 1855 ///
hae2 1598 hae5a 1610 hat1met 2393-2395 har1 2281 ///
using ${nh3}adult.DAT, clear
tempfile qa
save `qa'
use `exam', clear
merge 1:1 seqn using `lab', nogen
merge 1:1 seqn using `qa', nogen
gen surv=1
tempfile exam_0
save `exam_0'
//1999-2018
tokenize "`c(ALPHA)'" //i.e., "`2'" -> B, "`3'" -> C, etc.
local surv=2
local y=1999
forvalues i=1/10 { //nhanes survey
local yp1=`y'+1
forvalues j=1/11 { //specific datafile
local DATA1 BPX BMX LAB10 LAB16 LAB18 ALQ DIQ KIQ BPQ PAQ SMQ
local DATA2 BPX BMX L10 L16 L40 ALQ DIQ KIQ_U BPQ PAQ SMQ
local DATA3 BPX BMX GHB ALB_CR BIOPRO ALQ DIQ KIQ_U BPQ PAQ SMQ
if `i'==1 {
local letter: di ""
local DATA `DATA1'
}
if inlist(`i', 2, 3) {
local letter: di "_``i''"
local DATA `DATA2'
}
if inrange(`i', 4,11) {
local letter: di "_``i''"
local DATA `DATA3'
}
local DATA: di word("`DATA'",`j') //jth datafile
import sasxport5 "$nhanes/`y'-`yp1'/`DATA'`letter'.XPT", clear
noi di "processing... `y'-`yp1'/`DATA'`letter'.XPT"
tempfile dat`j'
save `dat`j''
}
use `dat1', clear
forvalues k=2/11 { //merge k datafiles from 1 year
merge 1:1 seqn using `dat`k'', gen(merge`k')
}
gen surv=`surv'
local surv=`surv'+1
tempfile exam_`i'
save `exam_`i'' //one datafile from ith survey
local y=`y'+2
}
clear
use `exam_0' //1988-1994: nhanes 3
forvalues i=1/10 {
append using `exam_`i'' //1999-2018: continuous nhanes
}
lab dat "NHANES 1988-2018, Q&A/Exam/Labs"
lookfor smq
save "05_nhexam$date", replace
noi di ""
noi di ""
noi di "05_nhexam$date.dta saved to " word("`c(pwd)'",1)
//big data!
noi di ""
noi di "rows x columns:"
noi count
ds
noi di wordcount(r(varlist))
global finish = clock(c(current_time),"hms")/10^3
global duration = round(($finish - $start)/60,1)
noi di "runtime = $duration minute(s)"
log close
}
*! quietly, loops, indent
*! collapsing, debugging
*! this is a pro-tip :)
6. nhmerge#
cls
macro drop _all
*! merge nhdemo & nhexam
*! rename variables
*! selection criteria
quietly {
capture log close
global date: di %td date(c(current_date),"DMY")
log using "09_nhtable$date.log", replace
version 12
global start = clock(c(current_time),"hms")/10^3
//1.data
use 05_nhexam$date, clear
replace seqn=-1*seqn if surv==1 //restore unique seqn across surveys
tempfile 05_nhexam
save `05_nhexam'
use 02_nhdemo$date, clear
replace seqn=-1*seqn if surv==1
merge 1:1 seqn using `05_nhexam' //enforce a strictly 1:1 merge
keep if _merge==3
}
quietly {
//2.variables
//seqn
isid seqn
//survey
g year=surv
lab var year "Year of Donation, %"
label define Year ///
1 "1988-1998" ///
2 "1999-2000" ///
3 "2001-2002" ///
4 "2003-2004" ///
5 "2005-2006" ///
6 "2007-2008" ///
7 "2009-2010" ///
8 "2011-2012" ///
9 "2013-2014" ///
10 "2015-2016" ///
11 "2017-2018"
label values year Year
//eligibility
//1988-1998
drop if inlist(dmpstat,1,3)
//1999-2018
drop if ridstatr==1
//race
//1988-1998
g race=dmarethn
label var race "Race/ethnicity, %"
label define Race 1 "White" 2 "Black" 3 "Hispanic" 4 "Other"
label values race Race
//1999-2018
recode ridreth1 (3=1)(4=2)(1 2=3)(5=4)
replace race=ridreth1 if inrange(year,2,11)
recode ridreth3 (3=1)(4=2)(1 2=3)(6=4)(7=5),gen(race2)
label var race2 "Race/ethnicity, %"
label define Race2 1 "White" 2 "Black" 3 "Hispanic" 4 "Asian" 5 "Other"
label values race2 Race2
//sex
//1988-1998
recode hssex (1=0)(2=1),gen(female)
label var female "Female, %"
//1999-2018
recode riagendr (1=0)(2=1)
replace female=riagendr if inrange(year,2,11)
//age
//1988-1998
g age=hsageir
label var age "Age, y, median [IQR]"
//1999-2018
replace age=ridageyr if inrange(year,2,11)
//eligibility
drop if !inrange(age,18,90)
//education
//1988-1998
recode hfa8r ///
(0/8=1) ///k-8
(9/12 13=2) ///highsch, diploma
(14/15=3) ///some coll/assoc
(16/17=4) ///coll/above
(66/99=.), gen(educ)
//1999-2018
recode dmdeduc2 ///
(2 3=2) ///
(4=3) ///
(5=4) ///
(7 9=.)
replace educ=dmdeduc2 if inrange(year,2,11)
label define Educ ///
1 "K-8" ///
2 "High school, Diploma/equivalent" ///
3 "Some college/associate" ///
4 "College graduate/above"
label values educ Educ
label var educ "Education, %"
//income
//1988-1998
g income=hff19r
label var income "Income, $, median [IQR]"
replace income=. if inlist(income,88,99)
replace income=round(rweibull(2, 5000, 0)) if inrange(income,0,2)
forvalues i=3/20 {
replace income=round(runiform((`i'000-1000),`i'000)) if income==`i'
}
local lb=20000
forvalues i=21/26 {
local ub=`lb' + 4999
di "`lb'" " `ub'"
replace income=round(runiform(`lb',`ub')) if income==`i'
local lb=`ub' + 1
}
replace income=round(rweibull(1.09, 25000, 50000)) if income==27
//1999-2018
replace income=indhhinc if inrange(year,2,5)
replace income=indhhin2 if inrange(year,6,11)
local lb=0
foreach i of numlist 1 2 3 4 5 {
local ub=`lb' + 4999
replace income=runiform(`lb',`ub') if income==`i' & year!=1
local lb=`ub' + 1
}
local lb=25000
foreach i of numlist 6 7 8 9 10 11 {
local ub=`lb' + 9999
replace income=runiform(`lb',`ub') if income==`i' & year!=1
local lb=`ub' + 1
}
//dm
//1988-1998
recode had1 (2=0)(8 9=.), gen(dm)
label var dm "Diabetes, %"
//1999-2018
recode diq010 (1 3=1)(2=0)(7 9=.)
replace dm=diq010 if inrange(year,2,11)
//htn
//1988-1998
recode hae2 (2=0)(8 9=.), gen(htn)
label var htn "Hypertension, %"
//1999-2018
recode bpq020 (2=0)(9=.)
replace htn=bpq020 if inrange(year,2,11)
//ckd
//1988-1998
***
//1999-2018
recode kiq020 (2=0)(7 9=.) if inlist(year,2),gen(ckd)
recode kiq022 (2=0)(7 9=.) if inrange(year,3,11)
replace ckd=kiq022 if inrange(year,3,11)
label var ckd "CKD, %"
//smoke
//1988-1998
recode har1 (2=0),gen(smk)
label var smk "Smoke, %"
//1999-2018
recode smq020 (2=0)(7 9=.)
replace smk=smq020 if inrange(year,2,11)
//bp
//1988-1998
g sbp=pepmnk1r
label var sbp "SBP, mmHg, median [IQR]"
g dbp=pepmnk5r
label var dbp "DBP, mmHg, median [IQR]"
//1999-2018
replace sbp=bpxsar if inrange(year,2,4)
replace dbp=bpxdar if inrange(year,2,4)
replace sbp=bpxsy1 if inrange(year,5,11)
replace dbp=bpxdi1 if inrange(year,5,11)
//bmi
//1988-1998
g bmi=bmpbmi
label var bmi "BMI, kg/m2, median [IQR]"
//1999-2018
replace bmi=bmxbmi if inrange(year,2,11)
//hba1c
//1988-1998
g hba1c=ghp
label var hba1c "HBA1c, %, median [IQR]"
//1988-1998
replace hba1c=lbxgh if inrange(year,2,11)
//uacr
//1988-1998
g acr=(ubp*1/10^-3)/(urp*1/10^-1)
label var acr "uACR, mg/g, median [IQR]"
//1999-2018
replace acr=(urxuma*1/10^-3)/(urxucr*1/10^-1) if inrange(year,2,11)
g logacr=log(acr)
label var logacr "Log uACR, log(mg/g), median [IQR]"
//glucose
//1988-1998
g glucose=sgpsi*18/10
label var glucose "Glucose, mg/L, median [IQR]"
//1999-2018
replace glucose=lbxsgl*1/10^1 if inrange(year,2,11)
//creatinine
//1988-1998
g creat=cep
label var creat "Serum Creatinine, mg/dL, median [IAR]"
//1999-2018
replace creat=lbdscrsi/88.42 if inrange(year,2,11)
//ckd-epi
replace creat=0.960*creat-0.184 if year==1
local k=cond(female==1,0.7,0.9)
local alpha=cond(female==1,-0.329,-0.411)
g egfr=141*min(creat/`k',1)^`alpha'* ///
max(creat/`k',1)^-1.209* ///
0.993^age* ///
1.018^female* ///
1.159^(race==2) if !missing(creat)
label var egfr "eGFR, ml/min/1.73m2, median [IAR]"
}
quietly {
//3.keep variables
keep seqn age female race race2 educ income dm htn smk ///
sbp dbp bmi hba1c glucose acr logacr creat egfr year
}
lab dat "NHANES 1988-2018, Demographic & Health Characteristics"
save "08_nhtable$date", replace
quietly {
//big data!
noi di ""
noi di "rows x columns:"
noi count
ds
noi di wordcount(r(varlist))
global finish = clock(c(current_time),"hms")/10^3
global duration = round(($finish - $start)/60,1)
noi di "runtime = $duration minute(s)"
log close
}
7. nhmort#
cls
clear
*! mortality linkage
*! cruxes at lines 35 & 47
quietly {
capture log close
global date: di %td date(c(current_date),"DMY")
log using "12_nhmort$date.log", replace
version 12
global start = clock(c(current_time),"hms")/10^3
//1.data
global nchs https://ftp.cdc.gov/pub/Health_Statistics/NCHS/
global linkage datalinkage/linked_mortality/
global nh3 NHANES_III
global mort _MORT_2019_PUBLIC.dat
global varlist ///
seqn 1-6 ///
eligstat 15 ///
mortstat 16 ///
ucod_leading 17-19 ///
diabetes 20 ///
hyperten 21 ///
permth_int 43-45 ///
permth_exm 46-48
//1988-1994
infix $varlist using $nchs$linkage$nh3$mort, clear
replace seqn=-1*seqn
tempfile dat0
save `dat0'
//1999-2018
local N=1
foreach y of numlist 1999 2001 2003 2005 2007 2009 2011 2013 2015 2017 {
local yplus1=`y' + 1
local nhanes NHANES_`y'_`yplus1'
infix $varlist using $nchs$linkage`nhanes'$mort, clear
tempfile dat`N'
save `dat`N''
local N=`N'+1
}
use `dat0', clear
forvalues i=1/10 {
append using `dat`i''
}
}
quietly {
//2.values
lab def premiss .z "Missing"
lab def eligfmt 1 "Eligible" 2 "Under age 18, not available for public release" 3 "Ineligible"
lab def mortfmt 0 "Assumed alive" 1 "Assumed deceased" .z "Ineligible or under age 18"
lab def flagfmt 0 "No - Condition not listed as a multiple cause of death" ///
1 "Yes - Condition listed as a multiple cause of death" ///
.z "Assumed alive, under age 18, ineligible for mortality follow-up, or MCOD not available"
lab def qtrfmt 1 "January-March" ///
2 "April-June" ///
3 "July-September" ///
4 "October-December" ///
.z "Ineligible, under age 18, or assumed alive"
lab def dodyfmt .z "Ineligible, under age 18, or assumed alive"
lab def ucodfmt 1 "Diseases of heart (I00-I09, I11, I13, I20-I51)"
lab def ucodfmt 2 "Malignant neoplasms (C00-C97)" , add
lab def ucodfmt 3 "Chronic lower respiratory diseases (J40-J47)" , add
lab def ucodfmt 4 "Accidents (unintentional injuries) (V01-X59, Y85-Y86)" , add
lab def ucodfmt 5 "Cerebrovascular diseases (I60-I69)" , add
lab def ucodfmt 6 "Alzheimer's disease (G30)" , add
lab def ucodfmt 7 "Diabetes mellitus (E10-E14)" , add
lab def ucodfmt 8 "Influenza and pneumonia (J09-J18)" , add
lab def ucodfmt 9 "Nephritis, nephrotic syndrome and nephrosis (N00-N07, N17-N19, N25-N27)" , add
lab def ucodfmt 10 "All other causes (residual)" , add
lab def ucodfmt .z "Ineligible, under age 18, assumed alive, or no cause of death data" , add
replace mortstat = .z if mortstat >=.
replace ucod_leading = .z if ucod_leading >=.
replace diabetes = .z if diabetes >=.
replace hyperten = .z if hyperten >=.
replace permth_int = .z if permth_int >=.
replace permth_exm = .z if permth_exm >=.
}
quietly {
//3.define
label var seqn "NHANES Respondent Sequence Number"
label var eligstat "Eligibility Status for Mortality Follow-up"
label var mortstat "Final Mortality Status"
label var ucod_leading "Underlying Cause of Death: Recode"
label var diabetes "Diabetes flag from Multiple Cause of Death"
label var hyperten "Hypertension flag from Multiple Cause of Death"
label var permth_int "Person-Months of Follow-up from NHANES Interview date"
label var permth_exm "Person-Months of Follow-up from NHANES Mobile Examination Center (MEC) Date"
label values eligstat eligfmt
label values mortstat mortfmt
label values ucod_leading ucodfmt
label values diabetes flagfmt
label values hyperten flagfmt
label value permth_int premiss
label value permth_exm premiss
noi des
//eligibility
drop if inlist(eligstat,2,3)
duplicates drop
keep seqn mortstat permth_int permth_exm
save "11_nhmort$date", replace
//big data!
noi di ""
noi di "rows x columns:"
noi count
ds
noi di wordcount(r(varlist))
global finish = clock(c(current_time),"hms")/10^3
global duration = round(($finish - $start)/60,1)
noi di "runtime = $duration minute(s)"
log close
}
8. nhsurv#
cls
clear
*! baseline 08_nhtable.dta
*! outcome 11_nhmort.dta
*! km curve 30y mortality
quietly {
capture log close
global date: di %td date(c(current_date),"DMY")
log using "14_nhsurv$date.log", replace
version 15
global start = clock(c(current_time),"hms")/10^3
//0.final eligibility criterion
use 08_nhtable$date, clear
noi merge 1:1 seqn using 11_nhmort$date, keep(matched) nogen
gen died=mortstat
gen years=permth_exm/(12)
//1.healthy nondonor
g healthy=.
replace healthy=1 if ///
inrange(sbp,0,140) & ///
inrange(dbp,50,85) & ///
inrange(bmi,0,30) & ///
inrange(egfr,100,160) & ///
inrange(hba1c,0,7) & ///
inrange(acr,2,10) & ///
dm==0 & ///
htn==0
replace healthy=0 if missing(healthy)
save "15_nhsurv$date", replace
}
if 13 {
//big data!
noi di ""
noi di "rows x columns:"
noi count
ds
noi di wordcount(r(varlist))
global finish = clock(c(current_time),"hms")/10^3
global duration = round(($finish - $start)/60,1)
noi di "runtime = $duration minute(s)"
}
quietly {
//2.just short of median survival
stset years, fail(died)
sts graph, ///
by(healthy) ///
fail per(100) ///
ti("") yti("%",orient(horizontal)) xti("Years") ///
ylab(0(10)50, angle(360) format(%3.0f))
graph export morality_nondonor.png, replace
gen inelig=healthy==0
noi stcox inelig
sts list, fail by(healthy) saving(16_nhkm$date, replace)
preserve
use 16_nhkm$date, clear
replace failure=failure*100
//see results of segev/jama/2010
noi list healthy time failure if inrange(time,4.9,5.1)
noi list healthy time failure if inrange(time,11.9,12.1)
restore
noi tab healthy
sts graph if healthy==1, ///
ha by(healthy) ///
ylab(0(.005).02, angle(360)) ///
yti("{&lambda}", size(6) ///
orient(horizontal)) ///
xti("Years") ///
ti("Nonparametric", size(3))
graph save nonparm_ha , replace
preserve
keep if healthy==1
drop _st _d _t _t0 healthy inelig
save nh_nondonors$date, replace //for donor vs nondonor analysis
restore
}
quietly {
if 0 {
noi streg if healthy==1, d(weibull) //beta = .0012, lambda=1.6, sigma=.6
noi streg if healthy==1, d(ggamma)
predict f_gg, ha
matrix list e(b)
matrix define b=e(b)
local beta: di %2.1f b[1,1]
local lambda: di %2.1f b[1,3]
local sigma: di %2.1f exp(b[1,2])
line f_gg _t, ///
sort ///
ylab(0(.005).02, angle(360)) ///
yti(" ", ///
orient(horizontal)) ///
xti("Years") ///
ti("{&beta}{subscript:gg} = `beta', {&lambda}{subscript:gg} = `lambda', {&sigma}{subscript:gg} = `sigma' ", size(3))
graph save parm_ha, replace
graph combine nonparm_ha.gph parm_ha.gph, ycommon
graph export parmnonparm_ha.png, replace
matrix ggpars=r(table)
//slope: di (.015-.003)/(27-0) = .00044
//dL/dt = .00044; L = .00044t + 0.001
preserve
clear
set obs 1000
g t=_n*30/1000
g L=.00045*t + 0.003 //from slope of non-parametric ha
//.003/12 gives the perioperative mortality!!!
line L t, ///
ylab(0(.005).02, angle(360)) ///
yti(" ", ///
orient(horizontal)) ///
xti("Years") ///
ti("{&lambda}{subscript:nonparm} ~ .00044t + .003", size(3))
graph save L.gph, replace
graph combine nonparm_ha.gph ///
parm_ha.gph ///
L.gph, ycommon row(1)
graph export lambda.png, replace
//consistent with "di (1/1000)*(3/12)" perioperative mortality
//consistent: https://www.cdc.gov/nchs/fastats/deaths.htm
//1 death per 100 population; or hazard of .01 per person
//3.5m deaths/350m population
restore
preserve
//https://en.wikipedia.org/wiki/Gompertz–Makeham_law_of_mortality
clear
set obs 1000
g t=_n*100/1000
g b=.085
g a=1
g A=-1
g l=1
g h=a*exp(b*t)+l
g H=(A*exp(b*t)+l)+5000
line h H t
restore
}
}
if 0 {
//3.table1 user input
global title: di ///
"Table 1 Demographic and health characteristics of NHANES participants"
global excelfile: di "Table1_Nondonors"
global byvar healthy
global contvars age sbp dbp creat egfr bmi hba1c acr logacr glucose income
global binaryvars female dm htn smk
global multivars race race2 educ year
global footnotes1 Age SBP DBP CREAT eGFR BMI HBA1c uACR LoguACR Glucose Income
global footnotes2 Female Diabetes Hypertension Smoke
global footnotes3 Race Race2 Education Cohort
}
if 0 {
//command
which table1_options
table1_options, ///
title("$title") /// by($byvar) ///
cont($contvars) ///
binary($binaryvars) ///
multi($multivars) ///
foot("$footnotes1 $footnotes2 $footnotes3") ///
excel("$excelfile")
clear
svmat ggpars
save 17_ggpars, replace
}
log close
9. table1#
cls
* requires Tasks/006_Ado_Files/table_options.ado, ind_translator.ado
* adapted from Tasks/003_NHANES/13_nhsurv_4trc.do
* copied lines 169-204
* edited line 169, 188
* deleted lines 200-202
* added lines 16-19 below
* these create log file, import dataset
* outputs file called Table1_Nondonors.xlsx
* you may edit output file name at line 27
if 1 {
capture log close
global date: di %td date(c(current_date),"DMY")
log using "02_nhtable1$date.log", replace
use 15_nhsurv30jan2023
//3.table1 user input
global title: di ///
"Table 1 Demographic and health characteristics of NHANES participants"
global excelfile: di "Table1_Nondonors"
global byvar healthy
global contvars age sbp dbp creat egfr bmi hba1c acr logacr glucose income
global binaryvars female dm htn smk
global multivars race race2 educ year
global footnotes1 Age SBP DBP CREAT eGFR BMI HBA1c uACR LoguACR Glucose Income
global footnotes2 Female Diabetes Hypertension Smoke
global footnotes3 Race Race2 Education Cohort
}
if 2 {
//command
which table1_options
table1_options, ///
title("$title") ///
by($byvar) ///
cont($contvars) ///
binary($binaryvars) ///
multi($multivars) ///
foot("$footnotes1 $footnotes2 $footnotes3") ///
excel("$excelfile")
}
log close
10. kaplan-meier#
cls
*! adapted from Tasks/003_NHANES/13_nhsurv_4trc.do
*! copied & pasted lines 61-66
*! then added lines 10-14 below
quietly {
//settings
capture log close
global date: di %td date(c(current_date),"DMY")
log using "02_survival_analysis_$date.log", replace
use 15_nhsurv30jan2023, clear
version 15
//exploration
noi tab healthy
sort healthy
noi list years died healthy in 60210/60218
//analysis
stset years, fail(died)
sts graph, ///
by(healthy) ///
fail per(100) ///
ti("") yti("%",orient(horizontal)) xti("Years") ///
ylab(0(10)50, angle(360) format(%3.0f))
log close
}
11. r#
11.1 Intro#
Survival Analysis in R
For TRC
01/31/22
11.2 Methods#
Data Source:
Tasks/003_NHANES/13_nhsurv_4trc.do
Outputs 15_nhsurv30jan2023.dta
11.3 Packages#
# good practice to start clean
# install.packages("haven)
# install.packages("survival")
rm(list = ls())
library(haven)
library(survival)
11.4 Surv#
# requires library(haven)
dat <- read_dta("15_nhsurv30jan2023.dta")
colnames(dat)
# head(dat)
# View(dat)
# requires library(survival)
attach(dat)
dat1 <- cbind(years,died)
dat1[1:5,]
mysurv <- Surv(years, died)
mysurv[1:5,]
11.5 Results#
# overall K-M
# K-M by group
myKM1 <- survfit(mysurv ~ healthy)
# myKM1
myKM1sum <- summary(myKM1)
#myKM1sum
plot(myKM1,
main=("Kaplan-Meier estimate with 95% confidence bounds"),
fun = "event",
xlab = "years",
ylab = "Survival function",
ylim = c(0.5, 0)) # plot with axes and title
# Cox PHM
# data attached so no need to state
myfit1 <- coxph(mysurv ~ healthy,
ties = "breslow")
summary(myfit1)
confint(myfit1)
11.6 Conclusion#
Survival Analysis in R
Just as easy as in Stata
Very similar output
12. table1_options#
*! inspired by 340.600.71
*! by Zhenghao(Vincent) Jin & Abi Muzaale
*! July 5, 2022
*! version 3.01
*! fixed rounding issue for non-by table
*! a space added to between median and IQR for cont vars
capture program drop table1_options
program define table1_options
syntax [if], title(string) ///
[cont(varlist)] ///
[binary(varlist)] ///
[multi(varlist)] ///
[foot(string)] ///
[by(varname)] ///
[excel(string)]
quietly {
// sreen if if syntax is called
if "`if'" != "" {
local if_helper = substr("`if'", 3, .)
local if_helper : di "& " "`if_helper'"
}
// screen if excel option got called
if "`excel'" == "" {
local excel_name : di "table1_option_output"
}
else {
local excel_name : di "`excel'"
}
// generate an excel file
putexcel set `excel_name', replace
local alphabet "`c(ALPHA)'"
tokenize `alphabet'
if "`by'" == "" {
// generate row and col indicator in excel
local excel_row_c = 1
local excel_col_c = 1
//columns
local col1: di "_col(40)"
capture local col2: di "_col(50)"
//title string
noisily di "`title'"
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`title'"
local excel_row_c = `excel_row_c'
count `if'
local total_N=r(N)
di `col1' "N=`total_N'"
local excel_row_c = `excel_row_c' + 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "N=`total_N'"
foreach v of varlist `cont' {
quietly sum `v' `if', detail
local row_lab_c: variable label `v'
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`row_lab_c'"
local D %2.0f
local med_iqr_c: di ///
`D' `col1' r(p50) " [" ///
`D' r(p25) "," ///
`D' r(p75) "]"
local med_iqr_c2: di ///
`D' r(p50) " [" ///
`D' r(p25) "," ///
`D' r(p75) "]"
local row_c: di "`row_lab_c' `med_iqr_c'"
local excel_col_c = `excel_col_c' + 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`med_iqr_c2'"
//continuous varlist
noisily di "`row_c'"
}
foreach v of varlist `binary' {
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
quietly sum `v' `if', detail
local row_lab_b: variable label `v'
putexcel $ul_cell = "`row_lab_b'"
local D %2.0f
local percent: di `D' `col1' r(mean)*100
local percent2: di `D' r(mean)*100
local row_b: di "`row_lab_b' `percent'"
local excel_col_c = `excel_col_c' + 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`percent2'"
//binary varlist
noisily di "`row_b'"
}
foreach v of varlist `multi' {
local row_var_lab: variable label `v'
local row_lab_m: value label `v'
// count without missing
quietly count if !missing(`v') `if_helper'
local total = r(N)
quietly levelsof `v' `if', local(levels)
// putexcel the variable name
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`row_var_lab'"
//multinomial varlist
noisily di "`row_var_lab'"
local nlevels=r(r)
forvalues l=1/`nlevels' {
local excel_col_c = 1
local excel_row_c = `excel_row_c' + 1
capture local levels: label `row_lab_m' `l'
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = " `levels'"
local excel_col_c = `excel_col_c' + 1
quietly count if `v'==`l' `if_helper'
local num=r(N)
local percent_v: di `D' `col1' `num'*100/`total'
local percent_v2: di `D' `num'*100/`total'
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`percent_v2'"
noisily di " `levels' `percent_v'"
local l=`l'+1
}
}
local oldvarname: di "`cont' `binary' `multi'"
preserve
rename (`oldvarname')(`foot')
local missvar "`foot'"
di ""
foreach v of varlist `missvar' {
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
quietly count `if'
local denom=r(N)
quietly count if missing(`v') `if_helper'
local vlab: variable label `v'
local num=r(N)
local missing=`num'*100/`denom'
local missingness: di _col(25) %2.1f `missing'
local missing2 : di %2.1f `missing'
noisily di "`v': `missingness'% missing"
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`v': `missing2'% missing"
}
noisily di "`if'"
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`if'"
restore
}
else {
// generate a screener to see if user inputted a by varaible in table 1
// this will not influence the table but will influence the footnote missing value section
local dual_screener = 0
local var_screener `cont' `binary' `multi'
foreach var in `var_screener' {
if "`by'" == "`var'"{
local dual_screener = 1
}
}
if `dual_screener' == 1 {
noisily di "Wrong Input: The stratifying variable should not be inputted as table 1 variable"
}
else {
// generate row and col indicator in excel
local excel_row_c = 1
local excel_col_c = 1
// first detect how many categories the variable has
levelsof(`by')
// save values to a macro
local by_var_val = r(levels)
/*
// count how many values the variable has
local val_count = 0
foreach count in `by_var_val' {
val_count = `val_count' + 1
}
*/
// prepare a spacing factor to separate columns
local col_fac = 20
// prepare a spacing parameter for actual separation
// and the default should be 40 for the first column
local col_sep = 40
// prepare for column heading
local col_lab_m: value label `by'
// display first line
noisily di "`title'"
// put the title line into excel
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`title'"
// di second line (the column heading)
local col_header_count = 0
// for second line, add excel row count for 1
local excel_row_c = `excel_row_c' + 1
foreach col_h in `by_var_val' {
// calculate appropriate identation for each column header
// an identation is calculated based on basic identation for second column + identation factor per column * column number of each heading
local col_sep_temp = `col_sep' + `col_header_count' * `col_fac'
// adjust for the excel column counts to make sure it gets inputted to correct place
local excel_col_c = `excel_col_c' + 1
local col_s "_col(`col_sep_temp')"
ind_translator, row(`excel_row_c') col(`excel_col_c')
capture local col_level : label `col_lab_m' `col_h'
noisily di `col_s' "`col_level'" _continue
// output to excel cells
putexcel $ul_cell = "`col_level'"
// correctly counting for the header numbers
local col_header_count = `col_header_count' + 1
}
// use this to stop _continue
noisily di ""
// reset col_header_count
local col_header_count = 0
// reset excel_col_c to 1 so that new lines can be inputted to first cell of each lines
local excel_col_c = 1
// the third line (N=xxx)
// first set the excel row count to correct numbers
local excel_row_c = `excel_row_c' + 1
foreach col_h in `by_var_val' {
// calculate correct identation
local col_sep_temp = `col_sep' + `col_header_count' * `col_fac'
local col_s "_col(`col_sep_temp')"
// adjust for the excel column counts to make sure it gets inputted to correct place
local excel_col_c = `excel_col_c' + 1
count if `by' == `col_h' `if_helper'
local col_count = r(N)
noisily di `col_s' "N=`col_count'" _continue
// output to excel cells
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "N=`col_count'"
local col_header_count = `col_header_count' + 1
}
// use this to stop _continue
noisily di ""
// for counting variables
foreach v of varlist `cont' {
// reset header count
local col_header_count = 0
// count for excel row numbers
local excel_row_c = `excel_row_c' + 1
// reset excel_col_c to 1 so that new lines can be inputted to first cell of each lines
local excel_col_c = 1
// label name and displaying status
local row_lab_c: variable label `v'
// set format
local D %1.0f
// print variable name
noisily di "`row_lab_c'" _continue
// put variable name into cell
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`row_lab_c'"
// print for each column
foreach col_h in `by_var_val' {
// get correct separation space
local col_sep_temp = `col_sep' + `col_header_count' * `col_fac'
local col_s "_col(`col_sep_temp')"
// count for excel columns
local excel_col_c = `excel_col_c' + 1
// get median and IQR info
quietly sum `v' if `by' == `col_h' `if_helper', detail
// display each column
local col_percent : di `D' r(p50) " [" ///
`D' r(p25) "," ///
`D' r(p75) "]"
noisily di `col_s' "`col_percent'", _continue
// output to excel for median & IQR
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`col_percent'"
local col_header_count = `col_header_count' + 1
}
// finish the line
noisily di ""
}
// for binary variables
foreach v of varlist `binary' {
// reset header count
local col_header_count = 0
// label name and displaying status
local row_lab_b: variable label `v'
// set format
local D %1.0f
// count for excel row numbers
local excel_row_c = `excel_row_c' + 1
// reset excel_col_c to 1 so that new lines can be inputted to first cell of each lines
local excel_col_c = 1
// put variable name into cell
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`row_lab_b'"
// print variable name
noisily di "`row_lab_b'" _continue
// print for each column
foreach col_h in `by_var_val' {
// get correct separation space
local col_sep_temp = `col_sep' + `col_header_count' * `col_fac'
local col_s "_col(`col_sep_temp')"
// count for excel columns
local excel_col_c = `excel_col_c' + 1
quietly sum `v' if `by' == `col_h' `if_helper', detail
local percent: di `D' `col1' r(mean)*100
noisily di `D' `col_s' `percent', _continue
// output to excel for median & IQR
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`percent'"
local col_header_count = `col_header_count' + 1
}
noisily di ""
}
// for categorical variables
foreach v of varlist `multi' {
// set format
local D %1.0f
local row_var_lab: variable label `v'
local row_lab_m: value label `v'
// get levels of the categorical variable
levelsof `v'
local var_level = r(levels)
// count for excel row numbers
local excel_row_c = `excel_row_c' + 1
// reset excel_col_c to 1 so that new lines can be inputted to first cell of each lines
local excel_col_c = 1
// put variable name into cell
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`row_lab_m'"
// display variable name
noisily di "`row_var_lab'"
// get value range in case some variables have extreme values like 0
sum `v' `if', detail
local v_min = r(min)
local v_max = r(max)
// loop from min to max
forvalues v_val = `v_min'/`v_max' {
// when loop from min to max, it is necessary to get rid off non-existing values
// achieve this by count if the variable has the value or not
count if `v' == `v_val' `if_helper'
local v_level_count = r(N)
if `v_level_count' != 0 {
// reset header count
local col_header_count = 0
// reset excel_col_c to 1 so that new lines can be inputted to first cell of each lines
local excel_col_c = 1
// label name and displaying variable value
local v_level: label `row_lab_m' `v_val'
// count for excel row and output variable values
local excel_row_c = `excel_row_c' + 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = " `v_level'"
noisily di _col(5) "`v_level'", _continue
foreach col_h in `by_var_val' {
// get correct separation space
local col_sep_temp = `col_sep' + `col_header_count' * `col_fac'
local col_s "_col(`col_sep_temp')"
// count for excel colmns
local excel_col_c = `excel_col_c' + 1
count if `v'==`v_val' & `by' == `col_h' & !missing(`v') `if_helper'
local num=r(N)
count if `by' == `col_h' & !missing(`v') `if_helper'
local t_num = r(N)
local v_percent = `num' * 100 / `t_num'
local v_percent2 : di `D' `v_percent'
noisily di `D' `col_s' `v_percent', _continue
// output to excel
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`v_percent2'"
local col_header_count = `col_header_count' + 1
}
noisily di ""
}
}
}
// missing values
local oldvarname: di "`cont' `binary' `multi'"
preserve
rename (`oldvarname')(`foot')
local missvar "`foot'"
// a line spacer
noisily di ""
// put the line separator into excel
local excel_col_c = 1
local excel_row_c = `excel_row_c' + 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = ""
foreach v of varlist `missvar' {
noisily di "`v'", _continue
// output variable name into excel
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`v'"
// reset col_header_count
local col_header_count = 0
foreach col_h in `by_var_val' {
local col_sep_temp = `col_sep' + `col_header_count' * `col_fac'
local col_s "_col(`col_sep_temp')"
// count for excel col
local excel_col_c = `excel_col_c' + 1
count if `by' == `col_h'
local denom = r(N)
count if missing(`v') & `by' == `col_h'
local neu = r(N)
local per = `neu' / `denom' * 100
local per2 = round(`per' * 10) / 10
// output to excel
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`per2'% missing"
noisily di `col_s' %2.1f `per' "% missing", _continue
local col_header_count = `col_header_count' + 1
}
noisily di ""
}
noisily di "`if'"
local excel_row_c = `excel_row_c' + 1
local excel_col_c = 1
ind_translator, row(`excel_row_c') col(`excel_col_c')
putexcel $ul_cell = "`if'"
restore
}
}
}
end
capture program drop _all
program define ind_translator
syntax, row(int) col(int)
// tokenize the alphabet
local alphabet "`c(ALPHA)'"
tokenize `alphabet'
// now translate col
local col_helper = `col'
while (`col_helper' > 0) {
local temp_helper2 = (`col_helper' - 1)
local temp_helper = mod(`temp_helper2', 26) + 1
local col_name : di "``temp_helper''" "`col_name'"
local col_helper = (`col_helper' - `temp_helper') / 26
}
// generate a global macro that can be used in main program
global ul_cell "`col_name'`row'"
end
13. irmatch#
//TODO:
// currently does not work with abbreviated variable names
//- is there a way to implement / generalize Dorry's "maxedout" option?
// (if no match, pick somebody with age within 5y, BMI 20-30, BP 100-140)
//TODO: radii are reported incorrectly in posted dataset
//(always reported as maximum allowable radius)
//for exmaple, in ltmatch_k2_without, "init_age" always = 5 even though actual radius is usu 1 or 2
//- k>1 and "without are IMPLEMENTED!
program define irmatch
//rmatch 0.03
//by Allan Massie and Dorry Segev, Segev lab
//Department of Surgery, Johns Hopkins School of Medicine
syntax anything using, case(varname) id(varname) ///
[k(int 1)] [seed(real 420)] [without] [replace] [verbose] [pedantic]
//without = without replacement
disp in smcl "{hline}"
disp "rmatch 0.03."
disp "When using this code, please cite:"
disp in smcl "Massie et al 2013, {hi:'Stata Journal Article'}"
disp "For a good reference on the radius matching techniques, consider:"
disp in smcl "James et al 2013, {hi:'Iterative Expanding Radius matching for observational studies'}," ///
" PMID {hi:XXXXXX}"
disp in smcl "{hline}"
preserve
//make a local copy of `0' to parse syntax before we actually get started
//local arglist = "`0'"
local arglist "`0'" //hopefully this fixes the max-256-char thing
//PARSE INPUT
local n = 0
local repeatvars = 0 //once they start repeating, you can't add new vars
gettoken curarg arglist:arglist
while "`curarg'" != "using" & `n' < 20 {
local n = `n' + 1
//gettoken curarg arglist:arglist
//curarg = current argument
//which should be of the form varname(n1(n2)n3)
//NOTE: using "tokenize" here will spoil `1', `2', etc.
//does not spoil `0'
tokenize "`curarg'", parse("()")
//now, `1' should be a varname
local badinput = 0
if "`2'`4'`6'`8'" != "(())"{
local badinput = 1
}
capture confirm number `3'
if _rc {
local badinput = 1
}
capture confirm number `5'
if _rc {
local badinput = 1
}
capture confirm number `7'
if _rc {
local badinput = 1
}
capture assert `3' < `7'
if _rc {
local badinput = 1
}
if `badinput' == 1 {
disp as error in smcl ///
"Error: each argument must be of the form varname(#1(#2)#3)"
disp as error in smcl ///
"Where #1, #2 and #3 are numbers and #1 <= #3"
disp as error in smcl ///
"Bad argument `curarg'"
exit
}
capture confirm variable `1'
if _rc {
disp as error in smcl "Error: variable `1' does not exist"
exit
}
capture confirm numeric variable `1'
if _rc {
disp as error in smcl "Error: variable `1' is not numeric"
exit
}
//input for this variable is valid
if `repeatvars' == 0 {
if "``1'_start'" == "" { //this var has not been listed before
local `1'_start = `3'
local `1'_rad = `3'
local distinct_vars "`distinct_vars' `1'"
}
else { //this var HAS been listed before
local repeatvars = 1
local n_distinct = `n' - 1 //# of distinct variables
}
}
else if "``1'_start '" == "" {
disp as error in smcl ///
"Error: after listing the same variable twice, you can't introduce new variables "
exit
}
local var`n'_name = "`1'"
local var`n'_min = `3'
local var`n'_step = `5'
local var`n'_max = `7'
gettoken curarg arglist:arglist
}
//DONE PARSING INPUT
//if we get this far, then the function was called correctly
//construct the if statements
local n_ifs = 0
//iterate over (possibly repeating) list of variables
forvalues i = 1/`n' {
//iterate stepping from min to max
local curvar "`var`i'_name'"
//disp "`var`i'_min'(`var`i'_step')`var`i'_max'"
forvalues var_step = `var`i'_min'(`var`i'_step')`var`i'_max' {
local `curvar'_rad = `var_step'
//disp "`curvar' ``curvar'_rad'"
local n_ifs = `n_ifs' + 1
local if`n_ifs' = "(1) " //build the `if' string
//iterate over distinct vars to build the `if' string
foreach vv of varlist `distinct_vars' {
//construct if statement (see macrotest.do)
//later define local `vv'_cur = value of vv for current record
//This *seems* to work...
local cur_if "cond(missing(\``vv'_cur\'), 1, abs(`vv'-\``vv'_cur\')<=``vv'_rad')"
local cur_disp = substr("`vv'",1,3)
local cur_disp "`cur_disp' \``vv'_cur\'/``vv'_rad' "
//local age_don_cur "OO"
//disp `"`cur_if'"'
//local age_don_cur "PP"
//disp `"`cur_if'"'
//macval() keeps `age_don_cur' or whatever from being interpreted at this point
//local if`n_ifs' "`if`n_ifs'' & `cur_if'"
local if`n_ifs' "`macval(if`n_ifs')' & `macval(cur_if)'"
local disp`n_ifs' "`macval(disp`n_ifs')' `macval(cur_disp)'"
/*
local age_don_cur "OO"
disp `"`if`n_ifs''"'
local age_don_cur "PP"
disp `"`if`n_ifs''"'
*/
}
}
}
//DONE CONSTRUCTING IF STATEMENTS
//disp "total `n_ifs' if statements"
//disp " original: `0'"
if !inrange(`k', 1, 100) {
disp in smcl as error "Error: k must be between 1 and 100"
exit
}
if "`without'" != "" {
quietly count if `case' == 1
local ncases = r(N)
quietly count if `case' == 0
local ncontrols = r(N)
if ((`ncases' * `k') > `ncontrols') {
disp as error in smcl ///
"Error: controls must exceed k*cases if 'without' option is used"
exit
}
}
quietly count if !inlist(`case', 0, 1)
if r(N) > 0 {
disp as error in smcl ///
"Error: case variable(`case') must be 0 or 1 for all records"
exit
}
quietly duplicates report `id'
if r(N) != r(unique_value) {
disp as error in smcl ///
"Error: ID variable(`id') is not a unique identifier"
exit
}
local NUMERIC_ID = 1
local id_type: type `id'
if substr("`id_type'", 1,3) == "str" {
local NUMERIC_ID = 0
}
set seed `seed'
//sort in random order
tempvar rsort
gen `rsort' = uniform()
//to match c cases on v variables, Dorry creates c*v local macros
//then drops the cases and works with only the controls
quietly count if `case' == 1
local ncases = r(N)
quietly count if `case' == 0
local ncontrols = r(N)
//At this point we have `n_ifs' if statements to run for each record
tempvar to_delete
gen `to_delete' = 0 //used for "without" option
//create c*v local macros which store all the data for the cases
disp "Processing matching variables for cases..."
gsort -`case' `rsort' //list cases first
forvalues i = 1/`ncases' {
local id_`i' = `id'[`i']
foreach vv of varlist `distinct_vars' {
local `vv'`i' = `vv'[`i']
}
}
disp "Done."
quietly keep if `case' == 0
tempvar newcontrols alreadymatched
quietly gen int `newcontrols' = 0 //mark whether a control matches the cur case
quietly gen int `alreadymatched' = 0 //mark whether record has already been matched to cur case
capture postclose postit
postfile postit `id_type' (`id'_ctrl `id'_case) ///
double `distinct_vars' maxedout matchnum ///
`using', `replace'
local step_size = 200
if (`ncases' < 2000) {
local step_size = ceil(`ncases'/10)
}
disp "Starting matching process for `ncases' cases..."
//MAIN LOOP: iterate over the cases
forvalues i=1/`ncases' {
quietly replace `alreadymatched' = 0 //haven't matched to anybody yet
local id_i `id_`i''
local nc_total = 0 //how many controls have been matched to this case
//if var is (e.g.) "age", we need to define "age_cur" = age for this record
foreach vv of varlist `distinct_vars' {
local `vv'_cur = ``vv'`i''
}
//iterate over the ifs; initialization string
local if_i 1
local remaining_matches = `k' //how many MORE matches are required?
//BEGIN ITERATION over each of the "if" strings that is used to expand the radius
while `remaining_matches' > 0 & `if_i' <= `n_ifs' {
quietly count if `if`if_i'' & `alreadymatched' == 0
local nc_total = r(N)
if "`pedantic'" != "" {
disp "id `id_i': `disp`if_i'': `nc_total' controls
// disp " `if`if_i''"
}
//if we identified at least one control...
if `nc_total' > 0 {
quietly replace `newcontrols' = (`if`if_i'') & `alreadymatched' == 0
gsort -`newcontrols' `rsort'
local new_matches = ///
cond(`nc_total' < `remaining_matches', `nc_total', `remaining_matches')
forvalues j=1/`new_matches' {
local nc_num=floor(uniform()*`nc_total')+1
if `alreadymatched'[`nc_num'] == 1 {
local counter = 0
while `alreadymatched'[`nc_num'] == 1 & `counter' < 20 {
local nc_num=floor(uniform()*`nc_total')+1
local ++counter
}
}
if "`verbose'" != "" {
disp "#`i' of `ncases' id `id_i': " ///
"control `j' drawn from control #`nc_num' " ///
"id " `id'[`nc_num']
}
local post_str = ""
foreach vv of varlist `distinct_vars' {
local post_str = "`post_str' (``vv'_rad')"
} //iterate over vars to construct post string
if `NUMERIC_ID' == 1 {
post postit (`id'[`nc_num']) (`id_i') `post_str' ///
(0) (`nc_total') //(0) = not maxed out
}
else {
local mymatch `id'[`nc_num']
post postit (`mymatch') ("`id_i'") `post_str' ///
(0) (`nc_total') //(0) = not maxed out
}
//if w/o replacement, mark this control for deletion
quietly replace `to_delete' = 1 in `nc_num'
quietly replace `alreadymatched' = 1 in `nc_num'
local --remaining_matches
} //loop over the new_matches controls to assign
//if doing w/o replacement, delete any ctrls that have been used
if "`without'" != "" {
quietly drop if `to_delete' == 1
}
}
//move to next "if" statement
if `remaining_matches' > 0 {
local ++if_i
} //if we haven't identified enough matches yet
} //while no matches & keepgoing == 1
//END ITERATING OVER "IF" STRINGS
local maxedout = `remaining_matches' > 0
if `remaining_matches' > 0 { //if we FAILED to identify enough controls
if "`verbose'" != "" {
disp "#`i' of `ncases' id `id_i': " ///
" failed to identify enough controls"
}
local post_str = ""
foreach vv of varlist `distinct_vars' {
local post_str = "`post_str' (``vv'_rad')"
} //iterate over vars to construct post string
//post once for each non-match
forvalues jj = 1/`remaining_matches' {
if `NUMERIC_ID' == 1 {
post postit (.) (`id_i') `post_str' ///
(`maxedout') (`nc_total')
}
else {
post postit ("") ("`id_i'") `post_str' ///
(`maxedout') (`nc_total')
}
}
} //if we failed to identify enough controls
if "`verbose'" == "" & mod(`i',`step_size') == 0 {
disp "Finished `i' of `ncases' cases (" %3.1f `i'*100/`ncases' "%)"
}
} //loop over each case
postclose postit
end
14. simulatepopulation#
/*
extra credit
reverse engineer
polack et al. nejm 2020
to appreciate 340.600.71
from a fresh perspective
NEJM2020;383:2603-15
*/
//1.settings
capture log close
log using BNT162b2.log, replace
set more off
clear all
version 16 //not checked whether it runs on earlier versions
//2.table1
//row1: treatment assignment
set seed 34060071
set obs 37706 //study population
gen bnt=rbinomial(1,.5) //random 1:1 assignment
lab define Bnt ///
0 "Placebo" /// group labels
1 "BNT162b2"
label values bnt Bnt
tab bnt
//row2: sex
gen female=rbinomial(1, .494)
label define Female ///
0 "Male" ///
1 "Female"
label values female Female
tab female
//row3: race/ethnicity
tempvar dem // temporarily need it
gen `dem'=round(runiform(0,100),.1) // equal pr(sampled)
recode `dem' /// but not inform by race:
(0/82.9=0) /// 82.9% white
(83.0/92.1=1) /// 9.3% black
(92.2/96.51=2) /// 4.3% asian
(96.52/97.0=3) /// 0.5% native american or alaskan
(97.1/97.2=4) /// 0.2% native hawaiian/other pacific isl
(97.3/99.41=5) /// 2.3% multiracial
(99.42/100=6) /// 0.6% not reported
, gen(race)
lab define Race ///
0 "White" ///
1 "Black or African American" ///
2 "Asian" ///
3 "Native American or Alsak Native" ///
4 "Native Hawaiian or other Pacific Islander" ///
5 "Multiracial" ///
6 "Not reported"
label values race Race
tab race
//row4: ethnicity
gen ethnicity=rbinomial(1,0.28)
tostring ethnicity, replace
replace ethnicity="Latinx" if ethnicity=="1"
replace ethnicity="Other" if ethnicity=="0"
//row5: country
tempvar country
gen `country'=round(runiform(0,100), .1)
recode `country' ///
(0/15.3=0) /// 15.3% argentina
(15.4/21.5=1) /// 6.1% brazil
(21.6/23.6=2) /// 2.0% south africa
(23.7/100=3) /// 76.7% united states
, gen(country) // source populations
label define Country ///
0 "Argentina" ///
1 "Brazil" ///
2 "South Africa" ///
3 "United States"
label values country Country
tab country
//row7: age at vaccination
gen age=(rt(_N)*9.25)+52 //estimated mu,sigma from median/range
replace age=runiform(16,91) /// range from table1 ~ 4*sigma
if !inrange(age,16,91)
summ age, detail
local age_med=r(p50)
local age_lb=r(min)
local age_ub=r(max)
gen dob = d(27jul2020) - /// earliest recruitment into trial
(age*365.25) // age at recruitment
gen dor = dob + age*365.25 + runiform(0,4*30.25)
//row6: age group can only be defined after age
gen over55=age>55 // actual population is 5.3% more
tab over55 // suggesting skewness to left
//row8: bmi
gen bmi=rbinomial(1, .351)
tab bmi
//figure 3: time-to-event data
g days=rweibull(.7,17,0) if bnt==0 //timing of covid for placebo
g covid=rbinomial(1, 162/21728) if bnt==0 //freq of covid event
*replace days=rgamma(1/1.8,11) if bnt==1 //timing of covid for bnt
replace days=rweibull(.4,.8,0) if bnt==1 //timing of covid for bnt
*hist days if bnt==1 & days<114
replace covid=rbinomial(1, 14/21772) if bnt==1 //freq of covid for bnt
//key dates
gen eft = dor + days
//date formats
format dob %td
format dor %td
format eft %td
//kaplan-meier curve
stset days, fail(covid)
sts graph, /// kaplan-meier curve
by(bnt) /// stratifying variable
fail per(100) /// yscale in %
tmax(119) /// xscale limit
xlab(0(7)119) ///
ylab(0(.4)2.4, ///
angle(360) /// verticle/easy to read ylabs
format("%3.1f")) /// precision 1 decimal
xti("Days after Dose 1") ///
legend(off) ///
text(2.3 100 "Placebo", ///
col(navy)) ///
text(.5 100 "BNT162b2", ///
col(maroon))
graph export BNT162b2.png, replace
stcox bnt
drop _* age over55 days //to be calculated by the student
g bnt_id=round(runiform(37,37+_N))
compress //change variables from float to byte if possible
//label variables
lab var bnt_id "Participant Identifier"
lab var bnt "Random treatment assignment"
lab var female "Gender at birth"
lab var race "Self-identified race"
lab var ethnicity "Hispanic ethnicity"
lab var country "Country where trial was conducted"
lab var dob "Date of birth"
lab var dor "Date of recruitment into BNT162b2 trial"
lab var eft "Date of exit from BNT162b2 trial"
lab var bmi "Obese"
lab var covid "Covid-19 status on eft date"
//label data
lab data "Safety and Efficacy of the BNT162b2 mRNA Covid-19 Vaccine"
describe
order bnt_id dob female race ethnicity country bmi bnt eft covid
***replace eft=. if eft>d(15dec2020) //some folks lost to followup
save BNT162b2, replace
log close
15. intervalcensoring#
** W. Werbel 7.20.20 HCV project stintreg code - darn I saved over this ESW code, not good
** Adapted from ESW project, curves still have that variable in place - swap out for relevant model and exp/outcome var
use steroid_ar_May_2020.dta
** change x axis from days to years
gen r_time_yrs= right_time/365
gen l_time_yrs= left_time/365
** set left time just a fraction beyond zero so that these people can enter interval censoring calculations and are not designated as being censored at time zero.
replace left_time=0.01 if left_time==0
replace l_time_yrs=0.001 if l_time_yrs==0
** example univariable code for rejection
stintreg don_hcv_nat_numeric, interval(l_time_yrs r_time_yrs) distribution(weibull)
** model checking, see if weibull fits ok
estat gofplot
** survival curves; currently set up for ESW analysis, sub HCV!
stcurve, survival at1(Isteroid_maint=0) at2(Isteroid_maint=1) graphregion(c(white)) title("") //
xtitle("Years post-transplantation") xlabel(0 (1) 6) xsca(titlegap(2)) ytitle("Rejection-free survival" ) //
ysca(titlegap(2)) ylabel(0 "0%" 0.25 "25%" 0.5 "50%" 0.75 "75%" 1 "100%") lpattern(dash) lwidth(*2) legend (col(1) //
order( 1 "Early Steroid Withdrawal" 2 "Steroid Continuation") nobox position(7) ring(0) region(lwidth(none)))
** full model, sub HCV model! vars of interest are similar for rejection,
stintreg Isteroid_maint don_age_decade recip_age_decade recip_black_race rec_hcv_numeric //
living_donor hla_0mismatch decile_rec_cpra dgf Ithymo_ind, interval(left_time right_time) distribution(weibull)
** can flip the axis to make it nicer (no longer "survival" orientation)
stcurve, survival at1(Isteroid_maint=0) at2(Isteroid_maint=1) graphregion(c(white)) title("") xtitle("Years post-transplantation") //
xlabel(0 (1) 6) xsca(titlegap(2)) ytitle("Acute rejection" ) ysca(titlegap(2)) ysc(reverse) ylabel(1 "0%" 0.9 "10%" 0.8 "20%" 0.7 "30%") //
lpattern(dash) lwidth(*2) legend (col(1) order( 1 "Early Steroid Withdrawal" 2 "Steroid Continuation") nobox position(10) ring(0) region(lwidth(none)))
** getting cumulative incidences
replace left_time=365
replace right_time=365*2
predict s1yr s2yr, surv
replace left_time=365*3
replace right_time=365*4
predict s3yr s4yr, surv
replace left_time=365*5
replace right_time=365*6
predict s5yr s6yr, surv
list Isteroid_maint s1yr s2yr s3yr s4yr s5yr s6yr, sepby(Isteroid_maint)
tabstat s1yr s2yr s3yr s4yr s5yr s6yr, by(Isteroid_maint) stats(mean) // subtract from 1
** other subanalyses to separate the curves
** black
stcurve, survival at1(recip_black_race=0) at2(recip_black_race=1) graphregion(c(white)) title("") //
xtitle("Years post-transplantation") xlabel(0 (1) 6) xsca(titlegap(2)) ytitle("Acute rejection" ) //
ysca(titlegap(2)) ysc(reverse) ylabel(1 "0%" 0.9 "10%" 0.8 "20%" 0.7 "30%") lpattern(dash) lwidth(*2) //
legend (col(1) order( 1 "Non-black recipient" 2 "Black recipient") nobox position(10) ring(0) region(lwidth(none)))
** txp era
stcurve, survival at1(txp_era=0) at2(txp_era=1) at3(txp_era=2) graphregion(c(white)) title("") //
xtitle("Years post-transplantation") xlabel(0 (1) 6) xsca(titlegap(2)) ytitle("Acute rejection" ) //
ysca(titlegap(2)) ysc(reverse) ylabel(1 "0%" 0.9 "10%" 0.8 "20%" 0.7 "30%") lcolor (blue red black) //
lwidth(*2) legend (col(1) order( 1 "2000-2007" 2 "2008-2013" 3 "2014-2017") nobox position(10) ring(0) region(lwidth(none)))
** living donor
stcurve, survival at1(living_donor=0) at2(living_donor=1) graphregion(c(white)) title("") //
xtitle("Years post-transplantation") xlabel(0 (1) 6) xsca(titlegap(2)) ytitle("Acute rejection" ) //
ysca(titlegap(2)) ysc(reverse) ylabel(1 "0%" 0.9 "10%" 0.8 "20%" 0.7 "30%") lcolor (blue red) lwidth(*2) //
legend (col(1) order( 1 "Deceased donor" 2 "Living donor") nobox position(10) ring(0) region(lwidth(none)))
** thymo
stcurve, survival at1(Ithymo_ind=0) at2(Ithymo_ind=1) graphregion(c(white)) title("") //
xtitle("Years post-transplantation") xlabel(0 (1) 6) xsca(titlegap(2)) ytitle("Acute rejection" ) //
ysca(titlegap(2)) ysc(reverse) ylabel(1 "0%" 0.9 "10%" 0.8 "20%" 0.7 "30%") lcolor (blue red) lwidth(*2) //
legend (col(1) order( 1 "No thymoglobulin" 2 "Thymoglobulin") nobox position(10) ring(0) region(lwidth(none)))
16. non-semi-para#
* to be presented on 03/14/22 to TRC analysts
* https://www.prb.org/usdata/indicator/deaths/table/
// cls
use 18_nhtable102feb2023.dta, clear
//hazards: conditional risk
//1. empirical
global deathrate=(2854838/328300000)
quietly {
//exploration
noi tab female
sort female
sum female
global lb: di r(N)*(1-r(mean))-4
global ub: di r(N)*(1-r(mean))+4
g years=permth_exm/12
noi list years mortstat female in $lb/$ub
//analysis
stset years, fail(mortstat)
}
quietly {
//2. nonparametric
// cls
sts graph, ///
ha by(female) ///
ylab(0(.005).03, angle(360)) ///
yti("{&lambda}", size(6) ///
orient(horizontal)) ///
xti("Years") ///
ti("Nonparametric", size(3)) ///
legend(off ring(0) ///
pos(11) ///
order(1 2) ///
row(2) ///
lab(1 "Male") ///
lab(2 "Female") ///
) ///
yline($deathrate, ///
lp(dash) ///
lc(lime) ///
)
graph save ha_nonpar.gph, replace
}
quietly {
//2. survival, failure
// cls
sts graph, ///
by(female) ///
fail ///
per(100) ///
ylab(0(20)100, angle(360) format(%3.0f)) ///
yti("%", size(6) ///
orient(horizontal)) ///
xti("Years") ///
ti("Nonparametric", size(3)) ///
legend(on ring(0) ///
pos(11) ///
order(1 2) ///
row(2) ///
lab(1 "Male") ///
lab(2 "Female") ///
) ///
yline(50, ///
lp(dash) ///
lc(lime) ///
)
graph save surv_nonpar.gph, replace
}
quietly {
//3. semiparametric
// cls
stcox female
//baseline cumhaz
predict H, basec
sum H
global H=r(max)
//figure prep
matrix define a_mort=(0,1)
matrix define b=e(b)
matrix define V_mort=e(V)
matrix b_mort = a_mort*b'
noi matrix list b_mort
noi matrix list V_mort
noi di (1 - exp(-$H *exp(b_mort[1,1])))*100
noi di (1 - exp(-$H *exp(-1.78)))*100
g risk0 = (1 - exp(-H *exp(b_mort[1,1])))*100
g risk1 = (1 - exp(-H *exp(b_mort[1,2])))*100
local complete=e(N)
local complete: di %2.0f `complete' " of " %2.0f `studypop'
line risk0 risk1 _t, sort ///
ti("Semiparametric ", size(3)) ///
xti("Years") ///
yti("",orient(horizontal)) ///
ylab(0(20)100, angle(360)) ///
xlab(0(10)30) ///
legend(off ///
ring(0) ////
pos(11) ///
col(1) ///
lab(1 "Male") ///
lab(2 "Female") ///
) ///
yline(50, lp(dash) lc(lime))
graph save surv_semipar.gph, replace
}
quietly {
streg female, d(weibull)
matrix define h0 = e(b)
global h0 = exp(_t*exp(h0[1,2]))
//g h0 = exp(-1*_t*h0[1,2])
stcurve, fail at(female= (0 1)) ///
ylab(0(.2)1, ///
angle(360)) ///
xlab(0(10)30) ///
yline(.5, lc(lime) lp(dash)) ///
yti("") ///
legend(off ring(0) ///
pos(11) ///
order(1 2) ///
row(2) ///
lab(1 "Male") ///
lab(2 "Female") ///
) ///
ti("Parametric", size(3))
graph save surv_par.gph, replace
}
quietly {
//parametric vs. non-parametric
// cls
sts graph, ///
ha by(female) ///
ylab(0(.005).03, angle(360)) ///
yti("", size(6) ///
orient(horizontal)) ///
xti("Years") ///
ti("Nonparametric", size(3)) ///
legend(off ring(0) ///
pos(11) ///
order(1 2) ///
row(2) ///
lab(1 "Male") ///
lab(2 "Female") ///
) ///
yline($deathrate, ///
lp(dash) ///
lc(lime) ///
) ///
yline($h0, ///
lp(dash) ///
lc(orange) ///
)
graph save ha_par.gph, replace
}
graph combine ///
surv_nonpar.gph ///
surv_semipar.gph ///
surv_par.gph ///
ha_nonpar.gph ///
ha_par.gph, ///
row(2)
graph export nh_mort.pdf, replace
17. modelselection#
quietly {
cls
clear
noi di "and the winner is..."
if 1 {
//import data
use 18_nhtable102feb2023.dta
}
if 2 {
//survival analysis
g years=permth_exm/12
stset years, fail(mortstat==1)
}
if 0 {
//kaplan-meier
sts graph, ///
fail ///
by(htn) ///
per(100) ///
ylab(, ///
format(%3.0f) angle(360)) ///
ti("Mortality in the United States, 1988-2018") ///
xti("Years") ///
yti("%", ///
orientation(horizontal)) ///
legend(on ///
ring(0) ///
pos(11) ///
col(1) ///
order(2 1) ///
lab(1 "Normotensive") ///
lab(2 "Hypertensive")) ///
caption("Source: NHANES")
}
if 4 {
//nested models
global demo age female i.race i.educ income
global qa dm htn smk
global exam sbp dbp bmi
global lab hba1c acr glucose creat
//cox regression
stcox $demo
est store demo
stcox $demo $qa
est store demoqa
stcox $demo $qa $exam
est store demoqaexam
stcox $demo $qa $exam $lab
est store demoqaexamlab
//rank models
est stat demo demoqa demoqaexam demoqaexamlab
//manipulate output
matrix define models=r(S)
noi matrix list models
svmat models
keep models*
rename (models1 models2 models3 models4 models5 models6) ///
(N ll0 ll df AIC BIC)
egen bestmodel=min(AIC)
keep if AIC==bestmodel
noi list
}
}
18. stsgraph#
qui {
if 0 { //background
1. aesthetics for sts graph command
2. export .png or pdf and control width
3. use nh3andmort.dta for illustration
}
if 1 { //methods
use nh3andmort.dta, clear
g years=permth_exm/12
stset years, fail(mortstat)
}
if 2 { //results
#delimit ;
sts graph if inrange(hab1,1,5),
by(hab1)
fail
ti("Morality in NHANES III",pos(11))
subti("Self-reported health status",pos(11))
yti("%",orientation(horizontal))
xti("Years")
per(100)
ylab(0(20)80,
format(%3.0f)
angle(360)
)
legend(on
size(2)
region(margin(zero))
lpattern(blank)
lab(1 "Excellent")
lab(2 "Good")
lab(3 "Fair")
lab(4 "Bad")
lab(5 "Poor")
ring(0)
pos(11)
col(1)
order(5 4 3 2 1)
)
note("Source: RDC/NCHS/CDC/DHHS",
size(2)
pos(7)
)
risktable(,
title("# at risk",
size(2))
size(2)
order(5 " " 4 " " 3 " " 2 " " 1 " ")
)
risktable(,
group(#1)
col(navy)
)
risktable(,
group(#2)
col(maroon)
)
risktable(,
group(#3)
col(forest_green)
)
risktable(,
group(#4)
col(dkorange)
)
risktable(,
group(#5)
col(teal)
)
;
#delimit cr
}
if 3 { //conclusion
graph export nh3andmort.png,replace width(2400)
stcox i.hab1 if inrange(hab1,1,5)
}
}