生存资料校准曲线calibration curve的绘制
本文首發(fā)于公眾號:醫(yī)學(xué)和生信筆記
“醫(yī)學(xué)和生信筆記,專注R語言在臨床醫(yī)學(xué)中的使用,R語言數(shù)據(jù)分析和可視化。主要分享R語言做醫(yī)學(xué)統(tǒng)計(jì)學(xué)、meta分析、網(wǎng)絡(luò)藥理學(xué)、臨床預(yù)測模型、機(jī)器學(xué)習(xí)、生物信息學(xué)等。
前面我們已經(jīng)講過logistic模型的校準(zhǔn)曲線的畫法,這次我們學(xué)習(xí)生存資料的校準(zhǔn)曲線畫法。
- 加載R包和數(shù)據(jù)
- calibration 方法1
- calibration 方法2
加載R包和數(shù)據(jù)
library(survival)library(rms)
##?Loading?required?package:?Hmisc
##?Loading?required?package:?lattice
##?Loading?required?package:?Formula
##?Loading?required?package:?ggplot2
##?
##?Attaching?package:?'Hmisc'
##?The?following?objects?are?masked?from?'package:base':
##?
##?????format.pval,?units
##?Loading?required?package:?SparseM
##?
##?Attaching?package:?'SparseM'
##?The?following?object?is?masked?from?'package:base':
##?
##?????backsolve
試試使用自帶數(shù)據(jù)lung數(shù)據(jù)集進(jìn)行演示。
大多數(shù)情況下都是使用1代表死亡,0代表刪失,這個(gè)數(shù)據(jù)集用2代表死亡。但有的R包會(huì)報(bào)錯(cuò),需要注意!
rm(list?=?ls())dim(lung)
##?[1]?228??10
str(lung)
##?'data.frame':?228?obs.?of??10?variables:
##??$?inst?????:?num??3?3?3?5?1?12?7?11?1?7?...
##??$?time?????:?num??306?455?1010?210?883?...
##??$?status???:?num??2?2?1?2?2?1?2?2?2?2?...
##??$?age??????:?num??74?68?56?57?60?74?68?71?53?61?...
##??$?sex??????:?num??1?1?1?1?1?1?2?2?1?1?...
##??$?ph.ecog??:?num??1?0?0?1?0?1?2?2?1?2?...
##??$?ph.karno?:?num??90?90?90?90?100?50?70?60?70?70?...
##??$?pat.karno:?num??100?90?90?60?90?80?60?80?80?70?...
##??$?meal.cal?:?num??1175?1225?NA?1150?NA?...
##??$?wt.loss??:?num??NA?15?15?11?0?0?10?1?16?34?...
library(dplyr)
##?
##?Attaching?package:?'dplyr'
##?The?following?objects?are?masked?from?'package:Hmisc':
##?
##?????src,?summarize
##?The?following?objects?are?masked?from?'package:stats':
##?
##?????filter,?lag
##?The?following?objects?are?masked?from?'package:base':
##?
##?????intersect,?setdiff,?setequal,?union
library(tidyr)
df1?<-?lung?%>%?
??mutate(status=ifelse(status?==?1,1,0))
str(lung)
##?'data.frame':?228?obs.?of??10?variables:
##??$?inst?????:?num??3?3?3?5?1?12?7?11?1?7?...
##??$?time?????:?num??306?455?1010?210?883?...
##??$?status???:?num??2?2?1?2?2?1?2?2?2?2?...
##??$?age??????:?num??74?68?56?57?60?74?68?71?53?61?...
##??$?sex??????:?num??1?1?1?1?1?1?2?2?1?1?...
##??$?ph.ecog??:?num??1?0?0?1?0?1?2?2?1?2?...
##??$?ph.karno?:?num??90?90?90?90?100?50?70?60?70?70?...
##??$?pat.karno:?num??100?90?90?60?90?80?60?80?80?70?...
##??$?meal.cal?:?num??1175?1225?NA?1150?NA?...
##??$?wt.loss??:?num??NA?15?15?11?0?0?10?1?16?34?...
calibration 方法1
dd?<-?datadist(df1)options(datadist?=?"dd")
構(gòu)建cox比例風(fēng)險(xiǎn)模型:
#?1年coxfit1?<-?cph(Surv(time,?status)?~?age?+?sex?+?ph.ecog?+?ph.karno?+?pat.karno,
??????????????data?=?df1,?x=T,y=T,surv?=?T,
??????????????time.inc?=?365?#?1?年
??????????????)
#?m=50表示每次計(jì)算50個(gè)樣本,一般取4-6個(gè)點(diǎn),u=365和上面的time.inc對應(yīng)
cal1?<-?calibrate(coxfit1,?cmethod="KM",?method="boot",u=365,m=50,B=500)?
##?Using?Cox?survival?estimates?at?365?Days
然后就是畫圖:
plot(cal1,?????#lwd?=?2,?#?誤差線粗細(xì)
?????lty?=?0,?#?誤差線類型,可選0-6
?????errbar.col?=?c("#2166AC"),?#?誤差線顏色
?????xlim?=?c(0.4,1),ylim=?c(0.4,1),
?????xlab?=?"Nomogram-prediced?OS?(%)",ylab?=?"Observed?OS?(%)",
?????cex.lab=1.2,?cex.axis=1,?cex.main=1.2,?cex.sub=0.6)?#?字體大小
lines(cal1[,c('mean.predicted',"KM")],?
??????type?=?'b',?#?連線的類型,可以是"p","b","o"
??????lwd?=?3,?#?連線的粗細(xì)
??????pch?=?16,?#?點(diǎn)的形狀,可以是0-20
??????col?=?"tomato")?#?連線的顏色
box(lwd?=?2)?#?邊框粗細(xì)
abline(0,1,lty?=?3,?#?對角線為虛線
???????lwd?=?2,?#?對角線的粗細(xì)
???????col?=?"grey70"?#?對角線的顏色
???????)?
unnamed-chunk-6-147478960
再介紹一下多個(gè)校正曲線圖形畫在一起的方法。
#?2年coxfit2?<-?cph(Surv(time,?status)?~?age?+?sex?+?ph.ecog?+?ph.karno?+?pat.karno,
??????????????data?=?df1,?x=T,y=T,surv?=?T,
??????????????time.inc?=?730?#?2?年
??????????????)
cal2?<-?calibrate(coxfit2,?cmethod="KM",?method="boot",u=730,m=50,B=500)?
##?Using?Cox?survival?estimates?at?730?Days
畫圖:
plot(cal1,lwd?=?2,lty?=?0,errbar.col?=?c("#2166AC"),?????xlim?=?c(0,1),ylim=?c(0,1),
?????xlab?=?"Nomogram-prediced?OS?(%)",ylab?=?"Observed?OS?(%)",
?????col?=?c("#2166AC"),
?????cex.lab=1.2,cex.axis=1,?cex.main=1.2,?cex.sub=0.6)
lines(cal1[,c('mean.predicted',"KM")],
??????type?=?'b',?lwd?=?3,?col?=?c("#2166AC"),?pch?=?16)
plot(cal2,lwd?=?2,lty?=?0,errbar.col?=?c("#B2182B"),
?????xlim?=?c(0,1),ylim=?c(0,1),col?=?c("#B2182B"),add?=?T)
lines(cal2[,c('mean.predicted',"KM")],
??????type?=?'b',?lwd?=?3,?col?=?c("#B2182B"),?pch?=?16)
abline(0,1,?lwd?=?2,?lty?=?3,?col?=?c("#224444"))
legend("bottomright",?#圖例的位置
???????legend?=?c("5-year","8-year"),?#圖例文字
???????col?=c("#2166AC","#B2182B"),?#圖例線的顏色,與文字對應(yīng)
???????lwd?=?2,#圖例中線的粗細(xì)
???????cex?=?1.2,#圖例字體大小
???????bty?=?"n")#不顯示圖例邊框
unnamed-chunk-8-147478960
calibration 方法2
不過這種方法是把多個(gè)模型放在一張圖上,不是同一個(gè)模型對應(yīng)多個(gè)時(shí)間點(diǎn)。
這種方法不能有缺失值。
#?刪除缺失值df2?<-?na.omit(df1)
library(survival)
#?構(gòu)建模型
cox_fit1?<-?coxph(Surv(time,?status)?~?age?+?sex?+?ph.ecog?+?ph.karno?+?pat.karno,
??????????????data?=?df2,x?=?T,?y?=?T)
cox_fit2?<-?coxph(Surv(time,?status)?~?age?+?ph.ecog?+?ph.karno,
??????????????data?=?df2,x?=?T,?y?=?T)
#?畫圖
library(riskRegression)
##?riskRegression?version?2022.03.22
set.seed(1)
cox_fit_s?<-?Score(list("fit1"?=?cox_fit1,
????????????????????????"fit2"?=?cox_fit2),
???????????????formula?=?Surv(time,?status)?~?1,
???????????????data?=?df2,
???????????????#metrics?=?c("auc","brier"),
???????????????#summary?=?c("risks","IPA","riskQuantile","ibs"),
???????????????plots?=?"calibration",
???????????????#null.model?=?T,
???????????????conf.int?=?T,
???????????????B?=?500,
???????????????M?=?50,
???????????????times=c(700)?#?limit?the?time?range
???????????????)
plotCalibration(cox_fit_s,
????????????????xlab?=?"Predicted?Risk",
????????????????ylab?=?"Observerd?RISK")
##?The?default?method?for?estimating?calibration?curves?based?on?censored?data?has?changed?for?riskRegression?version?2019-9-8?or?higher
##?Set?cens.method="jackknife"?to?get?the?estimate?using?pseudo-values.
##?However,?note?that?the?option?"jackknife"?is?sensititve?to?violations?of?the?assumption?that?the?censoring?is?independent?of?both?the?event?times?and?the?covariates.
##?Set?cens.method="local"?to?suppress?this?message.
當(dāng)然也是可以用ggplot2畫圖的。
#?獲取數(shù)據(jù)data_all?<-?plotCalibration(cox_fit_s,plot?=?F)
##?The?default?method?for?estimating?calibration?curves?based?on?censored?data?has?changed?for?riskRegression?version?2019-9-8?or?higher
##?Set?cens.method="jackknife"?to?get?the?estimate?using?pseudo-values.
##?However,?note?that?the?option?"jackknife"?is?sensititve?to?violations?of?the?assumption?that?the?censoring?is?independent?of?both?the?event?times?and?the?covariates.
##?Set?cens.method="local"?to?suppress?this?message.
#?數(shù)據(jù)轉(zhuǎn)換
plot_df?<-?bind_rows(data_all$plotFrames)?%>%?
??mutate(fits?=?rep(c("fit1","fit2"),c(56,48)))
#?畫圖
ggplot(plot_df,?aes(Pred,Obs))+
??geom_line(aes(group=fits,color=fits),size=1.2)+
??scale_color_manual(values?=?c("#2166AC","tomato"),name=NULL)+
??scale_x_continuous(limits?=?c(0,1),name?=?"Predicted?Risk")+
??scale_y_continuous(limits?=?c(0,1),name?=?"Observerd?Risk")+
??geom_abline(slope?=?1,intercept?=?0,lty=2)+
??geom_rug(aes(color=fits))+
??theme_bw()
unnamed-chunk-12-147478960
本文首發(fā)于公眾號:醫(yī)學(xué)和生信筆記
“醫(yī)學(xué)和生信筆記,專注R語言在臨床醫(yī)學(xué)中的使用,R語言數(shù)據(jù)分析和可視化。主要分享R語言做醫(yī)學(xué)統(tǒng)計(jì)學(xué)、meta分析、網(wǎng)絡(luò)藥理學(xué)、臨床預(yù)測模型、機(jī)器學(xué)習(xí)、生物信息學(xué)等。
本文由 mdnice 多平臺發(fā)布
總結(jié)
以上是生活随笔為你收集整理的生存资料校准曲线calibration curve的绘制的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 崭新的诺基亚:诺记能否借Windows
- 下一篇: FISCO BCOS工程师常用的性能分析