#ANALYSIS OF GAMLSS RESERVE VARIABILITY rm(list=ls()) library(gamlss) set.seed(123) require(gamlss) options(contrasts=c("contr.treatment","contr.treatment")) con<-gamlss.control( n.cyc = 1000) # this is to read data mainDir='F:/giorgio lavoro/universita/' mainDir='E:/Dropbox/Spedicato-Clemente/' #mainDir='C:/Users/Packard Bell/Documents/My Dropbox/Spedicato-Clemente/naic loss reserving db/NaicLossReserve/NaicLossReserve' subDir='naic loss reserving db' workDir=paste(mainDir,subDir,sep="") setwd(workDir) library(ChainLadder) privatePassengersAutoRawData<-read.csv(file="./Data/ppauto_pos.csv",header=TRUE) dataGr<-subset(privatePassengersAutoRawData,GRCODE==266) dataGr<-dataGr[,c("AccidentYear","DevelopmentYear","DevelopmentLag","CumPaidLoss_B")] #fix negative/equal incrmeental paid for(i in 2:nrow(dataGr)) { if((dataGr$CumPaidLoss_B[i]<=dataGr$CumPaidLoss_B[i-1])&(dataGr$AccidentYear[i]==dataGr$AccidentYear[i-1])) dataGr$CumPaidLoss_B[i]=dataGr$CumPaidLoss_B[i-1]+1 } #get reserve and ultimate payment ultimatePayments<-with(subset(dataGr, DevelopmentLag==10), sum(CumPaidLoss_B)) lastPayments<-with(subset(dataGr, AccidentYear+DevelopmentLag==1998), sum(CumPaidLoss_B)) reserve<-ultimatePayments-lastPayments #31.7 #prepare data ##collect df4Triangles<-subset(dataGr, AccidentYear+DevelopmentLag<=1998) names(df4Triangles)=c("ay","dy","dev","cumpaid") triGr<-as.triangle(df4Triangles,origin="ay",dev="dev",value="cumpaid") #make the triangles incrementalTriangle<-as.data.frame(cum2incr(triGr)) incrementalTriangle$ay<-factor(incrementalTriangle$ay,ordered=TRUE) incrementalTriangle$dev<-factor(incrementalTriangle$dev,ordered=TRUE) triangleDf<-subset(incrementalTriangle, !is.na(value)) predictionDf<-subset(incrementalTriangle, is.na(value)) #take only the unfilled predictionDf<-predictionDf[,c("ay","dev")];rownames(predictionDf)<-NULL #classical methods mackModel<-MackChainLadder(triGr) #30.6 bootModel<-BootChainLadder(triGr,process.dist="gamma",R=100000) #32.6 odpModel<-glmReserve(triGr,mse.method="formula",nsim=100000) #30 clarkModel<-ClarkLDF (triGr) #36 #GAMLSS MODEL myTriangleModel<-gamlss(formula=value~ay+dev, sigma.formula = ~dev, data=triangleDf, family="GA", control=con) residualsNQ<-as.numeric(residuals(object=myTriangleModel, what="z-scores",type="simple")) #normalized quantile #export table library(texreg) htmlreg(l=myTriangleModel, custom.model.names="Triangle 266 GAMLSS") myTriangleModelb<-gamlss(formula=value~ay+dev, sigma.formula = ~ay, data=triangleDf, family="GA", control=con) residualsNQb<-as.numeric(residuals(object=myTriangleModelb, what="z-scores",type="simple")) #normalized quantile myTriangleModelc<-gamlss(formula=value~ay+dev, sigma.formula = ~1, data=triangleDf, family="GA", control=con) residualsNQc<-as.numeric(residuals(object=myTriangleModelb, what="z-scores",type="simple")) #normalized quantile myTriangleModeld<-gamlss(formula=value~ay+dev, sigma.formula = ~ay, data=triangleDf, family="WEI", control=con) residualsNQd<-as.numeric(residuals(object=myTriangleModelb, what="z-scores",type="simple")) #normalized quantile #various functions #VARIOUS FUNCTIONS #FUNCTION TO GIVE PROCESS VARIANCE processSampler<-function(model, dataNew, processVariance=TRUE) #modella la process variance { muPred<-predict(object=model, newdata=dataNew, what="mu",type="response") #fit mu (location) by cell sigmaPred<-predict(object=model, newdata=dataNew, what="sigma",type="response") #fit sigma (shape) by cell reserveDistr<-model$family[1] #estrae distribution family #expression to sample from the cells' distributions given the model samplerEpression<-paste("mapply(r",reserveDistr,",1, mu=muPred, sigma=sigmaPred)", sep="") #this is the random sampler #if needed to simulate (process variance) then simulate else return straing sum of expected value by cell if(processVariance==TRUE) out<-eval(parse(text=samplerEpression)) else out<-muPred return(sum(out)) #outs future payemnts } #FUNCTION TO GENERATE BOOTSTRAP TRIANGLES regenerateIncrementalTriangle<-function(initialModel, residualsType="NQ", residuals, standardize=TRUE,showShapiro=FALSE, upperDf) { #which residual to consider #check normality (optional) if(showShapiro==TRUE) cat("Shapiro normality test p-value", shapiro.test(residuals)$p.value, "\n") if(standardize==TRUE) { #res2boot<-(temp-mean(temp))/sd(temp) res2boot<-(residuals-mean(residuals)) } else res2boot<-residuals mean(res2boot) #samplig of residuals sampledResiduals<-sample(x=res2boot,size=nrow(upperDf),replace=TRUE) #predict the upper triangle (mu and sigma) muPred<-predict(object=initialModel, newdata=upperDf, what="mu",type="response") #predice mu x ogni cella sigmaPred<-predict(object=initialModel, newdata=upperDf, what="sigma",type="response") #predice sigma x ogni cella reserveDistr<-initialModel$family[1] #add the distribution familuy if(residualsType=="NQ") { res2Quantile<-pnorm(sampledResiduals) #add normality standard samplerEpression=paste("mapply(q",reserveDistr,",p=res2Quantile, mu=muPred, sigma=sigmaPred)", sep="") #scrive l'espressione che genera le celle del triangolo val=eval(parse(text=samplerEpression)) #la valuta generando tutte le celle } else if(residualsType=="SC") { #expected*(1+SC) val<-muPred*(1+sampledResiduals) } else if(residualsType=="ST") { #expected + sigma*z val<-muPred+sigmaPred*(sampledResiduals) } out<-upperDf out$value<-val return(out) } #MAKE A BOOTSTRAP SAMPLE #makeABootstapCycle<-function(model,residuals,upperDf,predictionDf) #{ #newTriangle<-regenerateIncrementalTriangle(initialModel=model,residuals=residualsNQ,residualsType="NQ",standardize=TRUE, showShapiro=FALSE, upperDf=triangleDf) #generate a new triangle based on sampled residuals #gamlssModelNew<-tryCatch(gamlss(formula=model$mu.formula, sigma.formula = model$sigma.formula,data=newTriangle, family=model$family, control=con), error = function(e){return(NA)}) #fit the model again on the news triangle #if(!is.gamlss(gamlssModelNew)) out<-NA else out<-processSampler(model=gamlssModelNew,dataNew=predictionDf) #add the process variance #invisible(out) #} nsim<-10000 reserveDistr<-numeric(nsim) #for(i in 1:nsim) reserveDistr[i]<-makeABootstapCycle(model=myTriangleModel,residuals=residualsNQ,upperDf=triangleDf,predictionDf=predictionDf) for(i in 1:nsim) { newTriangle<-regenerateIncrementalTriangle(initialModel=myTriangleModel,residuals=residualsNQ,residualsType="NQ",standardize=TRUE, showShapiro=FALSE, upperDf=triangleDf) #generate a new triangle based on sampled residuals gamlssModelNew<-tryCatch(gamlss(formula=myTriangleModel$mu.formula, sigma.formula = myTriangleModel$sigma.formula,data=newTriangle, family=myTriangleModel$family, control=con), error = function(e){return(NA)}) #fit the model again on the news triangle if(!is.gamlss(gamlssModelNew)) reserveDistr[i]<-NA else reserveDistr[i]<-processSampler(model=gamlssModelNew,dataNew=predictionDf) #add the process variance } #try other models nsim=1000 reserveDistrb<-numeric(nsim) #for(i in 1:nsim) reserveDistr[i]<-makeABootstapCycle(model=myTriangleModel,residuals=residualsNQ,upperDf=triangleDf,predictionDf=predictionDf) for(i in 1:nsim) { newTriangle<-regenerateIncrementalTriangle(initialModel=myTriangleModelb,residuals=residualsNQb,residualsType="NQ",standardize=TRUE, showShapiro=FALSE, upperDf=triangleDf) #generate a new triangle based on sampled residuals gamlssModelNew<-tryCatch(gamlss(formula=myTriangleModelb$mu.formula, sigma.formula = myTriangleModelb$sigma.formula,data=newTriangle, family=myTriangleModelb$family, control=con), error = function(e){return(NA)}) #fit the model again on the news triangle if(!is.gamlss(gamlssModelNew)) reserveDistrb[i]<-NA else reserveDistrb[i]<-processSampler(model=gamlssModelNew,dataNew=predictionDf) #add the process variance } reserveDistrc<-numeric(nsim) for(i in 1:nsim) { newTriangle<-regenerateIncrementalTriangle(initialModel=myTriangleModelc,residuals=residualsNQc,residualsType="NQ",standardize=TRUE, showShapiro=FALSE, upperDf=triangleDf) #generate a new triangle based on sampled residuals gamlssModelNew<-tryCatch(gamlss(formula=myTriangleModelc$mu.formula, sigma.formula = myTriangleModelc$sigma.formula,data=newTriangle, family=myTriangleModelc$family, control=con), error = function(e){return(NA)}) #fit the model again on the news triangle if(!is.gamlss(gamlssModelNew)) reserveDistrc[i]<-NA else reserveDistrc[i]<-processSampler(model=gamlssModelNew,dataNew=predictionDf) #add the process variance } reserveDistrd<-numeric(nsim) #for(i in 1:nsim) reserveDistr[i]<-makeABootstapCycle(model=myTriangleModel,residuals=residualsNQ,upperDf=triangleDf,predictionDf=predictionDf) for(i in 1:nsim) { newTriangle<-regenerateIncrementalTriangle(initialModel=myTriangleModeld,residuals=residualsNQd,residualsType="NQ",standardize=TRUE, showShapiro=FALSE, upperDf=triangleDf) #generate a new triangle based on sampled residuals gamlssModelNew<-tryCatch(gamlss(formula=myTriangleModeld$mu.formula, sigma.formula = myTriangleModeld$sigma.formula,data=newTriangle, family=myTriangleModeld$family, control=con), error = function(e){return(NA)}) #fit the model again on the news triangle if(!is.gamlss(gamlssModelNew)) reserveDistrd[i]<-NA else reserveDistrd[i]<-processSampler(model=gamlssModelNew,dataNew=predictionDf) #add the process variance } outs<-data.frame(simulation=1:nsim, simulatedReserve=reserveDistr) jpeg(file="./RESULTS/resDistribution.jpg") hist(reserveDistr, col="steelblue",breaks =50,prob=TRUE, main="GR 266 Unpaid Claim Distribution") abline(v=mean(reserveDistr,na.rm=TRUE), col="red", lwd=2) abline(v=reserve, col="green",lwd=1) lines(density(reserveDistr),col="dark red") dev.off() #saving save(list=c('reserveDistr','reserveDistrb','reserveDistrc','reserveDistrd'),file="simulazioni.RData") load(file="simulazioni.RData") model.name<-c('Mack', 'BootstrapCL', 'ODP', 'GAMLSS BASE') model.be<-c(30064.72,32635,30065, mean(reserveDistr, na.rm=TRUE)) model.sigma<-c(2516.51,141905,6695, sd(reserveDistr, na.rm=TRUE)) resultsTable1<-data.frame(model=model.name, bestEstimate=model.be, sigma=model.sigma) meansT2=c(mean(reserveDistr,na.rm=TRUE), mean(reserveDistrb,na.rm=TRUE), mean(reserveDistrc,na.rm=TRUE), mean(reserveDistrd,na.rm=TRUE) ) sigmaT2=c(sd(reserveDistr,na.rm=TRUE), sd(reserveDistrb,na.rm=TRUE), sd(reserveDistrc,na.rm=TRUE), sd(reserveDistrd,na.rm=TRUE) ) resultsTable2<-data.frame(model=c('GA, dev','GA, ay','GA, none', 'WEI, ay' ), bestEstimate=meansT2, sigma=sigmaT2) library(xlsx) write.xlsx(x=resultsTable, file="outsModelli.xlsx", sheetName="Tabella1", append=FALSE) write.xlsx(x=resultsTable2, file="outsModelli.xlsx", sheetName="Tabella2", append=TRUE)