#R CODE TO INVESTIGATE THE MOST APPROPRIATE CONDITIONAL DISTRIBUTION ON NAIC TRIANGLES model.grid<-expand.grid(distribution = c("PO", "NBI","GA","IG","WEI","LOGNO") ,variance=c("none", "AY", "DEV"),stringsAsFactors=FALSE) library(gamlss) con<-gamlss.control( n.cyc = 1000) #this function fits a GAMLSS model specifying the conditional distribution and the variance dependency fitModel<-function(df,distribution,variance) { #defines the model mu.formula<-as.formula("value~AY+DEV") if(variance=="AY") sigma.formula<-as.formula("~AY")else { if(variance=="dev") sigma.formula<-as.formula("~DEV") else sigma.formula<-as.formula("~1")} #remove non positive values in case of continuous distriubtion if(distribution %in% c("WEI", "LOGNO", "GA", "IG")) df$value[df$value<=0]<-runif(length(df$value[df$value<=0])) #tries to fit the model gamlssModel<-tryCatch( gamlss(formula=mu.formula, sigma.formula = sigma.formula, data=df, family=distribution, control=con), error = function(e){return(NA)} ) if(!is.gamlss(gamlssModel)) out<-NA else out<-GAIC(gamlssModel) invisible(out) } #function that fits various models in the data set ad picks the best groupFitter<-function(data4Fit, groupId) { data4Fit$AY<-factor(data4Fit$ay,ordered=TRUE) data4Fit$DEV<-factor(data4Fit$dev,ordered=TRUE) #fit various models to the data performances<-numeric(nrow(model.grid)) for(i in 1:nrow(model.grid)) performances[i]<-fitModel(df=data4Fit, distribution=model.grid$distribution[i], variance=model.grid$variance[i]) fitted.grid<-model.grid fitted.grid$performances<-performances #retursn the best model model2output<-subset(fitted.grid, is.finite(performances)) if(nrow(model2output)==0) out=data.frame(distribution="none", variance="none", performances=NA) else { out<-model2output[which(model2output$performances==min(model2output$performances)),] out<-out[1,] } out$group=groupId rownames(out)<-NULL invisible(out) } #########################################ANALYSIS################################# #COMMERCIAL AUTO commercialAutoGroups<-unique(commercialAutoDb$group) privatePassengerAutoGroups<-unique(privatePassengerAutoDb$group) workersCompensationGroups<-unique(workersCompensationDb$group) medicalMalpracticeGroups<-unique(medicalMalpracticeDb$group) otherLiabilityGroups<-unique(otherLiabilityDb$group) productLiabilityGroups<-unique(productLiabilityDb$group) #parallel processing library(foreach) library(doParallel) cores2Use<-max(1,detectCores()) cl<-makeCluster(cores2Use) registerDoParallel(cl) #loop commercialAutoResults<-foreach(i=1:length(commercialAutoGroups),.combine=rbind, .packages = c('gamlss'),.errorhandling='remove') %dopar% { data4Fit<-subset(commercialAutoDb, group==commercialAutoGroups[i]) groupFit<-groupFitter(data4Fit=data4Fit,groupId = commercialAutoGroups[i]) } privatePassengerAutoResults<-foreach(i=1:length(privatePassengerAutoGroups),.combine=rbind, .packages = c('gamlss'),.errorhandling='remove') %dopar% { data4Fit<-subset(privatePassengerAutoDb, group==privatePassengerAutoGroups[i]) groupFit<-groupFitter(data4Fit=data4Fit,groupId = privatePassengerAutoGroups[i]) } workersCompensationResults<-foreach(i=1:length(workersCompensationGroups),.combine=rbind, .packages = c('gamlss'),.errorhandling='remove') %dopar% { data4Fit<-subset(workersCompensationDb, group==workersCompensationGroups[i]) groupFit<-groupFitter(data4Fit=data4Fit,groupId = workersCompensationGroups[i]) } medicalMalpracticeResults<-foreach(i=1:length(medicalMalpracticeGroups),.combine=rbind, .packages = c('gamlss'),.errorhandling='remove') %dopar% { data4Fit<-subset(medicalMalpracticeDb, group==medicalMalpracticeGroups[i]) groupFit<-groupFitter(data4Fit=data4Fit,groupId = medicalMalpracticeGroups[i]) } otherLiabilityResults<-foreach(i=1:length(otherLiabilityGroups),.combine=rbind, .packages = c('gamlss'),.errorhandling='remove') %dopar% { data4Fit<-subset(otherLiabilityDb, group==otherLiabilityGroups[i]) groupFit<-groupFitter(data4Fit=data4Fit,groupId = otherLiabilityGroups[i]) } productLiabilityResults<-foreach(i=1:length(productLiabilityGroups),.combine=rbind, .packages = c('gamlss'),.errorhandling='remove') %dopar% { data4Fit<-subset(productLiabilityDb, group==productLiabilityGroups[i]) groupFit<-groupFitter(data4Fit=data4Fit,groupId = productLiabilityGroups[i]) } print(Sys.time()) stopCluster(cl) save(list = c('productLiabilityResults','otherLiabilityResults','medicalMalpracticeResults', 'workersCompensationResults','privatePassengerAutoResults','commercialAutoResults'), file="results.RData") productLiabilityResults$LoB="ProdLiab" otherLiabilityResults$LoB="OtherLiab" medicalMalpracticeResults$LoB="MedMal" workersCompensationResults$LoB="WC" privatePassengerAutoResults$LoB="PPA" commercialAutoResults$LoB="ComAuto" fullTriangles<-rbind(productLiabilityResults,otherLiabilityResults,medicalMalpracticeResults,workersCompensationResults,privatePassengerAutoResults,commercialAutoResults) #show results library(ggplot2) condDistrPlot <- ggplot(fullTriangles, aes(factor(distribution)))+ geom_bar() + facet_wrap(~ LoB) + ggtitle("Best performing conditional distribution by LoB")+theme(axis.text.x=element_text(angle=90, size=10, hjust=1)) varianceAssumptionPlot <- ggplot(fullTriangles, aes(factor(variance)))+ geom_bar() + facet_wrap(~ LoB) + ggtitle("Best performing variance assumption by LoB")+theme(axis.text.x=element_text(angle=90, size=10, hjust=1)) jpeg(file="./RESULTS/condDist.jpg") plot(condDistrPlot) dev.off() jpeg(file="./RESULTS/varAssumption.jpg") plot(varianceAssumptionPlot) dev.off()