# October 2014 # CAMAR # Introduction to R # Simple Cookbook Examples # David L. Homer, FCAS MAAA CERA # JLT Towers Re # The goal of this session is to provide simple examples of how to # use R with very little knowledge of the entire language. The # examples are contained in this document and can be reproduced by # copying from this document and pasting them into an R session. #### 0. What is R? # The following description of R is copied from http://www.r-# project.org/ #----------------------- # R is a language and environment for statistical computing # and graphics. It is a GNU project which is similar to the S # language and environment which was developed at Bell Laboratories # (formerly AT&T, now Lucent Technologies) by John Chambers and # colleagues. R can be considered as a different implementation of # S. There are some important differences, but much code written for # S runs unaltered under R. # R provides a wide variety of statistical (linear and # nonlinear modelling, classical statistical tests, time-series # analysis, classification, clustering, ...) and graphical # techniques, and is highly extensible. The S language is often the # vehicle of choice for research in statistical methodology, and R # provides an Open Source route to participation in that activity. # One of R's strengths is the ease with which well-designed # publication-quality plots can be produced, including mathematical # symbols and formulae where needed. Great care has been taken over # the defaults for the minor design choices in graphics, but the # user retains full control. # R is available as Free Software under the terms of the # Free Software Foundation's GNU General Public License in source # code form. It compiles and runs on a wide variety of UNIX # platforms and similar systems (including FreeBSD and Linux), # Windows and MacOS. #---------------------- #### 1. Installation of R # R installation instructions and software can be obtained at # http://www.r-project.org/ # Select a download site from the Comprehensive R Archive # Network (CRAN) # for example, http://lib.stat.cmu.edu/R/CRAN/. # Then select "Download R for Windows" and follow the # instructions. # You can download the software and follow along during the CAMAR # session if you like. It is not necessary. #### 2. Basics. An R Session. Pasting examples and assigning variables. # a simple variable assigment can be made with the "<-" symbol # using the c() the concatenate operator # R is an array based language. Vectors, matrices, and arrays are # natural/common objects in R. lossratio<-c(.69,.75,.7,.72,.8,.67,.82,.7,.77) # compute the mean and std lr.stat<-c(mean(lossratio),sd(lossratio)) # individual items can have names in R names(lr.stat)<-c("mean","std") # R allows you to express a sequence with the ":" operator. year<-2005:2013 # yields the integers 2008,2009,...,2013 # plot loss ratio versus year plot(year,lossratio,main="Loss Ratio vs Year") # draw a thick, red, horizontal line representing the mean loss # ratio. abline(h=mean(lossratio),col="red",lwd=4) # R sees "\" as a control character so either use "/" or "\\" for # path names. savePlot("c:/temp/r-camar/plot1.wmf") ### matrices, row and column names, mm<-matrix(1:9,3,3) ## R loads down the first column then down the # second,.. # unless you ask for loading "byrow" mm<-matrix(1:9,3,3,byrow=T) # a matrix can have row and column names colnames(mm)<-c("A","B","C") rownames(mm)<-c("R1","R2","R3") ### scan copy from excel # scan is a convenient way to load a simple list of numbers # type numbers separated by into session # followed by an extra to finish. aa<-scan() # scan can also read from the clipboard # copy something to the clipboard before running this. write("1 3.14 528000","clipboard") bb<-scan("clipboard") ### csv files read write # write to clipboard, to past to excel write.table(mm,"clipboard",sep="\t",col.names=NA) # read from clipboard, after copying from excel m2<-read.table("clipboard",sep="\t",header=T,row.names=1) # m2 is a data frame, which looks like a matrix but has additional # properties that make it convenient for use with other R functions. # For example, plot(m2). # write to csv file (easily read by EXCEL) write.csv(mm,"c:/temp/r-camar/table1.csv") # read from the csv file. the row.names=1 is needed to force R to use the row names in column 1. m2<-read.csv("c:/temp/r-camar/table1.csv",row.names=1) #### 3. Simulation # R does have control structures like loops, but it is generally # better to avoid them if possible trials<-1000 # draw from a Poisson distribution with lambda=3 sim.n<-rpois(trials,3) # draw the claim sizes needed from a Gamma with shape=2 and scale=5000 sim.x<-rgamma(sum(sim.n),shape=2,scale=5000) # compute the aggregate losses for each trial sim.cn<-cumsum(c(1,sim.n)) sim.cx<-cumsum(c(0,sim.x)) sim.z<-diff(sim.cx[sim.cn]) mean(sim.z) #### 4. FFT # size of discrete transform (powers of 2 help make calculation fast) buckets<-2^10 # severity distribution place holder f<-rep(0,buckets) # rep() function repeats values # width (entry in f[i] represent the probability of a claim size # of width*(i-1) ) width<-1000 # discretized distribution # using the functions: pgamma(), diff(), and named parameters of # pgamma(), shape and scale. f[1:101]<-c(0,diff(pgamma(0:100*width,shape=2,scale=5000))) # create a function for the probability generating function of the poisson pgfPoi<-function(f,lambda){ # Poisson PGF #f-evaluation vector #lambda-poisson parameter exp(lambda*(f-1)) } # compute the aggregate distribution Z=X1 + ... + XN arising from # N claims from a Poisson with lambda=3 # and claim size Xi drawn from a Gamma with shape=2 and scale=5000 z<-Re(fft(pgfPoi(fft(f),3),T)/buckets) # check mean check.mean<-c(3*sum(0:(buckets-1)*f)*width,sum(0:(buckets-1)*z)*width) names(check.mean)<-c("EX*EN","EZ") # fft versus simulation plot(ecdf(sim.z)) lines(width*0:99,cumsum(z[1:100]),col="red")