Share your functions & code

trinker

ggplot2orBust
At the top of many of my scripts I have a list of the packages I'm going to call. These scripts may be analysis I've done that are templates or frequent references. Anyway some times I upgrade R and lose packages etc. and they're no longer installed.packages. It's annoying to install them and I decided to write a simple wrapper for library that checks if you have a package already installed. If not it installs it and loads it, other wise it just loads it. Takes a second optional argument that if you set it to TRUE will down load the new version of the package even if it's already in your library.

Code:
GL <- function(package, install = FALSE){
    pack<-as.character(substitute(package))
    if(!pack %in% list.files(.libPaths()) | install) {
        install.packages(pack)  
    }
    library(pack, character.only = TRUE)
}
I now use the GL at the top of my scripts instead of library in scripts that I intend to reuse for similar projects as I think this will be one more step towards efficiency.
 

trinker

ggplot2orBust
I find myself using ggplot more and more and sometimes have the need to remove just the horizontal or vertical grid lines. You can just remove all lines and redraw them at certain intervals unless you use facet_grid and allow scales to be free. I was directed to Winston Chang's website where I found out how to do the removal of just horizontal or vertical grid lines. Rather than wrap a bunch of code around a plot I created a function of Chang's code that I can source and use when needed. The functions simply take a ggplot saved as an object.

Code:
gghorX <- function(ggplot.object, credit = TRUE){    
  if (credit) {
    URL <- "\bhttp://wiki.stdout.org/rcookbook/Graphs/Axes%20(ggplot2)/"               
    cat("\nFunction compliments of Winston Chang:\n\n", URL, "\n")
  }
  guide_grid_orig <- ggplot2:::guide_grid                                 
  guide_grid_no_hline <- function(theme, x.minor, x.major, y.minor, y.major) {
    x.minor <- setdiff(x.minor, x.major)              
    y.minor <- setdiff(y.minor, y.major)                      
                                                             
    ggname("grill", grobTree(                          
      theme_render(theme, "panel.background"),   
      if(length(x.minor) > 0) theme_render(       
        theme, "panel.grid.minor", name = "x",           
        x = rep(x.minor, each=2), y = rep(0:1, length(x.minor)), 
        id.lengths = rep(2, length(x.minor))                  
      ),                                               
      if(length(x.major) > 0) theme_render(              
        theme, "panel.grid.major", name = "x",           
        x = rep(x.major, each=2), y = rep(0:1, length(x.major)),
        id.lengths = rep(2, length(x.major))                  
      )                                                     
    ))                                                         
  }                                                          
  environment(guide_grid_no_hline) <- environment(ggplot2:::guide_grid)
  assignInNamespace("guide_grid", guide_grid_no_hline, ns="ggplot2") 
  print(ggplot.object)             
  assignInNamespace("guide_grid", guide_grid_orig, ns="ggplot2") 
}
###############################################################
library(ggplot2)                                            
theplot <- ggplot(mtcars, aes(factor(cyl))) + geom_bar() +  
    coord_flip()  + theme_bw()                              
theplot                                                    
gghorX(theplot)                                             
###############################################################

ggvertX <- function(ggplot.object, credit = TRUE){
  if (credit) {
    URL <- "\bhttp://wiki.stdout.org/rcookbook/Graphs/Axes%20(ggplot2)/"               
    cat("\nFunction compliments of Winston Chang:\n\n", URL, "\n")
  }
  guide_grid_orig <- ggplot2:::guide_grid
  guide_grid_no_vline <- function(theme, x.minor, x.major, y.minor, y.major) {
    x.minor <- setdiff(x.minor, x.major)
    y.minor <- setdiff(y.minor, y.major)
    ggname("grill", grobTree(
      theme_render(theme, "panel.background"),
      if(length(y.minor) > 0) theme_render(
        theme, "panel.grid.minor", name = "y",
        x = rep(0:1, length(y.minor)), y = rep(y.minor, each=2), 
        id.lengths = rep(2, length(y.minor))
        ),
      if(length(y.major) > 0) theme_render(
        theme, "panel.grid.major", name = "y",
        x = rep(0:1, length(y.major)), y = rep(y.major, each=2), 
        id.lengths = rep(2, length(y.major))
        )
      ))
  }
  environment(guide_grid_no_vline) <- environment(ggplot2:::guide_grid)
  assignInNamespace("guide_grid", guide_grid_no_vline, ns="ggplot2")
  print(ggplot.object) 
  assignInNamespace("guide_grid", guide_grid_orig, ns="ggplot2")
}
###############################################################
library(ggplot2)                                                
theplot <- ggplot(mtcars, aes(factor(cyl))) +                  
    geom_histogram() + theme_bw()                              
theplot                                                        
ggvertX(theplot)
 

Dason

Ambassador to the humans
I might add a "quiet" parameter and wrap the printing out of "Function compliments of..." stuff inside an if statement. That way you can turn off printing out the message if you want. I guess I just don't like it when functions print stuff with cat or the like and don't give you an easy option to suppress that output.
 

trinker

ggplot2orBust
I thought I'd share the contents of my .Last function in my .Rprofile that backs stuff up. It may be a bit inefficient (I'd appreciate tips if you have any) but it makes copies of several key pieces of R code to my drob box. Some I just have it save the last version others I have it write a new version every time I exit R and date it. I throw a bit in there to delete anything more than two months back (not two literal months look at it to see what I mean):

Code:
.Last<-function(){
    v <- "C:/Users/Rinker/Dropbox/Back Up/"
    w1 <- "C:/Users/Rinker/Desktop/PhD Program/R Play Area/.Rprofile"
    w2 <- paste0(v, ".Rprofile")
    x1 <- "C:/Users/Rinker/Desktop/PhD Program/LAI 687-Select Topics/Select Topics Analysis/transcript Functions.R"
    x2 <- paste0(v, "transcript_functions/transcript_functions_", gsub("[: ]", "_", date()), ".txt")
    y1 <- "C:/Users/Rinker/Desktop/PhD Program/CEP 523-Stat Meth Ed Inference/R Stuff/.Commands for R.docx"
    y2 <- paste0(v, ".Commands for R.docx")
    z1 <- "C:/Users/Rinker/Desktop/PhD Program/CEP 523-Stat Meth Ed Inference/R Stuff/Scripts/useful functions.txt"
    z2 <- paste0(v, "useful functions.txt")

    file.copy(w1, w2, overwrite = TRUE, copy.mode = TRUE)
    file.copy(x1, x2, overwrite = TRUE, copy.mode = TRUE)
    file.copy(y1, y2, overwrite = TRUE, copy.mode = TRUE)
    file.copy(z1, z2, overwrite = TRUE, copy.mode = TRUE)

    monnum <- function(X){
        key <- data.frame(mon = month.abb, num = 1:12)
        return(key[key$mon %in% X, 2])
    }

    tf <- paste0(v, "transcript_functions/")
    a <- list.files(tf)
    b <- a[substring(a, 1, 20) %in% "transcript_functions"]
    d <- b[(as.numeric(monnum(substring(date(), 5, 7))) - as.numeric(sapply(substring(b, 26, 28), monnum))) > 1]
    if (length(d) > 1) {
        e <- paste0(tf, d)
        lapply(e, file.remove)
    }
    return(cat("Good bye Tyler!"))
}
 

Jake

Cookie Scientist
Recently I wrote a function that implements the Cornfield-Tukey algorithm for deriving the expected values of the mean squares for factorial designs. For all but the most simple factorials, doing this by hand is a real pain in the ass, but this function makes it very easy. The code (with comments) follows:
Code:
EMS <- function(design, nested=NULL, random=NULL){
  # build two-way table
  mat <- t(attr(terms(design), "factors"))
  
  # resolve fixed/random dummies
  if (!is.null(random)){
    random <- unlist(strsplit(random,split=""))
    mat[,which(colnames(mat) %in% random)][mat[,
         which(colnames(mat) %in% random)]==1] <- ""
    mat[,which(!colnames(mat) %in% random)][mat[,
         which(!colnames(mat) %in% random)]==1] <- "fix"
  }
  
  # insert 1 in nested rows
  subs <- strsplit(rownames(mat), split=":")
  if(!is.null(nested)){
    nested <- strsplit(nested, split="/")
    for(term in nested){
      rows <- unlist(lapply(subs, function(x) term[2] %in% x))
      cols <- colnames(mat)==term[1]
      mat[rows,cols] <- "1"
    }
  }
  mat <- rbind(mat, e=rep("1", ncol(mat)))
  
  # insert numbers of levels for remaining cells
  for(row in seq(nrow(mat))){
    mat[row,][mat[row,]=="0"] <- tolower(colnames(mat)[mat[row,]=="0"])
  }
  
  # construct EMS table
  ems <- matrix(nrow=nrow(mat), ncol=nrow(mat),
                dimnames=list(Effect=rownames(mat),
                              VarianceComponent=rev(rownames(mat))))
  # add nesting information to subscripts
  if (!is.null(nested)){
    subs <- lapply(subs, function(x){
      new <- x
      for (nest in seq(length(nested))){
        if (nested[[nest]][2] %in% x) new <- c(new, nested[[nest]][1])
      }
      return(new)
    })
  }
  subs[["e"]] <- colnames(mat)[-1]
  names(subs) <- rownames(mat)
  # rename #-of-reps variable to 'e' invisibly
  colnames(mat)[1] <- "e"
  # fill in EMS table
  for(effect in rownames(ems)){
    for(varcomp in colnames(ems)){
      effectVec <- unlist(strsplit(effect, ":"))
      ans <- mat[varcomp,-1*which(colnames(mat) %in% effectVec)]
      if ("fix" %in% ans) ans <- ""
      if (all(ans=="1")) ans <- "1"
      if (("1" %in% ans | "2" %in% ans) & !all(ans=="1")){
        ans <- ans[!ans %in% c("1","2")]
      }
      varcompVec <- unlist(strsplit(varcomp, ":"))
      if (!all(effectVec %in% subs[[varcomp]])) ans <- ""
      if (effect=="e" & varcomp=="e") ans <- "1"
      ems[effect,varcomp] <- paste(ans, collapse="")
    }
  }

  return(noquote(ems))
}
Arguments:
  • design = A formula object specifying all possible sources of variation in the design (except residual error, which is always implicitly included). The left hand side of the ~ is the symbol that will be used to denote the number of replications per lowest-level factor combination (I usually use "r" or "n"). The right hand side should include all fixed and random factors, and all interactions thereof which are permitted by the design.
  • nested = A character vector, where each element is of the form "A/B", indicating that the levels of factor B are nested under the levels of factor A.
  • random = A character string indicating, without spaces or any separating characters, which of the factors specified in the design are random.

The returned value is a formatted table where the rows represent the mean squares, the columns represent the variance components that comprise the various mean squares, and the entries in each cell represent the terms that are multiplied and summed to form the expectation of the mean square for that row. Each term is either the lower-case version of one of the experimental factors, which indicates the number of levels for that factor, or a "1", which means the variance component for that column is contributes to the mean square but is not multiplied by anything else.

To understand this, let's jump right in by examining the expected mean squares for a two-way mixed factorial design. We have a between-subjects factor \(A\) with \(p\) levels; a within-subjects factor \(B\) with \(q\) levels; and the subjects, \(S\), of which there are \(n\) levels. The number of replications per factor combination is \(r\); often this is implicitly equal to 1 but here we keep it general. Factors \(A\) and \(B\) are fixed while \(S\) is random. According to Winer (1971, Statistical Principles in Experimental Design), the expected values of the mean squares for this design are the following:

\(\begin{tabular}{c|c}
MS & E(MS)\\
\hline
A & $\sigma^2_\varepsilon$ + $rq\sigma^2_S$ + $rqn\sigma^2_A$\\
S & $\sigma^2_\varepsilon$ + $rq\sigma^2_S$\\
B & $\sigma^2_\varepsilon$ + $r\sigma^2_{SB}$ + $rpn\sigma^2_B$\\
AB & $\sigma^2_\varepsilon$ + $r\sigma^2_{SB}$ + $rn\sigma^2_{AB}$\\
SB & $\sigma^2_\varepsilon$ + $r\sigma^2_{SB}$\\
$\varepsilon$ & $\sigma^2_\varepsilon$\\
\end{tabular}\)

Testing a particular factor involves constructing an F-ratio such that, under the null hypothesis that the variance due to that factor equals 0, the numerator and denominator of the F-ratio have the same expectation. For example, in this case the null hypothesis that \(\sigma^2_A = 0\) is tested by \(\frac{MS_A}{MS_S}\); we say that the mean square due to subjects serves as the error term for this test. The tests of factor \(B\) and the \(AB\) interaction both use the mean square due to the \(SB\) interaction as their error terms.

We can specify this design using the EMS() function like this:
Code:
> EMS(r ~ A*B + S + S:B, nested="A/S", random="S")
      VarianceComponent
Effect e B:S A:B S  B   A  
   A   1         rb     rbs
   B   1 r          ras    
   S   1         rb        
   A:B 1 r   rs            
   B:S 1 r                 
   e   1
Which agrees with Winer. Note that the notation is slightly different here, as EMS() automatically uses the lower-case versions of the factor labels--a, b, and s--to represent the number of levels for the associated factors, rather than p, q, and n as above.

To specify a fully between-subjects anova, we can do:
Code:
> EMS(r ~ A*B + S, nested=c("A/S","B/S"), random="S")
      VarianceComponent
Effect e A:B S B   A  
   A   1     r     rbs
   B   1     r ras    
   S   1     r        
   A:B 1 rs  r        
   e   1
Note that in addition to adding an extra element to the nested argument, which now says that subjects are nested under both A and B, we also removed the S:B interaction from the design. If subjects are nested under the levels of B, so that each subjects gets one and only one level of B, then it is not possible to estimate an S:B interaction.

The function also works with multiple random factors. Consider a linguistics experiment where a sample of subjects give responses to a sample of words, where half of the words are under one level of some treatment and the other half of the words are under the other level of the treatment. Letting S = subjects, W = words, and T = treatment, we can specify this design as:
Code:
> EMS(r ~ (T + W + S)^2 - T:W, nested="T/W", random="WS")
      VarianceComponent
Effect e W:S T:S S   W  T  
   T   1 r   rw      rs rws
   W   1 r           rs    
   S   1 r       rtw       
   T:S 1 r   rw            
   W:S 1 r                 
   e   1
Note that there is no simple F-ratio that will yield an unbiased test of the null hypothesis that \(\sigma^2_T = 0\) in this design. In this case one would construct a quasi-F ratio by using linear combinations of the mean squares. The appropriate quasi-F ratio for testing the factor T here would be \(\frac{MS_T + MS_{WS}}{MS_{TS} + MS_W}\). Substituting in the expected values shows that the numerator exceeds the denominator only if \(\sigma^2_T > 0\).

And this one is for Dason: a split-split-plot design, with fixed factors A, B, and C, and random factor P (whole plots).
Code:
> EMS(r ~ A*B*C + P + P:B + P:B:C, random="P", nested=c("A/P"))
       VarianceComponent
Effect  e B:C:P A:B:C B:P B:C A:C A:B P   C    B    A   
  A     1                             rbc           rbcp
  B     1             rc                       racp     
  C     1 r                               rabp          
  P     1                             rbc               
  A:B   1             rc          rcp                   
  A:C   1 r                   rbp                       
  B:C   1 r               rap                           
  B:P   1             rc                                
  A:B:C 1 r     rp                                      
  B:C:P 1 r                                             
  e     1
I hope others find this function as useful as I have been finding it already. Let me know how it could be improved.
 

Jake

Cookie Scientist
Since any interactions with random factors are themselves random, another way to view this is as having three random factors: whole plots (P), subplots (corresponding to B:p), and sub-subplots (corresponding to B:C:p). The design is taken straight from Winer (it's the only example he gives of a split-split-plot design).
 

Jake

Cookie Scientist
Here's a function I just wrote that generates complete sets of orthogonal contrast codes for arbitrary designs. You input the contrast codes that you care about and that you want included (even just a single code), and it uses singular value decomposition to generate a set of orthogonal codes that complete the set. This could be very useful when working with complicated and/or unconventional coding schemes.

Important note: in general the automatically generated codes will not represent meaningful group comparisons; they are just there so that you can work with a complete, orthogonal set and the nice properties these afford you.

Here's the function:
Code:
orthogCodes <- function(x, keepScale=1){
  dims <- dim(x)
  if(length(dims) > 2) stop("2 dimensions only please!")
  if(dims[2]+1 >= dims[1]) {
    stop("Set is already complete or overdetermined.")
  }
  labs <- colnames(x)
  if (is.null(labs)) labs <- names(c(.code = seq(dims[2])))
  x <- c(rep(1, dims[1]),
         x,
         numeric((dims[1]-1-dims[2])*dims[1]))
  dim(x) <- rep(dims[1], 2)
  norms <- sqrt(apply(x, 2, crossprod))
  decomp <- svd(x)
  ans <- (decomp$u %*% decomp$v)[,-1]*norms[keepScale+1]
  colnames(ans) <- c(labs, names(c(.orth = seq(dims[1]-1-dims[2]))))
  return(ans)
}
Here's an example where we have 6 groups, "a" through "f", and for whatever reason the two comparisons that we really care about are (1) groups a,b,c vs. groups d,e,f, and (2) group a vs. group b.
Code:
> fac <- factor(letters[1:6])
> contrasts(fac) # the default contrasts--boring!
  b c d e f
a 0 0 0 0 0
b 1 0 0 0 0
c 0 1 0 0 0
d 0 0 1 0 0
e 0 0 0 1 0
f 0 0 0 0 1
> codes <- cbind(.ABCvDEF = c(-1,-1,-1,1,1,1),
+                .AvB = c(1,-1,0,0,0,0))
> codes # the codes we care about
     .ABCvDEF .AvB
[1,]       -1    1
[2,]       -1   -1
[3,]       -1    0
[4,]        1    0
[5,]        1    0
[6,]        1    0
> contrasts(fac) <- orthogCodes(codes)
> contrasts(fac) # the completed set
  .ABCvDEF          .AvB     .orth1     .orth2     .orth3
a        1  1.732051e+00 -0.5773503 -0.5773503 -0.5773503
b        1 -1.732051e+00 -0.5773503 -0.5773503 -0.5773503
c        1 -6.798700e-17  1.1547005  1.1547005  1.1547005
d       -1  4.759090e-16 -0.8164966 -0.8164966  1.6329932
e       -1  4.759090e-16  1.6329932 -0.8164966 -0.8164966
f       -1  4.759090e-16 -0.8164966  1.6329932 -0.8164966
By default the function is set up to preserve the scaling of the first code supplied (the others are rescaled to satisfy orthogonality). You can use the keepScale argument to specify which of the supplied contrasts keeps its original scaling; here I indicate the 2nd supplied contrast.
Code:
> contrasts(fac) <- orthogCodes(codes, keepScale=2)
> contrasts(fac)
    .ABCvDEF          .AvB     .orth1     .orth2     .orth3
a  0.5773503  1.000000e+00 -0.3333333 -0.3333333 -0.3333333
b  0.5773503 -1.000000e+00 -0.3333333 -0.3333333 -0.3333333
c  0.5773503 -3.925231e-17  0.6666667  0.6666667  0.6666667
d -0.5773503  2.747662e-16 -0.4714045 -0.4714045  0.9428090
e -0.5773503  2.747662e-16  0.9428090 -0.4714045 -0.4714045
f -0.5773503  2.747662e-16 -0.4714045  0.9428090 -0.4714045
We can verify complete orthogonality of the generated set. The matrix below contains all the pair-wise dot-products of the contrasts (we want all of the off-diagonals to be 0):
Code:
> contrs <- cbind(1, contrasts(fac))
> colnames(contrs)[1] <- "Intercept"
> rownames(contrs) <- colnames(contrs)
> round(crossprod(contrs), 2)
          Intercept .ABCvDEF .AvB .orth1 .orth2 .orth3
Intercept         6        0    0      0      0      0
.ABCvDEF          0        2    0      0      0      0
.AvB              0        0    2      0      0      0
.orth1            0        0    0      2      0      0
.orth2            0        0    0      0      2      0
.orth3            0        0    0      0      0      2
Here's a somewhat more complicated and interesting example, based on the situation that actually motivated the creation of this function. Say we have a time series consisting of data from each election year from 1980 to 2000. One way to help avoid issues of temporal autocorrelation of residuals in the fitted model would be to use orthogonal polynomial contrasts for years rather than treating the years variable as fully continuous. The contr.poly() function in base could usually do this easily for us, but there's a problem: data from one year is completely missing, so the resulting polynomials generated by contr.poly() and applied to the dataset would be neither orthogonal, nor technically would they be contrast codes (i.e., they wouldn't sum to 0). Suppose for now that the linear trend is the only one we are really interested in. Then one solution would be to specify a linear code, where the spacing between codes matches that between years (including the gap), and then use orthogCodes() to fill out the set. Here's what that process looks like:
Code:
> # notice that data from 1998 is missing
> years <- factor(seq(1980,2000, 2)[-10])
> contrasts(years) <- contr.poly(11)[-10,]
> round(contrasts(years), 2)
        .L    .Q    .C    ^4    ^5    ^6    ^7    ^8    ^9
1980 -0.48  0.51 -0.46  0.35 -0.24  0.14 -0.07  0.03 -0.01
1982 -0.38  0.20  0.09 -0.35  0.48 -0.45  0.33 -0.19  0.08
1984 -0.29 -0.03  0.34 -0.35  0.08  0.27 -0.47  0.44 -0.27
1986 -0.19 -0.20  0.35 -0.06 -0.32  0.34  0.03 -0.41  0.49
1988 -0.10 -0.31  0.21  0.24 -0.32 -0.11  0.40 -0.08 -0.43
1990  0.00 -0.34  0.00  0.35  0.00 -0.38  0.00  0.42  0.00
1992  0.10 -0.31 -0.21  0.24  0.32 -0.11 -0.40 -0.08  0.43
1994  0.19 -0.20 -0.35 -0.06  0.32  0.34 -0.03 -0.41 -0.49
1996  0.29 -0.03 -0.34 -0.35 -0.08  0.27  0.47  0.44  0.27
2000  0.48  0.51  0.46  0.35  0.24  0.14  0.07  0.03  0.01
> 
> # the default polynomials have undesirable properties here
> contrs <- cbind(1, contrasts(years))
> colnames(contrs)[1] <- "Intercept"
> rownames(contrs) <- colnames(contrs)
> round(crossprod(contrs), 2)
          Intercept    .L    .Q    .C    ^4    ^5    ^6    ^7
Intercept     10.00 -0.38 -0.20  0.09  0.35  0.48  0.45  0.33
.L            -0.38  0.85 -0.08  0.03  0.14  0.18  0.17  0.13
.Q            -0.20 -0.08  0.96  0.02  0.07  0.10  0.09  0.07
.C             0.09  0.03  0.02  0.99 -0.03 -0.04 -0.04 -0.03
^4             0.35  0.14  0.07 -0.03  0.87 -0.17 -0.16 -0.12
^5             0.48  0.18  0.10 -0.04 -0.17  0.77 -0.22 -0.16
^6             0.45  0.17  0.09 -0.04 -0.16 -0.22  0.79 -0.15
^7             0.33  0.13  0.07 -0.03 -0.12 -0.16 -0.15  0.89
^8             0.19  0.07  0.04 -0.02 -0.07 -0.09 -0.09 -0.06
^9             0.08  0.03  0.02 -0.01 -0.03 -0.04 -0.04 -0.03
             ^8    ^9
Intercept  0.19  0.08
.L         0.07  0.03
.Q         0.04  0.02
.C        -0.02 -0.01
^4        -0.07 -0.03
^5        -0.09 -0.04
^6        -0.09 -0.04
^7        -0.06 -0.03
^8         0.96 -0.02
^9        -0.02  0.99
> 
> # here are the new codes
> contrasts(years) <- orthogCodes(cbind(.lin = seq(1980,2000,2)[-10] - mean(seq(1980,2000,2)[-10])))
> round(contrasts(years), 2)
     .lin .orth1 .orth2 .orth3 .orth4 .orth5 .orth6 .orth7
1980 -9.2  -5.43  -5.18  -4.92  -4.40  -6.21  -6.46  -5.69
1982 -7.2   3.70   5.77   7.83  11.95  -2.48  -4.54   1.64
1984 -5.2  -0.69  -0.38  -0.07   0.55  -1.61  17.30  -1.00
1986 -3.2  -1.10  -0.96  -0.81  -0.53  17.69  -1.68  -1.24
1988 -1.2  -1.51  -1.54  -1.56  -1.60  -1.45  -1.43  -1.49
1990  0.8  -1.93  -2.12  -2.30  -2.68  -1.37  -1.18  17.48
1992  2.8  16.88  -2.69  -3.05  -3.75  -1.29  -0.93  -1.99
1994  4.8  -2.76  15.95  -3.79  -4.82  -1.20  -0.69  -2.24
1996  6.8  -3.17  -3.85  14.69  -5.90  -1.12  -0.44  -2.49
2000 10.8  -4.00  -5.01  -6.02  11.18  -0.96   0.05  -2.98
     .orth8
1980  -5.95
1982  -0.42
1984  -1.30
1986  -1.39
1988  17.75
1990  -1.55
1992  -1.64
1994  -1.72
1996  -1.81
2000  -1.97
> # these are much nicer
> contrs <- cbind(1, contrasts(years))
> colnames(contrs)[1] <- "Intercept"
> rownames(contrs) <- colnames(contrs)
> round(crossprod(contrs), 2)
          Intercept  .lin .orth1 .orth2 .orth3 .orth4 .orth5
Intercept        10   0.0    0.0    0.0    0.0    0.0    0.0
.lin              0 369.6    0.0    0.0    0.0    0.0    0.0
.orth1            0   0.0  369.6    0.0    0.0    0.0    0.0
.orth2            0   0.0    0.0  369.6    0.0    0.0    0.0
.orth3            0   0.0    0.0    0.0  369.6    0.0    0.0
.orth4            0   0.0    0.0    0.0    0.0  369.6    0.0
.orth5            0   0.0    0.0    0.0    0.0    0.0  369.6
.orth6            0   0.0    0.0    0.0    0.0    0.0    0.0
.orth7            0   0.0    0.0    0.0    0.0    0.0    0.0
.orth8            0   0.0    0.0    0.0    0.0    0.0    0.0
          .orth6 .orth7 .orth8
Intercept    0.0    0.0    0.0
.lin         0.0    0.0    0.0
.orth1       0.0    0.0    0.0
.orth2       0.0    0.0    0.0
.orth3       0.0    0.0    0.0
.orth4       0.0    0.0    0.0
.orth5       0.0    0.0    0.0
.orth6     369.6    0.0    0.0
.orth7       0.0  369.6    0.0
.orth8       0.0    0.0  369.6
 

trinker

ggplot2orBust
I want to take a test drive of a function I'm working on. it works for me on my windows platform. I want to see if it works on mac OX and Linux as well. I also want to see if the text compiling of packages works for those with tex and for those who don't use it:

The function get's the help manual for a package even if you don't have it installed. It tries to use what's on your machine first, then goes to cran to get the help manual

Code:
p_help <- function(package.name=NULL, pdf=FALSE){   
    x <- as.character(substitute(package.name))
    if(identical(x, character(0))) x <- "base"
    y <- list.files(.libPaths())
    if (!pdf) {
        if(x %in% y) {
          j <- options()$help_type
          on.exit(options(help_type=j))
          options(help_type="html")
          help(package = (x))
        } else { 
            z <- "http://cran.r-project.org/web/packages/"
            browseURL(paste0(z, x, "/", x, ".pdf"))
        }
    } else {
        w <- paste0(getwd(),"/", x, ".pdf")
        if (file.exists(w)){
            shell.exec(w)
        }else{ 
            path <- find.package(x)
            system(paste(shQuote(file.path(R.home("bin"), "R")),
                "CMD", "Rd2pdf",shQuote(path)))
        }
    }
}
########################################
#TRY IT
########################################
p_help(base)          #should retrieve it using your computer
p_help(afc)            #use a library you don't have on your machine (uses the Net)
p_help(plyr, TRUE)  #makes a pdf version using LaTeX compiler
Any tips, bugs, fixes, ideas etc are welcomed.
 

Lazar

Phineas Packard
Have been doing a MCMC course where the first lessons are generating known distributions using a random uniform distribution. Below is a couple of standard normal distributions one based on clt (which I twigged from MCSM book) and one based on the Box Muller transform.

First on based on central limit theory:
Code:
cltNorm <- function(n){
  x= runif(n*12, -.5,.5)
  x= matrix(data=x, nrow=12)
  x= apply(x,2,sum)
  x
}
Then one based on the box-muller transform:
Code:
boxNorm <- function(n){
                       ns<-(n/2)
                       y <- rep(0,n)
                          for (i in 1:ns){
                                        x1<- runif(1)
                                        x2<- runif(1)
                                        y[i]<- (-2*log(x1))^.5*cos(2*pi*x2)
                                        y[ns+i]<- (-2*log(x1))^.5*sin(2*pi*x2)
                                        }
                      y
                      }
Mine are much slower than the one in R:
Code:
> system.time(cltNorm(10^4));system.time(boxNorm(10^4));system.time(rnorm(10^4))
   user  system elapsed 
   0.03    0.00    0.03 
   user  system elapsed 
   0.08    0.00    0.07 
   user  system elapsed 
      0       0       0
and more biased (particularly the one based on clt.
 

trinker

ggplot2orBust
Spent the night making a hangman game. S till a few kinks to work out. Trying put it on github but can't for the moment as I deleted a repo by the same name and I think I have to wait a time period until I can upload hangman again. If you don't have dev.tools you'll need it as it makes you download my qdap package (I need a data set from it):


EDIT: download the hangman game:
Code:
[COLOR="red"]library(devtools); install_github("hangman", "trinker")
hangman()[/COLOR]
Code:
hangman <- function() {
    if (!"qdap" %in% .packages(all.available = TRUE)) {
        if (!"qdap" %in% .packages(all.available = TRUE)) {
            stop("please install devtools package")
        }
        require(devtools)
        install_github("qdap", "trinker") 
    }
    require(qdap)
    x1 <- DICTIONARY[sample(1:nrow(DICTIONARY), 1), 1]
    x <- unlist(strsplit(x1, NULL))
    len <- length(x)
    x2 <- rep("_", len)
    chance <- 0
    win1 <- 0
    win <- win1/len
    wrong <- character()
    right <- character()
    print(x2, quote = FALSE)
    hang.plot <- function(){ #plotting function
        plot.new()
        mtext("HANGMAN", col = "blue", cex=2)    
        mtext(paste(x2, collapse = " "), side = 1, cex=1.5) 
        mtext(paste(rep("_", len), collapse = " "), side = 1, cex=1.5) 
        mtext("wrong", side = 3, cex=1.5,
            adj = 0, padj = 1, col = "red") 
        mtext(paste(wrong, collapse = "\n"), side = 3, cex=1.5,
            adj = 0, padj = 2.5)
        mtext(paste(right, collapse = "\n"), side = 3, cex=1.5,
            adj = 1, padj = 2.5) 
        mtext("correct", side = 3, cex=1.5,
            adj = 1, padj = 1, col = "red")
        segments(.365, .77, .365, .83, lwd=2)
        segments(.365, .83, .625, .83, lwd=2)
        segments(.625, .83, .625, .25, lwd=2)
        segments(.57, .25, .675, .25, lwd=2)
        parts <- seq_len(length(wrong))
        if (identical(wrong, character(0))) {
            parts <- 0
        }
        if (1 %in% parts) {
            mtext("O", side = 1, cex=4, adj = .365, padj = -7.2)
            mtext("o o", side = 1, cex=1, adj = .3725, padj = -28.2)
            mtext("<", side = 1, cex=1, adj = .373, padj = -27.6)
            mtext("__", side = 1, cex=1, adj = .373, padj = -27.2)
        }
        if (2 %in% parts) {
            mtext("I", side = 1, cex=4, adj = .375, padj = -6.25)
            mtext("I", side = 1, cex=4, adj = .375, padj = -5.5)
            mtext("I", side = 1, cex=4, adj = .375, padj = -4.75)
        }
        if (3 %in% parts) {
            segments(.37, .57, .45, .63, lwd=7)
        }
        if (4 %in% parts) {
            segments(.37, .57, .29, .63, lwd=7)
        }
        if (5 %in% parts) {
            segments(.37, .426, .43, .3, lwd=7)
            mtext("__", side = 1, cex = 1, adj = .373, 
                padj = -27.2, col = "white")
            mtext("O", side = 1, cex = 1.25, adj = .373, padj = -21.5, 
                col="red")
        }
        if (6 %in% parts) {
            segments(.37, .426, .31, .3, lwd = 7)
            mtext("o o", side = 1, cex = 1, adj = .3725, 
                padj = -28.2, col="white")
            mtext("x x", side = 1, cex=1, adj = .3725, padj = -28.2)
            mtext("You Lose", side = 1, cex=8, padj = -3, 
                col = "darkgreen")
        }
        if (win1 == len) {
            mtext("WINNER!", side = 1, cex=8, padj = -3, 
                col = "green")
            mtext("WINNER!", side = 1, cex=8, adj = .1, padj = -3.1, 
                col = "darkgreen")
        }
    } #end of hang.plot
    guess <- function(){#start of guess function
        cat("\n","Choose a letter:","\n") 
        y <- scan(n=1,what = character(0),quiet=T)
        if (y %in% c(right, wrong)) {
            stop(paste0("You've already guessed ", y))
        }
        if (y %in% x) {
            right <<- c(right, y)
            win1 <<- sum(win1, sum(x %in% y)) 
            win <<- win1/len 
            message(paste0("Correct!","\n"))
        } else {
            wrong  <<- c(wrong, y)
            chance  <<- length(wrong)
            message(paste0("The word does not contain ", y, "\n"))
        }
        x2[x %in% right] <<- x[x %in% right]
        print(x2, quote = FALSE)
        hang.plot()
    }#end of guess function
    hang.plot()
    while(all(win1 != len & chance < 6)){ 
        try(guess())
    } 
    if (win == 1) {
        outcome <- "Congratulations! You Win!\n"
    } else {
        outcome <- paste("Sorry. You loose. The word is:", x1, "\n")
    }
    print(outcome)
}

hangman()
 

trinker

ggplot2orBust
I always have to give love to this forum. Anyway someone recently asked on SO (stack overflow.com) about how to recode a variable. Someone responded you could:

Code:
x[x==2] <- "two"
Lovely, but it got me thinking about recoding some cells in a vector but not all. I also wanted the solution to apply to a dataframe, list or vector. here's my code:

Code:
recoder <- function(object, pattern, replacement) {
    switcher <- function(x, y, z){ 
        x[x==y] <- z 
        return(x)
    } 
    if (length(replacement) == 1) {
        replacement <- rep(replacement, length(pattern))
    }
    superswitcher <- function(x, y, z){
        DF <- data.frame(y, z, stringsAsFactors = FALSE)
        z <- x
        if (class(DF[, 2]) %in% c("character", "factor")) {
            lapply(1:nrow(DF), function(i) {
               z <<- switcher(z, DF[i, 1], [COLOR="blue"][B]as.character(DF[i, 2])[/B][/COLOR]) 
            })
        } else {
            lapply(1:nrow(DF), function(i) {
               z <<- switcher(z, DF[i, 1], DF[i, 2]) 
            })
        }
        return(z)
    }
    if (is.vector(object) & !is.list(object)) {
        sapply(object, superswitcher, pattern, replacement)
    } else {
        if (is.matrix(object) | is.data.frame(object)) {
            do.call(data.frame, lapply(object, superswitcher, 
                pattern, replacement))
        } else {
            lapply(object, superswitcher, pattern, replacement)
        }
    }
}


[B][COLOR="gray"]#TRY IT OUT[/COLOR][/B]
recoder(mtcars$carb, c(1, 0), c("A", "B")) 
recoder(mtcars$carb, c(1, 0), NA)          
recoder(mtcars$carb, c(1, 0), c(3, 4))     
recoder(mtcars, c(1, 0), c("A", "B"))

dfsplitter <-function(df){ #convienence function
    LIST <- split(t(df), 1:ncol(df))
    names(LIST) <- names(df)
    return(LIST)
}

recoder(dfsplitter(mtcars), c(1, 0), c("A", "B"))
Couple things:

  1. Is there an easier approach/better/faster?
  2. When I supply a list to the function (list or data.frame) it returns an all character vector for the output even if nothing was replaced in that vector. How can I fix this?
 
Simple function that inputs a date (DD/MM/YYYY) and a vector of dates (each element DD/MM/YYYY) and returns a numeric location either of where that date is located in the vector, or if it's not in the vector, the closest adjacent date. Useful if you're doing a big time series project.

Code:
fun_datefind <- function(date,vector)
{

	temp=unlist(strsplit(date,"/"))
	day=as.numeric(temp[1])
	temp=paste(temp[2],temp[3],sep="/")
	
	temp2=grep(temp,vector,value=TRUE)
	temp2=strsplit(temp2,"/")
	temp2=matrix(as.numeric(unlist(temp2)),ncol=3,byrow=TRUE)
	
	location=which(abs(temp2[,1]-day)==min(abs(temp2[,1]-day)))
	location=grep(temp,vector)[location]
	
	return(location)

}
 

trinker

ggplot2orBust
Alright I figured it out for myself (Dason made me; always pushing you to do for yourself; self reliance is way over rated) :)

The problem is a call to as character that I highlighted above in blue. Here it is all fixed up:

Code:
recoder <- function(object, pattern, replacement) {
    switcher <- function(f, g, h){ 
        f[f==g] <- h 
        return(f)
    } 
    if (length(replacement) == 1) {
        replacement <- rep(replacement, length(pattern))
    }
    superswitcher <- function(x, y, z){
        DF <- data.frame(y, z, stringsAsFactors = FALSE)
        z <- x
        if (class(DF[, 2]) %in% c("character", "factor")) {
            lapply(1:nrow(DF), function(i) {
               if (sum(z %in% DF[i, 1]) == 0) {
                   z <<- z
               } else {
                   z <<- switcher(z, DF[i, 1], as.character(DF[i, 2])) 
               }
            })
        } else {
            lapply(1:nrow(DF), function(i) {
               z <<- switcher(z, DF[i, 1], DF[i, 2]) 
            })
        }
        return(z)
    }
    if (is.vector(object) & !is.list(object)) {
        sapply(object, superswitcher, pattern, replacement)
    } else {
        if (is.matrix(object) | is.data.frame(object)) {
            do.call(data.frame, lapply(object, superswitcher, 
                pattern, replacement))
        } else {
            lapply(object, superswitcher, pattern, replacement)
        }
    }
}


#TRY IT OUT
recoder(mtcars$carb, c(1, 0), c("A", "B")) 
recoder(mtcars$carb, c(1, 0), NA)          
recoder(mtcars$carb, c(1, 0), c(3, 4))     
recoder(mtcars, c(1, 0), c("A", "B"))

dfsplitter <-function(df){ #convienence function
    LIST <- split(t(df), 1:ncol(df))
    names(LIST) <- names(df)
    return(LIST)
}

recoder(dfsplitter(mtcars), c(1, 0), c("A", "B"))
 

vinux

Dark Knight
Trinker give me the idea. If you don't like strawberry, change the flavor.

Code:
library(plotrix)
candle = function(pos)
{
  x=pos[1]
  y=pos[2]
  rect(x,y,x+.2,y+2,col="red")    
#  polygon(c(x-.2,x+.4,x+.1,x-.2), c(y+2,y+2,y+2.4,y+2),col="orange")
polygon(c(x+.05,x-.1,x+.1,x+.3,x+.15,x+0.05), c(y+2,y+2.3,y+2.6,y+2.3,y+2,y+2),col="orange")
  
}

cake_colour="#FF3399"
plot(c(0,10), c(0,10),type="n", bty="n",xaxt="n",yaxt="n", main="Cake", xlab="",ylab="")
draw.ellipse(5,2,col=cake_colour,a=4.4,b=1.7,border=1)
draw.ellipse(5,2,col=cake_colour,a=4,b=1.4,border=1)
rect(1,2,9,5,col=cake_colour,border=cake_colour)
lines(c(1,1),c(2,5))
lines(c(9,9),c(2,5))
draw.ellipse(5,5,col=cake_colour,a=4,b=1.4)

candle(c(2.5,4.5))
candle(c(3,5))
candle(c(4,4.5))
candle(c(5,5))
candle(c(6,4.5))
candle(c(7,5.2))
 

Dason

Ambassador to the humans
It's more Windows command line code than R code but... alarm() doesn't actually make any noise on my Windows laptop so I wanted to make a function that actually did. This plays that annoying system notification beep 3 times by default.

Code:
beep <- function(n = 3, sleeptime = .5){
    for(i in seq(n)){
        system("rundll32 user32.dll,MessageBeep -1")
        Sys.sleep(sleeptime)
    }
}