
# author:  Norm Matloff

# compute the boundary for mean Y = bval against 2 specified predictors
# in X; motivated by classification but can be used for continuous Y and
# regression, in which case one can plot contours of (X1,X2) pairs
# corresponding to given values of the regression function

# returns the ggplot2 object for plotting the boundary against the
# specified predictors; one of the inputs can be an old such plot, to
# which a new boundary will be added

# also prints a few random data points, with Y values, to identify which
# side of the boundary is in the "positive" (increasing mean Y)
# direction

# method:  the estimated mean Y values are computed at each point; a
# band is formed around the boundary; the boundary band is smoothed into
# a curve

# xyz:  data 
# zvar:  index of the response variable
# xvars:  indices of the 2 predictor variables
# grpvar:  index of the group variable, if any; vector or factor
# bval:  value from which the boundary is defined; default is overall
#    (ungrouped) mean of Y 
# bandhw:  determines width of band around boundary--all points 
#    having estimated response mean within bval +/- bandhw*bval; default
#    is 0.1
# k:  number of nearest neighbors; default is square root of the number
#    of observations
# xlb:  optional label for the horizontal axis
# ylb:  optional label for the vertical axis
# clr:  vector of colors for the boundary curves; recycled if too short
# cls:  Snow cluster, if any
# nchunks:  number of chunks; see Smoother.R

boundary <- 
   function(xyz,zvar=1,xvars=2:3, grpvar=NULL,bval=NULL,bandhw=0.2,k=NULL,
      xlb=NULL,ylb=NULL,clr=c("blue","red","darkgreen","brown"),
      cls=NULL,nchunks=NULL) 
{
   require(ggplot2)
   y <- xyz[,zvar]
   x <- xyz[,xvars]
   if (is.null(bval)) bval <- mean(y)
   # determine group indices
   nrx <- nrow(x)
   if (is.null(grpvar)) {
      # no grouping, so all rows are in one big "group"
      idxs <- list(1:nrx)
   } else {
      idxs <- split(1:nrx,xyz[,grpvar])
   }
   ngrps <- length(idxs)
   if (length(clr) < ngrps) {
      tmp <- 1:ngrps
      tmp <- clr  # recycle
      clr <- tmp
   }
   tol <- bandhw * bval
   for (i in 1:ngrps) {
      ix <- idxs[[i]]
      xg <- x[ix,]
      yg <- y[ix]
      # find estimated mean Y at all data points in this group
      if (is.null(k)) k <- ceiling(sqrt(nrow(xg)))
      eyhat <- smoothz(cbind(xg,yg),knnreg,k,cls=cls)
      # find indices of the points in the band around the boundary
      bandpts <- which(abs(eyhat - bval) < tol)
      if (length(bandpts) == 0) stop("empty band")
      # prepare to plot boundary
      x1 <- xg[,1]  # horizontal plotting variable
      x1band <- x1[bandpts]
      x2 <- xg[,2]  # vertical plotting variable
      x2band <- x2[bandpts]
      x12 <- data.frame(x1b=x1band,x2b=x2band)
      clri <- clr[i]
      if (i == 1) {
         p <- ggplot(x12,aes(x1b,x2b)) + 
            geom_smooth(method="loess",colour=clri)
      } else {
         p <- p + 
            geom_smooth(data=x12,method="loess",colour=clri)
      }
   }
   if (!is.null(xlb)) p <- p + xlab(xlb)
   if (!is.null(ylb)) p <- p + ylab(ylb)
   print("some random points:")
   rx <- sample(1:nrow(x),5,replace=F)
   tmpx <- x[rx,]
   tmpy <- eyhat[rx]
   print(cbind(tmpx,tmpy))
   p
}

pc2 <- function(x) {
   pc12 <- prcomp(x)$rotation[,1:2]
   # x might be a data frame
   as.matrix(x) %*% pc12

}
