Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ Description: A ggplot2 based biplot. It provides a drop-in
replacement for biplot.princomp(). It implements a
biplot and scree plot using ggplot2.
Depends:
ggplot2, plyr, scales, grid
ggplot2, plyr, scales, grid, ggrepel
License: GPL-2
URL: http://github.com/vqv/ggbiplot
Collate:
'ggbiplot.r'
'ggscreeplot.r'
RoxygenNote: 6.1.1
117 changes: 72 additions & 45 deletions R/ggbiplot.r
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@
#' @param ellipse.prob size of the ellipse in Normal probability
#' @param labels optional vector of labels for the observations
#' @param labels.size size of the text used for the labels
#' @param arrows.col color of the arrows
#' @param alpha alpha transparency value for the points (0 = transparent, 1 = opaque)
#' @param circle draw a correlation circle? (only applies when prcomp was called with scale = TRUE and when var.scale = 1)
#' @param var.subset vector of labels to show on biplot (NULL = show all labels)
#' @param var.repel whether or not to repel geom_text
#' @param var.axes draw arrows for the variables?
#' @param varname.size size of the text for variable names
#' @param varname.adjust adjustment factor the placement of the variable names, >= 1 means farther from the arrow
#' @param varname.abbrev whether or not to abbreviate the variable names
#' @param varname.col color of the labels (text only)
#'
#' @return a ggplot2 plot
#' @export
Expand All @@ -45,24 +49,26 @@
#' wine.pca <- prcomp(wine, scale. = TRUE)
#' print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE))
#'
ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
groups = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3, alpha = 1,
var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
groups = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3,
alpha = 1, var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
var.repel = TRUE, var.subset = NULL,
varname.col = "darkred", arrows.col = varname.col,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
{
library(ggplot2)
library(plyr)
library(scales)
library(grid)

stopifnot(length(choices) == 2)

# Recover the SVD
if(inherits(pcobj, 'prcomp')){
if(inherits(pcobj, 'prcomp')){
nobs.factor <- sqrt(nrow(pcobj$x) - 1)
d <- pcobj$sdev
u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
Expand All @@ -78,75 +84,79 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
} else if(inherits(pcobj, "lda")) {
nobs.factor <- sqrt(pcobj$N)
d <- pcobj$svd
u <- predict(pcobj)$x/nobs.factor
v <- pcobj$scaling
d.total <- sum(d^2)
nobs.factor <- sqrt(pcobj$N)
d <- pcobj$svd
u <- predict(pcobj)$x/nobs.factor
v <- pcobj$scaling
d.total <- sum(d^2)
} else {
stop('Expected a object of class prcomp, princomp, PCA, or lda')
}

# Scores
choices <- pmin(choices, ncol(u))
df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))

# Directions
v <- sweep(v, 2, d^var.scale, FUN='*')
df.v <- as.data.frame(v[, choices])

names(df.u) <- c('xvar', 'yvar')
names(df.v) <- names(df.u)

if(pc.biplot) {
df.u <- df.u * nobs.factor
}

# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4)

# Scale directions
v.scale <- rowSums(v^2)
df.v <- r * df.v / sqrt(max(v.scale))

# Change the labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized PC', choices, sep='')
} else {
u.axis.labs <- paste('PC', choices, sep='')
}

# Append the proportion of explained variance to the axis labels
u.axis.labs <- paste(u.axis.labs,
sprintf('(%0.1f%% explained var.)',
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))

# Score Labels
if(!is.null(labels)) {
df.u$labels <- labels
}

# Grouping variable
if(!is.null(groups)) {
df.u$groups <- groups
}

# Variable Names
if(varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
}

# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)

# Base plot
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) +
xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()

xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()

if(!is.null(var.subset)){
df.v <- df.v[df.v$varname %in% var.subset, ]
}

if(var.axes) {
# Draw circle
if(circle)
Expand All @@ -156,22 +166,22 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
g <- g + geom_path(data = circle, color = muted('white'),
size = 1/2, alpha = 1/3)
}

# Draw directions
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, 'picas')),
color = muted('red'))
color = arrows.col)
}

# Draw either labels or points
if(!is.null(df.u$labels)) {
if(!is.null(df.u$groups)) {
g <- g + geom_text(aes(label = labels, color = groups),
size = labels.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
g <- g + geom_text(aes(label = labels), size = labels.size)
}
} else {
if(!is.null(df.u$groups)) {
Expand All @@ -180,12 +190,12 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
g <- g + geom_point(alpha = alpha)
}
}

# Overlay a concentration ellipse if there are groups
if(!is.null(df.u$groups) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))

ell <- ddply(df.u, 'groups', function(x) {
if(nrow(x) <= 2) {
return(NULL)
Expand All @@ -199,22 +209,39 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
names(ell)[1:2] <- c('xvar', 'yvar')
g <- g + geom_path(data = ell, aes(color = groups, group = groups))
}

# Label the variable axes
if(var.axes) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = 'darkred', size = varname.size)
if(var.repel){

g <- g +
geom_text_repel(data = df.v,
aes(label = varname, x = xvar, y = yvar,
#angle = angle,
hjust = hjust),
segment.alpha = .5,
segment.color = varname.col,
colour = varname.col,
size = varname.size)

}else{

g <- g +
geom_text_repel(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
colour = varname.col,
size = varname.size)
}

}
# Change the name of the legend for groups
# if(!is.null(groups)) {
# g <- g + scale_color_brewer(name = deparse(substitute(groups)),
# palette = 'Dark2')
# }

# TODO: Add a second set of axes

return(g)
}
1 change: 1 addition & 0 deletions ggbiplot.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ LaTeX: XeLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
86 changes: 41 additions & 45 deletions man/ggbiplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.