A colleague of mine recently asked about computing basic summary statistics from grouped data in R. These are a couple examples that I suggested. Additional documentation for the plyr package can be found here.
Code Snippets
# load libraries
library(lattice) # nice looking plots
library(plyr) # advanced aggregation functions
# generate 100 random obs
set.seed(1)
x <- rnorm(100)
# generate treatment labels
treatment <- rep(letters[1:5], each=4)
# generate depth labels
depth <- rep(c('0-10', '10-20'), 50)
# combine into a single dataframe
d <- data.frame(x, treatment, depth)
# check out the dataframe:
str(d)
head(d)
# visually check data with box-whisker plot
bwplot(x ~ treatment:depth, data=d, scales=list(y=list(tick.number=10, cex=0.75), x=list(rot=45, cex=0.75)), ylab='Measured Variable', xlab='Treatment / Group')
# calculate median by treatment and depth
aggregate(d$x, by=list(d$treatment, d$depth), median)
# Group.1 is the treatment
# Group.2 is the depth
# x is the median
Group.1 Group.2 x
1 a 0-10 0.382152173
2 b 0-10 0.347044867
3 c 0-10 0.384062345
4 d 0-10 0.499198983
5 e 0-10 -0.191705870
6 a 10-20 -0.005618922
7 b 10-20 0.066331780
8 c 10-20 0.328471014
9 d 10-20 -0.049369325
10 e 10-20 -0.097184000
# another approach using ddply()
# compute a summary by treatment X depth
# returning the result as a nice data frame
ddply(d, .(treatment, depth), function(i) summary(i$x))
# result looks like this:
treatment depth Min. 1st Qu. Median Mean 3rd Qu. Max.
1 a 0-10 -0.8356 -0.46760 0.382200 0.376500 0.8635 2.4020
2 b 0-10 -1.8050 -0.55550 0.347000 0.006561 0.5673 1.0630
3 c 0-10 -0.5425 -0.04595 0.384100 0.371000 0.5507 1.5120
4 d 0-10 -1.3770 -0.38070 0.499200 0.339300 1.1520 1.5870
5 e 0-10 -1.2770 -0.43100 -0.191700 -0.115700 0.4459 1.1000
6 a 10-20 -1.9890 -0.22380 -0.005619 -0.079500 0.4634 1.5950
7 b 10-20 -1.4710 -0.60670 0.066330 0.013510 0.6370 1.4660
8 c 10-20 -0.7099 -0.25470 0.328500 0.360600 0.7653 2.1730
9 d 10-20 -2.2150 -0.80430 -0.049370 -0.126100 0.4917 1.9800
10 e 10-20 -1.0440 -0.54830 -0.097180 -0.057270 0.4457 0.9438
library(lattice) # nice looking plots
library(plyr) # advanced aggregation functions
# generate 100 random obs
set.seed(1)
x <- rnorm(100)
# generate treatment labels
treatment <- rep(letters[1:5], each=4)
# generate depth labels
depth <- rep(c('0-10', '10-20'), 50)
# combine into a single dataframe
d <- data.frame(x, treatment, depth)
# check out the dataframe:
str(d)
head(d)
# visually check data with box-whisker plot
bwplot(x ~ treatment:depth, data=d, scales=list(y=list(tick.number=10, cex=0.75), x=list(rot=45, cex=0.75)), ylab='Measured Variable', xlab='Treatment / Group')
# calculate median by treatment and depth
aggregate(d$x, by=list(d$treatment, d$depth), median)
# Group.1 is the treatment
# Group.2 is the depth
# x is the median
Group.1 Group.2 x
1 a 0-10 0.382152173
2 b 0-10 0.347044867
3 c 0-10 0.384062345
4 d 0-10 0.499198983
5 e 0-10 -0.191705870
6 a 10-20 -0.005618922
7 b 10-20 0.066331780
8 c 10-20 0.328471014
9 d 10-20 -0.049369325
10 e 10-20 -0.097184000
# another approach using ddply()
# compute a summary by treatment X depth
# returning the result as a nice data frame
ddply(d, .(treatment, depth), function(i) summary(i$x))
# result looks like this:
treatment depth Min. 1st Qu. Median Mean 3rd Qu. Max.
1 a 0-10 -0.8356 -0.46760 0.382200 0.376500 0.8635 2.4020
2 b 0-10 -1.8050 -0.55550 0.347000 0.006561 0.5673 1.0630
3 c 0-10 -0.5425 -0.04595 0.384100 0.371000 0.5507 1.5120
4 d 0-10 -1.3770 -0.38070 0.499200 0.339300 1.1520 1.5870
5 e 0-10 -1.2770 -0.43100 -0.191700 -0.115700 0.4459 1.1000
6 a 10-20 -1.9890 -0.22380 -0.005619 -0.079500 0.4634 1.5950
7 b 10-20 -1.4710 -0.60670 0.066330 0.013510 0.6370 1.4660
8 c 10-20 -0.7099 -0.25470 0.328500 0.360600 0.7653 2.1730
9 d 10-20 -2.2150 -0.80430 -0.049370 -0.126100 0.4917 1.9800
10 e 10-20 -1.0440 -0.54830 -0.097180 -0.057270 0.4457 0.9438
No comments:
Post a Comment