For estimating batch effects using SVA, perform the following steps:
- Load the libraries and data:
library(sva)
arab <- readRDS(file.path(getwd(), "datasets", "ch1", "arabidopsis.RDS"))
- Filter out rows with too few counts in some experiments:
keep <- apply(arab, 1, function(x) { length(x[x>3])>=2 } )
arab_filtered <- arab[keep,]
- Create the initial design:
groups <- as.factor(rep(c("mock", "hrcc"), each=3))
- Set up the test and null models and run SVA:
test_model <- model.matrix(~groups)
null_model <- test_model[,1]
svar <- svaseq(arab_filtered, test_model, null_model, n.sv=1)
- Extract the surrogate variables to a new design for downstream use:
design <- cbind(test_model, svar$sv)