R+Code+-+RNA-Seq

=R Code Examples - RNA-Seq=

For each example, I start with a data frame of count data that looks like the following.
 * || NPL32 || NPL34 || NPL44 || NPL46 || NPL56 || NPL61 || PL20 || PL31 || PL43 || PL55 || PL59 || PL62 ||
 * comp0_c0_seq1 || 1073920 || 1228123 || 5254066 || 1371825 || 967934 || 677390 || 232806 || 1123876 || 2613486 || 630659 || 1231571 || 229342 ||
 * comp1_c0_seq1 || 1918518 || 1726845 || 4206394 || 1314385 || 615831 || 939729 || 343003 || 1570057 || 1481815 || 1227947 || 1273835 || 420144 ||
 * comp2_c0_seq1 || 171037 || 243930 || 712047 || 646413 || 469036 || 166717 || 129706 || 629589 || 1289674 || 564959 || 332828 || 116855 ||
 * comp5_c0_seq1 || 147703 || 41802 || 781225 || 261413 || 299237 || 101581 || 73395 || 175481 || 433382 || 79492 || 304055 || 97796 ||
 * comp5_c1_seq1 || 121391 || 43070 || 656968 || 231221 || 305144 || 81390 || 50360 || 152298 || 430000 || 77287 || 252271 || 62325 ||
 * comp6_c0_seq1 || 342865 || 294385 || 638089 || 228506 || 102413 || 290270 || 121084 || 301103 || 341995 || 234032 || 224490 || 213252 ||

DESeq
code format="rsplus" deseq.sums.countData_l <- sums.count.data[,c(1:6, 13:18)] deseq.sums.conds_l <- factor(c(rep("NPL", 6), rep("PL", 6))) deseq.sums.cds_l <- newCountDataSet(countData = deseq.sums.countData_l, conditions = deseq.sums.conds_l) deseq.sums.cds_l <- estimateSizeFactors(deseq.sums.cds_l) deseq.sums.cds_l <- estimateDispersions(deseq.sums.cds_l) deseq.sums.res_l <- nbinomTest(deseq.sums.cds_l, "NPL", "PL") code
 * 1) Getting the data and setting up the conditional factor vector
 * 1) Creating the DESeq data object
 * 1) Normalizing
 * 1) Doing the variance calculation
 * 1) Running the test and getting the results

DESeq2
code format="rsplus" deseq2.sums.countData_l <- sums.count.data[,c(1:6, 13:18)] deseq2.sums.colData_l <- data.frame(condition=factor(c(rep("NPL", 6), rep("PL", 6))), type=factor(rep("single-read", 12))) rownames(deseq2.sums.colData_l) <- colnames(deseq2.sums.countData_l) deseq2.sums.dds_l <- DESeqDataSetFromMatrix(countData = deseq2.sums.countData_l, colData = deseq2.sums.colData_l, design = ~ condition) deseq2.sums.dds_l <- DESeq(deseq2.sums.dds_l) deseq2.sums.res_l <- results(deseq2.sums.dds_l) deseq2.sums.res_l <- deseq2.sums.res_l[order(rownames(deseq2.sums.res_l)), ] code
 * 1) Extracting the columns I want
 * 1) Setting up the dataframe that describes the data
 * 1) Creating the DESeq2 data object
 * 1) Running the analysis, getting the results and sorting them

edgeR
code format="rsplus" edger.dgel_l <- DGEList(counts=deseq2.sums.countData_l, group=factor(c(rep(1,6), rep(2,6)))) edger.dgel_l <- calcNormFactors(edger.dgel_l) edger.dgel_l <- estimateCommonDisp(edger.dgel_l) edger.dgel_l <- estimateTagwiseDisp(edger.dgel_l) edger.et_l <- exactTest(edger.dgel_l) edger.res_l <- topTags(edger.et_l, n=nrow, sort.by="none") edger.res_l$table <- edger.res_l$table[order(rownames(edger.res_l$table)), ] code
 * 1) Creating the edgeR data object
 * 1) Normalizing the data
 * 1) Some other stuff I'm not sure about
 * 1) Running the tests
 * 1) Getting the results and sorting them

baySeq
code format="rsplus" cl <- makeCluster(c(rep("localhost", 4)), type="SOCK") bayseq.cd_l <- new("countData", data=as.matrix(sums.count.data[,c(1:6, 13:18)]), replicates=c(rep("NPL", 6), rep("PL", 6)), groups=list(NDE=rep(1,12), DE=c(rep(1,6), rep(2,6)))) libsizes(bayseq.cd_l) <- getLibsizes(bayseq.cd_l) bayseq.cd_l@annotation <- data.frame(name=rownames(sums.count.data)) bayseq.cd_l <- getPriors.NB(bayseq.cd_l, samplesize=10000, estimation="QL", cl=cl) bayseq.cd_l <- getLikelihoods.NB(bayseq.cd_l, pET='BIC', cl=cl) bayseq.res_l <- topCounts(bayseq.cd_l, group="DE", number=nrow(sums.count.data)) bayseq.res_l <- bayseq.res_l[order(rownames(bayseq.res_l)), ] code
 * 1) Setting up a 'mini' cluster using the snow library, using 4 cores on the current computer
 * 1) Setting up the data object and description
 * 1) Normalizing
 * 1) Adding in annotation
 * 1) Running the Tests, getting results and sorting them

Plots
code format="rsplus" tmp <- deseq2.sums.res_l plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-3, 3), log="x", col="darkgray",    main="MA-Plot of NPL vs PL with DESeq2 Significant Results (pval <= 0.05)",     xlab="mean of normalized counts",     ylab="Log2 Fold Change") tmp.sig <- deseq2.sums.res_l[!is.na(deseq2.sums.res_l$padj) & deseq2.sums.res_l$padj <= 0.05, ] points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red") abline(h=c(-1,1), col="blue") code
 * 1) The main plot
 * 1) Getting the significant points and plotting them again so they're a different color
 * 1) 2 FC lines