\documentclass{article} \title{Genomic context of (differential) peaks in mouse blasts/tubes} \usepackage[text={178mm,230mm},centering]{geometry} \usepackage{Sweave} \SweaveOpts{keep.source=TRUE,eps=FALSE,pdf=TRUE,width=9,height=10,prefix.string=figs/figs-mousepeaks} \setkeys{Gin}{width=0.98\textwidth} \begin{document} \maketitle \raggedright <>= library(chipseq) library(lattice) library(latticeExtra) set.seed(20081008) @ \section*{Combining myotube and myoblast lanes} <<>>= load("myodMyo.rda") library("BSgenome.Mmusculus.UCSC.mm9") mouse.seqlens <- seqlengths(Mmusculus) seqRanges <- lapply(myodMyo, extendReads) cblasts <- combineLanes(seqRanges[c("1","3","6")]) ctubes <- combineLanes(seqRanges[c("2","4","7")]) covblasts <- laneCoverage(cblasts, mouse.seqlens) covtubes <- laneCoverage(ctubes, mouse.seqlens) @ We will use one of the samples as a reference. An alternative is to combine the lanes and use that as the reference. Both lead to some problems in the subsequent regression (errors in predictor) that we ignore for now. <<>>= peakSummary.blasts.wrt.tubes <- diffPeakSummary(obs.ranges = cblasts, ref.ranges = ctubes, chrom.lens = mouse.seqlens, lower = 10) peakSummary.tubes.wrt.blasts <- diffPeakSummary(obs.ranges = ctubes, ref.ranges = cblasts, chrom.lens = mouse.seqlens, lower = 10) peakSummary.blasts.wrt.tubes <- within(peakSummary.blasts.wrt.tubes, { diffs <- log2(obs.sums)-log2(ref.sums) resids <- # prefer per-chromosome? (diffs - median(diffs)) / mad(diffs) resids.mean <- diffs - mean(diffs[is.finite(diffs)]) }) peakSummary.tubes.wrt.blasts <- within(peakSummary.tubes.wrt.blasts, { diffs <- log2(obs.sums)-log2(ref.sums) resids <- # prefer per-chromosome? (diffs - median(diffs)) / mad(diffs) resids.mean <- diffs - mean(diffs[is.finite(diffs)]) }) xyplot(log2(obs.sums) ~ log2(ref.sums) | chromosome, data = peakSummary.tubes.wrt.blasts, auto.key = TRUE, subset = (chromosome %in% c("chr1", "chr2") & obs.sums > 0), panel = function(x, y, ...) { panel.smoothScatter(x, y, ...) ## panel.xyplot(x, y, ...) panel.abline(median(y - x), 1) }, par.settings = simpleTheme(pch = ".", cex = 3), type = c("p", "g", "r"), col.line = "black", aspect = "iso") @ \newpage \begin{center} <>= plot(trellis.last.object()) @ \end{center} \newpage <<>>= bwplot(chromosome ~ resids, data = peakSummary.blasts.wrt.tubes) @ \begin{center} <>= plot(trellis.last.object()) @ \end{center} \newpage <<>>= bwplot(chromosome ~ resids, data = peakSummary.tubes.wrt.blasts) @ \begin{center} <>= plot(trellis.last.object()) @ \end{center} \newpage \section*{Peaks in genomic context} <>= data(geneMouse) ## gregions.500 <- genomic_regions(genes = geneMouse, proximal = 500) gregions.2000 <- genomic_regions(genes = geneMouse, proximal = 2000) gregions <- gregions.2000 gregions$gene <- as.character(gregions$gene) library(Matrix) library(IRanges) countHits <- function(subject, query) { sum(!is.na(findOverlaps(query, subject, multiple = FALSE))) } doPeakSet <- function(peaks, gregions) { query <- with(peaks, IRanges(start, end)) irangeByType <- function(type = c("promoter", "threeprime", "upstream", "downstream", "gene")) { type <- match.arg(type) istarts <- sprintf("%s.start", type) iends <- sprintf("%s.end", type) keep <- !duplicated(gregions[[istarts]]) ## what's the right thing to do here??? IRanges(start = gregions[[istarts]][keep], end = gregions[[iends]][keep]) } subject <- list(promoter = irangeByType("promoter"), threeprime = irangeByType("threeprime"), upstream = irangeByType("upstream"), downstream = irangeByType("downstream"), gene = irangeByType("gene")) c(total = length(query), sapply(subject, countHits, query = query)) } doChromosome <- function(chr) { gregions.sub <- subset(gregions, chrom == chr) tube.peaks <- subset(peakSummary.blasts.wrt.tubes, chromosome == chr) blast.peaks <- subset(peakSummary.tubes.wrt.blasts, chromosome == chr) ans <- as.data.frame(rbind(tube = doPeakSet(tube.peaks, gregions.sub), blast = doPeakSet(blast.peaks, gregions.sub), tube.only = doPeakSet(subset(tube.peaks, resids < -2), gregions.sub), blast.only = doPeakSet(subset(blast.peaks, resids < -2), gregions.sub))) ans <- cbind(type = factor(rownames(ans), levels = unique(rownames(ans))), ans) ans } ## all.chroms <- levels(peakSummary.blasts.wrt.tubes$chromosome) doAll <- function(chroms = paste("chr", 1:19, sep = "")) { ans <- do.call(make.groups, sapply(chroms, doChromosome, simplify = FALSE)) names(ans)[names(ans) == "which"] <- "chromosome" rownames(ans) <- NULL ans } ans <- doAll() sumtab <- as.data.frame(rbind(total = xtabs(total ~ type, ans), promoter = xtabs(promoter ~ type, ans), threeprime = xtabs(threeprime ~ type, ans), upstream = xtabs(upstream ~ type, ans), downstream = xtabs(downstream ~ type, ans), gene = xtabs(gene ~ type, ans))) sumtab <- within(sumtab, { `tube/blast` <- tube / blast `tube.only/blast.only` <- tube.only / blast.only }) @ <<>>= sumtab[c("tube", "blast", "tube/blast", "tube.only", "blast.only", "tube.only/blast.only")] @ \end{document}