\documentclass{article} \title{Are the different antibodies largely equivalent?} \usepackage[text={178mm,230mm},centering]{geometry} \usepackage{Sweave} \SweaveOpts{keep.source=TRUE,eps=FALSE,pdf=TRUE,width=9,height=10,prefix.string=figs/figs-islands} \setkeys{Gin}{width=0.98\textwidth} \begin{document} \maketitle \raggedright <>= library(lattice) library(chipseq) library(chipseqData) library(BSgenome.Mmusculus.UCSC.mm9) library(robustbase) ## fcorrelation <- function(x) { covMcd(x, cor = TRUE)$cor } ## OR: fcorrelation <- cor fcorrelation <- function(x) { cor(x, method = "spearman") } data(solexa54) data(myodMyo) data(myodFibro) set.seed(20081008) ## promoter information data(geneMouse) gregions <- genomic_regions(genes = geneMouse, proximal = 1000) gregions <- subset(gregions, chrom %in% paste("chr", 1:19, sep = "")) gregions$chrom <- gregions$chrom[drop = TRUE] gpromoters <- gregions[c("chrom", "promoter.start", "promoter.end")] names(gpromoters) <- c("chr", "start", "end") gpromoters.split <- split(gpromoters, gpromoters$chr) summarizeData <- function(edata, peak.ref, peak.cutoff = 6, ref = peak.ref, ref.cutoff = peak.cutoff, include = names(edata)) { peaks <- gdApply(edata[[peak.ref]], function(g, cutoff = peak.cutoff) { print(length(g)) IntervalTree(slice(coverage(g, width = max(end(g)) + 100L), lower = cutoff)) }) ## accumulate per-peak information peakSummary <- sapply(names(peaks), function(chr) { print(chr) chrpeaks <- peaks[[chr]] in.promoter <- !is.na(findOverlaps(chrpeaks, with(gpromoters.split[[chr]], IRanges(start, end)), multiple = FALSE)) countOverlapping <- function(x) { as.numeric(as.table(t(findOverlaps(edata[[x]][[chr]], chrpeaks, multiple = TRUE)))) } ans <- data.frame(start = start(chrpeaks), end = end(chrpeaks), promoter = ifelse(in.promoter, "In promoter", "Not in promoter")) for (nm in names(edata)) ans[[nm]] <- countOverlapping(nm) ans }, simplify = FALSE) peakSummary.df <- do.call(make.groups, peakSummary) rownames(peakSummary.df) <- NULL computeRates <- function(cutoff = 5) { mytab <- function(x) table(factor(x, levels = c(FALSE, TRUE))) ## dsub <- peakSummary.df ## [peakSummary.df[[ref]] >= ref.cutoff, ] dsub.promoter <- subset(peakSummary.df, promoter == "In promoter") dsub <- subset(peakSummary.df, promoter != "In promoter") props <- c(sapply(include, function(x) prop.table(mytab(dsub[[x]] >= cutoff))[2]), sapply(include, function(x) prop.table(mytab(dsub.promoter[[x]] >= cutoff))[2])) counts <- c(sapply(include, function(x) mytab(dsub[[x]] >= cutoff)[2]), sapply(include, function(x) mytab(dsub.promoter[[x]] >= cutoff)[2])) data.frame(cutoff = cutoff, ref = ref, ref.cutoff = ref.cutoff, promoter = rep(c("Not promoter", "Promoter"), each = length(include)), sample = rep(include, 2), proportion = props, counts = counts, stringsAsFactors = FALSE) } props <- do.call(rbind, lapply(1:15, computeRates)) list(peakSummary = peakSummary.df, props = props, peak.cutoff = peak.cutoff, peak.ref = peak.ref, include = include, nreads.ref = sum(unlist(lapply(edata[[peak.ref]], length)))) } plotSamples <- function(ereads, peak.ref, peak.cutoff = 12, ...) { foo <- summarizeData(ereads, peak.ref = peak.ref, peak.cutoff = peak.cutoff, include = names(ereads)) with(foo, xyplot(proportion ~ cutoff | promoter, data = props, type = c("g", "o"), groups = factor(sample, levels = include), auto.key = list(lines = TRUE, points = FALSE, columns = 3), xlab = "cutoff", ylab = "Proportion of peaks with number of overlapping reads >= cutoff ", main = sprintf("%s peaks, depth >= %g [%d reads, %d peaks]", peak.ref, peak.cutoff, nreads.ref, nrow(peakSummary)), ...)) } ## all.reads <- ## GenomeDataList(list(ctubes = combineLaneReads(myodMyo[c("2", "4", "7")]), ## cblasts = combineLaneReads(myodMyo[c("1", "3", "6")]), ## cfibromyod = combineLaneReads(myodFibro[c("2", "4", "7")]))) ## ereads <- gdApply(all.reads, ## function(x, seqLen = 200) { ## sort(extendReads(x, seqLen = seqLen)) ## }) ## all.reads <- myodFibro[c(1, 3, 5, 2, 4, 6, 7)] ## ereads <- gdApply(all.reads, ## function(x, seqLen = 200) { ## sort(extendReads(x, seqLen = seqLen)) ## }) @ <>= ## seemed interesting, but can't really say what caused a departure from the null ## (could be quality, could be different sets of peaks) peakProfile <- function(ereads, chr = "chr1", chrlens = seqlengths(Mmusculus), cutoffs = c(12, 20), nrep = 10L, ...) { ## take random subsets of 'combined' of size 'length(ref)', ## and compute number of peaks as function of cutoff. ## Provides null reference for same thing in 'ref'. refs <- lapply(ereads, "[[", chr) comb <- sort(do.call(c, refs)) all.refs.df <- lapply(refs, function(ref.chr) { nref <- length(ref.chr) ncomb <- length(comb) npeaks <- replicate(nrep, { sub <- comb[sort(sample.int(ncomb, nref))] cov <- coverage(sub, width = chrlens[chr]) data.frame(cutoff = cutoffs, npeaks = sapply(cutoffs, function(lower) length(slice(cov, lower = lower))), type = "subsample") }, simplify = FALSE) npeaks.ref <- { cov <- coverage(ref.chr, width = chrlens[chr]) data.frame(cutoff = cutoffs, npeaks = sapply(cutoffs, function(lower) length(slice(cov, lower = lower))), type = "observed") } do.call(rbind, c(npeaks, list(npeaks.ref))) }) stripplot(reorder(which, npeaks) ~ npeaks | factor(sprintf("cutoff = %g", cutoff)), do.call(make.groups, all.refs.df), jitter = TRUE, groups = type, pch = c(1, 16), xlab = sprintf("Number of peaks (%s)", chr), ...) } all.reads <- c(myodMyo) names(all.reads)[1:7] <- c("blast_1", "tube_1", "blast_2", "tube_2", "blast_3", "tube_3", "preimmune") ereads <- gdApply(all.reads, function(x, seqLen = 200) { sort(extendReads(x, seqLen = seqLen)) }) peakProfile(ereads[c(2, 4, 6)], chr = "chr5") @ <>= ## all.reads <- myodFibro[c(1, 3, 5, 2, 4, 6, 7)] ## names(all.reads)[1:7] <- ## c("fibro_7311", "fibro_6975", "fibro_6196", ## "fibromyod_7311", "fibromyod_6975", "fibromyod_6196", "beadonly") ## ereads <- gdApply(all.reads, ## function(x, seqLen = 200) { ## sort(extendReads(x, seqLen = seqLen)) ## }) ## plot(plotSamples(ereads, peak.ref = "fibromyod_6196", peak.cutoff = 20, ## par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), ## lty = 1:3, lwd = 2))) all.reads <- myodMyo[c(1, 3, 5, 2, 4, 6, 7)] names(all.reads)[1:7] <- c("blast_7311", "blast_6975", "blast_6196", "tube_7311", "tube_6975", "tube_6196", "preimmune") ereads <- gdApply(all.reads, function(x, seqLen = 200) { sort(extendReads(x, seqLen = seqLen)) }) @ We try to address the question in two ways. The first one is as follows: \begin{itemize} \item Choose one lane as the reference, and determine peaks (depth of 20 and 12 used here) \item For various other lanes, draw the folowing curve: \begin{itemize} \item For a given cutoff $k$, compute the proportion of peaks which have at least $k$ overlapping reads. \item Plot the proportion as a function of $k$. \end{itemize} \end{itemize} The results are shown in pages 2-13. For example, the first three plots use the three C2C12 myotube lanes as reference. The first thing we see is that for the pre-immune control lane, the proportion drops to almost 0 for cutoffs beyond 5, which is reassuring. In the second plot, using antibody 6975 as reference, we see that the other myotube lanes (as well as the other myoblast lanes) have fairly high overlap. This suggests that most peaks in myotube 6975 are also enriched in the other lanes. Similarly, the plot for antibody 6196 suggests that most peaks there are also enriched in antibody 7311, but not in antibody 6975. The plot for antibody 7311 suggests that some peaks there missing from both the other antibodies, more so in 6975 than 6196. Together, these suggest that the set of 6975 peaks is a subset of 6196 peaks, which is a further subset of 7311 peaks. \vspace{4mm} Myotubes seem generally more similar to each other than to the myoblasts. \vspace{4mm} The other approach is described later (page 14). \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "tube_7311", peak.cutoff = 20, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "tube_6975", peak.cutoff = 20, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "tube_6196", peak.cutoff = 20, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "blast_7311", peak.cutoff = 20, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "blast_6975", peak.cutoff = 20, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "blast_6196", peak.cutoff = 20, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} %% now for depth 12 \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "tube_7311", peak.cutoff = 12, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "tube_6975", peak.cutoff = 12, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "tube_6196", peak.cutoff = 12, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "blast_7311", peak.cutoff = 12, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "blast_6975", peak.cutoff = 12, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = "blast_6196", peak.cutoff = 12, par.settings = simpleTheme(col = c(2, 2, 2, 4, 4, 4, 3), lty = 1:3, lwd = 2))) @ \end{center} <>= ## Another (simple) idea: look at peaks in combined data, then compute ## correlation in number of overlapping reads overlapData <- function(edata, peak.cutoff = 6, chromosomes = names(edata[[1]]), chrlens = seqlengths(Mmusculus)) { pb <- txtProgressBar(1, length(chromosomes), style = 3) pbi <<- 1 comb <- sapply(chromosomes, function(chr) { setTxtProgressBar(pb, pbi) pbi <<- pbi + 1 components <- lapply(edata, "[[", chr) sort(do.call(c, components)) }, simplify = FALSE) pbi <<- 1 peaks <- sapply(chromosomes, function(chr) { setTxtProgressBar(pb, pbi) pbi <<- pbi + 1 IntervalTree(slice(coverage(comb[[chr]], width = chrlens[chr]), lower = peak.cutoff)) }, simplify = FALSE) ## accumulate per-peak information pbi <<- 1 peakSummary <- sapply(names(peaks), function(chr) { setTxtProgressBar(pb, pbi) pbi <<- pbi + 1 ## print(chr) chrpeaks <- peaks[[chr]] in.promoter <- !is.na(findOverlaps(chrpeaks, with(gpromoters.split[[chr]], IRanges(start, end)), multiple = FALSE)) countOverlapping <- function(x) { as.numeric(as.table(t(findOverlaps( edata[[x]][[chr]], chrpeaks, multiple = TRUE)))) } ans <- data.frame(start = start(chrpeaks), end = end(chrpeaks), promoter = ifelse(in.promoter, "In promoter", "Not in promoter")) for (nm in names(edata)) ans[[nm]] <- sqrt(countOverlapping(nm)) ans }, simplify = FALSE) peakSummary.df <- do.call(make.groups, peakSummary) rownames(peakSummary.df) <- NULL ## compute correlations for subsamples fake.edata <- function() { x <- edata for (chr in chromosomes) { lens <- cumsum(c(0, unlist(lapply(x, function(u) length(u[[chr]]))))) i <- sample.int(length(comb[[chr]]), length(comb[[chr]])) for (k in seq_len(length(x))) { x[[k]][[chr]] <- sort(comb[[chr]][i[ seq(lens[k]+1, lens[k+1]) ]]) } } x } corSummary <- function() { pbi <<- 1 fdata <- fake.edata() simPeakSummary <- sapply(names(peaks), function(chr) { setTxtProgressBar(pb, pbi) pbi <<- pbi + 1 ## generate fake dataset similar to edata chrpeaks <- peaks[[chr]] in.promoter <- !is.na(findOverlaps(chrpeaks, with(gpromoters.split[[chr]], IRanges(start, end)), multiple = FALSE)) countOverlapping <- function(x) { as.numeric(as.table(t(findOverlaps( fdata[[x]][[chr]], chrpeaks, multiple = TRUE)))) } ans <- data.frame(promoter = ifelse(in.promoter, "In promoter", "Not in promoter")) for (nm in names(fdata)) ans[[nm]] <- sqrt(countOverlapping(nm)) ans }, simplify = FALSE) simPeakSummary.df <- do.call(rbind, simPeakSummary) simdsub.promoter <- subset(simPeakSummary.df, promoter == "In promoter")[names(edata)] simdsub.rest <- subset(simPeakSummary.df, promoter != "In promoter")[names(edata)] list(rest = fcorrelation(simdsub.rest), promoter = fcorrelation(simdsub.promoter)) } cors <- replicate(10, corSummary(), simplify = FALSE) close(pb) dsub.promoter <- subset(peakSummary.df, promoter == "In promoter")[names(edata)] dsub.rest <- subset(peakSummary.df, promoter != "In promoter")[names(edata)] list(peakSummary = peakSummary.df, cors.obs = list(rest = fcorrelation(dsub.rest), promoter = fcorrelation(dsub.promoter)), cors.null = cors, peak.cutoff = peak.cutoff) } extract.cors <- function(x) { cor.names <- with(x$cors.obs, outer(rownames(rest), colnames(rest), FUN = paste, sep = ":")[lower.tri(rest)]) null.cors <- sapply(x$cors.null, function(u) { with(u, c(rest[lower.tri(rest)], promoter[lower.tri(promoter)])) }) colnames(null.cors) <- sprintf("null_%g", seq_len(ncol(null.cors))) obs.cors <- with(x$cors.obs, c(rest[lower.tri(rest)], promoter[lower.tri(promoter)])) data.frame(cors = c(obs.cors, null.cors), sample = rep(c("observed", "subsampled"), c(length(obs.cors), length(null.cors))), type = c(sprintf("rest_%s", cor.names), sprintf("promoter_%s", cor.names))) } @ \newpage One limitation of the first approach is that the results are affected by sample size; in particular, more reads would move the curves up, and it is not clear how their shapes would change. This is not easy to address, but we provide a second method which is hopefully more robust to sample size changes. \vspace{4mm} The idea is as follows: When comparing several lanes, \begin{itemize} \item Determine peaks in the combined data. \item For each peak, compute number of overlapping reads \item Compute pairwise correlations \end{itemize} The correlations are generally high, suggesting that there is substantial overlap in the peak sets. To determine if the overlap is comparable to what we would expect under replication, we generate observations from a reference distribution as follows: \begin{itemize} \item Start with all reads in the combined data \item Randomly subsample reads and assign to each lane keeping the total number of reads unchanged (do this on a per-chromosome basis). \item Compute correlations as above from this fake dataset. \item Repeat this several times (10 in the examples shown here). \end{itemize} We then plot the reference values and the actual observed value. In summary, the observed correlations are usually smaller than what we woould expect if the lanes were exact replicates, but the differences are not big. So, even though there are some differences in the sets of binding sites enriched in the different lanes, they are largely similar, and we are justified in combining data from the three antibodies. \vspace{4mm} Similar plots for the primary mouse myotubes are shown later (page 17 onwards). \newpage <>= all.reads <- myodMyo[c(1, 3, 5, 2, 4, 6, 7)] names(all.reads)[1:7] <- c("blast_7311", "blast_6975", "blast_6196", "tube_7311", "tube_6975", "tube_6196", "preimmune") ereads <- gdApply(all.reads, function(x, seqLen = 200) { sort(extendReads(x, seqLen = seqLen)) }) foo <- overlapData(ereads[4:6], peak.cutoff = 20) @ \begin{center} <>= plot(splom(~peakSummary[-c(1, 2, 3, ncol(foo$peakSummary))] | peakSummary$promoter, data = foo, pch = ".", cex = 2, main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), xlab = "Scatter plot matrix of sqrt(number of overlapping reads)")) @ <>= plot(stripplot(type ~ cors, data = extract.cors(foo), groups = sample, jitter = TRUE, pch = c(16, 1), xlab = "Pairwise correlation of sqrt(number of overlapping reads), observed and subsampled", main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff))) @ \end{center} \newpage <>= foo <- overlapData(ereads[1:3], peak.cutoff = 20) @ \begin{center} <>= plot(splom(~peakSummary[-c(1, 2, 3, ncol(foo$peakSummary))] | peakSummary$promoter, data = foo, pch = ".", cex = 2, main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), xlab = "Scatter plot matrix of sqrt(number of overlapping reads)")) @ <>= plot(stripplot(type ~ cors, data = extract.cors(foo), groups = sample, jitter = TRUE, pch = c(16, 1), xlab = "Pairwise correlation of sqrt(number of overlapping reads), observed and subsampled", main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff))) @ \end{center} \newpage The next few pages give similar plots for the Primary mouse myotubes. The first two plots seem to say that 6196 peaks are a (small) subset of 6975 peaks. However, note the very large difference in the number of peaks (even though the number of reads is comparable), suggesting some non-biological problem in the 6196 lane. \newpage <>= all.reads <- solexa54[c("7", "8")] names(all.reads) <- c("real_6975", "real_6196") ereads <- gdApply(all.reads, function(x, seqLen = 200) { sort(extendReads(x, seqLen = seqLen)) }) foo <- overlapData(ereads, peak.cutoff = 20) @ \begin{center} <>= plot(plotSamples(ereads, peak.ref = names(ereads)[1], peak.cutoff = 20, ylim = extendrange(c(0, 1)))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = names(ereads)[2], peak.cutoff = 20, ylim = extendrange(c(0, 1)))) @ \end{center} \newpage \begin{center} <>= ## xyplot(real_6975^2 ~ real_6196^2 | ifelse(promoter, "In promoter", "Not in promoter"), ## data = foo$peakSummary, pch = ".", cex = 2, ## main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), ## xlab = "Number of overlapping reads in real_6196", ## ylab = "Number of overlapping reads in real_6975", alpha = 0.5) ## xtabs(~I(real_6975^2 >= 20) + I(real_6196^2 >= 20), ## data = foo$peakSummary) ## xyplot(real_6975^2 ~ real_6196^2 | ifelse(promoter, "In promoter", "Not in promoter"), ## data = foo$peakSummary, pch = ".", cex = 2, ## panel = panel.smoothScatter, ## main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), ## xlab = "Number of overlapping reads in real_6196", ## ylab = "Number of overlapping reads in real_6975") plot(splom(~peakSummary[-c(1, 2, 3, ncol(peakSummary))] | peakSummary$promoter, data = foo, pch = ".", cex = 2, main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), xlab = "Scatter plot matrix of sqrt(number of overlapping reads)")) @ <>= plot(stripplot(type ~ cors, data = extract.cors(foo), groups = sample, jitter = TRUE, pch = c(16, 1), xlab = "Pairwise correlation of sqrt(number of overlapping reads), observed and subsampled", main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff))) @ \end{center} \newpage To address Yi's question: ``Could you compare the real 6196tube and real 6975tube to the 3 separate antibody lanes from C2 myotubes? The figures on pages 18-20 indicate that real 6196 peaks are a subset of real 6975 peaks. I am wondering if that is because real 6975 is just too much better than all other lanes we have run so far, or real 6196 is not of high quality.'' \vspace{4mm} Also included is the combined Fibroblast+MyoD. <>= all.reads <- c(myodMyo[c("2", "4", "7")], solexa54[c("7", "8")], GenomeDataList(list(cfibromyod = combineLaneReads(myodFibro[c("2", "4", "7")])))) names(all.reads) <- c("tube_7311", "tube_6975", "tube_6196", "real_6975", "real_6196", "cfibromyod") ereads <- gdApply(all.reads, function(x, seqLen = 200) { sort(extendReads(x, seqLen = seqLen)) }) @ \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = names(ereads)[1], peak.cutoff = 20, ylim = extendrange(c(0, 1)))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = names(ereads)[2], peak.cutoff = 20, ylim = extendrange(c(0, 1)))) @ \end{center} \newpage \begin{center} <>= plot(plotSamples(ereads, peak.ref = names(ereads)[3], peak.cutoff = 20, ylim = extendrange(c(0, 1)))) @ \end{center} %%%%%%%%%%%%%%% \newpage (Not directly relevant) Comparison of Fibroblast+MyoD with myotubes using correlation. <>= all.reads <- GenomeDataList(list(ctubes = combineLaneReads(myodMyo[c("2", "4", "7")]), cfibromyod = combineLaneReads(myodFibro[c("2", "4", "7")]))) ## primary_6975 = solexa54[["7"]])) ereads <- gdApply(all.reads, function(x, seqLen = 200) { sort(extendReads(x, seqLen = seqLen)) }) foo <- overlapData(ereads, peak.cutoff = 20) @ \begin{center} <>= plot(splom(~peakSummary[-c(1, 2, 3, ncol(peakSummary))] | peakSummary$promoter, data = foo, pch = ".", cex = 2, main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), xlab = "Scatter plot matrix of sqrt(number of overlapping reads)")) @ <>= plot(splom(~peakSummary[-c(1, 2, 3, ncol(peakSummary))] | peakSummary$promoter, data = foo, pch = ".", cex = 2, panel = panel.smoothScatter, main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff), xlab = "Scatter plot matrix of sqrt(number of overlapping reads)")) @ <>= plot(stripplot(type ~ cors, data = extract.cors(foo), groups = sample, jitter = TRUE, pch = c(16, 1), xlab = "Pairwise correlation of sqrt(number of overlapping reads), observed and subsampled", main = sprintf("Combined data, peaks of depth >= %g", foo$peak.cutoff))) @ \end{center} \end{document}