## The snp137common track must be downloaded directly, else it will timeout: ## #wget ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/snp137Common.txt.gz #zcat snp137Common.txt.gz | cut -f2,3,4,5,7,14 | gzip > snp137Common.BioC.txt.gz ## ## All operations below work on the above summarized version of the track. ## require(GenomicRanges) df2GR <- function(df, keepColumns=FALSE, ignoreStrand=FALSE){ # {{{ KDH require(GenomicRanges) stopifnot(class(df) == "data.frame") subs <- list(chromStart='start', chromEnd='end', chrom='chr', seqnames='chr') for(s in names(subs)) names(df) = gsub(s, subs[[s]], names(df), ignore=TRUE) stopifnot(all(c("start", "end") %in% names(df))) if('genome' %in% names(attributes(df))) g <- attr(df, 'genome') else g <- NULL if(substr(df$chr, 1, 3)[1] != 'chr') df$chr <- paste('chr', df$chr, sep='') df <- subset(df, !is.na(start) & !is.na(end)) if(!ignoreStrand && ("strand" %in% names(df))) { if(is.numeric(df$strand)) df$strand <- strandMe(df$strand) GR <- with(df, GRanges(chr, IRanges(start=start, end=end), strand=strand)) } else { GR <- with(df, GRanges(chr, IRanges(start=start, end=end))) } if('name' %in% names(df)) { names(GR) <- df$name df$name <- NULL } else { names(GR) <- rownames(df) } if(keepColumns) { skipped = c("rangename","chr","start","end","width","strand") elementMetadata(GR) <- as(df[, setdiff(names(df), skipped), drop=F], "DataFrame") } if('X' %in% names(elementMetadata(GR))) { if(all(is.na(GR$X))) { GR$X <- NULL } else { names(elementMetadata(GR))[which(names(elementMetadata(GR))=='X')]='score' } } if(!is.null(g)) genome(GR) <- g return(GR) } # }}} ## Create a GenomicRanges (GR) object of the snp137 common track ## snp137common.df <- read.table("snp137Common.BioC.txt.gz") names(snp137common.df) <- c('chrom','chromStart','chromEnd', 'name','strand','score') snp137common.GR <- df2GR(snp137common.df, keepColumns=TRUE) genome(snp137common.GR)<-'hg19' show(snp137common.GR) strand(snp137common.GR) <- '*' ## UCSC strand annotation seems to be misleading ## UCSC's Single Nucleotide Polymorphisms are 2nt wide. This will not do. ## snp137common.GR <- resize(snp137common.GR, width=1, fix='end') ## ## Fixed, now save it: ## source('GenomicRangesToFeatureDb.R') FDb.UCSC.snp137common.hg19 <- GenomicRangesToFeatureDb( snp137common.GR, URL='http://genome.ucsc.edu/', tableName='snp137common', src='UCSC data table', label='Common (MAF > 0.01) SNPs from dbSNP build 137' ) saveDb(FDb.UCSC.snp137common.hg19, file='FDb.UCSC.snp137common.hg19.sqlite')