/* entry_getters.cpp * * Created on: Nov 11, 2009 * Author: Adam Auton * ($Revision: 230 $) */ #include "entry.h" // Return the CHROMosome name string entry::get_CHROM() const { return CHROM; } // Return the CHROMosome name void entry::get_CHROM(string &out) const { out = CHROM; } int entry::get_POS() const { return POS; } string entry::get_ID() const { if (ID.size() == 0) return "."; return ID; } string entry::get_REF() const { if (REF == "") return "."; else return REF; } string entry::get_ALT() const { assert(parsed_ALT == true); string out; if (ALT.empty()) out = "."; else if (ALT.size() == 1 && ALT[0] == "") out = "."; else { out = ALT[0]; for (unsigned int ui=1; ui= ALT.size())) out = "."; else out = ALT[allele_num-1]; } string entry::get_allele(int allele_num) const { assert(parsed_ALT == true); if (allele_num == -2) return ""; else if (allele_num == 0) return REF; else if ((allele_num < 0) || (unsigned(allele_num - 1) >= ALT.size())) return "."; else return ALT[allele_num-1]; } string entry::get_ALT_allele(int allele_num) const { assert(parsed_ALT == true); if (allele_num == -2) return ""; else if ((allele_num == -1) || (unsigned(allele_num) >= ALT.size())) return "."; return ALT[allele_num]; } void entry::get_alleles_vector(vector &out) const { assert(parsed_ALT == true); out.resize(ALT.size()+1); out[0] = REF; copy(ALT.begin(), ALT.end(), out.begin()+1); } double entry::get_QUAL() const { return QUAL; } string entry::get_FILTER() const { assert(parsed_FILTER == true); ostringstream out; if (FILTER.empty()) out << "."; else { out << FILTER[0]; for (unsigned int ui=1; ui &out) const { assert(parsed_FILTER == true); out = FILTER; } string entry::get_INFO(const set &INFO_to_keep, bool keep_all_INFO) const { assert(parsed_INFO == true); ostringstream sout; sout.str(""); sout.clear(); bool first=true; if ( ( (!INFO.empty()) && (!INFO_to_keep.empty()) ) || keep_all_INFO ) { string key; for (unsigned int ui=0; ui > entry::get_INFO_vector(const set &INFO_to_keep, bool keep_all_INFO) { assert(parsed_INFO == true); vector > out_vector; if (keep_all_INFO == true) return INFO; if ( (!INFO.empty()) && (!INFO_to_keep.empty()) ) { string key; for (unsigned int ui=0; ui entry::get_INFO_values(const string &key) const { vector out; string tmp; tmp = get_INFO_value(key); if (tmp != "?") header::tokenize(tmp, ',', out); return out; } string entry::get_FORMAT() const { assert(parsed_FORMAT == true); string out; bool first = true; for (unsigned int ui=0; ui &out) const { assert(parsed_FORMAT_binary == true); out = FORMAT_binary; } // Return the alleles of a genotype as a pair of strings. void entry::get_indv_GENOTYPE_strings(unsigned int indv, pair &out) const { assert(parsed_GT[indv] == true); static string out_allele1, out_allele2; get_allele(GENOTYPE[indv].first, out_allele1); get_allele(GENOTYPE[indv].second, out_allele2); out = make_pair(out_allele1, out_allele2); } void entry::get_indv_GENOTYPE_ids(unsigned int indv, pair &out) const { assert(parsed_GT[indv] == true); out = GENOTYPE[indv]; } char entry::get_indv_PHASE(unsigned int indv) const { assert(parsed_GT[indv] == true); return PHASE[indv]; } int entry::get_indv_DEPTH(unsigned int indv) const { assert(parsed_DP[indv] == true); if (DEPTH.empty()) return -1; return DEPTH[indv]; } double entry::get_indv_GQUALITY(unsigned int indv) const { assert(parsed_GQ[indv] == true); if (GQUALITY.empty()) return -1; return GQUALITY[indv]; } void entry::get_indv_GFILTER_vector(unsigned int indv, vector &out) const { assert(parsed_FT[indv] == true); if (!GFILTER.empty()) out = GFILTER[indv]; else out.resize(0); } void entry::get_indv_GFILTER(unsigned int indv, string &out) const { assert(parsed_FT[indv] == true); if ((!GFILTER.empty()) && (GFILTER[indv].size()>0)) { out=""; for (unsigned int ui=0; ui &out, unsigned int &N_non_missing_chr_out) const { get_allele_counts(out, N_non_missing_chr_out, include_indv, include_genotype); } // Return the frequency (counts) of each allele. void entry::get_allele_counts(vector &out, unsigned int &N_non_missing_chr_out, const vector &include_indv, const vector &include_genotype) const { pair genotype; vector allele_counts(get_N_alleles(), 0); N_non_missing_chr_out = 0; for (unsigned int ui=0; ui -1) { allele_counts[genotype.first]++; N_non_missing_chr_out++; } if (genotype.second > -1) { allele_counts[genotype.second]++; N_non_missing_chr_out++; } } } out = allele_counts; } void entry::get_genotype_counts(const vector &include_indv, const vector &include_genotype, unsigned int &out_N_hom1, unsigned int &out_N_het, unsigned int &out_N_hom2) const { out_N_hom1 = 0; out_N_hom2 = 0; out_N_het = 0; pair genotype; if (ALT.size() > 1) LOG.error("Tried to return the genotype counts of a non-biallelic SNP", 99); for (unsigned int ui=0; ui -1) && (genotype.second > -1)) { if (genotype.first != genotype.second) out_N_het++; else if (genotype.first == 0) out_N_hom1++; else if (genotype.first == 1) out_N_hom2++; else LOG.error("Unknown allele in genotype", 98); } } } } void entry::get_multiple_genotype_counts(const vector &include_indv, const vector &include_genotype, vector &out_N_hom, vector &out_N_het) const { out_N_hom.assign(ALT.size()+1, 0); out_N_het.assign(ALT.size()+1, 0); pair genotype; for (unsigned int ui=0; ui &out) const { out.resize(sizeof(uint32_t)); uint32_t pos = POS - 1; memcpy(&out[0], &pos, sizeof(pos)); } void entry::get_rlen(vector &out) const { out.resize(sizeof(int32_t)); int32_t rlen; if (REF != "" and REF != "." and REF != " ") rlen = (int32_t)REF.length(); else rlen = (int32_t)0; memcpy(&out[0], &rlen, sizeof(rlen)); } void entry::get_QUAL_binary(vector &out) const { out.resize(sizeof(float)); float qual = (float)QUAL; memcpy(&out[0], &qual, sizeof(qual)); } void entry::get_n_allele_info(vector &out) const { out.resize(sizeof(uint32_t)); uint32_t n_allele_info = (uint32_t)ALT.size() + 1; uint32_t n_info = (uint32_t)(INFO.size()-N_INFO_removed); n_allele_info = n_allele_info << 16; n_allele_info = n_allele_info | n_info; memcpy(&out[0], &n_allele_info, sizeof(n_allele_info)); } void entry::get_n_fmt_sample(vector &out) const { out.resize(sizeof(uint32_t)); uint32_t n_fmt_sample = (uint32_t)(FORMAT.size()-N_FORMAT_removed); uint32_t n_sample = (uint32_t)N_indv; n_fmt_sample = n_fmt_sample << 24; n_fmt_sample = n_fmt_sample | n_sample; memcpy(&out[0], &n_fmt_sample, sizeof(n_fmt_sample)); } void entry::get_ID_binary(vector &out) { make_typed_string(out, ID, true ); } void entry::get_ALLELES_binary(vector &out) { vector tmp; out.resize(0); make_typed_string(tmp, REF, true ); out.insert(out.end(), tmp.begin(), tmp.end()); for (unsigned int ui=0; ui