/* * parameters.cpp * * Created on: Nov 11, 2009 * Author: Adam Auton * ($Revision: 249 $) */ // Class for reading in, checking and storing user parameters #include "parameters.h" parameters::parameters(int argc, char *argv[]) { if (isatty(STDERR_FILENO)) stream_err = false; else stream_err = true; string tmp; for (int i=0; iargv.push_back(tmp); } bcf_format = false; BED_exclude = false; BED_file = ""; chrom_map_file = ""; contigs_file = ""; derived = false; diff_discordance_matrix = false; diff_file = ""; diff_file_bcf = false; diff_file_compressed = false; diff_indv = false; diff_indv_discordance = false; diff_indv_map_file = ""; diff_site = false; diff_site_discordance = false; diff_switch_error = false; end_pos = numeric_limits::max(); exclude_positions_file = ""; exclude_positions_overlap_file = ""; fst_window_size = -1; fst_window_step = -1; hapcount_BED = ""; invert_mask = false; keep_only_indels = false; recode_all_INFO = false; ld_bp_window_size = numeric_limits::max(); ld_snp_window_size = numeric_limits::max(); ld_bp_window_min = -1; ld_snp_window_min = -1; min_mac = -1; min_maf = -1.0; mask_file = ""; max_alleles = numeric_limits::max(); max_genotype_depth = numeric_limits::max(); max_mac = numeric_limits::max(); max_maf = numeric_limits::max(); max_mean_depth = numeric_limits::max(); max_missing_call_count = numeric_limits::max(); max_non_ref_ac = numeric_limits::max(); max_non_ref_af = numeric_limits::max(); max_non_ref_ac_any = numeric_limits::max(); max_non_ref_af_any = numeric_limits::max(); max_N_indv = -1; mendel_ped_file = ""; min_alleles = -1; min_genotype_depth = -1; min_genotype_quality = -1.0; min_HWE_pvalue = -1.0; min_interSNP_distance = -1; min_kept_mask_value = 0; min_mean_depth = -1.0; min_quality = -1.0; min_r2 = -1.0; min_site_call_rate = 0; min_non_ref_ac = -1; min_non_ref_af = -1.0; min_non_ref_ac_any = -1; min_non_ref_af_any = -1.0; num_outputs = 0; output_012_matrix = false; output_as_IMPUTE = false; output_as_ldhat_phased = false; output_as_ldhat_unphased = false; output_BEAGLE_genotype_likelihoods_GL = false; output_BEAGLE_genotype_likelihoods_PL = false; output_counts = false; output_filter_summary = false; output_freq = false; output_geno_depth = false; output_geno_chisq = false; output_geno_rsq = false; output_hap_rsq = false; output_het = false; output_HWE = false; output_indel_hist = false; output_indv_burden = false; output_indv_depth = false; output_indv_freq_burden = false; output_indv_freq_burden2 = false; output_indv_missingness = false; output_interchromosomal_hap_rsq = false; output_interchromosomal_geno_rsq = false; output_kept_sites = false; output_LROH = false; output_N_PCA_SNP_loadings = -1; output_PCA = false; output_prefix="out"; output_relatedness_Yang = false; output_relatedness_Manichaikul = false; output_removed_sites = false; output_singletons = false; output_site_depth = false; output_site_mean_depth = false; output_site_missingness = false; output_site_pi=false; output_site_quality = false; output_SNP_density_bin_size = 0; output_Tajima_D_bin_size = 0; output_TsTv_bin_size = 0; output_TsTv_by_count = false; output_TsTv_by_qual = false; output_TsTv_summary = false; phased_only = false; PCA_no_normalisation = false; pi_window_size = 0; pi_window_step = 0; plink_output = false; plink_tped_output = false; positions_file = ""; positions_overlap_file = ""; recode = false; recode_bcf = false; remove_all_filtered_genotypes = false; remove_all_filtered_sites = false; remove_indels = false; snps_to_exclude_file = ""; snps_to_keep_file = ""; start_pos = -1; stream_in = false; stream_out = false; suppress_allele_output = false; temp_dir = "/tmp/"; vcf_filename=""; vcf_format = false; vcf_compressed = false; } void parameters::read_parameters() { unsigned int i=1; string in_str; while (i=argv.size()) error("Requested Missing Argument",76); return argv[i]; } void parameters::print_params() { parameters defaults(0, 0); LOG.printLOG("Parameters as interpreted:\n"); string tmp_name = vcf_filename; if (tmp_name == "-") tmp_name = "[stdin]"; if (bcf_format == true) LOG.printLOG("\t--bcf " + tmp_name + "\n"); else if (vcf_format == true && vcf_compressed == false) LOG.printLOG("\t--vcf " + tmp_name + "\n"); else if (vcf_format == true && vcf_compressed == true) LOG.printLOG("\t--gzvcf " + tmp_name + "\n"); if (chrs_to_keep.size() > 0) { for (set::iterator it=chrs_to_keep.begin(); it != chrs_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--chr " + tmp + "\n"); } } if (chrs_to_exclude.size() > 0) { for (set::iterator it=chrs_to_exclude.begin(); it != chrs_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--not-chr " + tmp + "\n"); } } if (chrom_map_file != defaults.chrom_map_file) LOG.printLOG("\t--chrom-map " + chrom_map_file + "\n"); if (contigs_file != defaults.contigs_file) LOG.printLOG("\t--contigs " + contigs_file + "\n"); if (derived != defaults.derived) LOG.printLOG("\t--derived\n"); if (end_pos != defaults.end_pos) LOG.printLOG("\t--to-bp " + output_log::int2str(end_pos) + "\n"); if (exclude_positions_file != defaults.exclude_positions_file) LOG.printLOG("\t--exclude-positions " + exclude_positions_file + "\n"); if (exclude_positions_overlap_file != defaults.exclude_positions_overlap_file) LOG.printLOG("\t--exclude-positions-overlap " + exclude_positions_overlap_file + "\n"); if (FORMAT_id_to_extract != defaults.FORMAT_id_to_extract) LOG.printLOG("\t--extract-FORMAT-info " + FORMAT_id_to_extract + "\n"); if (geno_rsq_position_list != defaults.geno_rsq_position_list) LOG.printLOG("\t--geno-r2-positions " + geno_rsq_position_list + "\n"); if (hap_rsq_position_list != defaults.hap_rsq_position_list) LOG.printLOG("\t--hap-r2-positions " + hap_rsq_position_list + "\n"); if (fst_window_size != defaults.fst_window_size) LOG.printLOG("\t--fst-window-size " + output_log::int2str(fst_window_size) + "\n"); if (fst_window_step != defaults.fst_window_step) LOG.printLOG("\t--fst-window-step " + output_log::int2str(fst_window_step) + "\n"); if (weir_fst_populations.size() != 0) { for (unsigned int ui=0; ui 0) for (set::iterator it=site_filter_flags_to_exclude.begin(); it != site_filter_flags_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-filtered " + tmp + "\n"); } if (site_filter_flags_to_keep.size() > 0) for (set::iterator it=site_filter_flags_to_keep.begin(); it != site_filter_flags_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--keep-filtered " + tmp + "\n"); } if (geno_filter_flags_to_exclude.size() > 0) for (set::iterator it=geno_filter_flags_to_exclude.begin(); it != geno_filter_flags_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-filtered-geno " + tmp + "\n"); } if (INFO_to_extract.size() > 0) for (unsigned int ui=0; ui 0) for (set::iterator it=recode_INFO_to_keep.begin(); it != recode_INFO_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--recode-INFO " + tmp + "\n"); } if (site_INFO_flags_to_remove.size() > 0) for (set::iterator it=site_INFO_flags_to_remove.begin(); it != site_INFO_flags_to_remove.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-INFO " + tmp + "\n"); } if (site_INFO_flags_to_keep.size() > 0) for (set::iterator it=site_INFO_flags_to_keep.begin(); it != site_INFO_flags_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--keep-INFO " + tmp + "\n"); } if (BED_file != defaults.BED_file) { if (BED_exclude == false) LOG.printLOG("\t--bed " + BED_file + "\n"); else LOG.printLOG("\t--exclude-bed " + BED_file + "\n"); } if (mask_file != defaults.mask_file) { if (invert_mask == false) LOG.printLOG("\t--mask " + mask_file + "\n"); else LOG.printLOG("\t--invert-mask " + mask_file + "\n"); } if (snps_to_keep.size() > 0) for (set::iterator it=snps_to_keep.begin(); it != snps_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--snp " + tmp + "\n"); } if (indv_to_keep.size() > 0) for (set::iterator it=indv_to_keep.begin(); it != indv_to_keep.end(); ++it) { string tmp = *it; LOG.printLOG("\t--indv " + tmp + "\n"); } if (indv_to_exclude.size() > 0) for (set::iterator it=indv_to_exclude.begin(); it != indv_to_exclude.end(); ++it) { string tmp = *it; LOG.printLOG("\t--remove-indv " + tmp + "\n"); } LOG.printLOG("\n"); } void parameters::print_help() { unsigned int i; string in_str; if (argv.size() <= 1) { // If there are no user parameters, display help. argv.push_back("--?"); print_help(); } for(i = 0; i < argv.size(); i++) { in_str = argv[i]; if ((in_str == "--version")) { cout << "VCFtools (" << VCFTOOLS_VERSION << ")" << endl; exit(0); } if ((in_str == "-h") || (in_str == "-?") || (in_str == "-help") || (in_str == "--?") || (in_str == "--help") || (in_str == "--h")) { cout << endl << "VCFtools (" << VCFTOOLS_VERSION << ")" << endl; cout << "\u00A9 Adam Auton and Anthony Marcketta 2009" << endl << endl; cout << "Process Variant Call Format files" << endl; cout << endl; cout << "For a list of options, please go to:" << endl; cout << "\thttps://vcftools.github.io/man_latest.html" << endl; cout << endl; cout << "Alternatively, a man page is available, type:" << endl; cout << "\tman vcftools" << endl; cout << endl; cout << "Questions, comments, and suggestions should be emailed to:" << endl; cout << "\tvcftools-help@lists.sourceforge.net" << endl; cout << endl; exit(0); } } } void parameters::check_parameters() { parameters defaults(0, 0); if (vcf_filename == "-") stream_in = true; if (isatty(STDIN_FILENO) && stream_in) LOG.error("No input detected via stream."); if (!weir_fst_populations.empty()) num_outputs++; if (num_outputs > 1) error("Only one output function may be called.",0); if (vcf_filename == "" && !stream_in) error("Input file required.", 0); if (vcf_format == false && bcf_format == false) error("Must specify input file type",0); if (chrs_to_keep.size() > 0 && chrs_to_exclude.size() > 0) error("Cannot specify chromosomes to keep and to exclude", 1); if (end_pos < start_pos) error("End position must be greater than Start position.", 1); if (((end_pos != numeric_limits::max()) || (start_pos != -1)) && (chrs_to_keep.size() != 1)) error("Require a single chromosome when specifying a range.", 2); if (max_maf < min_maf) error("Maximum MAF must be not be less than Minimum MAF.", 4); if (max_mac < min_mac) error("Maximum MAC must be not be less than Minimum MAC.", 4); if (min_maf != defaults.min_maf) { if ((min_maf < 0.0) || (min_maf > 1.0)) error("MAF must be between 0 and 1.", 4); } if (max_maf != defaults.max_maf) { if ((max_maf < 0.0) || (max_maf > 1.0)) error("Maximum MAF must be between 0 and 1.", 4); } if (min_non_ref_af != defaults.min_non_ref_af) { if ((min_non_ref_af < 0.0) || (min_non_ref_af > 1.0)) error("Non-Ref Allele Frequency must be between 0 and 1.", 4); } if (min_non_ref_af_any != defaults.min_non_ref_af_any) { if ((min_non_ref_af_any < 0.0) || (min_non_ref_af_any > 1.0)) error("Non-Ref Allele Frequency must be between 0 and 1.", 4); } if (max_non_ref_af < min_non_ref_af) error("Maximum Non-Ref Allele Frequency must not be less that Minimum Non-Ref AF.", 4); if (max_non_ref_ac < min_non_ref_ac) error("Maximum Non-Ref Allele Count must not be less that Minimum Non-Ref AC.", 4); if (max_non_ref_af_any < min_non_ref_af_any) error("Maximum Non-Ref Allele Frequency must not be less that Minimum Non-Ref AF.", 4); if (max_non_ref_ac_any < min_non_ref_ac_any) error("Maximum Non-Ref Allele Count must not be less that Minimum Non-Ref AC.", 4); if (min_site_call_rate > 1) error("Minimum Call rate cannot be greater than 1.", 5); if (max_alleles < min_alleles) error("Max Number of Alleles must be greater than Min Number of Alleles.", 6); if (max_mean_depth < min_mean_depth) error("Max Mean Depth must be greater the Min Mean Depth.", 7); if (max_genotype_depth < min_genotype_depth) error("Max Genotype Depth must be greater than Min Genotype Depth.", 9); if (((output_as_ldhat_phased == true) || (output_as_ldhat_unphased)) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting LDhat format.", 11); if ((output_BEAGLE_genotype_likelihoods_GL == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11); if ((output_BEAGLE_genotype_likelihoods_PL == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11); if (min_kept_mask_value > 9) error("Min Mask value must be between 0 and 9.", 14); if ((output_LROH == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting LROH.", 11); if (output_TsTv_bin_size < 0) error("TsTv bin size must be > 0",16); if (output_Tajima_D_bin_size < 0) error("Tajima D bin size must be > 0", 17); if (pi_window_size < 0) error("Pi Window size must be > 0", 18); if (output_SNP_density_bin_size < 0) error("SNP density bin size must be > 0", 18); if (stream_out) { if (output_012_matrix) error("Cannot output 012 matrix files to stream",19); if (plink_output || plink_tped_output) error("Cannot output Plink files to stream",19); if (output_as_ldhat_phased || output_as_ldhat_unphased) error("Cannot output LDhat files to stream",19); if (output_as_IMPUTE) error("Cannot output IMPUTE files to stream",19); } } void parameters::error(string err_msg, int code) { LOG.printLOG("\n\nError: " + err_msg + "\n\n"); exit(code); }