#!/usr/bin/env perl # # Author: petr.danecek@sanger # use strict; use warnings; use Carp; my $opts = parse_params(); sort_vcf($opts); exit; #-------------------------------- sub error { my (@msg) = @_; if ( scalar @msg ) { croak @msg; } die "Usage: vcf-sort > out.vcf\n", " cat file.vcf | vcf-sort > out.vcf\n", "Options:\n", " -c, --chromosomal-order Use natural ordering (1,2,10,MT,X) rather then the default (1,10,2,MT,X). This requires\n", " new version of the unix \"sort\" command which supports the --version-sort option.\n", " -p, --parallel Change the number of sorts run concurrently to \n", " -t, --temporary-directory Use a directory other than /tmp as the temporary directory for sorting.\n", " -h, -?, --help This help message.\n", "\n"; } sub parse_params { my $opts = {}; while (my $arg=shift(@ARGV)) { if ( $arg eq '-p' || $arg eq '--parallel-sort' ) { $$opts{parallel_sort}=shift(@ARGV); next; } if ( $arg eq '-c' || $arg eq '--chromosomal-order' ) { $$opts{chromosomal_order}=1; next; } if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } if ( $arg eq '-t' || $arg eq '--temporary-directory' ) { $$opts{temp_dir}=shift(@ARGV); next; } if ( -e $arg ) { $$opts{file}=$arg; next } error("Unknown parameter \"$arg\". Run -h for help.\n"); } return $opts; } sub sort_vcf { my ($opts) = @_; my $fh; if ( exists($$opts{file}) ) { if ( $$opts{file}=~/\.gz$/i ) { open($fh,"gunzip -c $$opts{file} |") or error("$$opts{file}: $!"); } else { open($fh,'<',$$opts{file}) or error("$$opts{file}: $!"); } } else { $fh = *STDIN; } my $sort_opts = check_sort_options($opts); my $cmd; if ( exists($$opts{temp_dir}) ) { $cmd = "sort $sort_opts -T $$opts{temp_dir} -k2,2n"; } else { $cmd = "sort $sort_opts -k2,2n"; } print STDERR "$cmd\n"; open(my $sort_fh,"| $cmd") or error("$cmd: $!"); my $unflushed = select(STDOUT); $| = 1; while (my $line=<$fh>) { if ( $line=~/^#/ ) { print $line; next; } print $sort_fh $line; last; } select($unflushed); while (my $line=<$fh>) { print $sort_fh $line; } } sub check_sort_options { my ($opts) = @_; my $sort_opts = join('',`sort --help`); my $has_version_sort = ( $sort_opts=~/\s+--version-sort\s+/ ) ? 1 : 0; my $has_parallel_sort = ( $sort_opts=~/\s+--parallel=/ ) ? 1 : 0; if ( $$opts{chromosomal_order} && !$has_version_sort ) { error("Old version of sort command installed, please run without the -c option.\n"); } if ( $$opts{parallel_sort} && !$has_version_sort ) { error("Old version of sort command installed, please run without the -p option.\n"); } $sort_opts = ( $$opts{chromosomal_order} && $has_version_sort ) ? '-k1,1V' : '-k1,1d'; if ( $$opts{parallel_sort} && $has_parallel_sort ) { $sort_opts .= " --parallel $$opts{parallel_sort}"; } return $sort_opts; }