#!/usr/bin/perl # =========================================================================== # # PUBLIC DOMAIN NOTICE # National Center for Biotechnology Information (NCBI) # # This software/database is a "United States Government Work" under the # terms of the United States Copyright Act. It was written as part of # the author's official duties as a United States Government employee and # thus cannot be copyrighted. This software/database is freely available # to the public for use. The National Library of Medicine and the U.S. # Government do not place any restriction on its use or reproduction. # We would, however, appreciate having the NCBI and the author cited in # any work or product based on this material. # # Although all reasonable efforts have been taken to ensure the accuracy # and reliability of the software and data, the NLM and the U.S. # Government do not and cannot warrant the performance or results that # may be obtained by using this software or data. The NLM and the U.S. # Government disclaim all warranties, express or implied, including # warranties of performance, merchantability or fitness for any particular # purpose. # # =========================================================================== # # File Name: nquire # # Author: Jonathan Kans # # Version Creation Date: 8/20/12 # # ========================================================================== # Entrez Direct - EDirect # use strict; use warnings; my ($LibDir, $ScriptName); use File::Spec; BEGIN { my $Volume; ($Volume, $LibDir, $ScriptName) = File::Spec->splitpath($0); $LibDir = File::Spec->catpath($Volume, $LibDir, ''); if (my $RealPathname = eval {readlink $0}) { do { $RealPathname = File::Spec->rel2abs($RealPathname, $LibDir); ($Volume, $LibDir, undef) = File::Spec->splitpath($RealPathname); $LibDir = File::Spec->catpath($Volume, $LibDir, '') } while ($RealPathname = eval {readlink $RealPathname}); } else { $LibDir = File::Spec->rel2abs($LibDir) } $LibDir .= '/aux/lib/perl5'; } use lib $LibDir; use LWP::UserAgent; use POSIX; use URI::Escape; # definitions use constant false => 0; use constant true => 1; # utility subroutines sub clearflags { %macros = (); $agent = "Nquire/1.0"; $alias = ""; $debug = false; $http = ""; $output = ""; } sub map_macros { $qury = shift (@_); if ( $qury !~ /\(#/ ) { return $qury; } if ( scalar (keys %macros) > 0 ) { for ( keys %macros ) { $ky = $_; $vl = $macros{$_}; $qury =~ s/\((\#$ky)\)/$vl/g; } } return $qury; } sub read_aliases { if ( $alias ne "" ) { if (open (my $PROXY_IN, $alias)) { while ( $thisline = <$PROXY_IN> ) { $thisline =~ s/\r//; $thisline =~ s/\n//; $thisline =~ s/ +/ /g; $thisline =~ s/> 300); $usragnt->agent( "$agent" ); $res = $usragnt->get ( $urlx ); if ( $res->is_success) { $rslt = $res->content; } else { print STDERR $res->status_line . "\n"; } if ( $rslt eq "" ) { print STDERR "No do_get output returned from '$urlx'\n"; } if ( $debug ) { print STDERR "$rslt\n"; } return $rslt; } $usragnt = new LWP::UserAgent (timeout => 300); $usragnt->agent( "$agent" ); $req = new HTTP::Request POST => "$urlx"; $req->content_type('application/x-www-form-urlencoded'); $req->content("$argx"); $res = $usragnt->request ( $req ); if ( $res->is_success) { $rslt = $res->content; } else { print STDERR $res->status_line . "\n"; } if ( $rslt eq "" ) { if ( $argx ne "" ) { $urlx .= "?"; $urlx .= "$argx"; } print STDERR "No do_post output returned from '$urlx'\n"; } if ( $debug ) { print STDERR "$rslt\n"; } return $rslt; } # uri_escape with backslash exceptions sub do_uri_escape { $patx = shift (@_); $rslt = ""; while ( $patx ne "" ) { if ( $patx =~ /^\\\\(.+)/ ) { $rslt .= "\\"; $patx = $1; } elsif ( $patx =~ /^\\(.)(.+)/ ) { $rslt .= $1; $patx = $2; } elsif ( $patx =~ /^(.)(.+)/ ) { $rslt .= uri_escape ($1); $patx = $2; } elsif ( $patx =~ /^(.)/ ) { $rslt .= uri_escape ($1); $patx = ""; } } return $rslt; } # nquire executes an external URL query from command line arguments my $nquire_help = qq{ Query Commands -get Uses HTTP GET instead of POST -url Base URL for external search Examples nquire -get -url "http://collections.mnh.si.edu/services/resolver/resolver.php" \\ -voucher "Birds:625456" | xtract -pattern Result -element ScientificName Country nquire -get -url http://w1.weather.gov/xml/current_obs/KSFO.xml | xtract -pattern current_observation -tab "\\n" \\ -element weather temp_f wind_dir wind_mph nquire -eutils efetch.fcgi -db pubmed -id 2539356 -rettype medline -retmode text }; sub nquire { # nquire -url http://... -tag value -tag value | ... $url = ""; $arg = ""; $pfx = ""; $amp = ""; $pat = ""; @args = @ARGV; $max = scalar @args; if ( $max > 0 and $ARGV[0] eq "-help" ) { print $nquire_help; return; } if ( $max < 2 ) { return; } $i = 0; # if present, -debug must be first argument, only prints generated URL (undocumented) if ( $i < $max ) { $pat = $args[$i]; if ( $pat eq "-debug" ) { $i++; $debug = true; } } # if present, -http get or -get must be next # nquire -get -url "http://collections.mnh.si.edu/services/resolver/resolver.php" -voucher "Birds:625456" if ( $i < $max ) { $pat = $args[$i]; if ( $pat eq "-http" ) { $i++; if ( $i < $max ) { $http = $args[$i]; $i++; } } elsif ( $pat eq "-get" ) { $i++; $http = "get"; } } # if present, -agent must be next argument (undocumented) if ( $i < $max ) { $pat = $args[$i]; if ( $pat eq "-agent" ) { $i++; if ( $i < $max ) { $agent = $args[$i]; $i++; } } } # read file of keyword shortcuts for URL expansion if ( $i < $max ) { $pat = $args[$i]; if ( $pat eq "-alias" ) { $i++; if ( $i < $max ) { $alias = $args[$i]; if ( $alias ne "" ) { read_aliases (); } $i++; } } } # read URL if ( $i < $max ) { $pat = $args[$i]; if ( $pat eq "-url" ) { $i++; if ( $i < $max ) { $url = $args[$i]; $url = map_macros ($url); $i++; } } elsif ( $pat eq "-ncbi" ) { # shortcut for ncbi base (undocumented) $i++; if ( $i < $max ) { $url = "https://www.ncbi.nlm.nih.gov"; } } elsif ( $pat eq "-eutils" ) { # shortcut for eutils base (undocumented) $i++; if ( $i < $max ) { $url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"; } } elsif ( $pat eq "-hydra" ) { # internal citation match request (undocumented) $i++; if ( $i < $max ) { $url = "https://www.ncbi.nlm.nih.gov/projects/hydra/hydra_search.cgi"; $pat = $args[$i]; $pat = map_macros ($pat); $enc = do_uri_escape ($pat); $arg="search=pubmed_search_citation_top_20.1&query=$enc"; $amp = "&"; $i++; } } elsif ( $pat eq "-revhist" ) { # internal sequence revision history request (undocumented) $i++; if ( $i < $max ) { $url = "https://www.ncbi.nlm.nih.gov/sviewer/girevhist.cgi"; $pat = $args[$i]; $arg="cmd=seqid&txt=on&seqid=asntext&os=PUBSEQ_OS&val=$pat"; $amp = "&"; $i++; } } } if ( $url eq "" ) { return; } # hard-coded URL aliases for common NCBI web sites if ( $url =~ /\(#/ ) { $ky = "ncbi_url"; if ( $url =~ /\(#$ky\)/ ) { $vl = "https://www.ncbi.nlm.nih.gov"; $url =~ s/\((\#$ky)\)/$vl/g; } $ky = "eutils_url"; if ( $url =~ /\(#$ky\)/ ) { $vl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"; $url =~ s/\((\#$ky)\)/$vl/g; } } # arguments before next minus are added to base URL as /value $go_on = true; while ( $i < $max and $go_on ) { $pat = $args[$i]; if ( $pat =~ /^-(.+)/ ) { $go_on = false; } else { $pat = map_macros ($pat); $url .= "/" . $pat; $i++; } } # now expect tag with minus and value[s] without, add as &tag=value[,value] while ( $i < $max ) { $pat = $args[$i]; if ( $pat =~ /^-(.+)/ ) { $pat = $1; $pfx = $amp . "$pat="; $amp = ""; } else { $pat =~ s/^\\-/-/g; $pat = map_macros ($pat); $enc = do_uri_escape ($pat); $arg .= $pfx . $enc; $pfx = ","; $amp = "&"; } $i++; } if ( $debug ) { if ( $arg eq "" ) { print "$url\n"; } else { print "$url?$arg\n"; } return; } $output = do_post ($url, $arg); print "$output"; } # initialize clearflags (); # execute URL request nquire (); # close input and output files close (STDIN); close (STDOUT); close (STDERR);