Difference between revisions of "Esip fedsearch.pl"
From Earth Science Information Partners (ESIP)
| Line 1: | Line 1: | ||
<pre> | <pre> | ||
| − | #!/usr/local/bin/perl | + | #!/usr/local/ActivePerl-5.8/bin/perl |
# N.B.: minimal included modules for portability | # N.B.: minimal included modules for portability | ||
# (Could be more efficient with XML/Atom parsing and XPath.) | # (Could be more efficient with XML/Atom parsing and XPath.) | ||
| + | # Example: | ||
| + | # esip_fedsearch.pl \ | ||
| + | # --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \ | ||
| + | # --bbox=-130,25,-60,50 \ | ||
| + | # --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \ | ||
| + | # --keywords=microwave --max_gran=1 --verbose | ||
use Getopt::Long; | use Getopt::Long; | ||
use LWP::Simple; | use LWP::Simple; | ||
| Line 10: | Line 16: | ||
# Parse command line | # Parse command line | ||
my ($osdd_url, $keywords, $bbox, $start, $end, $help); | my ($osdd_url, $keywords, $bbox, $start, $end, $help); | ||
| + | my $max_ds = 1; | ||
| + | my $max_gran = 1; | ||
| + | our $verbose = 0; | ||
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords, | my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords, | ||
"bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, | "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, | ||
| − | "help" => \$help); | + | "max_ds=i" => \$max_ds, "max_gran=i" => \$max_gran, |
| + | "verbose" => \$verbose, "help" => \$help); | ||
usage() if ($help || !$keywords); | usage() if ($help || !$keywords); | ||
$start ||= epoch2ccsds(time()); | $start ||= epoch2ccsds(time()); | ||
$end ||= epoch2ccsds(ccsds2epoch($start)+86400); | $end ||= epoch2ccsds(ccsds2epoch($start)+86400); | ||
| − | warn ("start: $start\nend: $end\n"); | + | warn ("start: $start\nend: $end\n") if $verbose; |
# Get Dataset Open Search Description Document | # Get Dataset Open Search Description Document | ||
| − | my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end); | + | my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds); |
my @osdd = extract_links($datasets, "search", "opensearchdescription"); | my @osdd = extract_links($datasets, "search", "opensearchdescription"); | ||
# Loop through returned dataset OpenSearch Description Documents | # Loop through returned dataset OpenSearch Description Documents | ||
| + | my $n = 0; | ||
foreach my $osdd (@osdd) { | foreach my $osdd (@osdd) { | ||
| − | my $granules=opensearch($osdd, $keywords, $bbox, $start,$end); | + | my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran); |
my @links = extract_links($granules, "/data#", ''); | my @links = extract_links($granules, "/data#", ''); | ||
print join("\n", @links, ''); | print join("\n", @links, ''); | ||
| + | $n++; | ||
| + | last if ($n >= $max_ds); # In case count is not supported at dataset level | ||
} | } | ||
| Line 35: | Line 48: | ||
my @links; | my @links; | ||
# Loop through <entry> elements | # Loop through <entry> elements | ||
| − | while ($doc =~ m/<entry>(.*?) | + | while ($doc =~ m/<[\w:]*entry>(.*?)[:\/]entry>/isg) { |
my $entry = $1; | my $entry = $1; | ||
# Loop through <link> elements | # Loop through <link> elements | ||
| − | while ($entry =~ m/<link(.*?)>/sg) { | + | while ($entry =~ m/<[\w:]*link(.*?)>/sg) { |
my $link = $1; | my $link = $1; | ||
my $match = 1; | my $match = 1; | ||
| Line 47: | Line 60: | ||
if ($match) { | if ($match) { | ||
my ($link_href) = ($link =~ m/href="(.*?)"/); | my ($link_href) = ($link =~ m/href="(.*?)"/); | ||
| + | $link_href =~ s/\&/\&/g; | ||
push @links, $link_href; | push @links, $link_href; | ||
last; | last; | ||
| Line 57: | Line 71: | ||
# fetch the OSDD and execute the search | # fetch the OSDD and execute the search | ||
sub opensearch { | sub opensearch { | ||
| − | my ($osdd_url, $keywords, $bbox, $start, $end) = @_; | + | my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_; |
# Fetch OpenSearch Description Document | # Fetch OpenSearch Description Document | ||
my $osdd = get($osdd_url) or die "Could not get $osdd_url"; | my $osdd = get($osdd_url) or die "Could not get $osdd_url"; | ||
# Extract template for Atom response | # Extract template for Atom response | ||
| − | my ($template) = ($osdd =~ /<[\w:]*Url | + | my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is); |
# Fill template in with values | # Fill template in with values | ||
| − | my $url = fill_template($template, $keywords, $bbox, $start, $end); | + | my $url = fill_template($template, $keywords, $bbox, $start, $end, $count); |
# Fetch results | # Fetch results | ||
my $results = get($url) or warn "No results returned for $url"; | my $results = get($url) or warn "No results returned for $url"; | ||
| Line 70: | Line 84: | ||
# fill_template: fill in an OpenSearch template with values from command line | # fill_template: fill in an OpenSearch template with values from command line | ||
sub fill_template { | sub fill_template { | ||
| − | my ($template, $keywords, $bbox, $start, $end) = @_; | + | my ($template, $keywords, $bbox, $start, $end, $count) = @_; |
| − | warn "Before: $template\n"; | + | warn "Before: $template\n" if ($verbose); |
my $url = $template; | my $url = $template; | ||
| + | $template =~ s/\&/\&/g; # Unescape | ||
$template =~ s/\{time:start\?*\}/$start/ if ($start); | $template =~ s/\{time:start\?*\}/$start/ if ($start); | ||
$template =~ s/\{time:end\?*\}/$end/ if ($end); | $template =~ s/\{time:end\?*\}/$end/ if ($end); | ||
$template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox); | $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox); | ||
| − | $template =~ s/\{searchTerms\}/$keywords/ if ($keywords); | + | $template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords); |
| − | $template =~ s/(\& | + | $template =~ s/\{count\?*\}/$count/ if ($count); |
| − | warn "After: $template\n"; | + | $template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders |
| + | warn "After: $template\n" if ($verbose); | ||
return $template; | return $template; | ||
} | } | ||
| Line 97: | Line 113: | ||
--start=yyyy-mm-ddThh:mm:ssZ Start time of search (Default=yesterday)\ | --start=yyyy-mm-ddThh:mm:ssZ Start time of search (Default=yesterday)\ | ||
--end=yyyy-mm-ddThh:mm:ssZ End time of search (Default = start+1day)\ | --end=yyyy-mm-ddThh:mm:ssZ End time of search (Default = start+1day)\ | ||
| + | --max_ds=N Maximum number of datasets (Default = 1)\ | ||
| + | --max_gran=N Maximum number of granules | ||
| + | per dataset (Default = 1)\ | ||
| + | --verbose Print some diagnostic messages | ||
--keywords=word+word+word... Keywords, separated by '+' (Required) | --keywords=word+word+word... Keywords, separated by '+' (Required) | ||
"; | "; | ||
} | } | ||
</pre> | </pre> | ||
Revision as of 09:04, January 26, 2010
#!/usr/local/ActivePerl-5.8/bin/perl
# N.B.: minimal included modules for portability
# (Could be more efficient with XML/Atom parsing and XPath.)
# Example:
# esip_fedsearch.pl \
# --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \
# --bbox=-130,25,-60,50 \
# --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \
# --keywords=microwave --max_gran=1 --verbose
use Getopt::Long;
use LWP::Simple;
use Time::Local;
use strict;
# Parse command line
my ($osdd_url, $keywords, $bbox, $start, $end, $help);
my $max_ds = 1;
my $max_gran = 1;
our $verbose = 0;
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,
"bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end,
"max_ds=i" => \$max_ds, "max_gran=i" => \$max_gran,
"verbose" => \$verbose, "help" => \$help);
usage() if ($help || !$keywords);
$start ||= epoch2ccsds(time());
$end ||= epoch2ccsds(ccsds2epoch($start)+86400);
warn ("start: $start\nend: $end\n") if $verbose;
# Get Dataset Open Search Description Document
my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds);
my @osdd = extract_links($datasets, "search", "opensearchdescription");
# Loop through returned dataset OpenSearch Description Documents
my $n = 0;
foreach my $osdd (@osdd) {
my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran);
my @links = extract_links($granules, "/data#", '');
print join("\n", @links, '');
$n++;
last if ($n >= $max_ds); # In case count is not supported at dataset level
}
# Extract links from Atom document based on rel and type values
sub extract_links {
my ($doc, $rel_target, $type_target) = @_;
my @links;
# Loop through <entry> elements
while ($doc =~ m/<[\w:]*entry>(.*?)[:\/]entry>/isg) {
my $entry = $1;
# Loop through <link> elements
while ($entry =~ m/<[\w:]*link(.*?)>/sg) {
my $link = $1;
my $match = 1;
my ($rel) = ($link =~ m/rel="(.*?)"/is);
$match = 0 if ($rel_target && $rel !~ /$rel_target/);
my ($type) = ($link =~ m/type="(.*?)"/is);
$match = 0 if ($type_target && $type !~ /$type_target/);
if ($match) {
my ($link_href) = ($link =~ m/href="(.*?)"/);
$link_href =~ s/\&/\&/g;
push @links, $link_href;
last;
}
}
}
return @links;
}
# opensearch: given a URL to an OpenSearch Description Document and the search values,
# fetch the OSDD and execute the search
sub opensearch {
my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_;
# Fetch OpenSearch Description Document
my $osdd = get($osdd_url) or die "Could not get $osdd_url";
# Extract template for Atom response
my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is);
# Fill template in with values
my $url = fill_template($template, $keywords, $bbox, $start, $end, $count);
# Fetch results
my $results = get($url) or warn "No results returned for $url";
return $results;
}
# fill_template: fill in an OpenSearch template with values from command line
sub fill_template {
my ($template, $keywords, $bbox, $start, $end, $count) = @_;
warn "Before: $template\n" if ($verbose);
my $url = $template;
$template =~ s/\&/\&/g; # Unescape
$template =~ s/\{time:start\?*\}/$start/ if ($start);
$template =~ s/\{time:end\?*\}/$end/ if ($end);
$template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
$template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords);
$template =~ s/\{count\?*\}/$count/ if ($count);
$template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders
warn "After: $template\n" if ($verbose);
return $template;
}
sub ccsds2epoch {
my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/);
return timegm($s, $min, $h, $d, $m-1, $y-1900);
}
sub epoch2ccsds {
my @t = gmtime($_[0]);
return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900,
$t[4]+1, $t[3], $t[2], $t[1], $t[0]);
}
sub usage() {
die "esip_fedsearch.pl [options]\
--osdd=url URL of dataset-level OpenSearch
Description Document (Required)\
--bbox=lon,lat,lon,lat Bounding box of search area\
--start=yyyy-mm-ddThh:mm:ssZ Start time of search (Default=yesterday)\
--end=yyyy-mm-ddThh:mm:ssZ End time of search (Default = start+1day)\
--max_ds=N Maximum number of datasets (Default = 1)\
--max_gran=N Maximum number of granules
per dataset (Default = 1)\
--verbose Print some diagnostic messages
--keywords=word+word+word... Keywords, separated by '+' (Required)
";
}