#!/usr/bin/perl # eap_search.pl v1.2 # # modified version of bibsearch.pl (see below) for the Edinburgh # Anisotropy Project at the British Geological Survey. # # last modification: 25 Jun 2002 # # author: Peter Hanssen # email : mail@Peter.Hanssen.name # web : http://Peter.Hanssen.name # # Use of environment variable DOCUMENT_ROOT to determine path to files # # Additional HTML-field: bibtex [0/1] - switch BibTeX output on/off # # Additional HTML-output of the 'offline' location for: # - @inproceedings # - @article # - @inbook # - @phdthesis # - @techreport # # Bug removed for last field of item not delimited by comma. # # Additional hard-wired HTML-formating. # # Using DOCUMENT_ROOT to build directory path. # #============================================================================== # # See following for basic setup and instructions... # **** # # bibsearch.pl v1.1b # # (c) Andy Wood ... 1994 # # Using ReadParse from cgi-lib.pl - Copyright 1993 Steven E. Brenner # # This script will respond to a call from a HTML form with the following # fields: # # 'header' - url filename of the header template file. # 'footer' - url filename of the footer template file. # 'term' - the search term(s). # 'field' - the field(s) to search in. # 'type' - one of 'exact', 'substr', or 'regexp'. # 'files' - list of url filenames to be searched seperated by `\0' # # It copies the specified header template file to stdout. This file should # contain valid html and can be used to put a title on the search results. # Any instances of "$term" will be replaced with the search term text, and # any instances of "$type" with the search type text. # # Then it searches the list of files, using the search term, in the manner # specified by the search type. The list of files should be a valid BibTeX # database or one with HTML markup (such as those created with bibmarkup). # Any entries in the database that match the search will be outputted with # a HTML title. # # When it has scanned all the files it will then copy the specified footer # template to stdout, again replacing "$term" and "$type". # # A url filename is of the form "~user/path/file" or "ftp:/path/file" and # these only work on the local filesystem - see GetPath for more details. # # For an example of this script in use, examine: # # http://www.cs.bham.ac.uk/~amw/agents/bibtex/search.html # # Andy... # __ # # Andy Wood : amw@cs.bham.ac.uk The University of Birmingham # tel: +44 (0)21 414 3736 School of Computer Science # fax: +44 (0)21 414 4281 Edgbaston, Birmingham # http://www.cs.bham.ac.uk/~amw B15 2TT England # # **** # **** # # Modifications # # v1.1 - updated ReadParse to version 1.6 # - separated out hard coded paths from the main code # # v1.1a - fixed bug in ReadParse version 1.6 # # v1.1b - unfixed bug in ReadParse and fixed corresponding bug in own code ;0) # # **** # **** # # Constants # # **** # For use in GetPath # USERHTMLDIR - name of directory for user supplied pages # LOCALFTPPATH - local path for public ftp site # # Use of DOCUMENT_ROOT to switch between zweb-server and test-server path $USERHTMLDIR = "$ENV{'DOCUMENT_ROOT'}/PUBLICATIONS/SEARCH/"; $LOCALFTPPATH = "$ENV{'DOCUMENT_ROOT'}/PUBLICATIONS/SEARCH/"; # **** # # ReadParse # # Reads in GET or POST data, converts it to unescaped text, and puts # one key=value in each member of the list "@in" # Also creates key/value pairs in %in, using '\0' to separate multiple # selections # # If a variable-glob parameter (e.g., *cgi_input) is passed to ReadParse, # information is stored there, rather than in $in, @in, and %in. # # From cgi-lib.pl - Copyright 1993/1994 Steven E. Brenner # http://www.bio.cam.ac.uk/web # # **** sub ReadParse { local (*in) = @_ if @_; local ($i, $loc, $key, $val); # Read in text if ($ENV{'REQUEST_METHOD'} eq "GET") { $in = $ENV{'QUERY_STRING'}; } elsif ($ENV{'REQUEST_METHOD'} eq "POST") { read(STDIN,$in,$ENV{'CONTENT_LENGTH'}); } @in = split(/&/,$in); foreach $i (0 .. $#in) { # Convert plus's to spaces $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%(..)/pack("c",hex($1))/ge; $val =~ s/%(..)/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } return 1; # just for fun } # **** # # GetPath # # Converts it's argument from a partial path ("~amw/file" or "ftp:/path/file") into # it's full equivalent ("/home/pg/amw/public_html/file" or "/scratch/ftp/path/file") # ensuring that we don't inadvertently allow external users full access to the file # system. We also remove any instances of ".." in the path. # # **** sub GetPath { local( $filename ) = $_[ 0 ]; if ( $filename =~ /^~/ ) { local( $name ) = $filename; $name =~ s/^~([^\/]*)\/(.*)/$1/; local( $file ) = $2; local( @entry ) = getpwnam( $name ); $filename = $entry[ $#entry - 1 ].$USERHTMLDIR.$file; } elsif ( $filename =~ /^ftp:(.*)/ ) { $filename = $LOCALFTPPATH.$1; } else { # $filename = ""; $filename = $USERHTMLDIR.$filename; } $filename =~ s/\.\.//g; # Make sure we don't allow any ..'ing return $filename; } # **** # # PrintHeader # # Prints the line that tells WWW that we're an HTML document (honest!) # # **** sub PrintHeader { print "Content-type: text/html\n\n"; } # **** # # Search # # See if entry ($_) matches required search term. First argument is # the search term (in this case $in{ 'term' }), the second is the # search type ($in{ 'type' }), the third is the field ($in{ 'field' }). # # **** sub Search { local( $found ) = 0; local( $searchin ); if ( $_[2] ne "" && $_[2] ne "all" ) { local( @fields ) = split( /[\s]+/, $_[2] ); foreach $field ( 0..$#fields ) { $searchin .= &GetField( $fields[$field], $_ ); } } else { $searchin = $_; } if ( $_[1] eq 'regexp' ) { $found = 1 if $searchin =~ /($_[0])/i; } else { local( @keyword ) = split( /[\s]+/, $_[0] ); foreach $word ( 0..$#keyword ) { if ( $_[1] eq 'exact' ) { $found = 1 if $searchin =~ /\b($keyword[$word])\b/i; } elsif( $_[1] eq 'substr' ) { $found = 1 if $searchin =~ /($keyword[$word])/i; } } } return $found; } # **** # # MarkupEntry # # Create a line of HTML for each entry in the file. This pulls the # title and the author (or editor) from the BibTeX entry in $_, and # prints a HTML heading, followed by an availablity list of urls if # there are any, followed by the full entry in
formatted form.
#
# ****
sub MarkupEntry
{
local( $junk, $bibtex ) = split( '@', $_, 2 );
$bibtex = '@'.$bibtex;
local( $key, $rest ) = split( ',', $bibtex, 2 );
$key =~ s/^@.*[{(]\s*(.*)\s*$/$1/; # Retrieve the key
local( $author, $title );
$title = &GetField( "title", $bibtex );
if ( /author\s*=/i )
{
$author = &GetField( "author", $bibtex );
}
elsif ( /editor\s*=/i )
{
$author = &GetField( "editor", $bibtex );
if ( $author =~ /\band\b/ )
{
$author .= " (Eds)";
}
else
{
$author .= " (Ed)";
}
}
else
{
$author = "";
}
print "\n| ", $title, " \n"; print "", $author, " \n"; # Printing reference location for different media local( $bt, $jl, $uv, $yr, $ps, $vl, $pb, $nu ); if ( /\@inproceedings/i ) { $bt = &GetField( "booktitle", $bibtex ); $yr = &GetField( "year", $bibtex ); $ps = &GetField( "pages", $bibtex ); print $bt, ", ", $yr, " | |
| Available as", &MarkupURL( &GetField( "url", $bibtex ) ), ". | |
| BibTeX: | \n"; print "\n", $bibtex, " |
\n" if $found == 0; close( FILE ); } # **** # # PrintTemplate # # Copy a Template file substituting variables where necessary. # # **** sub PrintTemplate { local( $filename ) = &GetPath( $_[0] ); open( TEMPLATE, $filename ) || print "