#!/usr/bin/perl # eap_search.pl v1.2 # # modified version of bibsearch.pl (see below) for the Edinburgh # Anisotropy Project at the British Geological Survey. # # last modification: 25 Jun 2002 # # author: Peter Hanssen # email : mail@Peter.Hanssen.name # web : http://Peter.Hanssen.name # # Use of environment variable DOCUMENT_ROOT to determine path to files # # Additional HTML-field: bibtex [0/1] - switch BibTeX output on/off # # Additional HTML-output of the 'offline' location for: # - @inproceedings # - @article # - @inbook # - @phdthesis # - @techreport # # Bug removed for last field of item not delimited by comma. # # Additional hard-wired HTML-formating. # # Using DOCUMENT_ROOT to build directory path. # #============================================================================== # # See following for basic setup and instructions... # **** # # bibsearch.pl v1.1b # # (c) Andy Wood ... 1994 # # Using ReadParse from cgi-lib.pl - Copyright 1993 Steven E. Brenner # # This script will respond to a call from a HTML form with the following # fields: # # 'header' - url filename of the header template file. # 'footer' - url filename of the footer template file. # 'term' - the search term(s). # 'field' - the field(s) to search in. # 'type' - one of 'exact', 'substr', or 'regexp'. # 'files' - list of url filenames to be searched seperated by `\0' # # It copies the specified header template file to stdout. This file should # contain valid html and can be used to put a title on the search results. # Any instances of "$term" will be replaced with the search term text, and # any instances of "$type" with the search type text. # # Then it searches the list of files, using the search term, in the manner # specified by the search type. The list of files should be a valid BibTeX # database or one with HTML markup (such as those created with bibmarkup). # Any entries in the database that match the search will be outputted with # a HTML title. # # When it has scanned all the files it will then copy the specified footer # template to stdout, again replacing "$term" and "$type". # # A url filename is of the form "~user/path/file" or "ftp:/path/file" and # these only work on the local filesystem - see GetPath for more details. # # For an example of this script in use, examine: # # http://www.cs.bham.ac.uk/~amw/agents/bibtex/search.html # # Andy... # __ # # Andy Wood : amw@cs.bham.ac.uk The University of Birmingham # tel: +44 (0)21 414 3736 School of Computer Science # fax: +44 (0)21 414 4281 Edgbaston, Birmingham # http://www.cs.bham.ac.uk/~amw B15 2TT England # # **** # **** # # Modifications # # v1.1 - updated ReadParse to version 1.6 # - separated out hard coded paths from the main code # # v1.1a - fixed bug in ReadParse version 1.6 # # v1.1b - unfixed bug in ReadParse and fixed corresponding bug in own code ;0) # # **** # **** # # Constants # # **** # For use in GetPath # USERHTMLDIR - name of directory for user supplied pages # LOCALFTPPATH - local path for public ftp site # # Use of DOCUMENT_ROOT to switch between zweb-server and test-server path $USERHTMLDIR = "$ENV{'DOCUMENT_ROOT'}/PUBLICATIONS/SEARCH/"; $LOCALFTPPATH = "$ENV{'DOCUMENT_ROOT'}/PUBLICATIONS/SEARCH/"; # **** # # ReadParse # # Reads in GET or POST data, converts it to unescaped text, and puts # one key=value in each member of the list "@in" # Also creates key/value pairs in %in, using '\0' to separate multiple # selections # # If a variable-glob parameter (e.g., *cgi_input) is passed to ReadParse, # information is stored there, rather than in $in, @in, and %in. # # From cgi-lib.pl - Copyright 1993/1994 Steven E. Brenner # http://www.bio.cam.ac.uk/web # # **** sub ReadParse { local (*in) = @_ if @_; local ($i, $loc, $key, $val); # Read in text if ($ENV{'REQUEST_METHOD'} eq "GET") { $in = $ENV{'QUERY_STRING'}; } elsif ($ENV{'REQUEST_METHOD'} eq "POST") { read(STDIN,$in,$ENV{'CONTENT_LENGTH'}); } @in = split(/&/,$in); foreach $i (0 .. $#in) { # Convert plus's to spaces $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%(..)/pack("c",hex($1))/ge; $val =~ s/%(..)/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } return 1; # just for fun } # **** # # GetPath # # Converts it's argument from a partial path ("~amw/file" or "ftp:/path/file") into # it's full equivalent ("/home/pg/amw/public_html/file" or "/scratch/ftp/path/file") # ensuring that we don't inadvertently allow external users full access to the file # system. We also remove any instances of ".." in the path. # # **** sub GetPath { local( $filename ) = $_[ 0 ]; if ( $filename =~ /^~/ ) { local( $name ) = $filename; $name =~ s/^~([^\/]*)\/(.*)/$1/; local( $file ) = $2; local( @entry ) = getpwnam( $name ); $filename = $entry[ $#entry - 1 ].$USERHTMLDIR.$file; } elsif ( $filename =~ /^ftp:(.*)/ ) { $filename = $LOCALFTPPATH.$1; } else { # $filename = ""; $filename = $USERHTMLDIR.$filename; } $filename =~ s/\.\.//g; # Make sure we don't allow any ..'ing return $filename; } # **** # # PrintHeader # # Prints the line that tells WWW that we're an HTML document (honest!) # # **** sub PrintHeader { print "Content-type: text/html\n\n"; } # **** # # Search # # See if entry ($_) matches required search term. First argument is # the search term (in this case $in{ 'term' }), the second is the # search type ($in{ 'type' }), the third is the field ($in{ 'field' }). # # **** sub Search { local( $found ) = 0; local( $searchin ); if ( $_[2] ne "" && $_[2] ne "all" ) { local( @fields ) = split( /[\s]+/, $_[2] ); foreach $field ( 0..$#fields ) { $searchin .= &GetField( $fields[$field], $_ ); } } else { $searchin = $_; } if ( $_[1] eq 'regexp' ) { $found = 1 if $searchin =~ /($_[0])/i; } else { local( @keyword ) = split( /[\s]+/, $_[0] ); foreach $word ( 0..$#keyword ) { if ( $_[1] eq 'exact' ) { $found = 1 if $searchin =~ /\b($keyword[$word])\b/i; } elsif( $_[1] eq 'substr' ) { $found = 1 if $searchin =~ /($keyword[$word])/i; } } } return $found; } # **** # # MarkupEntry # # Create a line of HTML for each entry in the file. This pulls the # title and the author (or editor) from the BibTeX entry in $_, and # prints a HTML heading, followed by an availablity list of urls if # there are any, followed by the full entry in
formatted form.
#
# ****

sub MarkupEntry
  {
  local( $junk, $bibtex ) = split( '@', $_, 2 );
  $bibtex = '@'.$bibtex;
  local( $key, $rest ) = split( ',', $bibtex, 2 );
  $key =~ s/^@.*[{(]\s*(.*)\s*$/$1/;                     # Retrieve the key

  local( $author, $title );
  $title = &GetField( "title", $bibtex );

  if ( /author\s*=/i )
    {
    $author = &GetField( "author", $bibtex );
    }
  elsif ( /editor\s*=/i )
    {
    $author = &GetField( "editor", $bibtex );
    if ( $author =~ /\band\b/ )
      {
      $author .= " (Eds)";
      }
    else
      {
      $author .= " (Ed)";
      }
    }
  else
    {
    $author = "";
    }
               
  print "\n\n";
  print "\n";
    }
  elsif ( /\@article/i )
    {
    $jl = &GetField( "journal", $bibtex );
    $yr = &GetField( "year", $bibtex );
    $vl = &GetField( "volume", $bibtex );
    $nu = &GetField( "number", $bibtex );
    $ps = &GetField( "pages", $bibtex );
    print $jl, ", ", $yr, ", ", $vl, ", ", $nu, ", ", $ps, "\n";
    }
  elsif ( /\@inbook/i )
    {
    $bt = &GetField( "booktitle", $bibtex );
    $yr = &GetField( "year", $bibtex );
    $ed = &GetField( "editor", $bibtex );
    $pb = &GetField( "publisher", $bibtex );
    $ps = &GetField( "pages", $bibtex );
    print $bt, ", ", $yr, ", ", $ed, "(Ed.), ", $pb, ", ", $ps, "\n";
    }
  elsif ( /\@book/i )
    {
    $bt = &GetField( "booktitle", $bibtex );
    $yr = &GetField( "year", $bibtex );
    $pb = &GetField( "publisher", $bibtex );
    $ps = &GetField( "pages", $bibtex );
    print $bt, ", ", $yr, ", ", $pb, ", ", $ps, "\n";
    }
  elsif ( /\@phdthesis/i )
    {
    $uv = &GetField( "university", $bibtex );
    $yr = &GetField( "year", $bibtex );
    print $uv, ", ", $yr, "\n";
    }
  elsif ( /\@techreport/i )
    {
    $jl = &GetField( "journal", $bibtex );
    $yr = &GetField( "year", $bibtex );
    $vl = &GetField( "volume", $bibtex );
    $ps = &GetField( "pages", $bibtex );
    print $jl, ", ", $yr, ", ", $vl, ", ", $ps, "\n";
    }
  else
    {
    print "There\'s something wrong with the database format!\n";
    }

  if ( /url\s*=/i )
    {
    print "\n";
    }

  s/
//i;
  s/<\/PRE>//i;
  if ( $in{'bibtex'} > 0 )                # ADDED FOR NON BIBTEX USERS
    {
    print "
\n"; print "\n"; print "
", $title, "
\n"; print "", $author, "
\n"; # Printing reference location for different media local( $bt, $jl, $uv, $yr, $ps, $vl, $pb, $nu ); if ( /\@inproceedings/i ) { $bt = &GetField( "booktitle", $bibtex ); $yr = &GetField( "year", $bibtex ); $ps = &GetField( "pages", $bibtex ); print $bt, ", ", $yr, "
Available as", &MarkupURL( &GetField( "url", $bibtex ) ), ".
BibTeX:
\n", $bibtex, "
\n"; } else { print " \n"; } } # **** # # MarkupURL # # Takes a list of URLs seperated by commas and expands them into a html list # that you can click on. # # **** sub MarkupURL { local( $url, $format, $html ) = ""; local( @urls ) = split( ',', $_[0] ); foreach $url ( 0..$#urls ) { $html .= "\nand" if ( $url == $#urls && $#urls >= 1 ); $html .= "," if ( $url >= 1 && $url < $#urls ); $format = ""; if ( $urls[ $url ] =~ /\.Z/ || $urls[ $url ] =~ /\.gz/ ) { $format = " compressed file"; } if ( $urls[ $url ] =~ /\.ps/i ) { $format .= " postscript file"; } elsif ( $urls[ $url ] =~ /\.pdf/i ) { $format .= " PDF"; } elsif ( $urls[ $url ] =~ /\.txt/i ) { $format .= " text only"; } elsif ( $urls[ $url ] =~ /\.gif/i ) { $format .= " gif image"; } elsif ( $urls[ $url ] =~ /\.html/i ) { $format .= " HTML file"; } else { $format = $urls[ $url ]; $format =~ s/.*\/([^\/]*)$/$1/; } $urls[ $url ] =~ s/^\s+//; if ( $urls[ $url ] =~ /\SPONSORS/i ) { $html .= "".$format." for sponsors only<\/FONT>"; } else { $html .= "".$format.""; } } return $html; } # **** # # GetField # # Gets the field specified in the first argument and strips it of quotes and/or # squiggly brackets, removes excess spaces and returns it. # # **** sub GetField { local( $field, $contents ) = @_; # Arguments: field name, bibtex entry $contents =~ s/\n/ /g; # Remove all \n's if ( $contents =~ /.*\b($field)\s*=\s*"([^"]*)"\s*,/i ) { $contents = $2; } # elsif ( $contents =~ /.*\b($field)\s*=\s*{(.*)}\s*,/i ) elsif ( $contents =~ /.*\b($field)\s*=\s*{(.*)}\s*/i ) # CHANGED FOR LAST ITEM WITHOUT COMMA ENDING { $contents = $2; # Contains remaining fields too $contents =~ s/}\s*,.*//g; # So remove everything after }, } elsif ( $contents =~ /.*\b($field)\s*=\s*(\d*)\s*,/i ) { $contents = $2; } else { $contents = ""; } $contents =~ s/"|{|}//g; # Remove ""`s and {}'s $contents =~ s/\s+/ /g; # Make lots of spaces into 1. return $contents; } # **** # # ProcessFile # # Process the file (filename specified by the first argument). This involves opening # it, printing the last part of the filename, stripping off the html header (if it # is not a .bib file), printing all the entries that match the search term (or "none # found" if there aren't any) and closing the file again. # # **** sub ProcessFile { local( $filename ) = &GetPath( $_[0] ); open( FILE, $filename ) || print "

Couldn't Open Input file - ".$filename."

\n"; if ( $filename =~ /_/ ) # Works with filenames like bibsearch_papers.bib otherwise old version { local( $filepart ) = substr( $filename, rindex( $filename, '_' ) + 1); $filepart =~ s/\.bib/$1/; print "

Results for ".$filepart."

\n"; # Prints just e.g. papers } else { local( $filepart ) = substr( $filename, rindex( $filename, '/' ) + 1); print "

Results of ".$filepart."

\n"; # Prints whole filename without path } local( $found ) = 0; if ( $filename =~ /\.bib$/ ) { $stage='body'; $/ = ""; } else { $stage='header'; } while ( ) { if ( $stage eq 'body' ) { if ( /@.*{/ && &Search( $in{'term'}, $in{'type'}, $in{'field'} ) ) # If bibtex entry and matches search. { &MarkupEntry(); $found = 1; } if ( //i ) # If end of entries { $stage='footer'; $/ = "\n"; } } if ( ( $stage eq 'header' ) && //i ) # If end of header { $stage='body'; $/ = ""; } } print "No matches found.

\n" if $found == 0; close( FILE ); } # **** # # PrintTemplate # # Copy a Template file substituting variables where necessary. # # **** sub PrintTemplate { local( $filename ) = &GetPath( $_[0] ); open( TEMPLATE, $filename ) || print "

Incorrect or can't find Template File!\n", $_[0] , "

\n"; while (