# NHSE Repository in a Box (RIB) # # The authors of this software are Paul McMahan and Jeff Horner. # Copyright (c) 1997 by the University of Tennessee. # Permission to use, copy, modify, and distribute this software for any # purpose without fee is hereby granted, provided that this entire notice # is included in all copies of any software which is or includes a copy # or modification of this software and in all copies of the supporting # documentation for such software. # THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED # WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR UNIVERSITY OF TENNESSEE # MAKE ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE # MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. # # $Id: BIDMParser.pm,v 1.1.1.1 1997/12/10 15:59:34 jhorner Exp $ # # $Log: BIDMParser.pm,v $ # Revision 1.1.1.1 1997/12/10 15:59:34 jhorner # RIB pre 1.0 # # Revision 1.1 1997/05/06 19:01:21 jhorner # Initial revision # package RIB::BIDMParser; use RIB::Util; require HTML::Parser; @ISA = qw(HTML::Parser); use HTML::Entities (); use LWP::UserAgent (); use HTTP::Request (); use HTTP::Date (); use Data::Dumper (); use strict; use vars qw( $VERSION $DEBUG $DONE ); $DEBUG = 1; $DONE = 0; $VERSION = 0.9; sub new { my $class = shift; my $self = bless HTML::Parser->new,$class; $self->{'META'} = {}; # Hash of name=>content pair. Each key of the hash # is a BIDM string. Each value is a list # of one or more values for the BIDM string. $self->{'LINK'} = {}; # Hash of rel=>href and and rev=>href pairs. $self->{'_errormsg'} = []; return $self; } sub parse { my $self = shift; eval { $self->SUPER::parse(@_) }; if ($@) { print $@ if $DEBUG; $self->{'_buf'} = ''; # flush rest of buffer push @{$self->{'_errormsg'}}, "HTML::Parser::Parse Failed!"; return ''; } return 1; } sub ErrorMsg { shift @{shift->{'_errormsg'}}; } sub parse_file { my ($self, $file) = @_; unless (open(F, $file)){ push @{$self->{'_errormsg'}}, "Can't open $file: $!"; return 0; } my $chunk = ''; while(read(F, $chunk, 2048)) { $self->parse($chunk); } close(F); $self->parse(undef); #EOF return 1; } sub parse_url { my ($self,$url,$lm) = @_; my $ua = LWP::UserAgent->new("RIBcatalog/1.0"); my $req = HTTP::Request->new('GET',$url); my $res = $ua->request($req); if ($res->is_success){ $self->parse($res->content); # been modified since last modified time $lm # if it has, return HTTP RESPONCE CODE 304 # and update $lm else return 1 if (defined $lm && $res->last_modified > HTTP::Date::str2time($$lm)){ $$lm = HTTP::Date::time2str($res->last_modified); return 304,$res->content; } else { return 1,$res->content; } } else { my $buf = $res->code . " - " . $res->message; push @{$self->{'_errormsg'}}, $buf; return 0; } } sub error_msg { my $self = shift; shift @{$self->{'_errormsg'}}; } sub start { my ($self,$tag,$attr) = @_; if ( $tag eq "meta"){ if (exists $attr->{'name'}){ my $buf = pretty($attr->{'name'}); my $field; if ($buf =~ /^(.*)\.(.*)\.(.*)$/){ unless (exists $self->{'CLASS'}){ $self->{'CLASS'} = $2; } $field = $3; } elsif ($buf =~ /^(.*)\.(.*)$/){ unless (exists $self->{'CLASS'}){ $self->{'CLASS'} = $1; } $field = $2; } unless (exists $self->{'META'}{$field}){ $self->{'META'}{$field} = []; } my $array = $self->{'META'}{$field}; push @$array, pretty($attr->{'content'}); } } elsif ($tag eq "link"){ if (exists $attr->{'rel'}){ my $buf = pretty($attr->{'rel'}); my $field; if ($buf =~ /^(.*)\.(.*)\.(.*)\.(.*)$/){ unless (exists $self->{'CLASS'}){ $self->{'CLASS'} = $2; } $field = $3; } elsif ($buf =~ /^(.*)\.(.*)\.(.*)$/){ unless (exists $self->{'CLASS'}){ $self->{'CLASS'} = $1; } $field = $2; } unless (exists $self->{'LINK'}{$field}){ $self->{'LINK'}{$field} = []; } my $array = $self->{'LINK'}{$field}; push @$array, pretty($attr->{'href'}); } if (exists $attr->{'rev'}){ my $buf = pretty($attr->{'rev'}); my $field; if ($buf =~ /^(.*)\.(.*)\.(.*)\.(.*)$/){ unless (exists $self->{'CLASS'}){ $self->{'CLASS'} = $2; } $field = $3; } elsif ($buf =~ /^(.*)\.(.*)\.(.*)$/){ unless (exists $self->{'CLASS'}){ $self->{'CLASS'} = $1; } $field = $2; } unless (exists $self->{'LINK'}{$field}){ $self->{'LINK'}{$field} = []; } my $array = $self->{'LINK'}{$field}; push @$array, pretty($attr->{'href'}); } } } sub link { my $self = shift; return %{$self->{'LINK'}}; } sub meta { my $self = shift; return %{$self->{'META'}}; } sub pretty { my $buf = shift; HTML::Entities::decode($buf); $buf =~ s/^\s+//; $buf =~ s/\s+$//; $buf =~ s/\s+/ /g; return $buf; } sub valuesof { my ($self,$field) = @_; return @{$self->{'META'}->{$field}} if (exists $self->{'META'}->{$field}); return @{$self->{'LINK'}->{$field}} if (exists $self->{'LINK'}->{$field}); return (); } sub valueof { my ($self,$field) = @_; return () unless $self->valuesof($field); my @array = $self->valuesof($field); return $array[0]; } 1;