http://wiki.crossplatform.ru/index.php?title=Perl/FAQ/%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%B0_%D1%81_Web&feed=atom&action=history
Perl/FAQ/Работа с Web - История изменений
2024-03-28T22:48:29Z
История изменений этой страницы в вики
MediaWiki 1.15.1
http://wiki.crossplatform.ru/index.php?title=Perl/FAQ/%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%B0_%D1%81_Web&diff=3525&oldid=prev
Root: Новая: {{Perl_header}} == Introduction == <source lang="perl"> #----------------------------- http://www.perl.com/CPAN/modules/by-category/15_World_Wide_Web_HTML_HTTP_CGI/ #-------------------...
2008-12-03T13:04:48Z
<p>Новая: {{Perl_header}} == Introduction == <source lang="perl"> #----------------------------- http://www.perl.com/CPAN/modules/by-category/15_World_Wide_Web_HTML_HTTP_CGI/ #-------------------...</p>
<p><b>Новая страница</b></p><div>{{Perl_header}}<br />
<br />
== Introduction ==<br />
<source lang="perl"><br />
#-----------------------------<br />
http://www.perl.com/CPAN/modules/by-category/15_World_Wide_Web_HTML_HTTP_CGI/<br />
#-----------------------------<br />
<br />
</source><br />
== Fetching a URL from a Perl Script ==<br />
<source lang="perl"><br />
#-----------------------------<br />
use LWP::Simple;<br />
<br />
$content = get($URL);<br />
#-----------------------------<br />
use LWP::Simple;<br />
unless (defined ($content = get $URL)) {<br />
<br />
die "could not get $URL\n";<br />
}<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w <br />
# titlebytes - find the title and size of documents <br />
<br />
use LWP::UserAgent; <br />
use HTTP::Request; <br />
use HTTP::Response; <br />
<br />
use URI::Heuristic;<br />
my $raw_url = shift or die "usage: $0 url\n"; <br />
<br />
my $url = URI::Heuristic::uf_urlstr($raw_url);<br />
$| = 1; # to flush next line <br />
printf "%s =>\n\t", $url;<br />
<br />
my $ua = LWP::UserAgent->new(); <br />
$ua->agent("Schmozilla/v9.14 Platinum"); # give it time, it'll get there<br />
my $req = HTTP::Request->new(GET => $url); <br />
<br />
$req->referer("http://wizard.yellowbrick.oz");<br />
# perplex the log analysers<br />
my $response = $ua->request($req);<br />
<br />
if ($response->is_error()) {<br />
printf " %s\n", $response->status_line;<br />
<br />
} else {<br />
my $count;<br />
my $bytes;<br />
<br />
my $content = $response->content();<br />
$bytes = length $content;<br />
$count = ($content =~ tr/\n/\n/);<br />
<br />
printf "%s (%d lines, %d bytes)\n", $response->title(), $count, $bytes; } <br />
<br />
#-----------------------------<br />
#% titlebytes http://www.tpj.com/<br />
#http://www.tpj.com/ =><br />
# The Perl Journal (109 lines, 4530 bytes)<br />
#-----------------------------<br />
<br />
</source><br />
== Automating Form Submission ==<br />
<source lang="perl"><br />
#-----------------------------<br />
use LWP::Simple;<br />
use URI::URL;<br />
<br />
my $url = url('http://www.perl.com/cgi-bin/cpan_mod');<br />
$url->query_form(module => 'DB_File', readme => 1);<br />
<br />
$content = get($url);<br />
#-----------------------------<br />
use HTTP::Request::Common qw(POST);<br />
use LWP::UserAgent;<br />
<br />
$ua = LWP::UserAgent->new();<br />
my $req = POST 'http://www.perl.com/cgi-bin/cpan_mod',<br />
[ module => 'DB_File', readme => 1 ];<br />
<br />
$content = $ua->request($req)->as_string;<br />
#-----------------------------<br />
field1=value1&field2=value2&field3=value3<br />
#-----------------------------<br />
http://www.site.com/path/to/<br />
script.cgi?field1=value1&field2=value2&field3=value3<br />
<br />
#-----------------------------<br />
http://www.site.com/path/to/<br />
script.cgi?arg=%22this+isn%27t+%3CEASY%3E+%26+%3CFUN%3E%22<br />
#-----------------------------<br />
$ua->proxy(['http', 'ftp'] => 'http://proxy.myorg.com:8081');<br />
<br />
#-----------------------------<br />
<br />
</source><br />
== Extracting URLs ==<br />
<source lang="perl"><br />
#-----------------------------<br />
use HTML::LinkExtor;<br />
<br />
$parser = HTML::LinkExtor->new(undef, $base_url);<br />
$parser->parse_file($filename);<br />
<br />
@links = $parser->links;<br />
foreach $linkarray (@links) {<br />
my @element = @$linkarray;<br />
<br />
my $elt_type = shift @element; # element type<br />
<br />
# possibly test whether this is an element we're interested in<br />
while (@element) {<br />
<br />
# extract the next attribute and its value<br />
my ($attr_name, $attr_value) = splice(@element, 0, 2);<br />
<br />
# ... do something with them ...<br />
}<br />
}<br />
#-----------------------------<br />
<A HREF="http://www.perl.com/">Home page</A><br />
<IMG SRC="images/big.gif" LOWSRC="images/big-lowres.gif"><br />
<br />
#-----------------------------<br />
[<br />
[ a, href => "http://www.perl.com/" ],<br />
[ img, src => "images/big.gif",<br />
lowsrc => "images/big-lowres.gif" ]<br />
<br />
]<br />
#-----------------------------<br />
if ($elt_type eq 'a' && $attr_name eq 'href') {<br />
<br />
print "ANCHOR: $attr_value\n" <br />
if $attr_value->scheme =~ /http|ftp/;<br />
}<br />
if ($elt_type eq 'img' && $attr_name eq 'src') {<br />
<br />
print "IMAGE: $attr_value\n";<br />
}<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# xurl - extract unique, sorted list of links from URL<br />
use HTML::LinkExtor;<br />
<br />
use LWP::Simple;<br />
<br />
$base_url = shift;<br />
$parser = HTML::LinkExtor->new(undef, $base_url);<br />
<br />
$parser->parse(get($base_url))->eof;<br />
@links = $parser->links;<br />
<br />
foreach $linkarray (@links) {<br />
my @element = @$linkarray;<br />
<br />
my $elt_type = shift @element;<br />
while (@element) {<br />
<br />
my ($attr_name , $attr_value) = splice(@element, 0, 2);<br />
<br />
$seen{$attr_value}++;<br />
}<br />
}<br />
for (sort keys %seen) { print $_, "\n" }<br />
<br />
#-----------------------------<br />
#% xurl http://www.perl.com/CPAN<br />
#ftp://ftp@ftp.perl.com/CPAN/CPAN.html<br />
#<br />
#http://language.perl.com/misc/CPAN.cgi<br />
#<br />
#http://language.perl.com/misc/cpan_module<br />
#<br />
#http://language.perl.com/misc/getcpan<br />
#<br />
#http://www.perl.com/index.html<br />
#<br />
#http://www.perl.com/gifs/lcb.xbm<br />
#-----------------------------<br />
<URL:http://www.perl.com><br />
#-----------------------------<br />
@URLs = ($message =~ /<URL:(.*?)>/g);<br />
<br />
#-----------------------------<br />
<br />
</source><br />
== Converting ASCII to HTML ==<br />
<source lang="perl"><br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w -p00<br />
# text2html - trivial html encoding of normal text<br />
# -p means apply this script to each record.<br />
# -00 mean that a record is now a paragraph<br />
<br />
use HTML::Entities;<br />
$_ = encode_entities($_, "\200-\377");<br />
<br />
if (/^\s/) {<br />
# Paragraphs beginning with whitespace are wrapped in <PRE> <br />
s{(.*)$} {<PRE>\n$1</PRE>\n}s; # indented verbatim<br />
<br />
} else {<br />
s{^(>.*)} {$1<BR>}gm; # quoted text<br />
<br />
s{<URL:(.*?)>} {<A HREF="$1">$1</A>}gs # embedded URL (good)<br />
<br />
||<br />
s{(http:\S+)} {<A HREF="$1">$1</A>}gs; # guessed URL (bad)<br />
<br />
s{\*(\S+)\*} {<STRONG>$1</STRONG>}g; # this is *bold* here<br />
<br />
s{\b_(\S+)\_\b} {<EM>$1</EM>}g; # this is _italics_ here<br />
<br />
s{^} {<P>\n}; # add paragraph tag<br />
}<br />
<br />
#-----------------------------<br />
BEGIN {<br />
<br />
print "<TABLE>";<br />
$_ = encode_entities(scalar <>);<br />
s/\n\s+/ /g; # continuation lines<br />
<br />
while ( /^(\S+?:)\s*(.*)$/gm ) { # parse heading<br />
<br />
print "<TR><TH ALIGN='LEFT'>$1</TH><TD>$2</TD></TR>\n";<br />
}<br />
<br />
print "</TABLE><HR>";<br />
}<br />
#-----------------------------<br />
<br />
</source><br />
== Converting HTML to ASCII ==<br />
<source lang="perl"><br />
#-----------------------------<br />
$ascii = `lynx -dump $filename`;<br />
#-----------------------------<br />
<br />
use HTML::FormatText;<br />
use HTML::Parse;<br />
<br />
$html = parse_htmlfile($filename);<br />
$formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);<br />
<br />
$ascii = $formatter->format($html);<br />
#-----------------------------<br />
use HTML::TreeBuilder;<br />
use HTML::FormatText;<br />
<br />
$html = HTML::TreeBuilder->new();<br />
$html->parse($document);<br />
<br />
$formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);<br />
<br />
$ascii = $formatter->format($html);<br />
#-----------------------------<br />
<br />
</source><br />
== Extracting or Removing HTML Tags ==<br />
<source lang="perl"><br />
#-----------------------------<br />
($plain_text = $html_text) =~ s/<[^>]*>//gs; #WRONG<br />
#-----------------------------<br />
<br />
use HTML::Parse;<br />
use HTML::FormatText;<br />
$plain_text = HTML::FormatText->new->format(parse_html($html_text));<br />
<br />
#-----------------------------<br />
#% perl -pe 's/<[^>]*>//g' file<br />
#-----------------------------<br />
#<IMG SRC = "foo.gif"<br />
# ALT = "Flurp!"><br />
#-----------------------------<br />
#% perl -0777 -pe 's/<[^>]*>//gs' file<br />
#-----------------------------<br />
{<br />
local $/; # temporary whole-file input mode<br />
<br />
$html = <FILE>;<br />
$html =~ s/<[^>]*>//gs;<br />
}<br />
<br />
#-----------------------------<br />
#<IMG SRC = "foo.gif" ALT = "A > B"><br />
#<br />
#<!-- <A comment> --><br />
#<br />
#<script>if (a<b && a>c)</script><br />
<br />
#<br />
#<# Just data #><br />
#<br />
#<![INCLUDE CDATA [ >>>>>>>>>>>> ]]><br />
#-----------------------------<br />
#<!-- This section commented out.<br />
# <B>You can't see me!</B><br />
#--><br />
#-----------------------------<br />
<br />
package MyParser;<br />
use HTML::Parser;<br />
use HTML::Entities qw(decode_entities);<br />
<br />
@ISA = qw(HTML::Parser);<br />
<br />
sub text {<br />
my($self, $text) = @_;<br />
<br />
print decode_entities($text);<br />
}<br />
<br />
package main;<br />
MyParser->new->parse_file(*F);<br />
<br />
#-----------------------------<br />
($title) = ($html =~ m#<TITLE>\s*(.*?)\s*</TITLE>#is);<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl<br />
# htitle - get html title from URL<br />
<br />
<br />
die "usage: $0 url ...\n" unless @ARGV;<br />
require LWP;<br />
<br />
foreach $url (@ARGV) {<br />
<br />
$ua = LWP::UserAgent->new();<br />
$res = $ua->request(HTTP::Request->new(GET => $url));<br />
<br />
print "$url: " if @ARGV > 1;<br />
if ($res->is_success) {<br />
<br />
print $res->title, "\n";<br />
} else {<br />
print $res->status_line, "\n";<br />
<br />
}<br />
}<br />
<br />
#-----------------------------<br />
#% htitle http://www.ora.com<br />
#www.oreilly.com -- Welcome to O'Reilly & Associates!<br />
#<br />
#% htitle http://www.perl.com/ http://www.perl.com/nullvoid<br />
#http://www.perl.com/: The www.perl.com Home Page<br />
#http://www.perl.com/nullvoid: 404 File Not Found<br />
#-----------------------------<br />
<br />
</source><br />
== Finding Stale Links ==<br />
<source lang="perl"><br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# churl - check urls<br />
<br />
use HTML::LinkExtor;<br />
<br />
use LWP::Simple qw(get head);<br />
<br />
$base_url = shift<br />
or die "usage: $0 <start_url>\n";<br />
<br />
$parser = HTML::LinkExtor->new(undef, $base_url);<br />
$parser->parse(get($base_url));<br />
@links = $parser->links;<br />
<br />
print "$base_url: \n";<br />
foreach $linkarray (@links) {<br />
my @element = @$linkarray;<br />
<br />
my $elt_type = shift @element;<br />
while (@element) {<br />
<br />
my ($attr_name , $attr_value) = splice(@element, 0, 2);<br />
<br />
if ($attr_value->scheme =~ /\b(ftp|https?|file)\b/) {<br />
print " $attr_value: ", head($attr_value) ? "OK" : "BAD", "\n";<br />
<br />
}<br />
}<br />
}<br />
<br />
#-----------------------------<br />
#% churl http://www.wizards.com<br />
#http://www.wizards.com:<br />
#<br />
# FrontPage/FP_Color.gif: OK<br />
#<br />
# FrontPage/FP_BW.gif: BAD<br />
#<br />
# #FP_Map: OK<br />
#<br />
# Games_Library/Welcome.html: OK<br />
#-----------------------------<br />
<br />
</source><br />
== Finding Fresh Links ==<br />
<source lang="perl"><br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# surl - sort URLs by their last modification date<br />
<br />
use LWP::UserAgent;<br />
<br />
use HTTP::Request;<br />
use URI::URL qw(url);<br />
<br />
my($url, %Date);<br />
<br />
my $ua = LWP::UserAgent->new();<br />
<br />
while ( $url = url(scalar <>) ) {<br />
<br />
my($req, $ans);<br />
next unless $url->scheme =~ /^(file|https?)$/;<br />
<br />
$ans = $ua->request(HTTP::Request->new("HEAD", $url));<br />
if ($ans->is_success) {<br />
<br />
$Date{$url} = $ans->last_modified || 0; # unknown<br />
} else {<br />
<br />
print STDERR "$url: Error [", $ans->code, "] ", $ans->message, "!\n";<br />
}<br />
}<br />
<br />
foreach $url ( sort { $Date{$b} <=> $Date{$a} } keys %Date ) {<br />
<br />
printf "%-25s %s\n", $Date{$url} ? (scalar localtime $Date{$url})<br />
<br />
: "<NONE SPECIFIED>", $url;<br />
}<br />
<br />
#-----------------------------<br />
#% xurl http://www.perl.com/ | surl | head<br />
#Mon Apr 20 06:16:02 1998 http://electriclichen.com/linux/srom.html<br />
#<br />
#Fri Apr 17 13:38:51 1998 http://www.oreilly.com/<br />
#<br />
#Fri Mar 13 12:16:47 1998 http://www2.binevolve.com/<br />
#<br />
#Sun Mar 8 21:01:27 1998 http://www.perl.org/<br />
#<br />
#Tue Nov 18 13:41:32 1997 http://www.perl.com/universal/header.map<br />
#<br />
#Wed Oct 1 12:55:13 1997 http://www.songline.com/<br />
#<br />
#Sun Aug 17 21:43:51 1997 http://www.perl.com/graphics/perlhome_header.jpg<br />
#<br />
#Sun Aug 17 21:43:47 1997 http://www.perl.com/graphics/perl_id_313c.gif<br />
#<br />
#Sun Aug 17 21:43:46 1997 http://www.perl.com/graphics/ora_logo.gif<br />
#<br />
#Sun Aug 17 21:43:44 1997 http://www.perl.com/graphics/header-nav.gif<br />
#-----------------------------<br />
<br />
</source><br />
== Creating HTML Templates ==<br />
<source lang="perl"><br />
#-----------------------------<br />
sub template {<br />
<br />
my ($filename, $fillings) = @_;<br />
my $text;<br />
<br />
local $/; # slurp mode (undef)<br />
local *F; # create local filehandle<br />
open(F, "< $filename\0") || return;<br />
<br />
$text = <F>; # read whole file<br />
close(F); # ignore retval<br />
# replace quoted words with value in %$fillings hash<br />
$text =~ s{ %% ( .*? ) %% }<br />
<br />
{ exists( $fillings->{$1} )<br />
? $fillings->{$1}<br />
<br />
: ""<br />
}gsex;<br />
return $text;<br />
}<br />
#-----------------------------<br />
#<!-- simple.template for internal template() function --><br />
<br />
#<HTML><HEAD><TITLE>Report for %%username%%</TITLE></HEAD><br />
#<BODY><H1>Report for %%username%%</H1><br />
<br />
#%%username%% logged in %%count%% times, for a total of %%total%% minutes.<br />
#-----------------------------<br />
#<!-- fancy.template for Text::Template --><br />
#<HTML><HEAD><TITLE>Report for {$user}</TITLE></HEAD><br />
<br />
#<BODY><H1>Report for {$user}</H1><br />
#{ lcfirst($user) } logged in {$count} times, for a total of <br />
#{ int($total / 60) } minutes.<br />
#-----------------------------<br />
%fields = (<br />
username => $whats_his_name,<br />
count => $login_count,<br />
total => $minute_used,<br />
<br />
);<br />
<br />
print template("/home/httpd/templates/simple.template", \%fields);<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# userrep1 - report duration of user logins using SQL database<br />
<br />
<br />
use DBI;<br />
use CGI qw(:standard);<br />
<br />
# template() defined as in the Solution section above<br />
$user = param("username") or die "No username";<br />
<br />
$dbh = DBI->connect("dbi:mysql:connections:mysql.domain.com:3306",<br />
"connections", "seekritpassword") or die "Couldn't connect\n";<br />
<br />
$sth = $dbh->prepare(<<"END_OF_SELECT") or die "Couldn't prepare SQL";<br />
SELECT COUNT(duration),SUM(duration) <br />
<br />
FROM logins WHERE username='$user'<br />
END_OF_SELECT<br />
<br />
# this time the duration is assumed to be in seconds<br />
if (@row = $sth->fetchrow()) {<br />
<br />
($count, $seconds) = @row;<br />
} else {<br />
($count, $seconds) = (0,0);<br />
<br />
} <br />
<br />
$sth->finish();<br />
$dbh->disconnect;<br />
<br />
print header();<br />
<br />
print template("report.tpl", { <br />
'username' => $user,<br />
'count' => $count,<br />
'total' => $total <br />
<br />
});<br />
<br />
#-----------------------------<br />
You owe: {$total}<br />
#-----------------------------<br />
The average was {$count ? ($total/$count) : 0}.<br />
<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# userrep2 - report duration of user logins using SQL database<br />
<br />
use Text::Template;<br />
use DBI;<br />
use CGI qw(:standard);<br />
<br />
$tmpl = "/home/httpd/templates/fancy.template";<br />
$template = Text::Template->new(-type => "file", -source => $tmpl);<br />
<br />
$user = param("username") or die "No username";<br />
<br />
$dbh = DBI->connect("dbi:mysql:connections:mysql.domain.com:3306",<br />
"connections", "secret passwd") or die "Couldn't db connect\n";<br />
<br />
$sth = $dbh->prepare(<<"END_OF_SELECT") or die "Couldn't prepare SQL";<br />
SELECT COUNT(duration),SUM(duration) <br />
<br />
FROM logins WHERE username='$user'<br />
END_OF_SELECT<br />
<br />
$sth->execute() or die "Couldn't execute SQL";<br />
<br />
if (@row = $sth->fetchrow()) {<br />
($count, $total) = @row;<br />
<br />
} else {<br />
$count = $total = 0;<br />
}<br />
<br />
$sth->finish();<br />
<br />
$dbh->disconnect;<br />
<br />
print header();<br />
print $template->fill_in();<br />
<br />
#-----------------------------<br />
<br />
</source><br />
== Mirroring Web Pages ==<br />
<source lang="perl"><br />
#-----------------------------<br />
use LWP::Simple;<br />
mirror($URL, $local_filename);<br />
#-----------------------------<br />
<br />
</source><br />
== Creating a Robot ==<br />
<source lang="perl"><br />
#-----------------------------<br />
use LWP::RobotUA;<br />
<br />
$ua = LWP::RobotUA->new('websnuffler/0.1', 'me@wherever.com');<br />
#-----------------------------<br />
403 (Forbidden) Forbidden by robots.txt<br />
<br />
#-----------------------------<br />
#% GET http://www.webtechniques.com/robots.txt <br />
#User-agent: *<br />
#<br />
# Disallow: /stats<br />
#<br />
# Disallow: /db<br />
#<br />
# Disallow: /logs<br />
#<br />
# Disallow: /store<br />
#<br />
# Disallow: /forms<br />
#<br />
# Disallow: /gifs<br />
#<br />
# Disallow: /wais-src<br />
#<br />
# Disallow: /scripts<br />
#<br />
# Disallow: /config<br />
#-----------------------------<br />
#% GET http://www.cnn.com/robots.txt | head<br />
## robots, scram<br />
#<br />
## $I d : robots.txt,v 1.2 1998/03/10 18:27:01 mreed Exp $<br />
#<br />
#User-agent: *<br />
#<br />
#Disallow: /<br />
#<br />
#User-agent: Mozilla/3.01 (hotwired-test/0.1)<br />
#<br />
#Disallow: /cgi-bin<br />
#<br />
#Disallow: /TRANSCRIPTS<br />
#<br />
#Disallow: /development<br />
#-----------------------------<br />
<br />
</source><br />
== Parsing a Web Server Log File ==<br />
<source lang="perl"><br />
#-----------------------------<br />
while (<LOGFILE>) {<br />
my ($client, $identuser, $authuser, $date, $time, $tz, $method,<br />
$url, $protocol, $status, $bytes) =<br />
/^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+) "(\S+) (.*?) (\S+)" (\S+) (\S+)$/ or next;<br />
<br />
# ...<br />
}<br />
#-----------------------------<br />
<br />
</source><br />
== Processing Server Logs ==<br />
<source lang="perl"><br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# sumwww - summarize web server log activity<br />
<br />
$lastdate = "";<br />
daily_logs();<br />
<br />
summary();<br />
exit;<br />
<br />
# read CLF files and tally hits from the host and to the URL<br />
sub daily_logs {<br />
while (<>) {<br />
<br />
($type, $what) = /"(GET|POST)\s+(\S+?) \S+"/ or next;<br />
($host, undef, undef, $datetime) = split;<br />
<br />
($bytes) = /\s(\d+)\s*$/ or next;<br />
($date) = ($datetime =~ /\[([^:]*)/);<br />
<br />
$posts += ($type eq POST);<br />
$home++ if m, / ,;<br />
<br />
if ($date ne $lastdate) {<br />
if ($lastdate) { write_report() }<br />
<br />
else { $lastdate = $date }<br />
}<br />
$count++;<br />
$hosts{$host}++;<br />
<br />
$what{$what}++;<br />
$bytesum += $bytes;<br />
}<br />
write_report() if $count;<br />
<br />
}<br />
<br />
# use *typeglob aliasing of global variables for cheap copy<br />
sub summary {<br />
$lastdate = "Grand Total";<br />
*count = *sumcount;<br />
<br />
*bytesum = *bytesumsum;<br />
*hosts = *allhosts;<br />
*posts = *allposts;<br />
*what = *allwhat;<br />
*home = *allhome;<br />
<br />
write;<br />
}<br />
<br />
# display the tallies of hosts and URLs, using formats<br />
sub write_report {<br />
write;<br />
<br />
# add to summary data<br />
$lastdate = $date;<br />
$sumcount += $count;<br />
$bytesumsum += $bytesum;<br />
$allposts += $posts;<br />
<br />
$allhome += $home;<br />
<br />
# reset daily data<br />
$posts = $count = $bytesum = $home = 0;<br />
@allwhat{keys %what} = keys %what;<br />
<br />
@allhosts{keys %hosts} = keys %hosts;<br />
%hosts = %what = ();<br />
<br />
}<br />
<br />
format STDOUT_TOP =<br />
@|||||||||| @|||||| @||||||| @||||||| @|||||| @|||||| @|||||||||||||<br />
"Date", "Hosts", "Accesses", "Unidocs", "POST", "Home", "Bytes"<br />
<br />
----------- ------- -------- -------- ------- ------- --------------<br />
.<br />
<br />
format STDOUT =<br />
@>>>>>>>>>> @>>>>>> @>>>>>>> @>>>>>>> @>>>>>> @>>>>>> @>>>>>>>>>>>>><br />
<br />
$lastdate, scalar(keys %hosts), <br />
$count, scalar(keys %what), <br />
<br />
$posts, $home, $bytesum<br />
.<br />
<br />
#-----------------------------<br />
# Date Hosts Accesses Unidocs POST Home Bytes<br />
# <br />
# ----------- ------- -------- -------- ------- ------- --------------<br />
# <br />
# 19/May/1998 353 6447 3074 352 51 16058246<br />
# <br />
# 20/May/1998 1938 23868 4288 972 350 61879643<br />
# <br />
# 21/May/1998 1775 27872 6596 1064 376 64613798<br />
# <br />
<br />
# 22/May/1998 1680 21402 4467 735 285 52437374<br />
# <br />
# 23/May/1998 1128 21260 4944 592 186 55623059<br />
# <br />
# Grand Total 6050 100849 10090 3715 1248 250612120<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# aprept - report on Apache logs<br />
<br />
use Logfile::Apache;<br />
<br />
$l = Logfile::Apache->new(<br />
File => "-", # STDIN<br />
Group => [ Domain, File ]);<br />
<br />
$l->report(Group => Domain, Sort => Records);<br />
$l->report(Group => File, List => [Bytes,Records]);<br />
<br />
#-----------------------------<br />
# Domain Records <br />
# <br />
# ==<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
=====<br />
# <br />
# US Commercial 222 38.47% <br />
# <br />
# US Educational 115 19.93% <br />
<br />
# <br />
# Network 93 16.12% <br />
# <br />
# Unresolved 54 9.36% <br />
# <br />
# Australia 48 8.32% <br />
<br />
# <br />
# Canada 20 3.47% <br />
# <br />
# Mexico 8 1.39% <br />
# <br />
# United Kingdom 6 1.04% <br />
<br />
# <br />
# <br />
# File Bytes Records <br />
# <br />
# ==<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
====<br />
===<br />
# <br />
# / 13008 0.89% 6 1.04% <br />
<br />
# <br />
# /cgi-bin/MxScreen 11870 0.81% 2 0.35% <br />
# <br />
# /cgi-bin/pickcards 39431 2.70% 48 8.32% <br />
# <br />
# /deckmaster 143793 9.83% 21 3.64% <br />
<br />
# <br />
# /deckmaster/admin 54447 3.72% 3 0.52% <br />
#-----------------------------<br />
<br />
</source><br />
== Program: htmlsub ==<br />
<source lang="perl"><br />
#-----------------------------<br />
#<HTML><HEAD><TITLE>Hi!</TITLE></HEAD><BODY><br />
<br />
#<H1>Welcome to Scooby World!</H1><br />
#I have <A HREF="pictures.html">pictures</A> of the crazy dog<br />
#himself. Here's one!<P><br />
#<IMG SRC="scooby.jpg" ALT="Good doggy!"><P><br />
<br />
#<BLINK>He's my hero!</BLINK> I would like to meet him some day,<br />
#and get my picture taken with him.<P><br />
#P.S. I am deathly ill. <A HREF="shergold.html">Please send<br />
#cards</A>.<br />
#</BODY></HTML><br />
<br />
#-----------------------------<br />
#% htmlsub picture photo scooby.html<br />
#<HTML><HEAD><TITLE>Hi!</TITLE></HEAD><BODY><br />
#<br />
#<H1>Welcome to Scooby World!</H1><br />
#<br />
#I have <A HREF="pictures.html">photos</A> of the crazy dog<br />
#<br />
#himself. Here's one!<P><br />
<br />
#<br />
#<IMG SRC="scooby.jpg" ALT="Good doggy!"><P><br />
#<br />
#<BLINK>He's my hero!</BLINK> I would like to meet him some day,<br />
#<br />
#and get my photo taken with him.<P><br />
#<br />
#P.S. I am deathly ill. <A HREF="shergold.html">Please send<br />
#<br />
#cards</A>.<br />
#<br />
#</BODY></HTML><br />
<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# htmlsub - make substitutions in normal text of HTML files<br />
# from Gisle Aas <gisle@aas.no><br />
<br />
sub usage { die "Usage: $0 <from> <to> <file>...\n" }<br />
<br />
my $from = shift or usage;<br />
my $to = shift or usage;<br />
<br />
usage unless @ARGV;<br />
<br />
# Build the HTML::Filter subclass to do the substituting.<br />
<br />
package MyFilter;<br />
require HTML::Filter;<br />
<br />
@ISA=qw(HTML::Filter);<br />
use HTML::Entities qw(decode_entities encode_entities);<br />
<br />
sub text<br />
{<br />
my $self = shift;<br />
my $text = decode_entities($_[0]);<br />
<br />
$text =~ s/\Q$from/$to/go; # most important line<br />
$self->SUPER::text(encode_entities($text));<br />
}<br />
<br />
# Now use the class.<br />
<br />
package main;<br />
foreach (@ARGV) {<br />
MyFilter->new->parse_file($_);<br />
<br />
}<br />
<br />
#-----------------------------<br />
<br />
</source><br />
== Program: hrefsub ==<br />
<source lang="perl"><br />
#-----------------------------<br />
#% hrefsub shergold.html cards.html scooby.html<br />
#<HTML><HEAD><TITLE>Hi!</TITLE></HEAD><BODY><br />
#<br />
#<H1>Welcome to Scooby World!</H1><br />
<br />
#<br />
#I have <A HREF="pictures.html">pictures</A> of the crazy dog<br />
#<br />
#himself. Here's one!<P><br />
#<br />
#<IMG SRC="scooby.jpg" ALT="Good doggy!"><P><br />
<br />
#<br />
#<BLINK>He's my hero!</BLINK> I would like to meet him some day,<br />
#<br />
#and get my picture taken with him.<P><br />
#<br />
#P.S. I am deathly ill. <a href="cards.html">Please send<br />
#<br />
#cards</A>.<br />
#<br />
#</BODY></HTML><br />
<br />
#-----------------------------<br />
# download the following standalone program<br />
#!/usr/bin/perl -w<br />
# hrefsub - make substitutions in <A HREF="..."> fields of HTML files<br />
# from Gisle Aas <gisle@aas.no><br />
<br />
sub usage { die "Usage: $0 <from> <to> <file>...\n" }<br />
<br />
my $from = shift or usage;<br />
my $to = shift or usage;<br />
<br />
usage unless @ARGV;<br />
<br />
# The HTML::Filter subclass to do the substitution.<br />
<br />
package MyFilter;<br />
require HTML::Filter;<br />
<br />
@ISA=qw(HTML::Filter);<br />
use HTML::Entities qw(encode_entities);<br />
<br />
sub start {<br />
my($self, $tag, $attr, $attrseq, $orig) = @_;<br />
<br />
if ($tag eq 'a' && exists $attr->{href}) {<br />
<br />
if ($attr->{href} =~ s/\Q$from/$to/g) {<br />
# must reconstruct the start tag based on $tag and $attr.<br />
<br />
# wish we instead were told the extent of the 'href' value<br />
# in $orig.<br />
my $tmp = "<$tag";<br />
for (@$attrseq) {<br />
<br />
my $encoded = encode_entities($attr->{$_});<br />
$tmp .= qq( $_="$encoded ");<br />
<br />
}<br />
$tmp .= ">";<br />
$self->output($tmp);<br />
return;<br />
<br />
}<br />
}<br />
$self->output($orig);<br />
}<br />
<br />
# Now use the class.<br />
<br />
<br />
package main;<br />
foreach (@ARGV) {<br />
MyFilter->new->parse_file($_);<br />
<br />
}<br />
<br />
#-----------------------------<br />
<br />
</source><br />
<br />
{{Perl_Footer}}</div>
Root