|
Perl/FAQ/Обработка файлов
Материал из Wiki.crossplatform.ru
[править] Introduction
#-----------------------------
while (defined ($line = <DATAFILE>)) {
chomp $line;
$size = length $line;
print "$size\n"; # output size of line
}
#-----------------------------
while (<DATAFILE>) {
chomp;
print length, "\n"; # output size of line
}
#-----------------------------
@lines = <DATAFILE>;
#-----------------------------
undef $/;
$whole_file = <FILE>; # 'slurp' mode
#-----------------------------
#% perl -040 -e '$word = <>; print "First word is $word\n";'
#-----------------------------
#% perl -ne 'BEGIN { $/="%%\n" } chomp; print if /Unix/i' fortune.dat
#-----------------------------
print HANDLE "One", "two", "three"; # "Onetwothree"
print "Baa baa black sheep.\n"; # Sent to default output handle
#-----------------------------
$rv = read(HANDLE, $buffer, 4096)
or die "Couldn't read from HANDLE : $!\n";
# $rv is the number of bytes read,
# $buffer holds the data read
#-----------------------------
truncate(HANDLE, $length)
or die "Couldn't truncate: $!\n";
truncate("/tmp/$$.pid", $length)
or die "Couldn't truncate: $!\n";
#-----------------------------
$pos = tell(DATAFILE);
print "I'm $pos bytes from the start of DATAFILE.\n";
#-----------------------------
seek(LOGFILE, 0, 2) or die "Couldn't seek to the end: $!\n";
seek(DATAFILE, $pos, 0) or die "Couldn't seek to $pos: $!\n";
seek(OUT, -20, 1) or die "Couldn't seek back 20 bytes: $!\n";
#-----------------------------
$written = syswrite(DATAFILE, $mystring, length($mystring));
die "syswrite failed: $!\n" unless $written == length($mystring);
$read = sysread(INFILE, $block, 256, 5);
warn "only read $read bytes, not 256" if 256 != $read;
#-----------------------------
$pos = sysseek(HANDLE, 0, 1); # don't change position
die "Couldn't sysseek: $!\n" unless defined $pos;
#-----------------------------
[править] Reading Lines with Continuation Characters
#-----------------------------
while (defined($line = <FH>) ) {
chomp $line;
if ($line =~ s/\\$//) {
$line .= <FH>;
redo unless eof(FH);
}
# process full record in $line here
}
#-----------------------------
# DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
# $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
# DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
# $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \
# $(EXTRA_DIST)
#-----------------------------
if ($line =~ s/\\\s*$//) {
# as before
}
#-----------------------------
[править] Counting Lines (or Paragraphs or Records) in a File
#-----------------------------
$count = `wc -l < $file`;
die "wc failed: $?" if $?;
chomp($count);
#-----------------------------
open(FILE, "< $file") or die "can't open $file: $!";
$count++ while <FILE>;
# $count now holds the number of lines read
#-----------------------------
$count += tr/\n/\n/ while sysread(FILE, $_, 2 ** 16);
#-----------------------------
open(FILE, "< $file") or die "can't open $file: $!";
$count++ while <FILE>;
# $count now holds the number of lines read
#-----------------------------
open(FILE, "< $file") or die "can't open $file: $!";
for ($count=0; <FILE>; $count++) { }
#-----------------------------
1 while <FILE>;
$count = $.;
#-----------------------------
$/ = ''; # enable paragraph mode for all reads
open(FILE, $file) or die "can't open $file: $!";
1 while <FILE>;
$para_count = $.;
#-----------------------------
[править] Processing Every Word in a File
#-----------------------------
while (<>) {
for $chunk (split) {
# do something with $chunk
}
}
#-----------------------------
while (<>) {
while ( /(\w[\w'-]*)/g ) { #'
# do something with $1
}
}
#-----------------------------
# Make a word frequency count
%seen = ();
while (<>) {
while ( /(\w['\w-]*)/g ) { #'
$seen{lc $1}++;
}
}
# output hash in a descending numeric sort of its values
foreach $word ( sort { $seen{$b} <=> $seen{$a} } keys %seen) {
printf "%5d %s\n", $seen{$word}, $word;
}
#-----------------------------
# Line frequency count
%seen = ();
while (<>) {
$seen{lc $_}++;
}
foreach $line ( sort { $seen{$b} <=> $seen{$a} } keys %seen ) {
printf "%5d %s", $seen{$line}, $line;
}
#-----------------------------
[править] Reading a File Backwards by Line or Paragraph
#-----------------------------
@lines = <FILE>;
while ($line = pop @lines) {
# do something with $line
}
#-----------------------------
@lines = reverse <FILE>;
foreach $line (@lines) {
# do something with $line
}
#-----------------------------
for ($i = $#lines; $i != -1; $i--) {
$line = $lines[$i];
}
#-----------------------------
# this enclosing block keeps local $/ temporary
{
local $/ = '';
@paragraphs = reverse <FILE>;
}
foreach $paragraph (@paragraphs) {
# do something
}
#-----------------------------
[править] Trailing a Growing File
#-----------------------------
for (;;) {
while (<FH>) { .... }
sleep $SOMETIME;
seek(FH, 0, 1);
}
#-----------------------------
use IO::Seekable;
for (;;) {
while (<FH>) { .... }
sleep $SOMETIME;
FH->clearerr();
}
#-----------------------------
$naptime = 1;
use IO::Handle;
open (LOGFILE, "/tmp/logfile") or die "can't open /tmp/logfile: $!";
for (;;) {
while (<LOGFILE>) { print } # or appropriate processing
sleep $naptime;
LOGFILE->clearerr(); # clear stdio error flag
}
#-----------------------------
for (;;) {
for ($curpos = tell(LOGFILE); <LOGFILE>; $curpos = tell(LOGFILE)) {
# process $_ here
}
sleep $naptime;
seek(LOGFILE, $curpos, 0); # seek to where we had been
}
#-----------------------------
exit if (stat(LOGFILE))[3] == 0
#-----------------------------
use File::stat;
exit if stat(*LOGFILE)->nlink == 0;
#-----------------------------
[править] Picking a Random Line from a File
#-----------------------------
srand;
rand($.) < 1 && ($line = $_) while <>;
# $line is the random line
#-----------------------------
$/ = "%%\n";
@ARGV = qw( /usr/share/games/fortunes );
srand;
rand($.) < 1 && ($adage = $_) while <>;
print $adage;
#-----------------------------
[править] Randomizing All Lines
#-----------------------------
# assumes the &shuffle sub from Chapter 4
while (<INPUT>) {
push(@lines, $_);
}
@reordered = shuffle(@lines);
foreach (@reordered) {
print OUTPUT $_;
}
#-----------------------------
[править] Reading a Particular Line in a File
#-----------------------------
# looking for line number $DESIRED_LINE_NUMBER
$. = 0;
do { $LINE = <HANDLE> } until $. == $DESIRED_LINE_NUMBER || eof;
#-----------------------------
@lines = <HANDLE>;
$LINE = $lines[$DESIRED_LINE_NUMBER];
#-----------------------------
# usage: build_index(*DATA_HANDLE, *INDEX_HANDLE)
sub build_index {
my $data_file = shift;
my $index_file = shift;
my $offset = 0;
while (<$data_file>) {
print $index_file pack("N", $offset);
$offset = tell($data_file);
}
}
# usage: line_with_index(*DATA_HANDLE, *INDEX_HANDLE, $LINE_NUMBER)
# returns line or undef if LINE_NUMBER was out of range
sub line_with_index {
my $data_file = shift;
my $index_file = shift;
my $line_number = shift;
my $size; # size of an index entry
my $i_offset; # offset into the index of the entry
my $entry; # index entry
my $d_offset; # offset into the data file
$size = length(pack("N", 0));
$i_offset = $size * ($line_number-1);
seek($index_file, $i_offset, 0) or return;
read($index_file, $entry, $size);
$d_offset = unpack("N", $entry);
seek($data_file, $d_offset, 0);
return scalar(<$data_file>);
}
# usage:
open(FILE, "< $file") or die "Can't open $file for reading: $!\n";
open(INDEX, "+>$file.idx")
or die "Can't open $file.idx for read/write: $!\n";
build_index(*FILE, *INDEX);
$line = line_with_index(*FILE, *INDEX, $seeking);
#-----------------------------
use DB_File;
use Fcntl;
$tie = tie(@lines, $FILE, "DB_File", O_RDWR, 0666, $DB_RECNO) or die
"Cannot open file $FILE: $!\n";
# extract it
$line = $lines[$sought - 1];
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# print_line-v1 - linear style
@ARGV == 2 or die "usage: print_line FILENAME LINE_NUMBER\n";
($filename, $line_number) = @ARGV;
open(INFILE, "< $filename") or die "Can't open $filename for reading: $!\n";
while (<INFILE>) {
$line = $_;
last if $. == $line_number;
}
if ($. != $line_number) {
die "Didn't find line $line_number in $filename\n";
}
print;
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# print_line-v2 - index style
# build_index and line_with_index from above
@ARGV == 2 or
die "usage: print_line FILENAME LINE_NUMBER";
($filename, $line_number) = @ARGV;
open(ORIG, "< $filename")
or die "Can't open $filename for reading: $!";
# open the index and build it if necessary
# there's a race condition here: two copies of this
# program can notice there's no index for the file and
# try to build one. This would be easily solved with
# locking
$indexname = "$filename.index";
sysopen(IDX, $indexname, O_CREAT|O_RDWR)
or die "Can't open $indexname for read/write: $!";
build_index(*ORIG, *IDX) if -z $indexname; # XXX: race unless lock
$line = line_with_index(*ORIG, *IDX, $line_number);
die "Didn't find line $line_number in $filename" unless defined $line;
print $line;
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# print_line-v3 - DB_File style
use DB_File;
use Fcntl;
@ARGV == 2 or
die "usage: print_line FILENAME LINE_NUMBER\n";
($filename, $line_number) = @ARGV;
$tie = tie(@lines, "DB_File", $filename, O_RDWR, 0666, $DB_RECNO)
or die "Cannot open file $filename: $!\n";
unless ($line_number < $tie->length) {
die "Didn't find line $line_number in $filename\n"
}
print $lines[$line_number-1]; # easy, eh?
#-----------------------------
[править] Processing Variable-Length Text Fields
#-----------------------------
# given $RECORD with field separated by PATTERN,
# extract @FIELDS.
@FIELDS = split(/PATTERN/, $RECORD);
#-----------------------------
split(/([+-])/, "3+5-2");
#-----------------------------
(3, '+', 5, '-', 2)
#-----------------------------
@fields = split(/:/, $RECORD);
#-----------------------------
@fields = split(/\s+/, $RECORD);
#-----------------------------
@fields = split(" ", $RECORD);
#-----------------------------
[править] Removing the Last Line of a File
#-----------------------------
open (FH, "+< $file") or die "can't update $file: $!";
while ( <FH> ) {
$addr = tell(FH) unless eof(FH);
}
truncate(FH, $addr) or die "can't truncate $file: $!";
#-----------------------------
[править] Processing Binary Files
#-----------------------------
binmode(HANDLE);
#-----------------------------
$gifname = "picture.gif";
open(GIF, $gifname) or die "can't open $gifname: $!";
binmode(GIF); # now DOS won't mangle binary input from GIF
binmode(STDOUT); # now DOS won't mangle binary output to STDOUT
while (read(GIF, $buff, 8 * 2**10)) {
print STDOUT $buff;
}
#-----------------------------
[править] Using Random-Access I/O
#-----------------------------
$ADDRESS = $RECSIZE * $RECNO;
seek(FH, $ADDRESS, 0) or die "seek:$!";
read(FH, $BUFFER, $RECSIZE);
#-----------------------------
$ADDRESS = $RECSIZE * ($RECNO-1);
#-----------------------------
[править] Updating a Random-Access File
#-----------------------------
use Fcntl; # for SEEK_SET and SEEK_CUR
$ADDRESS = $RECSIZE * $RECNO;
seek(FH, $ADDRESS, SEEK_SET) or die "Seeking: $!";
read(FH, $BUFFER, $RECSIZE) == $RECSIZE
or die "Reading: $!";
@FIELDS = unpack($FORMAT, $BUFFER);
# update fields, then
$BUFFER = pack($FORMAT, @FIELDS);
seek(FH, -$RECSIZE, SEEK_CUR) or die "Seeking: $!";
print FH $BUFFER;
close FH or die "Closing: $!";
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# weekearly -- set someone's login date back a week
use User::pwent;
use IO::Seekable;
$typedef = 'L A12 A16'; # linux fmt; sunos is "L A8 A16"
$sizeof = length(pack($typedef, ()));
$user = shift(@ARGV) || $ENV{USER} || $ENV{LOGNAME};
$address = getpwnam($user)->uid * $sizeof;
open (LASTLOG, "+</var/log/lastlog")
or die "can't update /usr/adm/lastlog: $!";
seek(LASTLOG, $address, SEEK_SET)
or die "seek failed: $!";
read(LASTLOG, $buffer, $sizeof) == $sizeof
or die "read failed: $!";
($time, $line, $host) = unpack($typedef, $buffer);
$time -= 24 * 7 * 60 * 60; # back-date a week
$buffer = pack($typedef, $time, $line, $time);
seek(LASTLOG, -$sizeof, SEEK_CUR) # backup one record
or die "seek failed: $!";
print LASTLOG $record;
close(LASTLOG)
or die "close failed: $!";
#-----------------------------
[править] Reading a String from a Binary File
#-----------------------------
$old_rs = $/; # save old $/
$/ = "\0"; # NULL
seek(FH, $addr, SEEK_SET) or die "Seek error: $!\n";
$string = <FH>; # read string
chomp $string; # remove NULL
$/ = $old_rs; # restore old $/
#-----------------------------
{
local $/ = "\0";
# ...
} # $/ is automatically restored
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# bgets - get a string from an address in a binary file
use IO::Seekable;
($file, @addrs) = @ARGV or die "usage: $0 addr ...";
open(FH, $file) or die "cannot open $file: $!";
$/ = "\000";
foreach $addr (@addrs) {
$addr = oct $addr if $addr =~ /^0/;
seek(FH, $addr, SEEK_SET)
or die "can't seek to $addr in $file: $!";
printf qq{%#x %#o %d "%s"\n}, $addr, $addr, $addr, scalar <>;
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# strings - pull strings out of a binary file
$/ = "\0";
while (<>) {
while (/([\040-\176\s]{4,})/g) {
print $1, "\n";
}
}
#-----------------------------
[править] Reading Fixed-Length Records
#-----------------------------
# $RECORDSIZE is the length of a record, in bytes.
# $TEMPLATE is the unpack template for the record
# FILE is the file to read from
# @FIELDS is an array, one element per field
until ( eof(FILE) ) {
read(FILE, $record, $RECORDSIZE) == $RECORDSIZE
or die "short read\n";
@FIELDS = unpack($TEMPLATE, $record);
}
#-----------------------------
#define UT_LINESIZE 12
#define UT_NAMESIZE 8
#define UT_HOSTSIZE 16
struct utmp { /* here are the pack template codes */
short ut_type; /* s for short, must be padded */
pid_t ut_pid; /* i for integer */
char ut_line[UT_LINESIZE]; /* A12 for 12-char string */
char ut_id[2]; /* A2, but need x2 for alignment */
time_t ut_time; /* l for long */
char ut_user[UT_NAMESIZE]; /* A8 for 8-char string */
char ut_host[UT_HOSTSIZE]; /* A16 for 16-char string */
long ut_addr; /* l for long */
};
#-----------------------------
[править] Reading Configuration Files
#-----------------------------
while (<CONFIG>) {
chomp; # no newline
s/#.*//; # no comments
s/^\s+//; # no leading white
s/\s+$//; # no trailing white
next unless length; # anything left?
my ($var, $value) = split(/\s*=\s*/, $_, 2);
$User_Preferences{$var} = $value;
}
#-----------------------------
do "$ENV{HOME}/.progrc";
#-----------------------------
# set class C net
NETMASK = 255.255.255.0
MTU = 296
DEVICE = cua1
RATE = 115200
MODE = adaptive
#-----------------------------
no strict 'refs';
$$var = $value;
#-----------------------------
# set class C net
$NETMASK = '255.255.255.0';
$MTU = 0x128;
# Brent, please turn on the modem
$DEVICE = 'cua1';
$RATE = 115_200;
$MODE = 'adaptive';
#-----------------------------
if ($DEVICE =~ /1$/) {
$RATE = 28_800;
} else {
$RATE = 115_200;
}
#-----------------------------
$APPDFLT = "/usr/local/share/myprog";
do "$APPDFLT/sysconfig.pl";
do "$ENV{HOME}/.myprogrc";
#-----------------------------
do "$ENV{HOME}/.myprogrc";
or
do "$APPDFLT/sysconfig.pl"
#-----------------------------
{ package Settings; do "$ENV{HOME}/.myprogrc" }
#-----------------------------
eval `cat $ENV{HOME}/.myprogrc`;
#-----------------------------
$file = "someprog.pl";
unless ($return = do $file) {
warn "couldn't parse $file: $@" if $@;
warn "couldn't do $file: $!" unless defined $return;
warn "couldn't run $file" unless $return;
}
#-----------------------------
[править] Testing a File for Trustworthiness
#-----------------------------
( $dev, $ino, $mode, $nlink,
$uid, $gid, $rdev, $size,
$atime, $mtime, $ctime,
$blksize, $blocks ) = stat($filename)
or die "no $filename: $!";
$mode &= 07777; # discard file type info
#-----------------------------
$info = stat($filename) or die "no $filename: $!";
if ($info->uid == 0) {
print "Superuser owns $filename\n";
}
if ($info->atime > $info->mtime) {
print "$filename has been read since it was written.\n";
}
#-----------------------------
use File::stat;
sub is_safe {
my $path = shift;
my $info = stat($path);
return unless $info;
# owner neither superuser nor me
# the real uid is in stored in the $< variable
if (($info->uid != 0) && ($info->uid != $<)) {
return 0;
}
# check whether group or other can write file.
# use 066 to detect either reading or writing
if ($info->mode & 022) { # someone else can write this
return 0 unless -d _; # non-directories aren't safe
# but directories with the sticky bit (01000) are
return 0 unless $info->mode & 01000;
}
return 1;
}
#-----------------------------
use Cwd;
use POSIX qw(sysconf _PC_CHOWN_RESTRICTED);
sub is_verysafe {
my $path = shift;
return is_safe($path) if sysconf(_PC_CHOWN_RESTRICTED);
$path = getcwd() . '/' . $path if $path !~ m{^/};
do {
return unless is_safe($path);
$path =~ s#([^/]+|/)$##; # dirname
$path =~ s#/$## if length($path) > 1; # last slash
} while length $path;
return 1;
}
#-----------------------------
$file = "$ENV{HOME}/.myprogrc";
readconfig($file) if is_safe($file);
#-----------------------------
$file = "$ENV{HOME}/.myprogrc";
if (open(FILE, "< $file")) {
readconfig(*FILE) if is_safe(*FILE);
}
#-----------------------------
[править] Program: tailwtmp
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# tailwtmp - watch for logins and logouts;
# uses linux utmp structure, from utmp(5)
$typedef = 's x2 i A12 A4 l A8 A16 l';
$sizeof = length pack($typedef, () );
use IO::File;
open(WTMP, '/var/log/wtmp') or die "can't open /var/log/wtmp: $!";
seek(WTMP, 0, SEEK_END);
for (;;) {
while (read(WTMP, $buffer, $sizeof) == $sizeof) {
($type, $pid, $line, $id, $time, $user, $host, $addr)
= unpack($typedef, $buffer);
next unless $user && ord($user) && $time;
printf "%1d %-8s %-12s %2s %-24s %-16s %5d %08x\n",
$type,$user,$line,$id,scalar(localtime($time)),
$host,$pid,$addr;
}
for ($size = -s WTMP; $size == -s WTMP; sleep 1) {}
WTMP->clearerr();
}
#-----------------------------
[править] Program: tctee
#-----------------------------
#% someprog | tee /tmp/output | Mail -s 'check this' user@host.org
#-----------------------------
#% someprog | tctee f1 "|cat -n" f2 ">>f3"
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# tctee - clone that groks process tees
# perl3 compatible, or better.
while ($ARGV[0] =~ /^-(.+)/ && (shift, ($_ = $1), 1)) {
next if /^$/;
s/i// && (++$ignore_ints, redo);
s/a// && (++$append, redo);
s/u// && (++$unbuffer, redo);
s/n// && (++$nostdout, redo);
die "usage tee [-aiun] [filenames] ...\n";
}
if ($ignore_ints) {
for $sig ('INT', 'TERM', 'HUP', 'QUIT') { $SIG{$sig} = 'IGNORE'; }
}
$SIG{'PIPE'} = 'PLUMBER';
$mode = $append ? '>>' : '>';
$fh = 'FH000';
unless ($nostdout) {
%fh = ('STDOUT', 'standard output'); # always go to stdout
}
$| = 1 if $unbuffer;
for (@ARGV) {
if (!open($fh, (/^[^>|]/ && $mode) . $_)) {
warn "$0: cannot open $_: $!\n"; # like sun's; i prefer die
$status++;
next;
}
select((select($fh), $| = 1)[0]) if $unbuffer;
$fh{$fh++} = $_;
}
while (<STDIN>) {
for $fh (keys %fh) {
print $fh $_;
}
}
for $fh (keys %fh) {
next if close($fh) || !defined $fh{$fh};
warn "$0: couldnt close $fh{$fh}: $!\n";
$status++;
}
exit $status;
sub PLUMBER {
warn "$0: pipe to \"$fh{$fh}\" broke!\n";
$status++;
delete $fh{$fh};
}
#-----------------------------
[править] Program: laston
#-----------------------------
#% laston gnat
#gnat UID 314 at Mon May 25 08:32:52 1998 on ttyp0 from below.perl.com
#-----------------------------
# download the following standalone program
#!/usr/bin/perl
# laston - find out when given user last logged on
use User::pwent;
use IO::Seekable qw(SEEK_SET);
open (LASTLOG, "/var/log/lastlog") or die "can't open /usr/adm/lastlog: $!";
$typedef = 'L A12 A16'; # linux fmt; sunos is "L A8 A16"
$sizeof = length(pack($typedef, ()));
for $user (@ARGV) {
$U = ($user =~ /^\d+$/) ? getpwuid($user) : getpwnam($user);
unless ($U) { warn "no such uid $user\n"; next; }
seek(LASTLOG, $U->uid * $sizeof, SEEK_SET) or die "seek failed: $!";
read(LASTLOG, $buffer, $sizeof) == $sizeof or next;
($time, $line, $host) = unpack($typedef, $buffer);
printf "%-8s UID %5d %s%s%s\n", $U->name, $U->uid,
$time ? ("at " . localtime($time)) : "never logged in",
$line && " on $line",
$host && " from $host";
}
#-----------------------------
|