|
Groovy/FAQ/Обработка файлов
Материал из Wiki.crossplatform.ru
[править] Introduction
//----------------------------------------------------------------------------------
datafile = new File('Pleac/data/pleac8_0.txt') // change on your system
datafile.eachLine{ line -> print line.size() }
lines = datafile.readLines()
wholeTextFile = datafile.text
// on command line Groovy use -a auto split pattern instead of record separator
// default pattern is /\s/
// groovy -a -e 'println "First word is ${split[0][1]}"'
// (additional examples to original cookbook to illustrate -a)
// Print processes owned by root:
// ps aux|groovy -ane "if(split[0][1] =~ 'root')println split[0][10..-1]"
// Print all logins from /etc/passwd that are not commented:
// groovy -a':' -ne "if(!(split[0][1] =~ /^#/))println split[0][1]" /etc/passwd
// Add the first and the penultimate column of a file:
// groovy -ape "split[0][1].toInteger()+split[0][-2].toInteger()" accounts.txt
// no BEGIN and END in Groovy (has been proposed, may be added soon)
datafile.withOutputStream{ stream ->
stream.print "one" + "two" + "three" // "onetwothree" -> file
println "Baa baa black sheep." // sent to $stdout
}
// use streams or channels for advanced file handling
int size = datafile.size()
buffer = ByteBuffer.allocate(size) // for large files, use some block size, e.g. 4096
channel = new FileInputStream(datafile).channel
println "Number of bytes read was: ${channel.read(buffer)}" // -1 = EOF
channel = new FileOutputStream(File.createTempFile("pleac8", ".junk")).channel
size = channel.size()
channel.truncate(size) // shrinks file (in our case to same size)
pos = channel.position()
println "I'm $pos bytes from the start of datafile"
channel.position(pos) // move to pos (in our case unchanged)
channel.position(0) // move to start of file
channel.position(size) // move to end of file
// no sysread and syswrite are available but dataInput/output streams
// can be used to achieve similar functionality, see 8.15.
//----------------------------------------------------------------------------------
[править] Reading Lines with Continuation Characters
//----------------------------------------------------------------------------------
testfile = new File('Pleac/data/pleac8_1.txt') // change on your system
// contents of testfile:
// DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
// $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
// DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
// $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \
// $(EXTRA_DIST)
lines = []
continuing = false
regex = /\\$/
testfile.eachLine{ line ->
stripped = line.replaceAll(regex,'')
if (continuing) lines[-1] += stripped
else lines += stripped
continuing = (line =~ regex)
}
println lines.join('\n')
// =>
// DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
// DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) $(EXTRA_DIST)
// to remove hidden spaces after the slash (but keep the slash):
def trimtail(line) {
line = line.replaceAll(/(?<=\\)\s*$/, '')
}
b = /\\/ // backslash
assert "abc $b" == trimtail("abc $b")
assert "abc " == trimtail("abc ")
assert "abc $b" == trimtail("abc $b ")
//----------------------------------------------------------------------------------
[править] Counting Lines (or Paragraphs or Records) in a File
//----------------------------------------------------------------------------------
// unixScript:
println ("wc -l < $filename".execute().text)
// for small files which fit in memory
println testfile.readLines().size()
// streaming approach (lines and paras)
lines = 0; paras = 1
testfile.eachLine{ lines++; if (it =~ /^$/) paras++ }
println "Found $lines lines and $paras paras."
// note: counts blank line at end as start of next empty para
// with a StreamTokenizer
st = new StreamTokenizer(testfile.newReader())
while (st.nextToken() != StreamTokenizer.TT_EOF) {}
println st.lineno()
//----------------------------------------------------------------------------------
[править] Processing Every Word in a File
//----------------------------------------------------------------------------------
// general pattern
def processWordsInFile(file, processWord) {
testfile.splitEachLine(/\W+/) { matched ->
matched.each{ w -> if (w) processWord(w) }
}
}
testfile = new File('Pleac/src/pleac8.groovy') // change path on your system
// count words
count = 0
processWordsInFile(testfile){ count++ }
println count
// (variation to Perl example)
// with a StreamTokenizer (counting words and numbers in Pleac chapter 8 source file)
words = 0; numbers = 0
st = new StreamTokenizer(testfile.newReader())
st.slashSlashComments(true) // ignore words and numbers in comments
while (st.nextToken() != StreamTokenizer.TT_EOF) {
if (st.ttype == StreamTokenizer.TT_WORD) words++
else if (st.ttype == StreamTokenizer.TT_NUMBER) numbers++
}
println "Found $words words and $numbers numbers."
// word frequency count
seen = [:]
processWordsInFile(testfile) {
w = it.toLowerCase()
if (seen.containsKey(w)) seen[w] += 1
else seen[w] = 1
}
// output map in a descending numeric sort of its values
seen.entrySet().sort { a,b -> b.value <=> a.value }.each{ e ->
printf("%5d %s\n", [e.value, e.key] )
}
// =>
// 25 pleac
// 22 line
// 20 file
// 19 println
// 19 lines
// 13 testfile
// ...
//----------------------------------------------------------------------------------
[править] Reading a File Backwards by Line or Paragraph
//----------------------------------------------------------------------------------
testfile.readLines().reverseEach{
println it
}
lines = testfile.readLines()
// normally one would use the reverseEach, but you can use
// a numerical index if you want
((lines.size() - 1)..0).each{
println lines[it]
}
// Paragraph-based processing could be done as in 8.2.
// A streaming-based solution could use random file access
// and have a sliding buffer working from the back of the
// file to the front.
//----------------------------------------------------------------------------------
[править] Trailing a Growing File
//----------------------------------------------------------------------------------
logfile = new File('Pleac/data/sampleLog.txt')
// logTailingScript:
sampleInterval = 2000 // 2000 millis = 2 secs
file = new RandomAccessFile( logfile, "r" )
filePointer = 0 // set to logfile.size() to begin tailing from the end of the file
while( true ) {
// Compare the length of the file to the file pointer
long fileLength = logfile.size()
if( fileLength < filePointer ) {
// Log file must have been rotated or deleted;
System.err.println "${new Date()}: Reopening $logfile"
file = new RandomAccessFile( logfile, "r" )
filePointer = 0
}
if( fileLength > filePointer ) {
// There is data to read
file.seek( filePointer )
while( (line = file.readLine()) != null ) {
println '##' + line
}
filePointer = file.filePointer
}
// Sleep for the specified interval
Thread.sleep( sampleInterval )
}
//----------------------------------------------------------------------------------
[править] Picking a Random Line from a File
//----------------------------------------------------------------------------------
//testfile = newFile('/usr/share/fortune/humorists')
// small files:
random = new Random()
lines = testfile.readLines()
println lines[random.nextInt(lines.size())]
// streamed alternative
count = 0
def adage
testfile.eachLine{ line ->
count++
if (random.nextInt(count) < 1) adage = line
}
println adage
//----------------------------------------------------------------------------------
[править] Randomizing All Lines
//----------------------------------------------------------------------------------
// non-streamed solution (like Perl and Ruby)
lines = testfile.readLines()
Collections.shuffle(lines)
println lines.join('\n')
//----------------------------------------------------------------------------------
[править] Reading a Particular Line in a File
//----------------------------------------------------------------------------------
desiredLine = 235
// for small files
lines = testfile.readLines()
println "Line $desiredLine: ${lines[desiredLine-1]}"
// streaming solution
reader = testfile.newReader()
count = 0
def line
while ((line = reader.readLine())!= null) {
if (++count == desiredLine) break
}
println "Line $desiredLine: $line"
//----------------------------------------------------------------------------------
[править] Processing Variable-Length Text Fields
//----------------------------------------------------------------------------------
println testfile.text.split(/@@pleac@@_8./i).size()
// => 23 (21 sections .0 .. .20 plus before .0 plus line above)
//----------------------------------------------------------------------------------
[править] Removing the Last Line of a File
//----------------------------------------------------------------------------------
file = new RandomAccessFile( logfile, "rw" )
long previous, lastpos = 0
while( (line = file.readLine()) != null ) {
previous = lastpos
lastpos = file.filePointer
}
if (previous) file.setLength(previous)
//----------------------------------------------------------------------------------
[править] Processing Binary Files
//----------------------------------------------------------------------------------
// Java's streams are binary at the lowest level if not processed with
// higher level stream mechanisms or readers/writers. Some additions
// to the Perl cookbook which illustrate the basics.
// Print first ten bytes of a binary file:
def dumpStart(filename) {
bytes = new File(filename).newInputStream()
10.times{
print bytes.read() + ' '
}
println()
}
dumpStart(System.getProperty('java.home')+'/lib/rt.jar')
// => 80 75 3 4 10 0 0 0 0 0 (note first two bytes = PK - you might recognize this
// as the starting sequence of a zip file)
dumpStart('Pleac/classes/pleac8.class') // after running groovyc compiler in src directory
// => 202 254 186 190 0 0 0 47 2 20 (starting bytes in HEX: CAFEBABE)
binfile = new File('Pleac/data/temp.bin')
binfile.withOutputStream{ stream -> (0..<20).each{ stream.write(it) }}
binfile.eachByte{ print it + ' ' }; println()
// => 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
//----------------------------------------------------------------------------------
[править] Using Random-Access I/O
//----------------------------------------------------------------------------------
// lets treat binfile as having 5 records of size 4, let's print out the 3rd record
recsize = 4
recno = 2 // index starts at 0
address = recsize * recno
randomaccess = new RandomAccessFile(binfile, 'r')
randomaccess.seek(address)
recsize.times{ print randomaccess.read() + ' ' }; println() // => 8 9 10 11
randomaccess.close()
//----------------------------------------------------------------------------------
[править] Updating a Random-Access File
//----------------------------------------------------------------------------------
// let's take the example from 8.12 but replace the 3rd record with
// 90 - the original value in the file
// this is an alternative example to the Perl cookbook which is cross platform
// see chapter 1 regarding un/pack which could be combined with below
// to achieve the full functionality of the original 8.13
recsize = 4
recno = 2 // index starts at 0
address = recsize * recno
randomaccess = new RandomAccessFile(binfile, 'rw')
randomaccess.seek(address)
bytes = []
recsize.times{ bytes += randomaccess.read() }
randomaccess.seek(address)
bytes.each{ b -> randomaccess.write(90 - b) }
randomaccess.close()
binfile.eachByte{ print it + ' ' }; println()
// => 0 1 2 3 4 5 6 7 82 81 80 79 12 13 14 15 16 17 18 19
//----------------------------------------------------------------------------------
[править] Reading a String from a Binary File
//----------------------------------------------------------------------------------
// reading a String would involve looping and collecting the read bytes
// simple bgets
// this is similar to the revised 8.13 but would look for the terminating 0
// simplistic strings functionality
binfile.eachByte{ b -> if ((int)b in 32..126) print ((char)b) }; println() // => RQPO
//----------------------------------------------------------------------------------
[править] Reading Fixed-Length Records
//----------------------------------------------------------------------------------
// You could combine the byte-level reading/writing mechanisms shown
// in 8.11 - 8.12 and combine that with the un/pack functionality from
// Chapter 1 to achieve the desired functionality. A more Java and Groovy
// friendly way to do this would be to use the Scattering and Gathering
// stream operations of channels for byte-oriented record fields or
// data-oriented records. Alternatively, the dataInput/output stream
// capabilities for data-oriented records. Finally, the
// objectInput/output stream capabilities could be used for object types.
// Note, these examples mix reading and writing even though the original
// Perl example was just about reading.
// fixed-length byte-oriented records using channels
// typical approach used with low-level protocols or file formats
import java.nio.*
binfile.delete(); binfile.createNewFile() // start from scratch
buf1 = ByteBuffer.wrap([10,11,12,13] as byte[]) // simulate 4 byte field
buf2 = ByteBuffer.wrap([44,45] as byte[]) // 2 byte field
buf3 = ByteBuffer.wrap('Hello'.bytes) // String
records = [buf1, buf2, buf3] as ByteBuffer[]
channel = new FileOutputStream(binfile).channel
channel.write(records) // gathering byte records
channel.close()
binfile.eachByte{ print it + ' ' }; println()
// => 10 11 12 13 44 45 72 101 108 108 111
// ScatteringInputStream would convert this back into an array of byte[]
// data-oriented streams using channels
binfile.delete(); binfile.createNewFile() // start from scratch
buf = ByteBuffer.allocate(24)
now = System.currentTimeMillis()
buf.put('PI='.bytes).putDouble(Math.PI).put('Date='.bytes).putLong(now)
buf.flip() // readies for writing: set length and point back to start
channel = new FileOutputStream(binfile).channel
channel.write(buf)
channel.close()
// now read it back in
channel = new FileInputStream(binfile).channel
buf = ByteBuffer.allocate(24)
channel.read(buf)
buf.flip()
3.times{ print ((char)buf.get()) }
println (buf.getDouble())
5.times{ print ((char)buf.get()) }
println (new Date(buf.getLong()))
channel.close()
// =>
// PI=3.141592653589793
// Date=Sat Jan 13 00:14:50 EST 2007
// object-oriented streams
binfile.delete(); binfile.createNewFile() // start from scratch
class Person implements Serializable { def name, age }
binfile.withObjectOutputStream{ oos ->
oos.writeObject(new Person(name:'Bernie',age:16))
oos.writeObject([1:'a', 2:'b'])
oos.writeObject(new Date())
}
// now read it back in
binfile.withObjectInputStream{ ois ->
person = ois.readObject()
println "$person.name is $person.age"
println ois.readObject()
println ois.readObject()
}
// =>
// Bernie is 16
// [1:"a", 2:"b"]
// Sat Jan 13 00:22:13 EST 2007
//----------------------------------------------------------------------------------
[править] Reading Configuration Files
//----------------------------------------------------------------------------------
// use built-in Java property class
// suppose you have the following file:
// # set your database settings here
// server=localhost
// url=jdbc:derby:derbyDB;create=true
// user.name=me
// user.password=secret
props = new Properties()
propsfile=new File('Pleac/data/plain.properties')
props.load(propsfile.newInputStream())
props.list(System.out)
// =>
// -- listing properties --
// user.name=me
// user.password=secret
// url=jdbc:derby:derbyDB;create=true
// server=localhost
// There are also provisions for writing properties file.
// (additional example to Perl)
// You can also read and write xml properties files.
new File('Pleac/data/props.xml').withOutputStream{ os ->
props.storeToXML(os, "Database Settings")
}
// =>
// <?xml version="1.0" encoding="UTF-8"?>
// <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
// <properties>
// <comment>Database Settings</comment>
// <entry key="user.password">secret</entry>
// <entry key="user.name">me</entry>
// <entry key="url">jdbc:derby:derbyDB;create=true</entry>
// <entry key="server">localhost</entry>
// </properties>
//----------------------------------------------------------------------------------
[править] Testing a File for Trustworthiness
//----------------------------------------------------------------------------------
// The File class provides canRead(), canWrite() and canExecute() (JDK6) methods
// for finding out about security information specific to the user. JSR 203
// (expected in Java 7) provides access to additional security related attributes.
// Another useful package to use when wondering about the trustworthiness of a
// file is the java.security package. It contains many classes. Just one is
// MessageDigest. This would allow you to create a strong checksum of a file.
// Your program could refuse to operate if a file it was accessing didn't have the
// checksum it was expecting - an indication that it may have been tampered with.
// (additional info)
// While getting file-based security permissions correct is important, it isn't the
// only mechanism to use for security when using Java based systems. Java provides
// policy files and an authorization and authentication API which lets you secure
// any reources (not just files) at various levels of granularity with various
// security mechanisms.
// Security policies may be universal, apply to a particular codebase, or
// using JAAS apply to individuals. Some indicative policy statements:
// grant {
// permission java.net.SocketPermission "*", "connect";
// permission java.io.FilePermission "C:\\users\\cathy\\foo.bat", "read";
// };
// grant codebase "file:./*", Principal ExamplePrincipal "Secret" {
// permission java.io.FilePermission "dummy.txt", "read";
// };
//----------------------------------------------------------------------------------
[править] Program: tailwtmp
//----------------------------------------------------------------------------------
// general purpose utility methods
def getString(buf,size){
// consider get(buf[]) instead of get(buf) for efficiency
b=[]; size.times{b+=buf.get()}; new String(b as byte[]).trim()
}
def getInt(buf,size) {
// normally in Java we would just use methods like getLong()
// to read a long but wish to ignore platform issues here
long val = 0
for (n in 0..<size) { val += ((int)buf.get() & 0xFF) << (n * 8) }
return val
}
def getDate(buf) {
return new Date(getInt(buf,4) * 1000) // Java uses millis
}
// specific utility method (wtmp file from ubuntu 6.10)
def processWtmpRecords(file, origpos) {
channel = new RandomAccessFile(file, 'r').channel
recsize = 4 + 4 + 32 + 4 + 32 + 256 + 8 + 4 + 40
channel.position(origpos)
newpos = origpos
buf = ByteBuffer.allocate(recsize)
while ((count = channel.read(buf)) != -1) {
if (count != recsize) break
buf.flip()
print getInt(buf,4) + ' ' // type
print getInt(buf,4) + ' ' // pid
print getString(buf,32) + ' ' // line
print getString(buf,4) + ' ' // inittab
print getString(buf,32) + ' ' // user
print getString(buf,256) + ' ' // hostname
buf.position(buf.position() + 8) // skip
println "${getDate(buf)} " // time
buf.clear()
newpos = channel.position()
}
return newpos
}
wtmp = new File('Pleac/data/wtmp')
// wtmpTailingScript:
sampleInterval = 2000 // 2000 millis = 2 secs
filePointer = wtmp.size() // begin tailing from the end of the file
while(true) {
// Compare the length of the file to the file pointer
long fileLength = wtmp.size()
if( fileLength > filePointer ) {
// There is data to read
filePointer = processWtmpRecords(wtmp, filePointer)
}
// Sleep for the specified interval
Thread.sleep( sampleInterval )
}
//----------------------------------------------------------------------------------
[править] Program: tctee
//----------------------------------------------------------------------------------
// contains most of the functionality of the original (not guaranteed to be perfect)
// -i ignores errors, e.g. if one target is write protected, the others will work
// -u writes files in unbuffered mode (ignore for '|')
// -n not to stdout
// -a all files are in append mode
// '>>file1' turn on append for individual file
// '|wc' or '|grep x' etc sends output to forked process (only one at any time)
class MultiStream {
private targets
private ignoreErrors
MultiStream(List targets, ignore) {
this.targets = targets
ignoreErrors = ignore
}
def println(String content) {
targets.each{
try {
it?.write(content.bytes)
} catch (Exception ex) {
if (!ignoreErrors) throw ex
targets -= it
it?.close()
}
}
}
def close() { targets.each{ it?.close() } }
}
class TeeTarget {
private filename
private stream
private p
TeeTarget(String name, append, buffered, ignore) {
if (name.startsWith('>>')) {
createFileStream(name[2..-1],true,buffered,ignore)
} else if (name.startsWith('|')) {
createProcessReader(name[1..-1])
} else {
createFileStream(name,append,buffered,ignore)
}
}
TeeTarget(OutputStream stream) { this.stream = stream }
def write(bytes) { stream?.write(bytes) }
def close() { stream?.close() }
private createFileStream(name, append, buffered, ignore) {
filename = name
def fos
try {
fos = new FileOutputStream(name, append)
} catch (Exception ex) {
if (ignore) return
}
if (!buffered) stream = fos
else stream = new BufferedOutputStream(fos)
}
private createWriter(os) {new PrintWriter(new BufferedOutputStream(os))}
private createReader(is) {new BufferedReader(new InputStreamReader(is))}
private createPiperThread(br, pw) {
Thread.start{
def next
while((next = br.readLine())!=null) {
pw.println(next)
}
pw.flush(); pw.close()
}
}
private createProcessReader(name) {
def readFromStream = new PipedInputStream()
def r1 = createReader(readFromStream)
stream = new BufferedOutputStream(new PipedOutputStream(readFromStream))
p = Runtime.runtime.exec(name)
def w1 = createWriter(p.outputStream)
createPiperThread(r1, w1)
def w2 = createWriter(System.out)
def r2 = createReader(p.inputStream)
createPiperThread(r2, w2)
}
}
targets = []
append = false; ignore = false; includeStdout = true; buffer = true
(0..<args.size()).each{
arg = args[it]
if (arg.startsWith('-')) {
switch (arg) {
case '-a': append = true; break
case '-i': ignore = true; break
case '-n': includeStdout = false; break
case '-u': buffer = false; break
default:
println "usage: tee [-ainu] [filenames] ..."
System.exit(1)
}
} else targets += arg
}
targets = targets.collect{ new TeeTarget(it, append, buffer, ignore) }
if (includeStdout) targets += new TeeTarget(System.out)
def tee = new MultiStream(targets, ignore)
while (line = System.in.readLine()) {
tee.println(line)
}
tee.close()
//----------------------------------------------------------------------------------
[править] Program: laston
//----------------------------------------------------------------------------------
// most of the functionality - uses an explicit uid - ran on ubuntu 6.10 on intel
lastlog = new File('Pleac/data/lastlog')
channel = new RandomAccessFile(lastlog, 'r').channel
uid = 1000
recsize = 4 + 32 + 256
channel.position(uid * recsize)
buf = ByteBuffer.allocate(recsize)
channel.read(buf)
buf.flip()
date = getDate(buf)
line = getString(buf,32)
host = getString(buf,256)
println "User with uid $uid last logged on $date from ${host?host:'unknown'} on $line"
// => User with uid 1000 last logged on Sat Jan 13 09:09:35 EST 2007 from unknown on :0
//----------------------------------------------------------------------------------
|