Groovy/FAQ/Строки
Материал из Wiki.crossplatform.ru
Версия от 08:59, 8 декабря 2008; Root (Обсуждение | вклад)
1. Strings
Introduction
//---------------------------------------------------------------------------------- string = '\\n' // two characters, \ and an n assert string.size() == 2 string = "\n" // a "newline" character string = '\n' // a "newline" character string = "Jon 'Maddog' Orwant" // literal single quote inside double quotes string = 'Jon \'Maddog\' Orwant' // escaped single quotes string = 'Jon "Maddog" Orwant' // literal double quotes inside single quotes string = "Jon \"Maddog\" Orwant" // escaped double quotes string = ''' This is a multiline string declaration using single quotes (you can use double quotes) ''' //----------------------------------------------------------------------------------
Accessing Substrings
//---------------------------------------------------------------------------------- // accessing substrings string = 'hippopotamus' start = 5; end = 7; endplus1 = 8 assert string.substring(start, endplus1) == 'pot' assert string[start..end] == 'pot' assert string.substring(start) == 'potamus' assert string[start..-1] == 'potamus' // String is immutable but new strings can be created in various ways assert string - 'hippo' - 'mus' + 'to' == 'potato' assert string.replace('ppopotam','bisc') == 'hibiscus' assert string.substring(0, 2) + 'bisc' + string[-2..-1] == 'hibiscus' // StringBuffer is mutable sb = new StringBuffer(string) sb[2..-3] = 'bisc' assert sb.toString() == 'hibiscus' // No exact pack/unpack equivalents exist in Groovy. Examples here use a custom // implementation to split an original string into chunks of specified length // the method is a modified version of the Java PLEAC version // get a 5-character string, skip 8, then grab 2 5-character strings // skipping the trailing spaces, then grab the rest data = 'hippopotamus means river horse' def fields = unpack('A5 x8 A5 x1 A5 x1 A*', data) assert fields == ['hippo', 'means', 'river', 'horse'] // On a Java 5 or 6 JVM, Groovy can also make use of Scanners: s = new Scanner(data) s.findInLine(/(.{5}).{8}(.{5}) (.{5}) (.*)/) m = s.match() fields = [] (1..m.groupCount()).each{ fields << m.group(it) } assert fields == ['hippo', 'means', 'river', 'horse'] // another scanner example similar to the javadoc example input = '1 fish 2 fish red fish blue fish' s = new Scanner(input).useDelimiter(/\s*fish\s*/) fields = [] 2.times{ fields << s.nextInt() } 2.times{ fields << s.next() } assert fields == [1, 2, 'red', 'blue'] // split at five characters boundaries String[] fivers = unpack('A5 ' * (data.length() / 5), data) assert fivers == ["hippo", "potam", "us me", "ans r", "iver ", "horse"] // chop string into individual characters assert 'abcd' as String[] == ['a', 'b', 'c', 'd'] string = "This is what you have" // Indexing forwards (left to right) // tens 000000000011111111112 // units +012345678901234567890 // Indexing backwards (right to left) // tens 221111111111000000000 // units 109876543210987654321- assert string[0] == 'T' assert string[5..6] == 'is' assert string[13..-1] == 'you have' assert string[-1] == 'e' assert string[-4..-1] == 'have' assert string[-8, -7, -6] == 'you' data = new StringBuffer(string) data[5..6] = "wasn't" ; assert data.toString() == "This wasn't what you have" data[-12..-1] = "ondrous" ; assert data.toString() == "This wasn't wondrous" data[0..0] = "" ; assert data.toString() == "his wasn't wondrous" data[-10..-1] = "" ; assert data.toString() == "his wasn'" string = "This wasn't wondrous" // check last ten characters match some pattern assert string[-10..-1] =~ /^t\sw.*s$/ string = 'This is a test' assert string[0..4].replaceAll('is', 'at') + string[5..-1] == 'That is a test' // exchange the first and last letters in a string string = 'make a hat' string = string[-1] + string[1..-2] + string[0] assert string == 'take a ham' // extract column with unpack string = 'To be or not to be' // skip 6, grab 6 assert unpack("x6 A6", string) == ['or not'] // forward 6, grab 2, backward 5, grab 2 assert unpack("x6 A2 X5 A2", string) == ['or', 'be'] assert cut2fmt([8, 14, 20, 26, 30]) == 'A7 A6 A6 A6 A4 A*' // utility method (derived from Java PLEAC version) def unpack(String format, String data) { def result = [] int formatOffset = 0, dataOffset = 0 int minDataOffset = 0, maxDataOffset = data.size() new StringTokenizer(format).each{ token -> int tokenLen = token.length() // count determination int count = 0 if (tokenLen == 1) count = 1 else if (token.charAt(1) == '*') count = -1 else count = token[1..-1].toInteger() // action determination char action = token.charAt(0) switch (action) { case 'A': if (count == -1) { start = [dataOffset, maxDataOffset].min() result.add(data[start..-1]) dataOffset = maxDataOffset } else { start = [dataOffset, maxDataOffset].min() end = [dataOffset + count, maxDataOffset].min() result.add(data[start..<end]) dataOffset += count } break case 'x': if (count == -1) dataOffset = maxDataOffset else dataOffset += count break case 'X': if (count == -1) dataOffset = minDataOffset else dataOffset -= count break default: throw new RuntimeException('Unknown action token', formatOffset) } formatOffset += tokenLen + 1 } return result as String[] } // utility method def cut2fmt(positions) { template = '' lastpos = 1 for (pos in positions) { template += 'A' + (pos - lastpos) + ' ' lastpos = pos } return template + 'A*' } //----------------------------------------------------------------------------------
Establishing a Default Value
//---------------------------------------------------------------------------------- // use b if b is true, else c b = false; c = 'cat' assert (b ? b : c) == 'cat' b = true assert (b ? b : c) // can be simplified to 'b || c' if c is a boolean // strictly speaking, b doesn't have to be a boolean, // e.g. an empty list is coerced to boolean false b = [] assert (b ? b : c) == 'cat' // set x to y unless x is already true x = false; y = 'dog' if (!x) x = y assert x == 'dog' // can be simplified to 'x ||= y' if y is a boolean // x doesn't need to be a boolean, e.g. a non-empty // string is coerced to boolean true x = 'cat' if (!x) x = y assert x == 'cat' // JVM supplies user name // otherwise could use exec or built-in Ant features for reading environment vars assert System.getProperty('user.name') // test for nullity then for emptyness def setDefaultIfNullOrEmpty(startingPoint) { (!startingPoint || startingPoint.length() == 0) ? 'Greenwich' : startingPoint } assert setDefaultIfNullOrEmpty(null) == 'Greenwich' assert setDefaultIfNullOrEmpty('') == 'Greenwich' assert setDefaultIfNullOrEmpty('Something else') == 'Something else' //----------------------------------------------------------------------------------
Exchanging Values Without Using Temporary Variables
//---------------------------------------------------------------------------------- v1 = 'alpha'; v2 = 'omega' // this can done with explicit swapping via a temp variable // or in a slightly more interesting way with a closure swap = { temp = v1; v1 = v2; v2 = temp } swap() assert v1 == 'omega' && v2 == 'alpha' // a more generic swap() is also possible using Groovy's metaclass mechanisms // but is not idiomatic of Groovy usage //----------------------------------------------------------------------------------
Converting Between ASCII Characters and Values
//---------------------------------------------------------------------------------- // char and int are interchangable, apart from precision difference // char use 16 bits while int use 32, requiring a cast from int to char char ch = 'e' int num = ch // no problem ch = (char) num // needs an explicit cast s1 = "Number " + num + " is character " + (char) num assert s1 == 'Number 101 is character e' s2 = "Character " + ch + " is number " + (int) ch assert s2 == 'Character e is number 101' // easy conversion between char arrays, char lists and Strings char[] ascii = "sample".toCharArray() // {115, 97, 109, 112, 108, 101} assert new String(ascii) == "sample" assert new String([115, 97, 109, 112, 108, 101] as char[]) == "sample" // convert 'HAL' to 'IBM' (in increasing order of Grooviness) assert "HAL".toCharArray().collect{new String(it+1 as char[])}.join() == 'IBM' assert ("HAL" as String[]).collect{it.next()}.join() == 'IBM' assert "HAL".replaceAll('.', {it.next()}) == 'IBM' //----------------------------------------------------------------------------------
Processing a String One Character at a Time
//---------------------------------------------------------------------------------- string = "an apple a day" assert string[3..7].split('')[1..5] == ['a', 'p', 'p', 'l', 'e'] assert string.split('').toList().unique().sort().join() == ' adelnpy' //---------------------------------------------------------------------------------- // CheckSum.groovy: Compute 16-bit checksum of input file // Usage: groovy CheckSum <file> // script: checksum = 0 new File(args[0]).eachByte{ checksum += it } checksum %= (int) Math.pow(2, 16) - 1 println checksum //---------------------------------------------------------------------------------- // to run on its own source code: //=> % groovy CheckSum CheckSum.groovy //=> 9349 //---------------------------------------------------------------------------------- // Slowcat.groovy: Emulate a s l o w line printer // Usage: groovy Slowcat <file> <delay_millis_between_each_char> // script: delay = args[1].toInteger() new File(args[0]).eachByte{ print ((char) it); Thread.sleep(delay) } //----------------------------------------------------------------------------------
Reversing a String by Word or Character
//---------------------------------------------------------------------------------- assert 'string'.reverse() == 'gnirts' string = 'Yoda said, "can you see this?"' revwords = string.split(' ').toList().reverse().join(' ') assert revwords == 'this?" see you "can said, Yoda' words = ['bob', 'alpha', 'rotator', 'omega', 'reviver'] long_palindromes = words.findAll{ w -> w == w.reverse() && w.size() > 5 } assert long_palindromes == ['rotator', 'reviver'] //----------------------------------------------------------------------------------
Expanding and Compressing Tabs
//---------------------------------------------------------------------------------- s1 = 'abc\t def\tghi \n\tx' s2 = 'abc def ghi \n x' def expand(s) { s.split('\n').toList().collect{ line = it while (line.contains('\t')) { line = line.replaceAll(/([^\t]*)(\t)(.*)/){ all,pre,tab,suf -> pre + ' ' * (8 - pre.size() % 8) + suf } } return line }.join('\n') } def unexpand(s) { s.split('\n').toList().collect{ line = it for (i in line.size()-1..1) { if (i % 8 == 0) { prefix = line[0..<i] if (prefix.trim().size() != prefix.size()) { line = prefix.trim() + '\t' + line[i..-1] } } } return line }.join('\n') } assert expand(s1) == s2 assert unexpand(s2) == s1 //----------------------------------------------------------------------------------
Expanding Variables in User Input
//---------------------------------------------------------------------------------- debt = 150 assert "You owe $debt to me" == 'You owe 150 to me' rows = 24; cols = 80 assert "I am $rows high and $cols wide" == 'I am 24 high and 80 wide' assert 'I am 17 years old'.replaceAll(/\d+/, {2*it.toInteger()}) == 'I am 34 years old' //----------------------------------------------------------------------------------
Controlling Case
//---------------------------------------------------------------------------------- assert "bo peep".toUpperCase() == 'BO PEEP' assert 'JOHN'.toLowerCase() == 'john' def capitalize(s) {s[0].toUpperCase() + (s.size()<2 ? '' : s[1..-1]?.toLowerCase())} assert capitalize('joHn') == 'John' s = "thIS is a loNG liNE".replaceAll(/\w+/){capitalize(it)} assert s == 'This Is A Long Line' s1 = 'JOhn'; s2 = 'joHN' assert s1.equalsIgnoreCase(s2) private Random rand def randomCase(char ch) { (rand.nextInt(100) < 20) ? Character.toLowerCase(ch) : ch } //----------------------------------------------------------------------------------
Interpolating Functions and Expressions Within Strings
//---------------------------------------------------------------------------------- n = 10 assert "I have ${n+1} guanacos." == 'I have 11 guanacos.' assert "I have " + (n+1) + " guanacos." == 'I have 11 guanacos.' // sending templated email is solved in two parts: templating and sending // Part 1: creating an email template naughty = 'Mr Bad Credit' def get_manager_list(s) { 'The Big Boss' } msg = """ To: $naughty From: Your Bank Cc: ${ get_manager_list(naughty) } Date: ${ new Date() } Dear $naughty, Today, you bounced check number ${ 500 + new Random().nextInt(100) } to us. Your account is now closed. Sincerely, the management """ expected = ''' To: Mr Bad Credit From: Your Bank Cc: The Big Boss Date: XXX Dear Mr Bad Credit, Today, you bounced check number XXX to us. Your account is now closed. Sincerely, the management ''' sanitized = msg.replaceAll('(?m)^Date: (.*)$','Date: XXX') sanitized = sanitized.replaceAll(/(?m)check number (\d+) to/,'check number XXX to') assert sanitized == expected // note: Groovy also has several additional built-in templating facilities // Part 2: sending email // SendMail.groovy: Send email // Usage: groovy SendEmail <msgfile> // script: ant = new AntBuilder() ant.mail(from:'manager@grumpybank.com', tolist:'innocent@poorhouse.com', encoding:'plain', mailhost:'mail.someserver.com', subject:'Friendly Letter', message:'this is a test message') // Ant has many options for setting encoding, security, attachments, etc., see: // http://ant.apache.org/manual/CoreTasks/mail.html // Groovy could also use the Java Mail Api directly if required //----------------------------------------------------------------------------------
Indenting Here Documents
//---------------------------------------------------------------------------------- def raw = ''' your text goes here ''' def expected = ''' your text goes here ''' assert raw.split('\n').toList().collect{ it.replaceAll(/^\s+/,'') }.join('\n') + '\n' == expected //----------------------------------------------------------------------------------
Reformatting Paragraphs
//---------------------------------------------------------------------------------- input = '''Folding and splicing is the work of an editor, not a mere collection of silicon and mobile electrons!''' expected = '''Folding and splicing is the work of an editor, not a mere collection of silicon and mobile electrons!''' def wrap(text, maxSize) { all = [] line = '' text.eachMatch(/\S+/) { word = it[0] if (line.size() + 1 + word.size() > maxSize) { all += line line = word } else { line += (line == '' ? word : ' ' + word) } } all += line return all.join('\n') } assert wrap(input, 20) == expected //----------------------------------------------------------------------------------
Escaping Characters
//---------------------------------------------------------------------------------- string = /Mom said, "Don't do that."/ // backslash special chars assert string.replaceAll(/['"]/){/\\/+it[0]} == /Mom said, \"Don\'t do that.\"/ //' // double special chars assert string.replaceAll(/['"]/){it[0]+it[0]} == /Mom said, ""Don''t do that.""/ //' //backslash quote all non-capital letters assert "DIR /?".replaceAll(/[^A-Z]/){/\\/+it[0]} == /DIR\ \/\?/ //----------------------------------------------------------------------------------
Trimming Blanks from the Ends of a String
//---------------------------------------------------------------------------------- assert ' x '.trim() == 'x' // print what's typed, but surrounded by >< symbols // script: new BufferedReader(new InputStreamReader(System.in)).eachLine{ println(">" + it.trim() + "<"); } //----------------------------------------------------------------------------------
Parsing Comma-Separated Data
//---------------------------------------------------------------------------------- pattern = /"([^\"\\]*(?:\\.[^\"\\]*)*)",?|([^,]+),?|,/ line = /XYZZY,"","O'Reilly, Inc","Wall, Larry","a \"glug\" bit,",5,"Error, Core Dumped"/ m = line =~ pattern expected = [/XYZZY/, '', /O'Reilly, Inc/, /Wall, Larry/, //' /a \"glug\" bit,/, /5/, /Error, Core Dumped/] for (i in 0..<m.size().toInteger()) assert expected[i] == (m[i][2] ? m[i][2] : m[i][1]) //----------------------------------------------------------------------------------
Soundex Matching
//---------------------------------------------------------------------------------- // A quick google search found several Java implementations. // As an example, how to use commons codec is shown below. // Just place the respective jar in your classpath. // Further details: http://jakarta.apache.org/commons/codec // require(groupId:'commons-codec', artifactId:'commons-codec', version:'1.3') soundex = new org.apache.commons.codec.language.Soundex() assert soundex.soundex('Smith') == soundex.soundex('Smyth') //----------------------------------------------------------------------------------
Program: fixstyle
//---------------------------------------------------------------------------------- input = '''I have analysed the new part. As long as you aren't worried about the colour, it is a dropin replacement.''' //' expected = '''I have analyzed the new part. As long as you aren't worried about the color, it is a drop-in replacement.''' //' translations = [colour:'color', analysed:'analyzed', dropin:'drop-in'] def fixstyle(s) { s.split('\n').toList().collect{ line = it translations.each{ key, value -> line = line.replaceAll(/(?<=\W)/ + key + /(?=\W)/, value) } return line }.join('\n') } assert fixstyle(input) == expected //----------------------------------------------------------------------------------
Program: psgrep
//---------------------------------------------------------------------------------- // Solved in two parts: 'screenscrape' text stream and return stream from process // Part 1: text scraping input = ''' PID PPID PGID WINPID TTY UID STIME COMMAND 4636 1 4636 4636 con 1005 08:24:50 /usr/bin/bash 676 4636 676 788 con 1005 13:53:32 /usr/bin/ps ''' select1 = ''' PID PPID PGID WINPID TTY UID STIME COMMAND 676 4636 676 788 con 1005 13:53:32 /usr/bin/ps ''' select2 = ''' PID PPID PGID WINPID TTY UID STIME COMMAND 4636 1 4636 4636 con 1005 08:24:50 /usr/bin/bash ''' // line below must be configured for your unix - this one's cygwin format = cut2fmt([10, 18, 26, 37, 42, 47, 56]) def psgrep(s) { out = [] lines = input.split('\n').findAll{ it.size() } vars = unpack(format, lines[0]).toList().collect{ it.toLowerCase().trim() } out += lines[0] lines[1..-1].each{ values = unpack(format, it).toList().collect{ try { return it.toInteger() } catch(NumberFormatException e) { return it.trim() } } vars.eachWithIndex{ var, i -> binding.setVariable(var, values[i]) } if (new GroovyShell(binding).evaluate(s)) out += it } return '\n' + out.join('\n') + '\n' } assert psgrep('winpid < 800') == select1 assert psgrep('uid % 5 == 0 && command =~ /sh$/') == select2 // Part 2: obtaining text stream from process // unixScript: input = 'ps'.execute().text // cygwinScript: input = 'path_to_cygwin/bin/ps.exe'.execute().text // windowsScript: // can use something like sysinternal.com s pslist (with minor script tweaks) input = 'pslist.exe'.execute().text //----------------------------------------------------------------------------------