|
Python/FAQ/Строки
Материал из Wiki.crossplatform.ru
#-----------------------------
mystr = "\n" # a newline character
mystr = r"\n" # two characters, \ and n
#-----------------------------
mystr = "Jon 'Maddog' Orwant" # literal single quote inside double quotes
mystr = 'Jon "Maddog" Orwant' # literal double quote inside single quotes
#-----------------------------
mystr = 'Jon \'Maddog\' Orwant' # escaped single quote
mystr = "Jon \"Maddog\" Orwant" # escaped double quote
#-----------------------------
mystr = """
This is a multiline string literal
enclosed in triple double quotes.
"""
mystr = '''
And this is a multiline string literal
enclosed in triple single quotes.
'''
#-----------------------------
[править] Доступ к подстрокам
#-----------------------------
# get a 5-char string, skip 3, then grab 2 8-char strings, then the rest
# Note that struct.unpack cannot use * for an unknown length.
# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65224
import struct
(lead, s1, s2), tail = struct.unpack("5s 3x 8s 8s", data[:24]), data[24:]
# split at five-char boundaries
fivers = struct.unpack("5s" * (len(data)//5), data)
fivers = print [x[i*5:i*5+5] for i in range(len(x)/5)]
# chop string into individual characters
chars = list(data)
#-----------------------------
mystr = "This is what you have"
# +012345678901234567890 Indexing forwards (left to right)
# 109876543210987654321- Indexing backwards (right to left)
# note that 0 means 10 or 20, etc. above
first = mystr[0] # "T"
start = mystr[5:7] # "is"
rest = mystr[13:] # "you have"
last = mystr[-1] # "e"
end = mystr[-4:] # "have"
piece = mystr[-8:-5] # "you"
#-----------------------------
# Python strings are immutable.
# In general, you should just do piecemeal reallocation:
mystr = "This is what you have"
mystr = mystr[:5] + "wasn't" + mystr[7:]
# Or replace and reallocate
mystr = "This is what you have"
mystr = mystr.replace(" is ", " wasn't ")
# DON'T DO THIS: In-place modification could be done using character arrays
import array
mystr = array.array("c", "This is what you have")
mystr[5:7] = array.array("c", "wasn't")
# mystr is now array('c', "This wasn't what you have")
# DON'T DO THIS: It could also be done using MutableString
from UserString import MutableString
mystr = MutableString("This is what you have")
mystr[-12:] = "ondrous"
# mystr is now "This is wondrous"
#-----------------------------
# you can test simple substrings with "in" (for regex matching see ch.6):
if txt in mystr[-10:]:
print "'%s' found in last 10 characters"%txt
# Or use the startswith() and endswith() string methods:
if mystr.startswith(txt):
print "%s starts with %s."%(mystr, txt)
if mystr.endswith(txt):
print "%s ends with %s."%(mystr, txt)
#-----------------------------
[править] Установка значения по умолчанию
#-----------------------------
# Introductory Note: quite a bit of this section is not terribly Pythonic
# as names must be set before being used. For instance, unless myvar has
# been previously defined, these next lines will all raise NameError:
myvar = myvar or some_default
myvar2 = myvar or some_default
myvar |= some_default # bitwise-or, not logical-or - for demo
# The standard way of setting a default is often:
myvar = default_value
if some_condition:
pass # code which may set myvar to something else
# if myvar is returned from a function and may be empty/None, then use:
myvar = somefunc()
if not myvar:
myvar = default_value
# If you want a default value that can be overridden by the person calling
# your code, you can often wrap it in a function with a named parameter:
def myfunc(myvar="a"):
return myvar + "b"
print myfunc(), myfunc("c")
#=> ab cb
# Note, though, that this won't work for mutable objects such as lists or
# dicts that are mutated in the function as the object is only created once
# and repeated calls to the same function will return the same object. This
# can be desired behaviour however - see section 10.3, for instance.
def myfunc(myvar=[]):
myvar.append("x")
return myvar
print myfunc(), myfunc()
#=> ['x'] ['x', 'x']
# You need to do:
def myfunc(myvar=None):
if myvar is None:
myvar = []
myvar.append("x")
return myvar
print myfunc(), myfunc()
#=> ['x'] ['x']
#=== Perl Equivalencies start here
# use b if b is true, otherwise use c
a = b or c
# as that is a little tricksy, the following may be preferred:
if b:
a = b
else:
a = c
# set x to y unless x is already true
if not x:
x = y
#-----------------------------
# use b if b is defined, else c
try:
a = b
except NameError:
a = c
#-----------------------------
foo = bar or "DEFAULT VALUE"
#-----------------------------
# To get a user (for both UNIX and Windows), use:
import getpass
user = getpass.getuser()
# DON'T DO THIS: find the user name on Unix systems
import os
user = os.environ.get("USER")
if user is None:
user = os.environ.get("LOGNAME")
#-----------------------------
if not starting_point:
starting_point = "Greenwich"
#-----------------------------
if not a: # copy only if empty
a = b
if b: # assign b if nonempty, else c
a = b
else:
a = c
#-----------------------------
[править] Обмен значений не используя временные переменные
#-----------------------------
v1, v2 = v2, v1
#-----------------------------
# DON'T DO THIS:
temp = a
a = b
b = temp
#-----------------------------
a = "alpha"
b = "omega"
a, b = b, a # the first shall be last -- and versa vice
#-----------------------------
alpha, beta, production = "January March August".split()
alpha, beta, production = beta, production, alpha
#-----------------------------
[править] Converting Between ASCII Characters and Values
#-----------------------------
num = ord(char)
char = chr(num)
#-----------------------------
char = "%c" % num
print "Number %d is character %c" % (num, num)
print "Number %(n)d is character %(n)c" % {"n": num}
print "Number %(num)d is character %(num)c" % locals()
#=> Number 101 is character e
#-----------------------------
ascii_character_numbers = [ord(c) for c in "sample"]
print ascii_character_numbers
#=> [115, 97, 109, 112, 108, 101]
word = "".join([chr(n) for n in ascii_character_numbers])
word = "".join([chr(n) for n in [115, 97, 109, 112, 108, 101]])
print word
#=> sample
#-----------------------------
hal = "HAL"
ibm = "".join([chr(ord(c)+1) for c in hal]) # add one to each ASCII value
print ibm
#=> IBM
#-----------------------------
[править] Processing a String One Character at a Time
#-----------------------------
mylist = list(mystr)
#-----------------------------
for char in mystr:
pass # do something with char
#-----------------------------
mystr = "an apple a day"
uniq = sorted(set(mystr))
print "unique chars are: '%s'" % "".join(uniq)
#=> unique chars are: ' adelnpy'
#-----------------------------
ascvals = [ord(c) for c in mystr]
print "total is %s for '%s'."%(sum(ascvals), mystr)
#=> total is 1248 for 'an apple a day'.
#-----------------------------
# sysv checksum
def checksum(myfile):
values = [ord(c) for line in myfile for c in line]
return sum(values)%(2**16) - 1
import fileinput
print checksum(fileinput.input()) # data from sys.stdin
# Using a function means any iterable can be checksummed:
print checksum(open("C:/test.txt") # data from file
print checksum("sometext") # data from string
#-----------------------------
#!/usr/bin/python
# slowcat - emulate a s l o w line printer
# usage: slowcat [- DELAY] [files ...]
import sys, select
import re
DELAY = 1
if re.match("^-\d+$",sys.argv[1]):
DELAY=-int(sys.argv[1])
del sys.argv[1]
for ln in fileinput.input():
for c in ln:
sys.stdout.write(c)
sys.stdout.flush()
select.select([],[],[], 0.005 * DELAY)
#-----------------------------
[править] Reversing a String by Word or Character
#-----------------------------
# 2.3+ only
revchars = mystr[::-1] # extended slice - step is -1
revwords = " ".join(mystr.split(" ")[::-1])
# pre 2.3 version:
mylist = list(mystr)
mylist.reverse()
revbytes = "".join(mylist)
mylist = mystr.split()
mylist.reverse()
revwords = ' '.join(mylist)
# Alternative version using reversed():
revchars = "".join(reversed(mystr))
revwords = " ".join(reversed(mystr.split(" ")))
# reversed() makes an iterator, which means that the reversal
# happens as it is consumed. This means that "print reversed(mystr)" is not
# the same as mystr[::-1]. Standard usage is:
for char in reversed(mystr):
pass # ... do something
#-----------------------------
# 2.3+ only
word = "reviver"
is_palindrome = (word == word[::-1])
#-----------------------------
# Generator version
def get_palindromes(fname):
for line in open(fname):
word = line.rstrip()
if len(word) > 5 and word == word[::-1]:
yield word
long_palindromes = list(get_palindromes("/usr/share/dict/words"))
# Simpler old-style version using 2.2 string reversal
def rev_string(mystr):
mylist = list(mystr)
mylist.reverse()
return "".join(mylist)
long_palindromes=[]
for line in open("/usr/share/dict/words"):
word = line.rstrip()
if len(word) > 5 and word == rev_string(word):
long_palindromes.append(word)
print long_palindromes
#-----------------------------
[править] Expanding and Compressing Tabs
#-----------------------------
mystr.expandtabs()
mystr.expandtabs(4)
#-----------------------------
[править] Expanding Variables in User Input
#-----------------------------
text = "I am %(rows)s high and %(cols)s long"%{"rows":24, "cols":80)
print text
#=> I am 24 high and 80 long
rows, cols = 24, 80
text = "I am %(rows)s high and %(cols)s long"%locals()
print text
#=> I am 24 high and 80 long
#-----------------------------
import re
print re.sub("\d+", lambda i: str(2 * int(i.group(0))), "I am 17 years old")
#=> I am 34 years old
#-----------------------------
# expand variables in text, but put an error message in
# if the variable isn't defined
class SafeDict(dict):
def __getitem__(self, key):
return self.get(key, "[No Variable: %s]"%key)
hi = "Hello"
text = "%(hi)s and %(bye)s!"%SafeDict(locals())
print text
#=> Hello and [No Variable: bye]!
#If you don't need a particular error message, just use the Template class:
from string import Template
x = Template("$hi and $bye!")
hi = "Hello"
print x.safe_substitute(locals())
#=> Hello and $bye!
print x.substitute(locals()) # will throw a KeyError
#-----------------------------
[править] Controlling Case
#-----------------------------
mystr = "bo peep".upper() # BO PEEP
mystr = mystr.lower() # bo peep
mystr = mystr.capitalize() # Bo peep
#-----------------------------
beast = "python"
caprest = beast.capitalize().swapcase() # pYTHON
#-----------------------------
print "thIS is a loNG liNE".title()
#=> This Is A Long Line
#-----------------------------
if a.upper() == b.upper():
print "a and b are the same"
#-----------------------------
import random
def randcase_one(letter):
if random.randint(0,5): # True on 1, 2, 3, 4
return letter.lower()
else:
return letter.upper()
def randcase(myfile):
for line in myfile:
yield "".join(randcase_one(letter) for letter in line[:-1])
for line in randcase(myfile):
print line
#-----------------------------
[править] Interpolating Functions and Expressions Within Strings
#-----------------------------
"I have %d guanacos." % (n + 1)
print "I have", n+1, "guanacos."
#-----------------------------
#Python templates disallow in-string calculations (see PEP 292)
from string import Template
email_template = Template("""\
To: $address
From: Your Bank
CC: $cc_number
Date: $date
Dear $name,
Today you bounced check number $checknum to us.
Your account is now closed.
Sincerely,
the management
""")
import random
import datetime
person = {"address":"Joe@somewhere.com",
"name": "Joe",
"cc_number" : 1234567890,
"checknum" : 500+random.randint(0,99)}
print email_template.substitute(person, date=datetime.date.today())
#-----------------------------
[править] Indenting Here Documents
#-----------------------------
# indenting here documents
#
# in python multiline strings can be used as here documents
var = """
your text
goes here
"""
# using regular expressions
import re
re_leading_blanks = re.compile("^\s+",re.MULTILINE)
var1 = re_leading_blanks.sub("",var)[:-1]
# using string methods
# split into lines, use every line except first and last, left strip and rejoin.
var2 = "\n".join([line.lstrip() for line in var.split("\n")[1:-1]])
poem = """
Here's your poem:
Now far ahead the Road has gone,
And I must follow, if I can,
Pursuing it with eager feet,
Until it joins some larger way
Where many paths and errand meet.
And whither then? I cannot say.
--Bilbo in /usr/src/perl/pp_ctl.c
"""
import textwrap
print textwrap.dedent(poem)[1:-1]
#-----------------------------
[править] Reformatting Paragraphs
#-----------------------------
from textwrap import wrap
output = wrap(para,
initial_indent=leadtab
subsequent_indent=nexttab)
#-----------------------------
#!/usr/bin/env python
# wrapdemo - show how textwrap works
txt = """\
Folding and splicing is the work of an editor,
not a mere collection of silicon
and
mobile electrons!
"""
from textwrap import TextWrapper
wrapper = TextWrapper(width=20,
initial_indent=" "*4,
subsequent_indent=" "*2)
print "0123456789" * 2
print wrapper.fill(txt)
#-----------------------------
"""Expected result:
01234567890123456789
Folding and
splicing is the
work of an editor,
not a mere
collection of
silicon and mobile
electrons!
"""
#-----------------------------
# merge multiple lines into one, then wrap one long line
from textwrap import fill
import fileinput
print fill("".join(fileinput.input()))
#-----------------------------
# Term::ReadKey::GetTerminalSize() isn't in the Perl standard library.
# It isn't in the Python standard library either. Michael Hudson's
# recipe from python-list #530228 is shown here.
# (http://aspn.activestate.com/ASPN/Mail/Message/python-list/530228)
# Be aware that this will work on Unix but not on Windows.
from termwrap import wrap
import struct, fcntl
def getheightwidth():
height, width = struct.unpack(
"hhhh", fcntl.ioctl(0, TERMIOS.TIOCGWINSZ ,"\000"*8))[0:2]
return height, width
# PERL <>, $/, $\ emulation
import fileinput
import re
_, width = getheightwidth()
for para in re.split(r"\n{2,}", "".join(fileinput.input())):
print fill(para, width)
[править] Escaping Characters
#-----------------------------
mystr = '''Mom said, "Don't do that."''' #"
re.sub("['\"]", lambda i: "\\" + i.group(0), mystr)
re.sub("[A-Z]", lambda i: "\\" + i.group(0), mystr)
re.sub("\W", lambda i: "\\" + i.group(0), "is a test!") # no function like quotemeta?
[править] Trimming Blanks from the Ends of a String
#-----------------------------
mystr = mystr.lstrip() # left
mystr = mystr.rstrip() # right
mystr = mystr.strip() # both ends
[править] Parsing Comma-Separated Data
#-----------------------------
import csv
def parse_csv(line):
reader = csv.reader([line], escapechar='\\')
return reader.next()
line = '''XYZZY,"","O'Reilly, Inc","Wall, Larry","a \\"glug\\" bit,",5,"Error, Core Dumped,",''' #"
fields = parse_csv(line)
for i, field in enumerate(fields):
print "%d : %s" % (i, field)
# pre-2.3 version of parse_csv
import re
def parse_csv(text):
pattern = re.compile('''"([^"\\\]*(?:\\\.[^"\\\]*)*)",?|([^,]+),?|,''')
mylist = ["".join(elem)
for elem in re.findall(pattern, text)]
if text[-1] == ",":
mylist += ['']
return mylist
# cvs.reader is meant to work for many lines, something like:
# (NB: in Python default, quotechar is *not* escaped by backslash,
# but doubled instead. That's what Excel does.)
for fields in cvs.reader(lines, dialect="some"):
for num, field in enumerate(fields):
print num, ":", field
#-----------------------------
[править] Soundex Matching
#-----------------------------
def soundex(name, len=4):
""" soundex module conforming to Knuth's algorithm
implementation 2000-12-24 by Gregory Jorgensen
public domain
"""
# digits holds the soundex values for the alphabet
digits = '01230120022455012623010202'
sndx = ''
fc = ''
# translate alpha chars in name to soundex digits
for c in name.upper():
if c.isalpha():
if not fc:
fc = c # remember first letter
d = digits[ord(c)-ord('A')]
# duplicate consecutive soundex digits are skipped
if not sndx or (d != sndx[-1]):
sndx += d
# replace first digit with first alpha character
sndx = fc + sndx[1:]
# remove all 0s from the soundex code
sndx = sndx.replace('0','')
# return soundex code padded to len characters
return (sndx + (len * '0'))[:len]
user = raw_input("Lookup user: ")
if user == "":
raise SystemExit
name_code = soundex(user)
for line in open("/etc/passwd"):
line = line.split(":")
for piece in line[4].split():
if name_code == soundex(piece):
print "%s: %s\n" % line[0], line[4])
#-----------------------------
[править] Program: fixstyle
#-----------------------------
import sys, fileinput, re
data = """\
analysed => analyzed
built-in => builtin
chastized => chastised
commandline => command-line
de-allocate => deallocate
dropin => drop-in
hardcode => hard-code
meta-data => metadata
multicharacter => multi-character
multiway => multi-way
non-empty => nonempty
non-profit => nonprofit
non-trappable => nontrappable
pre-define => predefine
preextend => pre-extend
re-compiling => recompiling
reenter => re-enter
turnkey => turn-key
"""
mydict = {}
for line in data.split("\n"):
if not line.strip():
continue
k, v = [word.strip() for word in line.split("=>")]
mydict[k] = v
pattern_text = "(" + "|".join([re.escape(word) for word in mydict.keys()]) + ")"
pattern = re.compile(pattern_text)
args = sys.argv[1:]
verbose = 0
if args and args[0] == "-v":
verbose = 1
args = args[1:]
if not args:
sys.stderr.write("%s: Reading from stdin\n" % sys.argv[0])
for line in fileinput.input(args, inplace=1, backup=".orig"):
output = ""
pos = 0
while True:
match = pattern.search(line, pos)
if not match:
output += line[pos:]
break
output += line[pos:match.start(0)] + mydict[match.group(1)]
pos = match.end(0)
sys.stdout.write(output)
#-----------------------------
[править] Program: psgrep
#-----------------------------
#!/usr/bin/python
# psgrep - print selected lines of ps output by
# compiling user queries into code.
#
# examples :
# psgrep "uid<10"
import sys, os, re
class PsLineMatch:
# each field from the PS header
fieldnames = ("flags","uid","pid","ppid","pri","nice","size", \
"rss","wchan","stat","tty","time","command")
numeric_fields = ("flags","uid","pid","ppid","pri","nice","size","rss")
def __init__(self):
self._fields = {}
def new_line(self, ln):
self._ln = ln.rstrip()
# ps header for option "wwaxl" (different than in the perl code)
"""
F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND"
004 0 1 0 15 0 448 236 schedu S ? 0:07 init"
. . . . . . . . . . . . .
"""
# because only the last entry might contain blanks, splitting
# is safe
data = self._ln.split(None,12)
for fn, elem in zip(self.fieldnames, data):
if fn in self.numeric_fields: # make numbers integer
self._fields[fn] = int(elem)
else:
self._fields[fn] = elem
def set_query(self, args):
# assume args: "uid==500", "command ~ ^wm"
conds=[]
m = re.compile("(\w+)([=<>]+)(.+)")
for a in args:
try:
(field,op,val) = m.match(a).groups()
except:
print "can't understand query \"%s\"" % (a)
raise SystemExit
if field in self.numeric_fields:
conds.append(a)
else:
conds.append("%s%s'%s'",(field,op,val))
self._desirable = compile("(("+")and(".join(conds)+"))", "<string>","eval")
def is_desirable(self):
return eval(self._desirable, {}, self._fields)
def __str__(self):
# to allow "print".
return self._ln
if len(sys.argv)<=1:
print """usage: %s criterion ...
Each criterion is a Perl expression involving:
%s
All criteria must be met for a line to be printed.""" \
% (sys.argv[0], " ".join(PsLineMatch().fieldnames))
raise SystemExit
psln = PsLineMatch()
psln.set_query(sys.argv[1:])
p = os.popen("ps wwaxl")
print p.readline()[:-1] # emit header line
for ln in p.readlines():
psln.new_line(ln)
if psln.is_desirable():
print psln
p.close()
# alternatively one could consider every argument being a string and
# support wildcards: "uid==500" "command~^wm" by means of re, but this
# does not show dynamic python code generation, although re.compile
# also precompiles.
#-----------------------------
|