Введение

mystr = "\n"   # a newline character
mystr = r"\n"  # two characters, \ and n
mystr = "Jon 'Maddog' Orwant"  # literal single quote inside double quotes
mystr = 'Jon "Maddog" Orwant'  # literal double quote inside single quotes
mystr = 'Jon \'Maddog\' Orwant'  # escaped single quote
mystr = "Jon \"Maddog\" Orwant"  # escaped double quote
mystr = """
This is a multiline string literal
enclosed in triple double quotes.
mystr = '''
And this is a multiline string literal
enclosed in triple single quotes.

Доступ к подстрокам

# get a 5-char string, skip 3, then grab 2 8-char strings, then the rest
# Note that struct.unpack cannot use * for an unknown length.
# See
import struct
(lead, s1, s2), tail = struct.unpack("5s 3x 8s 8s", data[:24]), data[24:]
# split at five-char boundaries
fivers = struct.unpack("5s" * (len(data)//5), data)
fivers = print [x[i*5:i*5+5] for i in range(len(x)/5)]
# chop string into individual characters
chars = list(data)
mystr = "This is what you have"
#       +012345678901234567890  Indexing forwards  (left to right)
#        109876543210987654321- Indexing backwards (right to left)
#         note that 0 means 10 or 20, etc. above
first = mystr[0]                            # "T"
start = mystr[5:7]                          # "is"
rest = mystr[13:]                           # "you have"
last = mystr[-1]                            # "e"
end = mystr[-4:]                            # "have"
piece = mystr[-8:-5]                        # "you"
# Python strings are immutable.
# In general, you should just do piecemeal reallocation:
mystr = "This is what you have"
mystr = mystr[:5] + "wasn't" + mystr[7:]
# Or replace and reallocate
mystr = "This is what you have"
mystr = mystr.replace(" is ", " wasn't ")
# DON'T DO THIS: In-place modification could be done using character arrays
import array
mystr = array.array("c", "This is what you have")
mystr[5:7] = array.array("c", "wasn't")
# mystr is now array('c', "This wasn't what you have")
# DON'T DO THIS: It could also be done using MutableString 
from UserString import MutableString
mystr = MutableString("This is what you have")
mystr[-12:] = "ondrous"
# mystr is now "This is wondrous"
# you can test simple substrings with "in" (for regex matching see ch.6):
if txt in mystr[-10:]:
    print "'%s' found in last 10 characters"%txt
# Or use the startswith() and endswith() string methods:
if mystr.startswith(txt):
    print "%s starts with %s."%(mystr, txt)
if mystr.endswith(txt):
    print "%s ends with %s."%(mystr, txt)

Установка значения по умолчанию

# Introductory Note: quite a bit of this section is not terribly Pythonic
# as names must be set before being used. For instance, unless myvar has 
# been previously defined, these next lines will all raise NameError:
myvar = myvar or some_default
myvar2 = myvar or some_default
myvar |= some_default          # bitwise-or, not logical-or - for demo
# The standard way of setting a default is often:
myvar = default_value
if some_condition:
    pass                     # code which may set myvar to something else
# if myvar is returned from a function and may be empty/None, then use:
myvar = somefunc()
if not myvar:
    myvar = default_value
# If you want a default value that can be overridden by the person calling 
# your code, you can often wrap it in a function with a named parameter:
def myfunc(myvar="a"):
   return myvar + "b"
print myfunc(), myfunc("c")
#=> ab cb
# Note, though, that this won't work for mutable objects such as lists or
# dicts that are mutated in the function as the object is only created once 
# and repeated calls to the same function will return the same object.  This
# can be desired behaviour however - see section 10.3, for instance.
def myfunc(myvar=[]):
    return myvar
print myfunc(), myfunc()
#=> ['x'] ['x', 'x']
# You need to do:
def myfunc(myvar=None):
    if myvar is None:
        myvar = []
    return myvar
print myfunc(), myfunc()
#=> ['x'] ['x']
#=== Perl Equivalencies start here
# use b if b is true, otherwise use c
a = b or c
# as that is a little tricksy, the following may be preferred:
if b:
    a = b
    a = c
# set x to y unless x is already true
if not x:
    x = y
# use b if b is defined, else c
    a = b
except NameError:
    a = c
foo = bar or "DEFAULT VALUE"
# To get a user (for both UNIX and Windows), use:
import getpass
user = getpass.getuser()
# DON'T DO THIS: find the user name on Unix systems 
import os
user = os.environ.get("USER")
if user is None:
    user = os.environ.get("LOGNAME")
if not starting_point:
    starting_point = "Greenwich"
if not a:         # copy only if empty
    a = b
if b:             # assign b if nonempty, else c
    a = b
    a = c

Обмен значений не используя временные переменные

v1, v2 = v2, v1
temp = a
a = b
b = temp
a = "alpha"
b = "omega"
a, b = b, a   # the first shall be last -- and versa vice 
alpha, beta, production = "January March August".split()
alpha, beta, production = beta, production, alpha

Converting Between ASCII Characters and Values

num = ord(char)
char = chr(num)
char = "%c" % num
print "Number %d is character %c" % (num, num)
print "Number %(n)d is character %(n)c" % {"n": num}
print "Number %(num)d is character %(num)c" % locals()
#=> Number 101 is character e
ascii_character_numbers = [ord(c) for c in "sample"]
print ascii_character_numbers
#=> [115, 97, 109, 112, 108, 101]
word = "".join([chr(n) for n in ascii_character_numbers])
word = "".join([chr(n) for n in [115, 97, 109, 112, 108, 101]])
print word
#=> sample
hal = "HAL"
ibm = "".join([chr(ord(c)+1) for c in hal]) # add one to each ASCII value
print ibm   
#=> IBM

Processing a String One Character at a Time

mylist = list(mystr)
for char in mystr:
    pass # do something with char
mystr = "an apple a day"
uniq = sorted(set(mystr))
print "unique chars are: '%s'" % "".join(uniq)
#=> unique chars are: ' adelnpy'
ascvals = [ord(c) for c in mystr]
print "total is %s for '%s'."%(sum(ascvals), mystr)
#=> total is 1248 for 'an apple a day'.
# sysv checksum
def checksum(myfile):
    values = [ord(c) for line in myfile for c in line]
    return sum(values)%(2**16) - 1
import fileinput
print checksum(fileinput.input())   # data from sys.stdin
# Using a function means any iterable can be checksummed:
print checksum(open("C:/test.txt")  # data from file
print checksum("sometext")          # data from string
# slowcat - emulate a   s l o w  line printer
# usage: slowcat [- DELAY] [files ...]
import sys, select
import re
if re.match("^-\d+$",sys.argv[1]):
    del sys.argv[1]
for ln in fileinput.input():
    for c in ln:
        sys.stdout.flush()[],[],[], 0.005 * DELAY)

Reversing a String by Word or Character

# 2.3+ only
revchars = mystr[::-1]  # extended slice - step is -1
revwords = " ".join(mystr.split(" ")[::-1])
# pre 2.3 version:
mylist = list(mystr)
revbytes = "".join(mylist)
mylist = mystr.split()
revwords = ' '.join(mylist)
# Alternative version using reversed():
revchars = "".join(reversed(mystr))
revwords = " ".join(reversed(mystr.split(" ")))
# reversed() makes an iterator, which means that the reversal
# happens as it is consumed.  This means that "print reversed(mystr)" is not
# the same as mystr[::-1].  Standard usage is:
for char in reversed(mystr):
   pass  # ... do something
# 2.3+ only
word = "reviver"
is_palindrome = (word == word[::-1])
# Generator version
def get_palindromes(fname):
    for line in open(fname):
        word = line.rstrip()
        if len(word) > 5 and word == word[::-1]:
            yield word
long_palindromes = list(get_palindromes("/usr/share/dict/words"))
# Simpler old-style version using 2.2 string reversal
def rev_string(mystr):
    mylist = list(mystr)
    return "".join(mylist)
for line in open("/usr/share/dict/words"):
    word = line.rstrip()
    if len(word) > 5 and word == rev_string(word):
print long_palindromes

Expanding and Compressing Tabs


Expanding Variables in User Input

text = "I am %(rows)s high and %(cols)s long"%{"rows":24, "cols":80)
print text
#=> I am 24 high and 80 long
rows, cols = 24, 80
text = "I am %(rows)s high and %(cols)s long"%locals()
print text
#=> I am 24 high and 80 long
import re
print re.sub("\d+", lambda i: str(2 * int(, "I am 17 years old")
#=> I am 34 years old
# expand variables in text, but put an error message in
# if the variable isn't defined
class SafeDict(dict):
    def __getitem__(self, key):
        return self.get(key, "[No Variable: %s]"%key)
hi = "Hello"
text = "%(hi)s and %(bye)s!"%SafeDict(locals())
print text
#=> Hello and [No Variable: bye]!
#If you don't need a particular error message, just use the Template class:
from string import Template
x = Template("$hi and $bye!")
hi = "Hello"
print x.safe_substitute(locals())
#=> Hello and $bye!
print x.substitute(locals()) # will throw a KeyError

Controlling Case

mystr = "bo peep".upper()  # BO PEEP
mystr = mystr.lower()      # bo peep
mystr = mystr.capitalize() # Bo peep
beast = "python"
caprest = beast.capitalize().swapcase() # pYTHON
print "thIS is a loNG liNE".title()
#=> This Is A Long Line
if a.upper() == b.upper():
    print "a and b are the same"
import random
def randcase_one(letter):
    if random.randint(0,5):   # True on 1, 2, 3, 4
        return letter.lower()
        return letter.upper()
def randcase(myfile):
    for line in myfile:
        yield "".join(randcase_one(letter) for letter in line[:-1])
for line in randcase(myfile):
    print line

Interpolating Functions and Expressions Within Strings

"I have %d guanacos." % (n + 1)
print "I have", n+1, "guanacos."
#Python templates disallow in-string calculations (see PEP 292)
from string import Template
email_template = Template("""\
To: $address
From: Your Bank
CC: $cc_number
Date: $date
Dear $name,
Today you bounced check number $checknum to us.
Your account is now closed.
the management
import random
import datetime
person = {"address":"",
          "name": "Joe",
          "cc_number" : 1234567890,
          "checknum" : 500+random.randint(0,99)}
print email_template.substitute(person,

Indenting Here Documents

# indenting here documents
# in python multiline strings can be used as here documents
var = """
      your text
      goes here
# using regular expressions
import re
re_leading_blanks = re.compile("^\s+",re.MULTILINE)
var1 = re_leading_blanks.sub("",var)[:-1]
# using string methods 
# split into lines, use every line except first and last, left strip and rejoin.
var2 = "\n".join([line.lstrip() for line in var.split("\n")[1:-1]])
poem = """
       Here's your poem:
       Now far ahead the Road has gone,
          And I must follow, if I can,
       Pursuing it with eager feet,
          Until it joins some larger way
       Where many paths and errand meet.
          And whither then? I cannot say.
               --Bilbo in /usr/src/perl/pp_ctl.c  
import textwrap
print textwrap.dedent(poem)[1:-1]

Reformatting Paragraphs

from textwrap import wrap 
output = wrap(para,
#!/usr/bin/env python
# wrapdemo - show how textwrap works
txt = """\
Folding and splicing is the work of an editor,
not a mere collection of silicon
mobile electrons!
from textwrap import TextWrapper
wrapper = TextWrapper(width=20,
                      initial_indent=" "*4,
                      subsequent_indent=" "*2)
print "0123456789" * 2
print wrapper.fill(txt)
"""Expected result:
    Folding and
  splicing is the
  work of an editor,
  not a mere
  collection of
  silicon and mobile
# merge multiple lines into one, then wrap one long line
from textwrap import fill
import fileinput
print fill("".join(fileinput.input()))
# Term::ReadKey::GetTerminalSize() isn't in the Perl standard library. 
# It isn't in the Python standard library either. Michael Hudson's 
# recipe from python-list #530228 is shown here.
# (
# Be aware that this will work on Unix but not on Windows.
from termwrap import wrap
import struct, fcntl
def getheightwidth():
    height, width = struct.unpack(
        "hhhh", fcntl.ioctl(0, TERMIOS.TIOCGWINSZ ,"\000"*8))[0:2]
    return height, width
# PERL <>, $/, $\ emulation
import fileinput
import re
_, width = getheightwidth()
for para in re.split(r"\n{2,}", "".join(fileinput.input())):
    print fill(para, width)

Escaping Characters

mystr = '''Mom said, "Don't do that."'''  #"
re.sub("['\"]", lambda i: "\\" +, mystr)
re.sub("[A-Z]", lambda i: "\\" +, mystr)
re.sub("\W", lambda i: "\\" +, "is a test!") # no function like quotemeta?

Trimming Blanks from the Ends of a String

mystr = mystr.lstrip() # left
mystr = mystr.rstrip() # right
mystr = mystr.strip()  # both ends

Parsing Comma-Separated Data

import csv
def parse_csv(line):
    reader = csv.reader([line], escapechar='\\')
line = '''XYZZY,"","O'Reilly, Inc","Wall, Larry","a \\"glug\\" bit,",5,"Error, Core Dumped,",''' #"
fields = parse_csv(line)
for i, field in enumerate(fields):
    print "%d : %s" % (i, field)
# pre-2.3 version of parse_csv
import re
def parse_csv(text):
    pattern = re.compile('''"([^"\\\]*(?:\\\.[^"\\\]*)*)",?|([^,]+),?|,''')
    mylist = ["".join(elem) 
              for elem in re.findall(pattern, text)]
    if text[-1] == ",": 
        mylist += ['']
    return mylist
# cvs.reader is meant to work for many lines, something like:
# (NB: in Python default, quotechar is *not* escaped by backslash,
#      but doubled instead. That's what Excel does.)
for fields in cvs.reader(lines, dialect="some"):
    for num, field in enumerate(fields):
        print num, ":", field

Soundex Matching

def soundex(name, len=4):
    """ soundex module conforming to Knuth's algorithm
        implementation 2000-12-24 by Gregory Jorgensen
        public domain
    # digits holds the soundex values for the alphabet
    digits = '01230120022455012623010202'
    sndx = ''
    fc = ''
    # translate alpha chars in name to soundex digits
    for c in name.upper():
        if c.isalpha():
            if not fc: 
                fc = c   # remember first letter
            d = digits[ord(c)-ord('A')]
            # duplicate consecutive soundex digits are skipped
            if not sndx or (d != sndx[-1]):
                sndx += d
    # replace first digit with first alpha character
    sndx = fc + sndx[1:]
    # remove all 0s from the soundex code
    sndx = sndx.replace('0','')
    # return soundex code padded to len characters
    return (sndx + (len * '0'))[:len]
user = raw_input("Lookup user: ")
if user == "":
    raise SystemExit
name_code = soundex(user)
for line in open("/etc/passwd"):
    line = line.split(":")
    for piece in line[4].split():
        if name_code == soundex(piece):
            print "%s: %s\n" % line[0], line[4])

Program: fixstyle

import sys, fileinput, re
data = """\
analysed        => analyzed
built-in        => builtin
chastized       => chastised
commandline     => command-line
de-allocate     => deallocate
dropin          => drop-in
hardcode        => hard-code
meta-data       => metadata
multicharacter  => multi-character
multiway        => multi-way
non-empty       => nonempty
non-profit      => nonprofit
non-trappable   => nontrappable
pre-define      => predefine
preextend       => pre-extend
re-compiling    => recompiling
reenter         => re-enter
turnkey         => turn-key
mydict = {}
for line in data.split("\n"):
    if not line.strip():
    k, v = [word.strip() for word in line.split("=>")]
    mydict[k] = v
pattern_text = "(" + "|".join([re.escape(word) for word in mydict.keys()]) + ")"
pattern = re.compile(pattern_text)
args = sys.argv[1:]
verbose = 0
if args and args[0] == "-v":
    verbose = 1
    args = args[1:]
if not args:
    sys.stderr.write("%s: Reading from stdin\n" % sys.argv[0])
for line in fileinput.input(args, inplace=1, backup=".orig"):
    output = ""
    pos = 0
    while True:
        match =, pos)
        if not match:
            output += line[pos:]
        output += line[pos:match.start(0)] + mydict[]
        pos = match.end(0)

Program: psgrep

# psgrep - print selected lines of ps output by
#          compiling user queries into code.
# examples :
# psgrep "uid<10"
import sys, os, re
class PsLineMatch:
    # each field from the PS header
    fieldnames = ("flags","uid","pid","ppid","pri","nice","size", \
    numeric_fields = ("flags","uid","pid","ppid","pri","nice","size","rss")
    def __init__(self):
        self._fields = {}
    def new_line(self, ln):
        self._ln = ln.rstrip()
        # ps header for option "wwaxl" (different than in the perl code)
        004     0     1     0  15   0   448  236 schedu S    ?          0:07 init"
        .   .     .     .     .   .   .     .    .      .    .    .          .
        # because only the last entry might contain blanks, splitting
        # is safe
        data = self._ln.split(None,12)
        for fn, elem in zip(self.fieldnames, data):
            if fn in self.numeric_fields:  # make numbers integer 
                self._fields[fn] = int(elem)
                self._fields[fn] = elem
    def set_query(self, args):
        # assume args: "uid==500", "command ~ ^wm"
        m = re.compile("(\w+)([=<>]+)(.+)")
        for a in args:
                (field,op,val) = m.match(a).groups()
                print "can't understand query \"%s\"" % (a)
                raise SystemExit
            if field in self.numeric_fields:
        self._desirable = compile("(("+")and(".join(conds)+"))", "<string>","eval")
    def is_desirable(self):
        return eval(self._desirable, {}, self._fields)
    def __str__(self):
        # to allow "print".
        return self._ln
if len(sys.argv)<=1:
    print """usage: %s criterion ...
    Each criterion is a Perl expression involving: 
    All criteria must be met for a line to be printed.""" \
    % (sys.argv[0], " ".join(PsLineMatch().fieldnames))
    raise SystemExit
psln = PsLineMatch()
p = os.popen("ps wwaxl")
print p.readline()[:-1]        # emit header line
for ln in p.readlines():
    if psln.is_desirable():
        print psln
# alternatively one could consider every argument being a string and
# support wildcards: "uid==500" "command~^wm" by means of re, but this
# does not show dynamic python code generation, although re.compile
# also precompiles.