|
Python/FAQ/Хэши
Материал из Wiki.crossplatform.ru
Introduction
#-----------------------------
# dictionaries
age = {"Nat": 24,
"Jules": 24,
"Josh": 17}
#-----------------------------
age = {}
age["Nat"] = 24
age["Jules"] = 25
age["Josh"] = 17
#-----------------------------
food_color = {"Apple": "red",
"Banana": "yellow",
"Lemon": "yellow",
"Carrot": "orange"
}
#-----------------------------
# NOTE: keys must be quoted in Python
Adding an Element to a Hash
mydict[key] = value
#-----------------------------
# food_color defined per the introduction
food_color["Raspberry"] = "pink"
print "Known foods:"
for food in food_color:
print food
#=> Known foods:
#=> Raspberry
#=> Carrot
#=> Lemon
#=> Apple
#=> Banana
#-----------------------------
Testing for the Presence of a Key in a Hash
# does mydict have a value for key?
if key in mydict:
pass # it exists
else:
pass # it doesn't
#-----------------------------
# food_color per the introduction
for name in ("Banana", "Martini"):
if name in food_color:
print name, "is a food."
else:
print name, "is a drink."
#=> Banana is a food.
#=> Martini is a drink.
#-----------------------------
age = {}
age["Toddler"] = 3
age["Unborn"] = 0
age["Phantasm"] = None
for thing in ("Toddler", "Unborn", "Phantasm", "Relic"):
print ("%s:"%thing),
if thing in age:
print "Exists",
if age[thing] is not None:
print "Defined",
if age[thing]:
print "True",
print
#=> Toddler: Exists Defined True
#=> Unborn: Exists Defined
#=> Phantasm: Exists
#=> Relic:
#-----------------------------
# Get file sizes for the requested filenames
import fileinput, os
size = {}
for line in fileinput.input():
filename = line.rstrip()
if filename in size:
continue
size[filename] = os.path.getsize(filename)
Deleting from a Hash
# remove key and its value from mydict
del mydict[key]
#-----------------------------
# food_color as per Introduction
def print_foods():
foods = food_color.keys()
print "Keys:", " ".join(foods)
print "Values:",
for food in foods:
color = food_color[food]
if color is not None:
print color,
else:
print "(undef)",
print
print "Initially:"
print_foods()
print "\nWith Banana set to None"
food_color["Banana"] = None
print_foods()
print "\nWith Banana deleted"
del food_color["Banana"]
print_foods()
#=> Initially:
#=> Keys: Carrot Lemon Apple Banana
#=> Values: orange yellow red yellow
#=>
#=> With Banana set to None
#=> Keys: Carrot Lemon Apple Banana
#=> Values: orange yellow red (undef)
#=>
#=> With Banana deleted
#=> Keys: Carrot Lemon Apple
#=> Values: orange yellow red
#-----------------------------
for key in ["Banana", "Apple", "Cabbage"]:
del food_color[key]
#-----------------------------
Traversing a Hash
#-----------------------------
for key, value in mydict.items():
pass # do something with key and value
# If mydict is large, use iteritems() instead
for key, value in mydict.iteritems():
pass # do something with key and value
#-----------------------------
# DON'T DO THIS:
for key in mydict.keys():
value = mydict[key]
# do something with key and value
#-----------------------------
# food_color per the introduction
for food, color in food_color.items():
print "%s is %s." % (food, color)
# DON'T DO THIS:
for food in food_color:
color = food_color[food]
print "%s is %s." % (food, color)
#-----------------------------
print """%(food)s
is
%(color)s.
""" % vars()
#-----------------------------
for food, color in sorted(food_color.items()):
print "%s is %s." % (food, color)
#-----------------------------
#!/usr/bin/env python
# countfrom - count number of messages from each sender
import sys
if len(sys.argv) > 1:
infile = open(sys.argv[1])
else:
infile = sys.stdin
counts = {}
for line in infile:
if line.startswith("From: "):
name = line[6:-1]
counts[name] = counts.get(name, 0) + 1
for (name, count) in sorted(counts.items()):
print "%s: %s" % (name, count)
#-----------------------------
Printing a Hash
for key, val in mydict.items():
print key, "=>", val
#-----------------------------
print "\n".join([("%s => %s" % item) for item in mydict.items()])
#-----------------------------
print mydict
#=> {'firstname': 'Andrew', 'login': 'dalke', 'state': 'New Mexico', 'lastname': 'Dalke'}
#-----------------------------
import pprint
pprint.pprint(dict)
#=> {'firstname': 'Andrew',
#=> 'lastname': 'Dalke',
#=> 'login': 'dalke',
#=> 'state': 'New Mexico'}
#-----------------------------
Retrieving from a Hash in Insertion Order
#-----------------------------
class SequenceDict(dict):
"""
Dictionary that remembers the insertion order.
The lists returned by keys(), values() and items() are
in the insertion order.
"""
def __init__(self, *args):
self._keys={} # key --> id
self._ids={} # id --> key
self._next_id=0
def __setitem__(self, key, value):
self._keys[key]=self._next_id
self._ids[self._next_id]=key
self._next_id+=1
return dict.__setitem__(self, key, value)
def __delitem__(self, key):
id=self._keys[key]
del(self._keys[key])
del(self._ids[id])
return dict.__delitem__(self, key)
def values(self):
values=[]
ids=list(self._ids.items())
ids.sort()
for id, key in ids:
values.append(self[key])
return values
def items(self):
items=[]
ids=list(self._ids.items())
ids.sort()
for id, key in ids:
items.append((key, self[key]))
return items
def keys(self):
ids=list(self._ids.items())
ids.sort()
keys=[]
for id, key in ids:
keys.append(key)
return keys
def update(self, d):
for key, value in d.items():
self[key]=value
def clear(self):
dict.clear(self)
self._keys={}
self._ids={}
self._next_id=0
def testSequenceDict():
sd=SequenceDict()
# First Test
sd[3]="first"
sd[2]="second"
sd[1]="third"
print sd.keys()
print sd.items()
print sd.values()
del(sd[1])
del(sd[2])
del(sd[3])
print sd.keys(), sd.items(), sd.values()
print sd._ids, sd._keys
print "---------------"
# Second Test
sd["b"]="first"
sd["a"]="second"
sd.update({"c": "third"})
print sd.keys()
print sd.items()
print sd.values()
del(sd["b"])
del(sd["a"])
del(sd["c"])
print sd.keys(), sd.items(), sd.values()
print sd._ids, sd._keys
def likePerlCookbook():
food_color=SequenceDict()
food_color["Banana"]="Yellow";
food_color["Apple"]="Green";
food_color["Lemon"]="Yellow"
print "In insertion order, the foods' color are:"
for food, color in food_color.items():
print "%s is colored %s" % (food, color)
if __name__=="__main__":
#testSequenceDict()
likePerlCookbook()
Hashes with Multiple Values Per Key
import os
ttys = {}
who = os.popen("who")
for line in who:
user, tty = line.split()[:2]
ttys.setdefault(user, []).append(tty)
for (user, tty_list) in sorted(ttys.items()):
print user + ": " + " ".join(tty_list)
#-----------------------------
import pwd
for (user, tty_list) in ttys.items():
print user + ":", len(tty_list), "ttys."
for tty in sorted(tty_list):
try:
uid = os.stat("/dev/" + tty).st_uid
user = pwd.getpwuid(uid)[0]
except os.error:
user = "(not available)"
print "\t%s (owned by %s)" % (tty, user)
Inverting a Hash
# lookup_dict maps keys to values
reverse = dict([(val, key) for (key, val) in lookup_dict.items()])
#-----------------------------
surname = {"Mickey": "Mantle", "Babe": "Ruth"}
first_name = dict([(last, first) for (first, last) in surname.items()])
print first_name["Mantle"]
#=> Mickey
#-----------------------------
#!/usr/bin/perl -w
# foodfind - find match for food or color
import sys
if not sys.argv[1:]:
raise SystemExit("usage: foodfind food_or_color")
given = sys.argv[1]
color_dict = {"Apple": "red",
"Banana": "yellow",
"Lemon": "yellow",
"Carrot": "orange",
}
food_dict = dict([(color, food) for (food, color) in color_dict.items()])
if given in color_dict:
print given, "is a food with color", color_dict[given]
elif given in food_dict:
print food_dict[given], "is a food with color", given
#-----------------------------
# food_color as per the introduction
foods_with_color = {}
for food, color in food_color.items():
foods_with_color.setdefault(color, []).append(food)
print " ".join(foods_with_color["yellow"]), "were yellow foods."
#-----------------------------
Sorting a Hash
#-----------------------------
# mydict is the hash to sort
for key, value in sorted(mydict.items()):
# do something with key, value
#-----------------------------
# food_color as per section 5.8
for food, color in sorted(food_color.items()):
print "%s is %s." % (food, color)
#-----------------------------
# NOTE: alternative version
for item in sorted(food_color.items()):
print "%s is %s." % item
#-----------------------------
# NOTE: alternative version showing a user-defined function
def food_cmp(x, y):
return cmp(x, y)
for food, color in sorted(food_color, cmp=food_cmp):
print "%s is %s." % (food, color)
#-----------------------------
def food_len_cmp(x, y):
return cmp(len(x), len(y))
for food in sorted(food_color, cmp=food_len_cmp):
print "%s is %s." % (food, food_color[food])
# In this instance, however, the following is both simpler and faster:
for food in sorted(food_color, key=len):
print "%s is %s." % (food, food_color[food])
#-----------------------------
Merging Hashes
#-----------------------------
merged = {}
merged.update(a_dict)
merged.update(b_dict)
#-----------------------------
# NOTE: alternative version
merged = a_dict.copy()
merged.update(b_dict)
#-----------------------------
# DON'T DO THIS:
merged = {}
for k, v in a_dict.items():
merged[k] = v
for k, v in b_dict.items():
merged[k] = v
#-----------------------------
# food_color as per section 5.8
drink_color = {"Galliano": "yellow",
"Mai Tai": "blue"}
ingested_color = drink_color.copy()
ingested_color.update(food_color)
#-----------------------------
# DON'T DO THIS:
drink_color = {"Galliano": "yellow",
"Mai Tai": "blue"}
substance_color = {}
for k, v in food_color.items():
substance_color[k] = v
for k, v in drink_color.items():
substance_color[k] = v
#-----------------------------
# DON'T DO THIS:
substance_color = {}
for mydict in (food_color, drink_color):
for k, v in mydict:
substance_color[k] = v
#-----------------------------
# DON'T DO THIS:
substance_color = {}
for item in food_color.items() + drink_color.items():
for k, v in mydict:
substance_color[k] = v
#-----------------------------
substance_color = {}
for mydict in (food_color, drink_color):
for k, v in mydict.items():
if substance_color.has_key(k):
print "Warning:", k, "seen twice. Using the first definition."
continue
substance_color[k] = v
# I think it's a copy, in which case
all_colors = new_colors.copy()
Finding Common or Different Keys in Two Hashes
common = [k for k in dict1 if k in dict2]
#-----------------------------
this_not_that = [k for k in dict1 if k not in dict2]
#-----------------------------
# citrus_color is a dict mapping citrus food name to its color.
citrus_color = {"Lemon": "yellow",
"Orange": "orange",
"Lime": "green"}
# build up a list of non-citrus foods
non_citrus = [k for k in food_color if k not in citruscolor]
#-----------------------------
Hashing References
#-----------------------------
# references as keys of dictionaries is no pb in python
name = {}
for filename in ("/etc/termcap", "/vmunix", "/bin/cat"):
try:
myfile = open(filename)
except IOError:
pass
else:
names[myfile] = filename
print "open files:", ", ".join(name.values())
for f, fname in name.items():
f.seek(0, 2) # seek to the end
print "%s is %d bytes long." % (fname, f.tell())
#-----------------------------
Presizing a Hash
# Python doesn't allow presizing of dicts, but hashing is efficient -
# it only re-sizes at intervals, not every time an item is added.
Finding the Most Common Anything
count = {}
for element in mylist:
count[element] = count.get(element, 0) + 1
Representing Relationships Between Data
#-----------------------------
import fileinput
father = {'Cain': 'Adam',
'Abel': 'Adam',
'Seth': 'Adam',
'Enoch': 'Cain',
'Irad': 'Enoch',
'Mehujael': 'Irad',
'Methusael': 'Mehujael',
'Lamech': 'Methusael',
'Jabal': 'Lamech',
'Tubalcain': 'Lamech',
'Enos': 'Seth',
}
for line in fileinput.input():
person = line.rstrip()
while person: # as long as we have people,
print person, # print the current name
person = father.get(person) # set the person to the person's father
print
#-----------------------------
import fileinput
children = {}
for k, v in father.items():
children.setdefault(v, []).append(k)
for line in fileinput.input():
person = line.rstrip()
kids = children.get(person, ["nobody"])
print person, "begat", ", ".join(kids)
#-----------------------------
import sys, re
pattern = re.compile(r'^\s*#\s*include\s*<([^>]+)')
includes = {}
for filename in filenames:
try:
infile = open(filename)
except IOError, err:
print>>sys.stderr, err
continue
for line in infile:
match = pattern.match(line)
if match:
includes.setdefault(match.group(1), []).append(filename)
#-----------------------------
# list of files that don't include others
mydict = {}
for e in reduce(lambda a,b: a + b, includes.values()):
if not includes.has_key(e):
mydict[e] = 1
include_free = mydict.keys()
include_free.sort()
Program: dutree
#-----------------------------
#!/usr/bin/env python -w
# dutree - print sorted indented rendition of du output
import os, sys
def get_input(args):
# NOTE: This is insecure - use only from trusted code!
cmd = "du " + " ".join(args)
infile = os.popen(cmd)
dirsize = {}
kids = {}
for line in infile:
size, name = line[:-1].split("\t", 1)
dirsize[name] = int(size)
parent = os.path.dirname(name)
kids.setdefault(parent, []).append(name)
# Remove the last field added, which is the root
kids[parent].pop()
if not kids[parent]:
del kids[parent]
return name, dirsize, kids
def getdots(root, dirsize, kids):
size = cursize = dirsize[root]
if kids.has_key(root):
for kid in kids[root]:
cursize -= dirsize[kid]
getdots(kid, dirsize, kids)
if size != cursize:
dot = root + "/."
dirsize[dot] = cursize
kids[root].append(dot)
def output(root, dirsize, kids, prefix = "", width = 0):
path = os.path.basename(root)
size = dirsize[root]
fmt = "%" + str(width) + "d %s"
line = fmt % (size, path)
print prefix + line
prefix += (" " * (width-1)) + "| " + (" " * len(path))
if kids.has_key(root):
kid_list = kids[root]
kid_list.sort(lambda x, y, dirsize=dirsize:
cmp(dirsize[x], dirsize[y]))
width = len(str(dirsize[kid_list[-1]]))
for kid in kid_list:
output(kid, dirsize, kids, prefix, width)
def main():
root, dirsize, kids = get_input(sys.argv[1:])
getdots(root, dirsize, kids)
output(root, dirsize, kids)
if __name__ == "__main__":
main()
|