forked from SigmaQuan/Better-Python-59-Ways
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathitem_22_prefer_helper_classes.py
314 lines (225 loc) · 10.5 KB
/
item_22_prefer_helper_classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# Chapter 3: Classes and Inheritance
import collections
# As an object-oriented programming language. Python supports a full range of
# features, such as inheritance, polymorphism, and encapsulation. Getting
# things done in Python often requires writing new classes and defining how
# they interact through their interfaces and hierarchies.
# Python's classes and inheritance make it easy to express your program's
# intended behaviors with objects. They allow you to improve and expand
# functionality over time. They provide flexibility in an environment of
# changing requirements. Knowing how to use them well enables you to write
# maintainable code.
# Item 22: Prefer helper classes over bookkeeping with dictionaries and tuples
# Python's built-in dictionary type is wonderful for maintaining dynamic
# internal state over the lifetime of an object. By dynamic, I mean situations
# in which you need to do bookkeeping for an unexpected set of identifiers.
# For example, say you want to record the grades of a set of students whose
# names aren't known in advance. You can define a class to store the names in
# a dictionary instead of using a predefined attribute for each student.
class SimpleGradebook(object):
def __init__(self):
self._grades = {}
def add_student(self, name):
self._grades[name] = []
def report_grade(self, name, score):
self._grades[name].append(score)
def average_grade(self, name):
grades = self._grades[name]
return sum(grades) / len(grades)
# Using the class in simple.
book = SimpleGradebook()
book.add_student('Isaac Newton')
book.report_grade('Isaac Newton', 90)
# ...
print(book.average_grade('Isaac Newton'))
# 90.0
# Dictionaries are so easy to use that there's a danger of overextending them
# to write brittle code. For example, say you want to extend the
# SimpleGradeboook class to keep a list of grades by subject, not just
# overall. You can do this by changing the _grades dictionary to map student
# names (the keys) to yet another dictionary (the values). The innermost
# dictionary will map subjects (the keys) to grades (the values).
class BySubjectGradebook(object):
def __init__(self):
self._grades = {}
def add_student(self, name):
self._grades[name] = {}
# This seems straightforward enough. The report_grade and average_grade
# methods will gain a bit of complexity to deal with the multilevel
# dictionary, but it's manageable.
def report_grade(self, name, subject, grade):
by_subject = self._grades[name]
grade_list = by_subject.setdefault(subject, [])
grade_list.append(grade)
def average_grade(self, name):
by_subject = self._grades[name]
total, count = 0, 0
for grades in by_subject.values():
total += sum(grades)
count += len(grades)
return total / count
# Using the class remains simple.
book = BySubjectGradebook()
book.add_student('Albert Einstein')
book.report_grade('Albert Einstein', 'Math', 75)
book.report_grade('Albert Einstein', 'Math', 65)
book.report_grade('Albert Einstein', 'Gym', 90)
book.report_grade('Albert Einstein', 'Gym', 95)
print(book.average_grade('Albert Einstein'))
# 81.25
# Now, imagine your requirements change again. You also want to track the
# weight of each score toward the overall grade in the class so midterms and
# finals are more important than pop quizzes. One way to implement this
# feature is to change the innermost dictionary; instead of mapping subjects
# (the keys) to grades (the values), I can use the tuple (score, weight) as
# values.
class WeightGradebook(object):
def __init__(self):
self._grades = {}
def add_student(self, name):
self._grades[name] = {}
def report_grade(self, name, subject, score, weight):
by_subject = self._grades[name]
grade_list = by_subject.setdefault(subject, [])
grade_list.append((score, weight))
# Although the change to report_grade seem simple--just make the value a
# tuple--the average_grade method now has a loop within a loop and is
# difficult to read.
# def average_grade(self, name):
# by_subject = self._grades[name]
# score_sum, score_count = 0, 0
# for subject, scores in by_subject.items():
# subject_avg, total_weight = 0, 0
# for score, weight in scores:
# # ...
# return score_sum / score_count
# Using the class has also gotten more difficult. It's unclear what all of the
# numbers in the positional arguments mean.
# book.report_grade('Albert Einstein', 'Math', 80, 0.10)
# When you see complexity like this happen, it's time to make the leap from
# dictionaries and tuples to a hierarchy of classes.
# At first, you didn't know you'd need to support weighted grades, so the
# complexity of additional helper classes seemed unwarranted. Python's
# built-in dictionary and tuple types made it easy to keep going, adding layer
# after layer to the internal bookkeeping. But you should avoid doing this for
# more than one level of nesting (i.e., avoid dictionaries that contain
# dictionaries). It makes your code hard to read by other programmers and sets
# you up for a maintenance nightmare.
# As soon as you realize the bookkeeping is getting complicated, break it all
# out into classes. This lets you provide well-defined interfaces that better
# encapsulate your data. This also enables you to create a layer of
# abstraction between your interfaces and your concrete implementations.
# Refactoring to Classes
# You can start moving to classes at the bottom of the dependency tree: a
# single grade. A class seems too heavyweight for such simple information.
# A tuple, though, seems to appropriate because grades are immutable. Here, I
# use the tuple (score, weight) to track grades in a list:
grades = []
grades.append((95, 0.45))
grades.append((90, 0.45))
grades.append((85, 0.10))
# ...
total = sum(score * weight for score, weight in grades)
total_weight = sum(weight for _, weight in grades)
average_grade = total / total_weight
print(average_grade)
# 91.75
# The problem is that plain tuples are positional. When you want to associate
# more information with a grade, like a set of notes from the teacher. You'll
# need to rewrite every usage of the two-tuple to be aware that there are now
# three items present instead of two. Here, I use _ (the underscore variable
# name, a Python convention for unused variables) to capture the third entry
# in the tuple and just ignore it:
grades = []
grades.append((95, 0.45, 'Great job'))
grades.append((90, 0.45, 'Great job'))
grades.append((85, 0.10, 'Come on'))
# ...
total = sum(score * weight for score, weight, _ in grades)
total_weight = sum(weight for _, weight, _ in grades)
average_grade = total / total_weight
print(average_grade)
# 91.75
# This pattern of extending tuples longer and longer is similar to deepening
# layers of dictionaries. As soon as you find yourself going longer thant a
# two-tuple, it's time to consider another approach.
# The namedtuple type in the collections module does exactly what you need. It
# lets you easily define tiny, immutable data classes.
Grade = collections.namedtuple('Grade', ('score', 'weight'))
# These classes can be constructed with positional or keyword arguments. The
# fields are accessible with named attributes. Having named attributes makes
# it easy to move from a nametuple to your own class later if your
# requirements change again and you need to add behaviors to the simple data
# containers.
# Limitations of namedtuple
# Although useful in many circumstances, it's important to understand when
# namedtuple can cause more harm than good.
# 1. You can't specify default argument values for namedtuple classes. This
# makes them unwidely when your data may have many optional properties. If
# you find yourself using more than a handful of attributes, defining your
# your own class may be a better choice.
# 2. The attribute value of namedtuple instances are still accessible using
# numerical indexes and iteration. Especially in externalized APIs, this
# can lead to unintentional usage that makes it harder to move to a real
# class later. If you're not in control of all the usage of your namedtuple
# instances, it's better to define your own class.
# Next, you can write a class to represent a single subject that contains a
# set of grades.
class Subject(object):
def __init__(self):
self._grades = []
def report_grade(self, score, weight):
self._grades.append(Grade(score, weight))
def average_grade(self):
total, total_weight = 0, 0
for grade in self._grades:
total += grade.score * grade.weight
total_weight += grade.weight
return total / total_weight
# Then you would write a class to represent a set of subjects that are being
# studies by a single student.
class Student(object):
def __init__(self):
self._subjects = {}
def subject(self, name):
if name not in self._subjects:
self._subjects[name] = Subject()
return self._subjects[name]
def average_grade(self):
total, count = 0, 0
for subject in self._subjects.values():
total += subject.average_grade()
count += 1
return total / count
# Finally, you'd write a container for all the students keyed dynamically by
# their names.
class Gradebook(object):
def __init__(self):
self._students = {}
def student(self, name):
if name not in self._students:
self._students[name] = Student()
return self._students[name]
# The line count of these classes is almost double the previous
# implementation's size. But this code is much easier to read. The example
# driving the classes is also more clear and extensible.
book = Gradebook()
albert = book.student('Albert Einstein')
math = albert.subject('Math')
math.report_grade(80, 0.10)
math.report_grade(75, 0.90)
Gym = albert.subject('Gym')
Gym.report_grade(90, 0.10)
Gym.report_grade(95, 0.90)
# ...
print(albert.average_grade())
# 85.0
# If necessary, you can write backwards-compatible methods to help migrate
# usage of the old API style to the new hierarchy of objects.
# Things to remember
# 1. Avoid making dictionaries with values that are other dictionaries or
# long tuples.
# 2. Use namedtuple for lightweight, immutable data containers before you need
# the flexibility of a full class.
# 3. Move your bookkeeping code to use multiple helper classes when your
# internal state dictionaries get complicated.