%%HTML
<iframe src='https://gfycat.com/ifr/YearlyWelcomeBlowfish' frameborder='0' scrolling='no' allowfullscreen width='640' height='1185'></iframe>
for value in [1, 2, 3]:
print(value)
class Counter:
pass
for value in Counter():
print(value)
it = iter([1, 2, 3])
try:
while True:
value = next(it)
print(value)
except StopIteration:
pass
How to create iterable object? Implement __iter__ that returns object implementing __next__
class Counter:
def __init__(self, low, high):
self.current = low
self.high = high
def __iter__(self):
return self
def __next__(self):
if self.current > self.high:
raise StopIteration
else:
self.current += 1
return self.current - 1
for i in Counter(3, 8):
print(i)
You don't have to return self in __iter__
class Counter:
def __iter__(self):
return iter([1, 2, 3])
for value in Counter():
print(value)
Why is this information useful? Infinite or very large sequences.
Problem: find first fibonaci sequence number which has a sum of digits greater than 100
from itertools import islice
class FibonacciIterator:
def __iter__(self):
self.a = 0
self.b = 1
return self
def __next__(self):
a, b = self.a, self.b
self.a = b
self.b = a + b
return a
list(islice(FibonacciIterator(), 10))
from typing import Iterable, Callable, Any
sum_digits = lambda n: sum(map(int, str(n)))
def find_first(iterable: Iterable, predicate: Callable[[Any], bool]):
for x in iterable:
if predicate(x):
return x
find_first(FibonacciIterator(), lambda x: sum_digits(x) > 100)
# next(x for x in FibonaciIterator() if sum_digits(x) > 100)
Generator is just like a container, but values are generated on the fly as you iterate.
generator = range(10000000)
big_list = list(generator)
from sys import getsizeof
print(getsizeof(generator))
print(getsizeof(big_list))
Generator comprehensions:
%%timeit
power_2 = [i**2 for i in range(10**6)]
%%timeit
power_2_gen = (i**2 for i in range(10**6))
It's waay faster because no i**2 was computed :P. We only created a recipe for a sequence.
! pip3.8 install memory_profiler --user
%load_ext memory_profiler
%memit sum([i**2 for i in range(10**6)])
%memit sum(i**2 for i in range(10**6))
That's because with list you create the whole list and then start adding. With generator comprehension you ask the generator to generate next value and add it to the current sum. There's no need for a container.
It's very useful when you might break at some point:
%%timeit
for i in range(10**6):
if i == 2:
break
%%timeit
for i in list(range(10**6)):
if i == 2:
break
def some_generator():
print("Starting")
yield 1
print("Let's come back to where we left off")
yield 2
print("Nope. No more yields")
gen = some_generator()
next(gen)
next(gen)
next(gen)
list(some_generator())
gen = some_generator()
import inspect
inspect.getgeneratorstate(gen)
next(gen)
inspect.getgeneratorstate(gen)
list(gen)
inspect.getgeneratorstate(gen)
def primitive_range(start: int, stop: int, step: int = 1):
current = start
while current < stop:
yield current
current += step
for i in primitive_range(0, 4):
print(i)
If you come from C or C++ you might find it weird that stack (local variables) are not destructed when returing from function. In Cpython each function call creates new frame object on the heap :D. So python can manage the lifetime of function local variables dynamically.
import inspect
g = primitive_range(0,10)
inspect.getgeneratorlocals(g)
next(g)
inspect.getgeneratorlocals(g)
next(g)
inspect.getgeneratorlocals(g)
How does python know where he stopped in generator? The instruction pointer
def simple_gen():
x = 10
yield x
y = "abc"
yield y
g = simple_gen()
g.gi_frame.f_lasti
next(g)
g.gi_frame.f_lasti
import dis
dis.disco(g.gi_code, lasti=g.gi_frame.f_lasti)
This was python bytecode. And a topic for a separate CoP.
import dis
print(dis.code_info(simple_gen))
def infinite_power_2_gen():
current = 2
while True:
yield current
current *= 2
powers_of_2 = infinite_power_2_gen()
first_4 = powers_of_2[:4]
There are a couple of ways to slice a generator
first_5_elements = []
for i in range(5):
first_5_elements.append(next(powers_of_2))
first_5_elements
first_5_elements = [pair[0] for pair in zip(powers_of_2, range(5))]
first_5_elements
from itertools import islice
first_4_powers_gen = islice(infinite_power_2_gen(), 4)
first_4_powers_gen
But obviously the islice is the best one.
Generators are one pass. There's no way to reuse a generator object that is already exhausted (that raised StopIteration)
list(first_4_powers_gen)
With yield statement the FibonacciIterator implemented with iter protocol is much simplier:
def fib_generator():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
list(islice(fib_generator(), 10))
from pathlib import Path
from typing import Generator
import xml.etree.ElementTree as ET
def iter_tags_from_xml_file(path: Path) -> Generator[ET.Element, None, None]:
"""
Parses xml file incrementally to not bloat the ram on big xml files
:param path: path to xml_file
:return: generator iterating over xml tags
"""
xml_iterator = iter(ET.iterparse(str(path), events=("start", "end")))
_, root = next(xml_iterator)
for event, element in xml_iterator:
if event == "end":
yield element
# without clearing the root element the whole tree is still stored in ram, but created incrementally
root.clear()
! du -hs ~/big.xml
tags = iter_tags_from_xml_file(Path("/home/rs/big.xml"))
keybox_tags = (entry for entry in tags if entry.tag == "Keybox")
It's great that up to this point the file hasn't been opened yet :D
%memit keybox_elements = sum(1 for x in keybox_tags)
We counted keybox tags in 17GB xml file with just a couple of KB.
We need some test data. Let's generate it using python
! pip3.8 install names --user
import names
import random
with open("student_grades.txt", "w") as file:
for _ in range(25):
print(f"{names.get_first_name()} {','.join(map(str,random.choices(range(2,6),k=15)))}", file=file)
!cat student_grades.txt
Find the first student with grades mean less than 3.2. There's no need to find the one with the worst grades. Just find one
Using lists won't scale. In case of a really big file you would run out of ram memory:
from pprint import pprint
from statistics import mean
def get_lines(filename: str):
lines = []
with open(filename) as file:
for line in file:
lines.append(line)
return lines
def parse_lines(lines: list):
students_with_grades = []
for line in lines:
student, grades_str = line.split()
grades = [int(grade) for grade in grades_str.split(",")]
students_with_grades.append((student, grades))
return students_with_grades
def get_students_with_means(students: list):
student_with_means = []
for student, grades in students:
student_with_means.append((student, mean(grades)))
return student_with_means
lines = get_lines("student_grades.txt")
students_with_grades = parse_lines(lines)
student_with_means = get_students_with_means(students_with_grades)
student_with_means
next(((student, mean) for student, mean in student_with_means if mean < 3.1))
But this will scale. I've put the corresponding list-generator functions next to each other, so you can how simple was the transition from lists to generators:
from pprint import pprint
from statistics import mean
def get_lines(filename: str):
lines = []
with open(filename) as file:
for line in file:
lines.append(line)
return lines
def get_lines_gen(filename: str):
with open(filename) as file:
for line in file:
yield line
def parse_lines_gen(lines):
for line in lines:
student, grades_str = line.split()
grades = [int(grade) for grade in grades_str.split(",")]
yield (student, grades)
def parse_lines(lines):
students_with_grades = []
for line in lines:
student, grades_str = line.split()
grades = [int(grade) for grade in grades_str.split(",")]
students_with_grades.append((student, grades))
return students_with_grades
def get_students_with_means(students):
student_with_means = []
for student, grades in students:
student_with_means.append((student, mean(grades)))
return student_with_means
def get_students_with_means_gen(students):
for student, grades in students:
yield (student, mean(grades))
lines = get_lines_gen("student_grades.txt")
students_with_grades = parse_lines_gen(lines)
student_with_means = get_students_with_means_gen(students_with_grades)
It's great that up to this point no line has been read from file.
list(student_with_means)
lines = get_lines_gen("student_grades.txt")
students_with_grades = parse_lines_gen(lines)
student_with_means = get_students_with_means_gen(students_with_grades)
next(((student, mean) for student, mean in student_with_means if mean < 3.1))
With this transition we came from:
to:
The second version is better because:
It can also be done with pure generator comprehensions in just a couple of lines. I love python one-liners:
lines = (line for line in open("student_grades.txt"))
splitted_lines = (line.split() for line in lines)
students_with_grades = ((student, [int(grade) for grade in grades_str.split(",")]) for student, grades_str in splitted_lines)
students_with_means = ((student, mean(grades)) for student, grades in students_with_grades)
next(((student, mean) for student, mean in students_with_means if mean < 3.1))
def adjustable_counter():
current = 0
while True:
next_val = yield current
if next_val is not None:
current = next_val
current += 1
c = adjustable_counter()
next(c)
next(c)
next(c)
With send() you can send a value to the runnig generator. next() is equivalent to send(None)
c.send(-100) # Spoiler alert - that's how coroutines communitate
next(c)
throw() raises the exception inside the generator:
c.throw(RuntimeError("Sorry"))
Close raises GeneratorExit inside the generator. This cleans up the generator state.
c.close()
import inspect
inspect.getgeneratorstate(c)
with open("irrelevant.txt","w") as file:
file.write("raii")
Is better than:
file = open("irrelevant.txt","w")
try:
file.write("raii")
finally:
file.close()
from threading import Lock
lock = Lock()
x = 10
lock.acquire()
x += 1
lock.release()
with lock:
x += 1
with lock:
x += 1
%%HTML
<blockquote class="reddit-card" data-card-created="1570178622"><a href="https://www.reddit.com/r/ProgrammerHumor/comments/bfr1xc/i_love_python_but/">I love Python, but...</a> from <a href="http://www.reddit.com/r/ProgrammerHumor">r/ProgrammerHumor</a></blockquote>
<script async src="//embed.redditmedia.com/widgets/platform.js" charset="UTF-8"></script>
class File:
def __init__(self, name: str, mode: str = "r"):
self.name = name
self.mode = mode
self.file_handle = None
def __enter__(self):
self.file_handle = open(self.name, self.mode)
return self.file_handle
def __exit__(self, exc_type, exc_val, exc_tb):
print("__exit__ called")
if self.file_handle:
self.file_handle.close()
with File("irrelevant.txt", "r") as f:
10 / 0
@contextmanager - a shortcut for creating contextmanagers
The code up to the first yield statement is executed in __enter__ and the rest is executed in __exit__
from contextlib import contextmanager
@contextmanager
def File(name: str, mode: str = "r"):
file_handle = None
try:
file_handle = open(name, mode)
yield file_handle
finally:
if file_handle:
print("closing")
file_handle.close()
with File("irrelevant.txt", "w") as f:
pass
with File("irrelevant.txt", "w") as f:
10 / 0
with File("3.txt", "r") as f:
10 / 0
import sys
import datetime
from typing import Generator
from typing.io import TextIO
from contextlib import contextmanager
@contextmanager
def execution_time_printed(file: TextIO = sys.stdout) -> Generator[None, None, None]:
start = datetime.datetime.now()
yield
print("Execution time:", datetime.datetime.now() - start, file=file)
with execution_time_printed():
print("inside")
import time
time.sleep(0.5)
print("outside")
File cannot be opened after closing:
file = open("irrelevant.txt","w")
with file:
file.write("a")
with file:
file.write("a")
SQL transaction is implemented as contextmanager:
import sqlite3
db = sqlite3.connect(":memory:")
db.execute("""
CREATE TABLE numbers (
number INTEGER
);
""")
with db:
db.execute("INSERT INTO numbers values (1);")
db.execute("INSERT INTO numbers values (2);")
list(db.execute("SELECT * from numbers"))
with db:
db.execute("INSERT INTO numbers values (3);")
db.execute("INSERT INTO numbers values ('should fail', 2);")
list(db.execute("SELECT * from numbers"))
It works, adding number 3 was rolled back
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(pow, 323, 1235)
print(future.result())
from concurrent.futures import Executor, ProcessPoolExecutor
with ProcessPoolExecutor(max_workers=8) as executor:
executor: Executor
powers = list(executor.map(pow, range(10 ** 4), range(10 ** 4)))
powers[:100]
from contextlib import closing
from urllib.request import urlopen
page = urlopen('http://www.python.org')
try:
print(next(page))
finally:
page.close()
page.isclosed()
from contextlib import closing
from urllib.request import urlopen
with closing(urlopen('http://www.python.org')) as page:
print(next(page))
page.isclosed()
try:
raise ValueError()
except ValueError:
pass
from contextlib import suppress
with suppress(ValueError):
raise ValueError()
from contextlib import redirect_stdout
import io
f = io.StringIO()
with redirect_stdout(f):
help(pow)
f.getvalue()
from contextlib import ContextDecorator
class mycontext(ContextDecorator):
def __enter__(self):
print('Starting')
return self
def __exit__(self, *exc):
print('Finishing')
return False
@mycontext()
def function():
print('The bit in the middle')
function()
with mycontext():
print('The bit in the middle')