Table Of Contents
Problem
Python's GIL prevents true parallelism with threads for CPU-bound tasks, limiting performance in multi-core systems.
Solution
# Use multiprocessing for CPU-bound tasks
import multiprocessing
import time
def cpu_bound_task(n):
result = 0
for i in range(n * 1000000):
result += i * i
return result
if __name__ == '__main__':
# Sequential execution
start = time.time()
results = [cpu_bound_task(100) for _ in range(4)]
print(f"Sequential: {time.time() - start:.2f}s")
# Parallel with multiprocessing
start = time.time()
with multiprocessing.Pool() as pool:
results = pool.map(cpu_bound_task, [100] * 4)
print(f"Multiprocessing: {time.time() - start:.2f}s")
# Use threading for I/O-bound tasks (GIL released during I/O)
import threading
import requests
def fetch_url(url):
response = requests.get(url)
return len(response.content)
urls = ['http://httpbin.org/delay/1'] * 5
# Sequential I/O
start = time.time()
results = [fetch_url(url) for url in urls]
print(f"Sequential I/O: {time.time() - start:.2f}s")
# Threaded I/O (faster because GIL released during network calls)
start = time.time()
threads = []
results = []
def worker(url):
result = fetch_url(url)
results.append(result)
for url in urls:
thread = threading.Thread(target=worker, args=(url,))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
print(f"Threaded I/O: {time.time() - start:.2f}s")
# Use asyncio for concurrent I/O
import asyncio
import aiohttp
async def fetch_async(session, url):
async with session.get(url) as response:
return len(await response.read())
async def main():
async with aiohttp.ClientSession() as session:
tasks = [fetch_async(session, url) for url in urls]
results = await asyncio.gather(*tasks)
return results
# Asyncio approach
start = time.time()
results = asyncio.run(main())
print(f"Asyncio: {time.time() - start:.2f}s")
# Check if code is CPU or I/O bound
import concurrent.futures
def identify_bottleneck():
# CPU-bound: use ProcessPoolExecutor
with concurrent.futures.ProcessPoolExecutor() as executor:
cpu_results = list(executor.map(cpu_bound_task, [50] * 2))
# I/O-bound: use ThreadPoolExecutor
with concurrent.futures.ThreadPoolExecutor() as executor:
io_results = list(executor.map(fetch_url, urls[:2]))
Explanation
GIL allows only one Python thread to execute at a time. Use multiprocessing for CPU-bound tasks to bypass GIL completely. Use threading or asyncio for I/O-bound tasks since GIL is released during I/O operations.
Choose multiprocessing for CPU work, threading for I/O with simple coordination, and asyncio for complex I/O with many concurrent operations.
Share this article
Add Comment
No comments yet. Be the first to comment!