Spaces:

marimo-team
/

marimo-learn

Runtime error

marimo-learn / queueing /06_pooled_vs_separate.py

Greg Wilson

feat: hiding a few cells

8eccedb 10 days ago

9.4 kB

	# /// script
	# requires-python = ">=3.13"
	# dependencies = [
	# "altair",
	# "asimpy",
	# "marimo",
	# "polars==1.24.0",
	# ]
	# ///

	import marimo

	__generated_with = "0.20.4"
	app = marimo.App(width="medium")


	@app.cell(hide_code=True)
	def _():
	import marimo as mo
	import random
	import statistics

	import altair as alt
	import polars as pl

	from asimpy import Environment, Process, Resource

	return Environment, Process, Resource, alt, mo, pl, random, statistics


	@app.cell(hide_code=True)
	def _(mo):
	mo.md(r"""
	# Pooled vs. Separate Queues

	## Why Airports Switched to Single Lines

	A facility has two identical servers. Customers arrive as a Poisson process and each needs one server for an exponentially distributed service time. Which queueing discipline should the facility use?

	- Separate queues: each server has its own dedicated line; customers randomly pick a line on arrival and cannot switch.
	- Pooled queue: a single shared line feeds whichever server becomes free first.

	It turns out that pooling the queues is always better, even though both systems have identical total arrival rate, identical per-server service rate, and identical utilization $\rho$. The pooled system consistently produces shorter mean wait times, often by a factor of two or more at moderate utilization. The reason is that separate queues waste servers' idle time. In separate queues, one server may be idle while customers wait in the other line. Pooling eliminates this mismatch: a free server always serves the next waiting customer.

	### Why Separate Queues Persist

	Despite being provably worse, separate queues feel fairer because customers can see their progress. Single lines eliminate the anxiety of watching the other queue move faster, but historically customers resisted them until airlines and banks demonstrated the improvement empirically in the 1960s–70s.
	""")
	return


	@app.cell(hide_code=True)
	def _(mo):
	mo.md(r"""
	## Implementation

	This tutorial explores this finding using two simulations run with identical random seeds:

	1. Pooled: `Resource(capacity=2)` with one arrival stream. The resource grants access to whichever capacity slot is free.
	2. Separate: two `Resource(capacity=1)` instances. Arrivals call `random.choice` to pick a server and cannot switch even if it is slower.

	The mean sojourn time is collected across a sweep of utilization levels $\rho$.
	""")
	return


	@app.cell(hide_code=True)
	def _(mo):
	sim_time_slider = mo.ui.slider(
	start=0,
	stop=100_000,
	step=1_000,
	value=20_000,
	label="Simulation time",
	)

	arrival_rate_slider = mo.ui.slider(
	start=0.5,
	stop=1.9,
	step=0.05,
	value=1.8,
	label="Arrival rate",
	)

	seed_input = mo.ui.number(
	value=192,
	step=1,
	label="Random seed",
	)

	run_button = mo.ui.run_button(label="Run simulation")

	mo.vstack([
	sim_time_slider,
	arrival_rate_slider,
	seed_input,
	run_button,
	])
	return arrival_rate_slider, seed_input, sim_time_slider


	@app.cell
	def _(arrival_rate_slider, seed_input, sim_time_slider):
	SIM_TIME = int(sim_time_slider.value)
	ARRIVAL_RATE = float(arrival_rate_slider.value)
	SEED = int(seed_input.value)
	SERVICE_RATE = 1.0
	N_SERVERS = 2
	RHO = ARRIVAL_RATE / (N_SERVERS * SERVICE_RATE)
	return ARRIVAL_RATE, N_SERVERS, RHO, SEED, SERVICE_RATE, SIM_TIME


	@app.cell
	def _(Process, SERVICE_RATE, random):
	class Customer(Process):
	def init(self, server, sojourn_times):
	self.server = server
	self.sojourn_times = sojourn_times

	async def run(self):
	arrival = self.now
	async with self.server:
	await self.timeout(random.expovariate(SERVICE_RATE))
	self.sojourn_times.append(self.now - arrival)

	return (Customer,)


	@app.cell
	def _(Customer, Process, random):
	class PooledArrivals(Process):
	def init(self, arrival_rate, server, sojourn_times):
	self.arrival_rate = arrival_rate
	self.server = server
	self.sojourn_times = sojourn_times

	async def run(self):
	while True:
	await self.timeout(random.expovariate(self.arrival_rate))
	Customer(self._env, self.server, self.sojourn_times)

	return (PooledArrivals,)


	@app.cell
	def _(Customer, Process, random):
	class SeparateArrivals(Process):
	def init(self, arrival_rate, servers, sojourn_times):
	self.arrival_rate = arrival_rate
	self.servers = servers
	self.sojourn_times = sojourn_times

	async def run(self):
	while True:
	await self.timeout(random.expovariate(self.arrival_rate))
	server = random.choice(self.servers)
	Customer(self._env, server, self.sojourn_times)

	return (SeparateArrivals,)


	@app.cell
	def _(
	ARRIVAL_RATE,
	Environment,
	N_SERVERS,
	PooledArrivals,
	Resource,
	SEED,
	SIM_TIME,
	random,
	statistics,
	):
	def run_pooled(arrival_rate=ARRIVAL_RATE):
	random.seed(SEED)
	sojourn_times = []
	env = Environment()
	shared_server = Resource(env, capacity=N_SERVERS)
	PooledArrivals(env, arrival_rate, shared_server, sojourn_times)
	env.run(until=SIM_TIME)
	return statistics.mean(sojourn_times)

	return (run_pooled,)


	@app.cell
	def _(
	ARRIVAL_RATE,
	Environment,
	N_SERVERS,
	Resource,
	SEED,
	SIM_TIME,
	SeparateArrivals,
	random,
	statistics,
	):
	def run_separate(arrival_rate=ARRIVAL_RATE):
	random.seed(SEED)
	sojourn_times = []
	env = Environment()
	servers = [Resource(env, capacity=1) for _ in range(N_SERVERS)]
	SeparateArrivals(env, arrival_rate, servers, sojourn_times)
	env.run(until=SIM_TIME)
	return statistics.mean(sojourn_times)

	return (run_separate,)


	@app.cell
	def _(ARRIVAL_RATE, N_SERVERS, SERVICE_RATE, pl, run_pooled, run_separate):
	def sweep():
	sweep_rows = []
	for rho in [0.5, 0.6, 0.7, 0.8, 0.9]:
	rate = rho * N_SERVERS * SERVICE_RATE
	pw = run_pooled(arrival_rate=rate)
	sw = run_separate(arrival_rate=rate)
	sweep_rows.append({"rho": rho, "pooled_W": pw, "separate_W": sw, "ratio": sw / pw})
	return pl.DataFrame(sweep_rows)

	df_sweep = sweep()
	pooled_W = run_pooled(arrival_rate=ARRIVAL_RATE)
	separate_W = run_separate(arrival_rate=ARRIVAL_RATE)
	return df_sweep, pooled_W, separate_W


	@app.cell(hide_code=True)
	def _(N_SERVERS, RHO, SERVICE_RATE, mo, pooled_W, separate_W):
	mo.md(f"""
	## Results

	{N_SERVERS} servers, service rate {SERVICE_RATE}, utilisation ρ = {RHO:.2f}

	At ρ = {RHO:.2f}: pooled W = {pooled_W:.3f}, separate W = {separate_W:.3f}
	— separate queues are {separate_W / pooled_W:.2f}× slower
	""")
	return


	@app.cell
	def _(df_sweep):
	df_sweep
	return


	@app.cell
	def _(alt, df_sweep):
	df_plot = df_sweep.unpivot(
	on=["pooled_W", "separate_W"], index="rho", variable_name="system", value_name="W"
	)
	chart = (
	alt.Chart(df_plot)
	.mark_line(point=True)
	.encode(
	x=alt.X("rho:Q", title="Utilization per server (ρ)"),
	y=alt.Y("W:Q", title="Mean sojourn time (W)"),
	color=alt.Color("system:N", title="Queue type"),
	tooltip=["rho:Q", "system:N", "W:Q"],
	)
	.properties(title="Pooled vs. Separate Queues: Mean Sojourn Time")
	)
	chart
	return


	@app.cell(hide_code=True)
	def _(mo):
	mo.md(r"""
	## Understanding the Math

	### Why pooling always wins

	Two separate M/M/1 queues each running at utilization $\rho$ have mean sojourn time $W_{\text{sep}} = 1/(\mu(1-\rho))$. A pooled M/M/2 queue with the same total arrival rate has strictly lower mean sojourn time for every value of $0 < \rho < 1$. The proof uses the [Erlang-C formula](https://en.wikipedia.org/wiki/Erlang_(unit)#Erlang_C_formula), but the intuition is simpler: pooling converts two independent random processes into one, and the combined queue can exploit any idle capacity instantly. At $\rho = 0.8$, separate queues give roughly twice the mean wait of a pooled queue.

	### Connection to variance reduction

	Think of the service delivered in a time window by two separate servers as two independent random variables $X_1$ and $X_2$. Their average $(X_1 + X_2)/2$ has variance $\sigma^2/2$, which is half the variance of either component alone. Pooling achieves something similar: by combining demand into one stream served by both servers, the system smooths out random fluctuations. The pooled queue is, in effect, averaging over both servers' idle periods instead of locking each idle period to a single lane.

	### Rule of thumb

	At $\rho = 0.8$, separate queues produce roughly double the mean wait of a pooled queue. This factor grows as $\rho$ increases, because the $(1-\rho)$ term in the denominator amplifies any wasted capacity. The lesson: whenever you can route demand flexibly to a shared resource, do it.
	""")
	return


	if __name__ == "__main__":
	app.run()