# /// script # requires-python = ">=3.13" # dependencies = [ # "altair", # "asimpy", # "marimo", # "polars==1.24.0", # ] # /// import marimo __generated_with = "0.20.4" app = marimo.App(width="medium") @app.cell(hide_code=True) def _(): import marimo as mo import random import statistics import altair as alt import polars as pl from asimpy import Environment, Process, Queue, PriorityQueue return Environment, Process, Queue, PriorityQueue, alt, mo, pl, random, statistics @app.cell(hide_code=True) def _(mo): mo.md(r""" # The Convoy Effect ## *One Slow Job Ruins Everyone's Day* A single server processes jobs that arrive randomly (Poisson process). Most jobs are quick (exponential service with small mean), but a rare few are very slow (exponential service with large mean). This *hyperexponential* service distribution has high variance. This tutorial compares the performance of two scheduling disciplines in this situation: - FIFO (First In, First Out): jobs are served in the order they arrive. - SJF (Shortest Job First): the server always picks the shortest queued job next. The surprising result is that SJF dramatically outperforms FIFO: not just for the small jobs that directly benefit from skipping ahead, but also for mean sojourn time across *all* jobs. The improvement is most visible at the tail (95th and 99th percentiles) because FIFO creates a *convoy effect*: one long job blocks many short jobs behind it, inflating everyone's wait. ### The Convoy Metaphor Picture a one-lane road with one slow truck and many fast cars. Every car behind the truck must drive at truck speed; no overtaking allowed. The truck is the long job; the cars are the short jobs stuck behind it in FIFO order. SJF is like a passing lane: fast cars jump ahead of the truck and reach their destination much sooner. The truck itself arrives at the same time either way, but the total delay experienced by all vehicles plummets. ### Why FIFO Hurts with High Variance In FIFO, the server's current job is chosen at arrival time, not at decision time. When a slow job begins service, every subsequent arrival must join the queue and wait. The expected excess work in service (the remaining time of the current job, seen by an arriving customer) under FIFO is: $$W_{\text{FIFO}} = \frac{\lambda \overline{s^2}}{2(1-\rho)} + \frac{1}{\mu}$$ where $\overline{s^2}$ is the second moment of service time. High variance inflates $\overline{s^2}$ without changing $\rho$, directly worsening wait time. ### SJF Minimises Mean Sojourn Time For a single server with non-preemptive SJF and any service-time distribution, the mean sojourn time is given by the formula below (which is discussed in "Understanding the Math" at the end of this lesson): $$W_{\text{SJF}} = \frac{1}{\mu} + \frac{\lambda \overline{s^2}}{2(1-\rho)}$$ SJF achieves this minimum because short jobs that would otherwise be blocked by a long job are promoted ahead, reducing the total waiting work in the system. ## Practical Relevance Operating system CPU schedulers use time-quanta and priority aging to approximate SJF without knowing job sizes in advance. Database query planners estimate query cost and reorder execution to minimize blocking. The phenomenon reappears as *head-of-line blocking* in HTTP/1.1 (one slow response stalls a connection), motivating HTTP/2 multiplexing and HTTP/3's QUIC stream independence. """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Implementation Jobs are placed in a `PriorityQueue` for SJF (tupled as `(service_time, job_id)` so shorter jobs sort earlier) or a plain `Queue()` for FIFO (tupled as `(job_id, service_time)` to preserve arrival order). The same hyperexponential service-time generator (90% short, 10% long) is used in both runs. """) return @app.cell(hide_code=True) def _(mo): sim_time_slider = mo.ui.slider( start=0, stop=100_000, step=1_000, value=20_000, label="Simulation time", ) arrival_rate_slider = mo.ui.slider( start=0.1, stop=1.5, step=0.05, value=0.7, label="Arrival rate", ) seed_input = mo.ui.number( value=192, step=1, label="Random seed", ) run_button = mo.ui.run_button(label="Run simulation") mo.vstack([ sim_time_slider, arrival_rate_slider, seed_input, run_button, ]) return arrival_rate_slider, seed_input, sim_time_slider @app.cell def _(arrival_rate_slider, seed_input, sim_time_slider): SIM_TIME = int(sim_time_slider.value) ARRIVAL_RATE = float(arrival_rate_slider.value) SEED = int(seed_input.value) SHORT_RATE = 4.0 LONG_RATE = 0.2 LONG_PROB = 0.10 return ARRIVAL_RATE, LONG_PROB, LONG_RATE, SEED, SHORT_RATE, SIM_TIME @app.cell def _(LONG_PROB, LONG_RATE, SHORT_RATE, random): def service_time(): if random.random() < LONG_PROB: return random.expovariate(LONG_RATE) return random.expovariate(SHORT_RATE) return (service_time,) @app.cell def _(ARRIVAL_RATE, Process, random, service_time): class JobSource(Process): def init(self, job_queue, arrivals, sjf): self.job_queue = job_queue self.arrivals = arrivals self.sjf = sjf self._jid = 0 async def run(self): while True: await self.timeout(random.expovariate(ARRIVAL_RATE)) jid = self._jid self._jid += 1 svc = service_time() self.arrivals[jid] = (self.now, svc) if self.sjf: await self.job_queue.put((svc, jid)) else: await self.job_queue.put((jid, svc)) return (JobSource,) @app.cell def _(Process): class Server(Process): def init(self, job_queue, arrivals, sojourn_times, sjf): self.job_queue = job_queue self.arrivals = arrivals self.sojourn_times = sojourn_times self.sjf = sjf async def run(self): while True: item = await self.job_queue.get() if self.sjf: svc, jid = item else: jid, svc = item await self.timeout(svc) arrival_time, _ = self.arrivals[jid] self.sojourn_times.append(self.now - arrival_time) return (Server,) @app.cell def _(Environment, JobSource, Queue, PriorityQueue, SIM_TIME, Server, statistics): def simulate(sjf): arrivals = {} sojourn_times = [] env = Environment() q = PriorityQueue(env) if sjf else Queue(env) JobSource(env, q, arrivals, sjf) Server(env, q, arrivals, sojourn_times, sjf) env.run(until=SIM_TIME) return { "mean": statistics.mean(sojourn_times), "median": statistics.median(sojourn_times), "p95": sorted(sojourn_times)[int(0.95 * len(sojourn_times))], "p99": sorted(sojourn_times)[int(0.99 * len(sojourn_times))], "n": len(sojourn_times), } return (simulate,) @app.cell def _(LONG_PROB, LONG_RATE, SEED, SHORT_RATE, pl, random, simulate): def run_scenarios(): fifo = simulate(sjf=False) sjf_res = simulate(sjf=True) rows = [ { "metric": m, "fifo": fifo[m], "sjf": sjf_res[m], "improvement": fifo[m] / sjf_res[m], } for m in ("mean", "median", "p95", "p99") ] return pl.DataFrame(rows) random.seed(SEED) df = run_scenarios() mean_svc = (1 - LONG_PROB) / SHORT_RATE + LONG_PROB / LONG_RATE return df, mean_svc @app.cell(hide_code=True) def _(ARRIVAL_RATE, LONG_PROB, LONG_RATE, SHORT_RATE, mean_svc, mo): mo.md(f""" ## Summary Statistics Arrival rate: {ARRIVAL_RATE}, estimated mean service: {mean_svc:.3f} Short jobs: {100 * (1 - LONG_PROB):.0f}% (mean {1 / SHORT_RATE:.2f}), Long jobs: {100 * LONG_PROB:.0f}% (mean {1 / LONG_RATE:.1f}) > **Note:** SJF is optimal for mean sojourn time but requires knowing job sizes in advance. """) return @app.cell def _(df): df return @app.cell def _(alt, df, pl): df_plot = df.filter(pl.col("metric") != "n").unpivot( on=["fifo", "sjf"], index="metric", variable_name="policy", value_name="sojourn_time", ) chart = ( alt.Chart(df_plot) .mark_bar() .encode( x=alt.X("metric:N", title="Metric"), y=alt.Y("sojourn_time:Q", title="Sojourn time"), color=alt.Color("policy:N", title="Policy"), xOffset="policy:N", tooltip=["metric:N", "policy:N", "sojourn_time:Q"], ) .properties(title="Convoy Effect: FIFO vs. Shortest Job First") ) chart return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Understanding the Math ### The second moment For a random variable $S$ representing service time, the second moment is $E[S^2]$. Recall from your statistics course that variance is $\text{Var}(S) = E[S^2] - (E[S])^2$, which rearranges to: $$E[S^2] = \text{Var}(S) + (E[S])^2$$ This means high variance inflates $E[S^2]$ even if the mean $E[S]$ stays fixed. Doubling the spread of service times can quadruple $E[S^2]$, even with the same average service time. ### Why variance of service time hurts Imagine a FIFO server handling jobs that are either 0.1 minutes or 10 minutes long, with 90% being short and 10% being long. The mean service time is $0.9 \times 0.1 + 0.1 \times 10 = 1.09$ minutes, so utilization $\rho = \lambda / \mu$ might be modest. But when a 10-minute job starts, every job arriving during those 10 minutes must join the queue and wait. The longer $E[S^2]$, the more average work sits ahead of each arriving job. ### The Pollaczek–Khinchine formula The mean time a job spends waiting (not counting its own service time) in a FIFO single-server queue is: $$W_q = \frac{\lambda \cdot E[S^2]}{2(1 - \rho)}$$ Here $\lambda$ is the arrival rate, $E[S^2]$ is the second moment of service time, and $\rho = \lambda \cdot E[S]$ is the server utilization. Both $\lambda$ and $E[S^2]$ appear in the numerator, so more variance means more waiting even at the same $\rho$. The $(1-\rho)$ denominator is the familiar blow-up term from M/M/1. """) return if __name__ == "__main__": app.run()