diff --git a/.server-changes/queue-metrics-dashboard.md b/.server-changes/queue-metrics-dashboard.md
new file mode 100644
index 00000000000..37baffc7aaa
--- /dev/null
+++ b/.server-changes/queue-metrics-dashboard.md
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: feature
+---
+
+Queue metrics and health on the Queues page: per-queue depth, throughput, concurrency, throttling, and scheduling-delay charts, plus a per-queue detail view. Off by default; enabled per organization.
diff --git a/apps/webapp/app/components/primitives/UsageSparkline.tsx b/apps/webapp/app/components/primitives/UsageSparkline.tsx
index 2ffc1936a1d..7c4bbd5d262 100644
--- a/apps/webapp/app/components/primitives/UsageSparkline.tsx
+++ b/apps/webapp/app/components/primitives/UsageSparkline.tsx
@@ -27,6 +27,8 @@ export type UsageSparklineProps = {
   color?: string;
   /** Unit shown in the tooltip (e.g. calls, tokens). */
   unitLabel?: UnitLabel;
+  /** Trailing scalar shown after the chart. Defaults to the sum of buckets (override for gauges, e.g. peak). */
+  total?: number;
   /** Format the trailing total. Defaults to `toLocaleString`. */
   formatTotal?: (total: number) => string;
   /** Class for the trailing total label. */
@@ -44,14 +46,16 @@ export function UsageSparkline({
   bucketIntervalMs,
   color = "#3B82F6",
   unitLabel = { singular: "call", plural: "calls" },
+  total: totalOverride,
   formatTotal,
   totalClassName = "text-blue-400",
 }: UsageSparklineProps) {
-  if (!data || data.every((v) => v === 0)) {
+  const hasTotalOverride = totalOverride !== undefined;
+  if (!data || data.length === 0 || (data.every((v) => v === 0) && !hasTotalOverride)) {
     return <span className="text-text-dimmed">–</span>;
   }
 
-  const total = data.reduce((a, b) => a + b, 0);
+  const total = totalOverride ?? data.reduce((a, b) => a + b, 0);
   const max = Math.max(...data);
 
   // Map each bucket to a dated point so the tooltip can show the window it
diff --git a/apps/webapp/app/components/primitives/charts/Chart.tsx b/apps/webapp/app/components/primitives/charts/Chart.tsx
index 57a2692e677..8894c2da34d 100644
--- a/apps/webapp/app/components/primitives/charts/Chart.tsx
+++ b/apps/webapp/app/components/primitives/charts/Chart.tsx
@@ -216,7 +216,7 @@ const ChartTooltipContent = React.forwardRef<
                       )}
                       <div
                         className={cn(
-                          "flex flex-1 justify-between leading-none",
+                          "flex flex-1 justify-between gap-3 leading-none",
                           nestLabel ? "items-end" : "items-center"
                         )}
                       >
diff --git a/apps/webapp/app/components/primitives/charts/ChartLine.tsx b/apps/webapp/app/components/primitives/charts/ChartLine.tsx
index 1edd5a2357e..5d5fb95ecce 100644
--- a/apps/webapp/app/components/primitives/charts/ChartLine.tsx
+++ b/apps/webapp/app/components/primitives/charts/ChartLine.tsx
@@ -4,6 +4,7 @@ import {
   CartesianGrid,
   Line,
   LineChart,
+  ReferenceLine,
   XAxis,
   YAxis,
   type XAxisProps,
@@ -48,12 +49,38 @@ export type ChartLineRendererProps = {
   tooltipLabelFormatter?: (label: string, payload: any[]) => string;
   /** Optional formatter for numeric tooltip values (e.g. bytes, duration) */
   tooltipValueFormatter?: (value: number) => string;
+  /** Draw a dot at each data point. Defaults to true; turn off for dense/compact charts. */
+  showDots?: boolean;
+  /** Horizontal reference lines (e.g. limits); the y-domain extends to include them. */
+  referenceLines?: Array<{ y: number; label?: string; color?: string }>;
   /** Width injected by ResponsiveContainer */
   width?: number;
   /** Height injected by ResponsiveContainer */
   height?: number;
 };
 
+/** Reference-line label: right-aligned just below the line (recharts injects viewBox). */
+function ReferenceLineLabel({
+  viewBox,
+  value,
+}: {
+  viewBox?: { x: number; y: number; width: number };
+  value: string;
+}) {
+  if (!viewBox) return null;
+  return (
+    <text
+      x={viewBox.x + viewBox.width - 4}
+      y={viewBox.y + 12}
+      textAnchor="end"
+      fill="#878C99"
+      fontSize={10}
+    >
+      {value}
+    </text>
+  );
+}
+
 /**
  * Line chart renderer for the compound component system.
  * Must be used within a Chart.Root.
@@ -73,6 +100,8 @@ export function ChartLineRenderer({
   stacked = false,
   tooltipLabelFormatter,
   tooltipValueFormatter,
+  showDots = true,
+  referenceLines,
   width,
   height,
 }: ChartLineRendererProps) {
@@ -176,6 +205,17 @@ export function ChartLineRenderer({
           labelFormatter={tooltipLabelFormatter}
         />
         {/* Note: Legend is now rendered by ChartRoot outside the chart container */}
+        {referenceLines?.map((line) => (
+          <ReferenceLine
+            key={`ref-${line.y}-${line.label ?? ""}`}
+            y={line.y}
+            stroke={line.color ?? "#4D525B"}
+            strokeDasharray="4 4"
+            strokeWidth={1}
+            ifOverflow="extendDomain"
+            label={line.label ? <ReferenceLineLabel value={line.label} /> : undefined}
+          />
+        ))}
         {visibleSeries.map((key) => (
           <Area
             key={key}
@@ -222,6 +262,17 @@ export function ChartLineRenderer({
         labelFormatter={tooltipLabelFormatter}
       />
       {/* Note: Legend is now rendered by ChartRoot outside the chart container */}
+      {referenceLines?.map((line) => (
+        <ReferenceLine
+          key={`ref-${line.y}-${line.label ?? ""}`}
+          y={line.y}
+          stroke={line.color ?? "#4D525B"}
+          strokeDasharray="4 4"
+          strokeWidth={1}
+          ifOverflow="extendDomain"
+          label={line.label ? <ReferenceLineLabel value={line.label} /> : undefined}
+        />
+      ))}
       {visibleSeries.map((key) => (
         <Line
           key={key}
@@ -229,7 +280,7 @@ export function ChartLineRenderer({
           type={lineType}
           stroke={config[key]?.color}
           strokeWidth={1}
-          dot={{ r: 1.5, fill: config[key]?.color, strokeWidth: 0 }}
+          dot={showDots ? { r: 1.5, fill: config[key]?.color, strokeWidth: 0 } : false}
           activeDot={{ r: 4 }}
           isAnimationActive={false}
         />
diff --git a/apps/webapp/app/components/query/QueryEditor.tsx b/apps/webapp/app/components/query/QueryEditor.tsx
index 8520d2a7a0b..eb9fd08ffb1 100644
--- a/apps/webapp/app/components/query/QueryEditor.tsx
+++ b/apps/webapp/app/components/query/QueryEditor.tsx
@@ -72,7 +72,7 @@ import type { action as titleAction } from "~/routes/resources.orgs.$organizatio
 import type { QueryScope } from "~/services/queryService.server";
 import { downloadFile, rowsToCSV, rowsToJSON } from "~/utils/dataExport";
 import { organizationBillingPath } from "~/utils/pathBuilder";
-import { querySchemas } from "~/v3/querySchemas";
+import { visibleQuerySchemas } from "~/v3/querySchemas";
 
 /** Convert a Date or ISO string to ISO string format */
 function toISOString(value: Date | string): string {
@@ -245,7 +245,7 @@ const QueryEditorForm = forwardRef<
       <TSQLEditor
         defaultValue={query}
         onChange={setQuery}
-        schema={querySchemas}
+        schema={visibleQuerySchemas}
         linterEnabled={true}
         showCopyButton={true}
         showClearButton={true}
diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx
index 091f2f28ccf..1cc842eb916 100644
--- a/apps/webapp/app/entry.server.tsx
+++ b/apps/webapp/app/entry.server.tsx
@@ -11,6 +11,7 @@ import * as Worker from "~/services/worker.server";
 import { initMollifierDrainerWorker } from "~/v3/mollifierDrainerWorker.server";
 import { initMollifierStaleSweepWorker } from "~/v3/mollifierStaleSweepWorker.server";
 import { initBillingLimitWorker } from "~/v3/billingLimitWorker.server";
+import { initQueueMetricsConsumer, initQueueMetricsEmitter } from "~/v3/queueMetrics.server";
 import { bootstrap } from "./bootstrap";
 import { LocaleContextProvider } from "./components/primitives/LocaleProvider";
 import type { OperatingSystemPlatform } from "./components/primitives/OperatingSystemProvider";
@@ -234,6 +235,8 @@ Worker.init().catch((error) => {
 initMollifierDrainerWorker();
 initMollifierStaleSweepWorker();
 initBillingLimitWorker();
+initQueueMetricsEmitter();
+initQueueMetricsConsumer();
 
 bootstrap().catch((error) => {
   logError(error);
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index f5e0f0a671c..26e178d18a6 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -883,6 +883,31 @@ const EnvironmentSchema = z
     RUN_ENGINE_REUSE_SNAPSHOT_COUNT: z.coerce.number().int().default(0),
     RUN_ENGINE_MAXIMUM_ENV_COUNT: z.coerce.number().int().optional(),
     RUN_ENGINE_RUN_QUEUE_SHARD_COUNT: z.coerce.number().int().default(4),
+    // Queue metrics ingestion (Redis Stream -> ClickHouse). The runtime on/off is the
+    // `queue_metrics:enabled` Redis key; these gate emitter construction + consumer boot.
+    QUEUE_METRICS_EMIT_ENABLED: z.string().default("0"),
+    QUEUE_METRICS_CONSUMER_ENABLED: z.string().default("0"),
+    QUEUE_METRICS_STREAM_SHARD_COUNT: z.coerce.number().int().default(4),
+    QUEUE_METRICS_CONSUMER_BATCH_SIZE: z.coerce.number().int().default(1000),
+    // Counter stream (exact counts, loss-intolerant). Unset host => the run-queue Redis;
+    // set it to a dedicated instance so counter backlog never competes with the run queue.
+    QUEUE_METRICS_REDIS_HOST: z.string().optional(),
+    QUEUE_METRICS_REDIS_PORT: z.coerce.number().optional(),
+    QUEUE_METRICS_REDIS_USERNAME: z.string().optional(),
+    QUEUE_METRICS_REDIS_PASSWORD: z.string().optional(),
+    QUEUE_METRICS_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
+    // Default depends on where the stream lives: see metricsDefinition() in
+    // queueMetrics.server.ts (2M on the shared run-queue Redis, 8M on a dedicated one).
+    QUEUE_METRICS_COUNTER_STREAM_MAXLEN: z.coerce.number().int().optional(),
+    // TTL (seconds) on the per-(queue,op) cumulative odometer key, refreshed on every write.
+    // Idle-past-TTL queues purge and self-heal (restart from 1) on return; default 7 days.
+    QUEUE_METRICS_COUNTER_ODOMETER_TTL_SECONDS: z.coerce.number().int().default(604_800),
+    // Per-env distinct queue_name cap (0 = unlimited); overflow maps to "__overflow__".
+    QUEUE_METRICS_MAX_QUEUE_NAMES_PER_ENV: z.coerce.number().int().default(1000),
+    QUEUE_METRICS_MAX_CONCURRENCY_KEYS_PER_QUEUE: z.coerce.number().int().default(10_000),
+    // Fraction (0..1) of ops that emit a gauge; counters are never sampled. Dial below 1
+    // only if EngineCPU is too high in slow-path-heavy regions (hurts low-traffic queues).
+    QUEUE_METRICS_GAUGE_SAMPLE_RATE: z.coerce.number().min(0).max(1).default(1),
     RUN_ENGINE_WORKER_SHUTDOWN_TIMEOUT_MS: z.coerce.number().int().default(60_000),
     RUN_ENGINE_RETRY_WARM_START_THRESHOLD_MS: z.coerce.number().int().default(30_000),
     RUN_ENGINE_PROCESS_WORKER_QUEUE_DEBOUNCE_MS: z.coerce.number().int().default(200),
diff --git a/apps/webapp/app/hooks/useMetricResourceQuery.ts b/apps/webapp/app/hooks/useMetricResourceQuery.ts
new file mode 100644
index 00000000000..8cb8faec507
--- /dev/null
+++ b/apps/webapp/app/hooks/useMetricResourceQuery.ts
@@ -0,0 +1,109 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { useInterval } from "./useInterval";
+
+export type MetricResourceRow = Record<string, number | string | null>;
+
+type MetricResourceResponse =
+  | { success: true; data: { rows: MetricResourceRow[] } }
+  | { success: false; error: string };
+
+export type MetricResourceTimeRange = {
+  period: string | null;
+  from: string | null;
+  to: string | null;
+};
+
+export type MetricResourceQueryOptions = {
+  organizationId: string;
+  projectId: string;
+  environmentId: string;
+  timeRange: MetricResourceTimeRange;
+  defaultPeriod: string;
+  queues?: string[];
+  fillGaps?: boolean;
+  refreshIntervalMs?: number;
+};
+
+/**
+ * Client-fetch a TRQL query from the metric resource route (like the dashboard
+ * widgets): own loading state, interval + on-focus refresh, abort on change/unmount.
+ */
+export function useMetricResourceQuery(query: string, opts: MetricResourceQueryOptions) {
+  const [rows, setRows] = useState<MetricResourceRow[] | null>(null);
+  const [isLoading, setIsLoading] = useState(true);
+  const [failed, setFailed] = useState(false);
+  const abortRef = useRef<AbortController | null>(null);
+
+  const {
+    organizationId,
+    projectId,
+    environmentId,
+    defaultPeriod,
+    fillGaps,
+    refreshIntervalMs = 60_000,
+  } = opts;
+  const { period, from, to } = opts.timeRange;
+  const queuesKey = opts.queues && opts.queues.length > 0 ? opts.queues.join(",") : undefined;
+
+  const load = useCallback(() => {
+    abortRef.current?.abort();
+    const controller = new AbortController();
+    abortRef.current = controller;
+    setIsLoading(true);
+    fetch("/resources/metric", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        query,
+        scope: "environment",
+        period: period ?? (from || to ? null : defaultPeriod),
+        from,
+        to,
+        fillGaps: !!fillGaps,
+        organizationId,
+        projectId,
+        environmentId,
+        ...(queuesKey !== undefined ? { queues: queuesKey.split(",") } : {}),
+      }),
+      signal: controller.signal,
+    })
+      .then((res) => res.json() as Promise<MetricResourceResponse>)
+      .then((data) => {
+        if (controller.signal.aborted) return;
+        if (data.success) {
+          setRows(data.data.rows);
+          setFailed(false);
+        } else {
+          setFailed(true);
+        }
+        setIsLoading(false);
+      })
+      .catch((error) => {
+        if (error instanceof DOMException && error.name === "AbortError") return;
+        if (!controller.signal.aborted) {
+          setFailed(true);
+          setIsLoading(false);
+        }
+      });
+  }, [
+    query,
+    period,
+    from,
+    to,
+    defaultPeriod,
+    fillGaps,
+    organizationId,
+    projectId,
+    environmentId,
+    queuesKey,
+  ]);
+
+  useEffect(() => {
+    load();
+    return () => abortRef.current?.abort();
+  }, [load]);
+
+  useInterval({ interval: refreshIntervalMs, onLoad: false, onFocus: true, callback: load });
+
+  return { rows: rows ?? [], isLoading, showLoading: isLoading && !rows, failed };
+}
diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
index 971fc9a3033..d831568248d 100644
--- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
+++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts
@@ -550,7 +550,186 @@ const llmDashboard: BuiltInDashboard = {
   },
 };
 
-const builtInDashboards: BuiltInDashboard[] = [overviewDashboard, llmDashboard];
+const queuesDashboard: BuiltInDashboard = {
+  key: "queues",
+  title: "Queues",
+  filters: ["queues"],
+  layout: {
+    version: "1",
+    layout: [
+      { i: "env-used", x: 0, y: 0, w: 3, h: 4 },
+      { i: "env-limit", x: 3, y: 0, w: 3, h: 4 },
+      { i: "env-avail", x: 6, y: 0, w: 3, h: 4 },
+      { i: "env-sat", x: 9, y: 0, w: 3, h: 4 },
+      { i: "sat-time", x: 0, y: 4, w: 6, h: 9 },
+      { i: "used-limit", x: 6, y: 4, w: 6, h: 9 },
+      { i: "t-pressure", x: 0, y: 13, w: 12, h: 2, minH: 2, maxH: 2 },
+      { i: "pressure", x: 0, y: 15, w: 12, h: 11 },
+      { i: "t-trends", x: 0, y: 26, w: 12, h: 2, minH: 2, maxH: 2 },
+      { i: "running-q", x: 0, y: 28, w: 6, h: 9 },
+      { i: "queued-q", x: 6, y: 28, w: 6, h: 9 },
+      { i: "throttled-q", x: 0, y: 37, w: 6, h: 9 },
+      { i: "throughput", x: 6, y: 37, w: 6, h: 9 },
+      { i: "wait-pct", x: 0, y: 46, w: 12, h: 9 },
+    ],
+    widgets: {
+      "env-used": {
+        title: "Concurrency in use",
+        query: `SELECT argMax(max_env_running, bucket_start) AS in_use\nFROM env_metrics`,
+        display: { type: "bignumber", column: "in_use", aggregation: "max", abbreviate: false },
+      },
+      "env-limit": {
+        title: "Environment limit",
+        query: `SELECT argMax(max_env_limit, bucket_start) AS env_limit\nFROM env_metrics`,
+        display: { type: "bignumber", column: "env_limit", aggregation: "max", abbreviate: false },
+      },
+      "env-avail": {
+        title: "Available slots",
+        query: `SELECT argMax(max_env_limit, bucket_start) - argMax(max_env_running, bucket_start) AS available\nFROM env_metrics`,
+        display: { type: "bignumber", column: "available", aggregation: "max", abbreviate: false },
+      },
+      "env-sat": {
+        title: "Env saturation",
+        query: `SELECT round(argMax(max_env_running, bucket_start) * 100.0 / nullIf(argMax(max_env_limit, bucket_start), 0), 1) AS saturation\nFROM env_metrics`,
+        display: {
+          type: "bignumber",
+          column: "saturation",
+          aggregation: "max",
+          abbreviate: false,
+          suffix: "%",
+        },
+      },
+      "sat-time": {
+        title: "Environment saturation over time",
+        query: `SELECT timeBucket() AS t,\n  round(max(max_env_running) * 100.0 / nullIf(max(max_env_limit), 0), 1) AS saturation\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+        display: {
+          type: "chart",
+          chartType: "line",
+          xAxisColumn: "t",
+          yAxisColumns: ["saturation"],
+          groupByColumn: null,
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "max",
+        },
+      },
+      "used-limit": {
+        title: "Concurrency used vs limit",
+        query: `SELECT timeBucket() AS t,\n  max(max_env_running) AS used,\n  max(max_env_limit) AS limit\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+        // Single-series gauge: carry the last known used/limit across idle buckets instead of dropping to 0.
+        fillGaps: true,
+        display: {
+          type: "chart",
+          chartType: "line",
+          xAxisColumn: "t",
+          yAxisColumns: ["used", "limit"],
+          groupByColumn: null,
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "max",
+        },
+      },
+      "t-pressure": { title: "Queue pressure", query: "", display: { type: "title" } },
+      pressure: {
+        title: "Queue pressure",
+        query: `SELECT queue,\n  argMax(max_running, bucket_start) AS running,\n  argMax(max_queued, bucket_start) AS queued,\n  argMax(max_limit, bucket_start) AS limit,\n  running + queued AS demand,\n  max(max_queued) AS peak_queued,\n  sum(throttled_count) AS throttled,\n  multiIf(running >= limit AND queued > 0, 'queue-limited', queued > 0, 'backlogged', 'healthy') AS status\nFROM queue_metrics\nGROUP BY queue\nORDER BY peak_queued DESC`,
+        display: {
+          type: "table",
+          prettyFormatting: true,
+          sorting: [{ id: "peak_queued", desc: true }],
+        },
+      },
+      "t-trends": { title: "Per-queue trends", query: "", display: { type: "title" } },
+      "running-q": {
+        title: "Running by queue",
+        query: `SELECT timeBucket() AS t, queue, max(max_running) AS running\nFROM queue_metrics\nGROUP BY t, queue\nORDER BY t`,
+        // Grouped gauge: carry each queue's running across idle buckets (per-group LOCF).
+        fillGaps: true,
+        display: {
+          type: "chart",
+          chartType: "line",
+          xAxisColumn: "t",
+          yAxisColumns: ["running"],
+          groupByColumn: "queue",
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "max",
+        },
+      },
+      "queued-q": {
+        title: "Queue depth (backlog) by queue",
+        query: `SELECT timeBucket() AS t, queue, max(max_queued) AS queued\nFROM queue_metrics\nGROUP BY t, queue\nORDER BY t`,
+        // Grouped gauge: carry each queue's backlog across idle buckets (per-group LOCF).
+        fillGaps: true,
+        display: {
+          type: "chart",
+          chartType: "line",
+          xAxisColumn: "t",
+          yAxisColumns: ["queued"],
+          groupByColumn: "queue",
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "max",
+        },
+      },
+      "throttled-q": {
+        title: "Throttled buckets by queue",
+        query: `SELECT timeBucket() AS t, queue, sum(throttled_count) AS throttled\nFROM queue_metrics\nGROUP BY t, queue\nORDER BY t`,
+        // Grouped counter: per-group zero-fill so idle buckets read 0, not a gap.
+        fillGaps: true,
+        display: {
+          type: "chart",
+          chartType: "bar",
+          xAxisColumn: "t",
+          yAxisColumns: ["throttled"],
+          groupByColumn: "queue",
+          stacked: true,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "sum",
+        },
+      },
+      throughput: {
+        title: "Enqueued vs started",
+        // Counter states merge per queue, then sum outside: a single merge across queues
+        // mixes unrelated odometers and returns wrong totals.
+        query: `SELECT t, sum(enq) AS enqueued, sum(st) AS started\nFROM (\n  SELECT timeBucket() AS t, queue,\n    deltaSumTimestampMerge(enqueue_delta) AS enq,\n    deltaSumTimestampMerge(started_delta) AS st\n  FROM queue_metrics\n  GROUP BY t, queue\n)\nGROUP BY t\nORDER BY t`,
+        display: {
+          type: "chart",
+          chartType: "line",
+          xAxisColumn: "t",
+          yAxisColumns: ["enqueued", "started"],
+          groupByColumn: null,
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "sum",
+        },
+      },
+      "wait-pct": {
+        title: "Scheduling delay p50/p95/p99 (ms)",
+        query: `SELECT timeBucket() AS t,\n  round(quantilesTDigestMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[1]) AS p50,\n  round(quantilesTDigestMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[3]) AS p95,\n  round(quantilesTDigestMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[4]) AS p99\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+        display: {
+          type: "chart",
+          chartType: "line",
+          xAxisColumn: "t",
+          yAxisColumns: ["p50", "p95", "p99"],
+          groupByColumn: null,
+          stacked: false,
+          sortByColumn: null,
+          sortDirection: "asc",
+          aggregation: "max",
+        },
+      },
+    },
+  },
+};
+
+const builtInDashboards: BuiltInDashboard[] = [overviewDashboard, llmDashboard, queuesDashboard];
 
 export function builtInDashboardList(): BuiltInDashboard[] {
   return builtInDashboards;
diff --git a/apps/webapp/app/presenters/v3/MetricDashboardPresenter.server.ts b/apps/webapp/app/presenters/v3/MetricDashboardPresenter.server.ts
index df43864b53a..0b84e971b2f 100644
--- a/apps/webapp/app/presenters/v3/MetricDashboardPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/MetricDashboardPresenter.server.ts
@@ -37,6 +37,9 @@ export const Widget = z.object({
   title: z.string(),
   query: z.string().default(""),
   display: QueryWidgetConfig,
+  // Opt into server-side gap fill (carry-forward for gauges, zero-fill for counters).
+  // Top-level rather than in `display` because display config is client-only and never reaches the query POST.
+  fillGaps: z.boolean().optional(),
 });
 
 export type Widget = z.infer<typeof Widget>;
diff --git a/apps/webapp/app/presenters/v3/QueueAllocationPresenter.server.ts b/apps/webapp/app/presenters/v3/QueueAllocationPresenter.server.ts
new file mode 100644
index 00000000000..c7a8166b6a3
--- /dev/null
+++ b/apps/webapp/app/presenters/v3/QueueAllocationPresenter.server.ts
@@ -0,0 +1,94 @@
+import { TaskQueueType, type Prisma } from "@trigger.dev/database";
+import { type AuthenticatedEnvironment } from "~/services/apiAuth.server";
+import { engine } from "~/v3/runEngine.server";
+import { BasePresenter } from "./basePresenter.server";
+
+const MAX_ALLOCATION_QUEUES = 500;
+
+export type QueueAllocationItem = {
+  id: string;
+  name: string;
+  type: "task" | "custom";
+  running: number;
+  queued: number;
+  paused: boolean;
+  /** Explicit per-queue limit; null means the queue floats up to the env limit. */
+  limit: number | null;
+  overridden: boolean;
+};
+
+export type QueueAllocation = {
+  queues: QueueAllocationItem[];
+  totalQueues: number;
+  truncated: boolean;
+  /** Sum of explicit limits, each clamped to the env limit. */
+  allocated: number;
+  unlimitedCount: number;
+};
+
+/** Every queue in the environment (capped) with live counts, for the allocation view. */
+export class QueueAllocationPresenter extends BasePresenter {
+  public async call({
+    environment,
+  }: {
+    environment: AuthenticatedEnvironment;
+  }): Promise<QueueAllocation> {
+    const where: Prisma.TaskQueueWhereInput = {
+      runtimeEnvironmentId: environment.id,
+      version: "V2",
+    };
+
+    const [totalQueues, queues] = await Promise.all([
+      this._replica.taskQueue.count({ where }),
+      this._replica.taskQueue.findMany({
+        where,
+        select: {
+          friendlyId: true,
+          name: true,
+          type: true,
+          paused: true,
+          concurrencyLimit: true,
+          concurrencyLimitOverriddenAt: true,
+        },
+        orderBy: { orderableName: "asc" },
+        take: MAX_ALLOCATION_QUEUES,
+      }),
+    ]);
+
+    const names = queues.map((q) => q.name);
+    const [queuedByQueue, runningByQueue] = await Promise.all([
+      engine.lengthOfQueues(environment, names),
+      engine.currentConcurrencyOfQueues(environment, names),
+    ]);
+
+    const envLimit = environment.maximumConcurrencyLimit;
+    let allocated = 0;
+    let unlimitedCount = 0;
+
+    const items: QueueAllocationItem[] = queues.map((queue) => {
+      if (queue.concurrencyLimit === null) {
+        unlimitedCount++;
+      } else {
+        allocated += Math.min(queue.concurrencyLimit, envLimit);
+      }
+      return {
+        id: queue.friendlyId,
+        name: queue.name.replace(/^task\//, ""),
+        type: queue.type === TaskQueueType.VIRTUAL ? ("task" as const) : ("custom" as const),
+        running: runningByQueue[queue.name] ?? 0,
+        queued: queuedByQueue[queue.name] ?? 0,
+        paused: queue.paused,
+        limit: queue.concurrencyLimit,
+        overridden: queue.concurrencyLimitOverriddenAt !== null,
+      };
+    });
+
+    return {
+      queues: items,
+      totalQueues,
+      truncated: totalQueues > queues.length,
+      allocated,
+      unlimitedCount,
+    };
+  }
+}
diff --git a/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts b/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts
index 024a1342b0a..751d4b0a602 100644
--- a/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts
+++ b/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts
@@ -3,6 +3,8 @@ import type { Prisma } from "@trigger.dev/database";
 import { TaskQueueType } from "@trigger.dev/database";
 import { type PrismaClientOrTransaction } from "~/db.server";
 import { type AuthenticatedEnvironment } from "~/services/apiAuth.server";
+import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server";
+import { logger } from "~/services/logger.server";
 import { determineEngineVersion } from "~/v3/engineVersion.server";
 import { engine } from "~/v3/runEngine.server";
 import { BasePresenter } from "./basePresenter.server";
@@ -13,6 +15,12 @@ type QueueListEngine = Pick<RunEngine, "lengthOfQueues" | "currentConcurrencyOfQ
 export const QUEUE_LIST_DEFAULT_ITEMS_PER_PAGE = 25;
 const MAX_ITEMS_PER_PAGE = 100;
 
+export type QueueListSort = "busiest" | "queued" | "name";
+
+/** Ranking reads recent aggregated gauges, so ordering is a stable snapshot, not a live sort. */
+export const QUEUE_RANKING_WINDOW_MINUTES = 15;
+const MAX_RANKED_QUEUES = 5000;
+
 const typeToDBQueueType: Record<"task" | "custom", TaskQueueType> = {
   task: TaskQueueType.VIRTUAL,
   custom: TaskQueueType.NAMED,
@@ -30,6 +38,38 @@ const queueListSelect = {
   paused: true,
 } satisfies Prisma.TaskQueueSelect;
 
+type QueueListRow = Prisma.TaskQueueGetPayload<{ select: typeof queueListSelect }>;
+
+type QueueListItem = ReturnType<typeof toQueueItem>;
+
+type QueueListPagination =
+  | { mode: "filtered"; currentPage: number; hasMore: boolean }
+  | { mode: "unfiltered"; currentPage: number; totalPages: number; count: number };
+
+// The `?: undefined` markers keep every key reachable across the union, so consumers
+// can destructure before narrowing on `success`.
+export type QueueListResult =
+  | {
+      success: false;
+      code: string;
+      totalQueues: number;
+      hasFilters: boolean;
+      queues?: undefined;
+      pagination?: undefined;
+    }
+  | {
+      success: true;
+      queues: QueueListItem[];
+      pagination: QueueListPagination;
+      totalQueues?: number;
+      hasFilters: boolean;
+      code?: undefined;
+    };
+
+function formatClickhouseDateTime(date: Date): string {
+  return date.toISOString().slice(0, 19).replace("T", " ");
+}
+
 function buildQueueListWhere(
   environmentId: string,
   query: string | undefined,
@@ -70,13 +110,15 @@ export class QueueListPresenter extends BasePresenter {
     query,
     page,
     type,
+    sort = "name",
   }: {
     environment: AuthenticatedEnvironment;
     query?: string;
     page: number;
     perPage?: number;
     type?: "task" | "custom";
-  }) {
+    sort?: QueueListSort;
+  }): Promise<QueueListResult> {
     const hasFilters = Boolean(query?.trim()) || type !== undefined;
 
     const engineVersion = await determineEngineVersion({ environment });
@@ -110,6 +152,18 @@ export class QueueListPresenter extends BasePresenter {
       };
     }
 
+    if (sort !== "name") {
+      // Ranking is additive: any failure or unsupported input falls back to name order.
+      try {
+        const ranked = await this.getRankedQueues(environment, query, page, type, sort);
+        if (ranked) {
+          return ranked;
+        }
+      } catch (error) {
+        logger.warn("Queue ranking unavailable, falling back to name order", { error });
+      }
+    }
+
     if (hasFilters) {
       const { queues, hasMore } = await this.getFilteredQueues(environment, query, page, type);
 
@@ -143,6 +197,132 @@ export class QueueListPresenter extends BasePresenter {
     };
   }
 
+  /**
+   * ClickHouse ranks queues by recent activity and returns the requested page of names;
+   * queues with no recent metrics follow in name order. Null when ranking does not apply.
+   */
+  private async getRankedQueues(
+    environment: AuthenticatedEnvironment,
+    query: string | undefined,
+    page: number,
+    type: "task" | "custom" | undefined,
+    sort: Exclude<QueueListSort, "name">
+  ) {
+    if (type !== undefined) {
+      return null;
+    }
+
+    const clickhouse = await clickhouseFactory.getClickhouseForOrganization(
+      environment.organizationId,
+      "query"
+    );
+
+    // The window start is aligned to the minute so repeated page loads produce identical
+    // query text and can share ClickHouse query-cache entries.
+    const windowStartMs =
+      Math.floor((Date.now() - QUEUE_RANKING_WINDOW_MINUTES * 60 * 1000) / 60_000) * 60_000;
+    const rankingArgs = {
+      organizationId: environment.organizationId,
+      projectId: environment.projectId,
+      environmentId: environment.id,
+      startTime: formatClickhouseDateTime(new Date(windowStartMs)),
+      nameContains: query?.trim() ?? "",
+    };
+
+    const offset = (page - 1) * this.perPage;
+
+    // One scan returns the page and the total ranked count (window function).
+    const [pageError, pageRows] = await clickhouse.queueMetrics.ranking({
+      ...rankingArgs,
+      byQueuedOnly: sort === "queued" ? 1 : 0,
+      limit: this.perPage,
+      offset,
+    });
+    if (pageError) {
+      throw pageError;
+    }
+
+    let ranked = pageRows?.[0]?.ranked_total ?? 0;
+    if (ranked === 0 && offset > 0) {
+      // Empty page past the ranked head: fetch the count alone for the tail slot math.
+      const [countError, countRows] = await clickhouse.queueMetrics.rankingCount(rankingArgs);
+      if (countError) {
+        throw countError;
+      }
+      ranked = countRows?.[0]?.ranked ?? 0;
+    }
+    if (ranked > MAX_RANKED_QUEUES) {
+      return null;
+    }
+
+    const where = buildQueueListWhere(environment.id, query, type);
+    const totalQueues = await this._replica.taskQueue.count({ where });
+
+    let rankedPageQueues: QueueListRow[] = [];
+    if ((pageRows?.length ?? 0) > 0) {
+      const rankedNames = (pageRows ?? []).map((row) => row.queue_name);
+      rankedPageQueues = await this.findQueuesByNames(where, rankedNames);
+    }
+
+    // Tail of the page: name-ordered queues that have no recent metrics. Slot math uses the
+    // ClickHouse counts so pages never overlap, even if some ranked names no longer exist.
+    const rankedSlots = Math.min(Math.max(ranked - offset, 0), this.perPage);
+    const tailNeeded = this.perPage - rankedSlots;
+    let tailQueues: QueueListRow[] = [];
+    if (tailNeeded > 0) {
+      let excludedNames: string[] = [];
+      if (ranked > 0) {
+        const [allError, allRows] = await clickhouse.queueMetrics.rankingNames({
+          ...rankingArgs,
+          limit: MAX_RANKED_QUEUES,
+        });
+        if (allError) {
+          throw allError;
+        }
+        excludedNames = (allRows ?? []).map((row) => row.queue_name);
+      }
+      // AND keeps the search's name filter intact alongside the exclusion (a spread
+      // would overwrite one name condition with the other).
+      tailQueues = await this._replica.taskQueue.findMany({
+        where: { AND: [where, { name: { notIn: excludedNames } }] },
+        select: queueListSelect,
+        orderBy: {
+          orderableName: "asc",
+        },
+        skip: Math.max(0, offset - ranked),
+        take: tailNeeded,
+      });
+    }
+
+    return {
+      success: true as const,
+      queues: await this.enrichQueues(environment, [...rankedPageQueues, ...tailQueues]),
+      pagination: {
+        mode: "unfiltered" as const,
+        currentPage: page,
+        totalPages: Math.max(1, Math.ceil(totalQueues / this.perPage)),
+        count: totalQueues,
+      },
+      totalQueues,
+      hasFilters: Boolean(query?.trim()) || type !== undefined,
+    };
+  }
+
+  private async findQueuesByNames(
+    where: Prisma.TaskQueueWhereInput,
+    names: string[]
+  ): Promise<QueueListRow[]> {
+    if (names.length === 0) {
+      return [];
+    }
+    const queues = await this._replica.taskQueue.findMany({
+      where: { AND: [where, { name: { in: names } }] },
+      select: queueListSelect,
+    });
+    const byName = new Map(queues.map((queue) => [queue.name, queue]));
+    return names.flatMap((name) => byName.get(name) ?? []);
+  }
+
   private async getFilteredQueues(
     environment: AuthenticatedEnvironment,
     query: string | undefined,
diff --git a/apps/webapp/app/presenters/v3/QueueMetricsPresenter.server.ts b/apps/webapp/app/presenters/v3/QueueMetricsPresenter.server.ts
new file mode 100644
index 00000000000..a36c402dda7
--- /dev/null
+++ b/apps/webapp/app/presenters/v3/QueueMetricsPresenter.server.ts
@@ -0,0 +1,139 @@
+import { type AuthenticatedEnvironment } from "~/services/apiAuth.server";
+import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server";
+import { logger } from "~/services/logger.server";
+
+export type QueueListMetric = {
+  p50WaitMs: number | null;
+  p95WaitMs: number | null;
+  peakQueued: number;
+  /** Equal-width buckets, oldest first, carry-forward filled across idle gaps. */
+  depthSparkline: number[];
+};
+
+export type QueueListMetrics = {
+  bucketStartMs: number;
+  bucketIntervalMs: number;
+  byQueue: Map<string, QueueListMetric>;
+};
+
+const SPARKLINE_POINTS = 48;
+
+function formatClickhouseDateTime(date: Date): string {
+  return date.toISOString().slice(0, 19).replace("T", " ");
+}
+
+function finiteOrNull(value: number): number | null {
+  return Number.isFinite(value) ? value : null;
+}
+
+export class QueueMetricsPresenter {
+  /**
+   * Per-queue metrics over a time range for a fixed set of queues (the visible list page),
+   * scoped to one ClickHouse query window so cost is independent of total queue count.
+   * Degrades to an empty map if ClickHouse is unavailable so the live list still renders.
+   */
+  public async getQueueListMetrics({
+    environment,
+    queueNames,
+    from,
+    to,
+  }: {
+    environment: AuthenticatedEnvironment;
+    queueNames: string[];
+    from: Date;
+    to: Date;
+  }): Promise<QueueListMetrics> {
+    const rangeSeconds = Math.max(60, Math.round((to.getTime() - from.getTime()) / 1000));
+    const bucketSeconds = Math.max(60, Math.round(rangeSeconds / SPARKLINE_POINTS));
+    const numBuckets = Math.max(1, Math.ceil(rangeSeconds / bucketSeconds));
+    const gridStartSeconds =
+      Math.floor(Math.floor(from.getTime() / 1000) / bucketSeconds) * bucketSeconds;
+    const bucketStartMs = gridStartSeconds * 1000;
+    const bucketIntervalMs = bucketSeconds * 1000;
+
+    const empty: QueueListMetrics = {
+      bucketStartMs,
+      bucketIntervalMs,
+      byQueue: new Map(),
+    };
+
+    if (queueNames.length === 0) {
+      return empty;
+    }
+
+    try {
+      const clickhouse = await clickhouseFactory.getClickhouseForOrganization(
+        environment.organizationId,
+        "query"
+      );
+
+      // End bound snaps up to the bucket grid so repeated loads within a bucket produce
+      // identical params and share ClickHouse query-cache entries.
+      const endMs = Math.ceil(to.getTime() / bucketIntervalMs) * bucketIntervalMs;
+      const ids = {
+        organizationId: environment.organizationId,
+        projectId: environment.projectId,
+        environmentId: environment.id,
+        queueNames,
+        startTime: formatClickhouseDateTime(new Date(bucketStartMs)),
+        endTime: formatClickhouseDateTime(new Date(endMs)),
+      };
+
+      const [summaryResult, sparklineResult] = await Promise.all([
+        clickhouse.queueMetrics.listSummary(ids),
+        clickhouse.queueMetrics.depthSparklines({ ...ids, bucketSeconds }),
+      ]);
+
+      const [summaryError, summaryRows] = summaryResult;
+      const [sparklineError, sparklineRows] = sparklineResult;
+
+      if (summaryError || sparklineError) {
+        logger.warn("QueueMetricsPresenter: clickhouse query failed", {
+          summaryError: summaryError?.message,
+          sparklineError: sparklineError?.message,
+        });
+        return empty;
+      }
+
+      // Bucket -> depth per queue, mapped onto the aligned grid and forward-filled.
+      const depthsByQueue = new Map<string, Map<number, number>>();
+      for (const row of sparklineRows ?? []) {
+        const bucketMs = Date.parse(row.bucket.replace(" ", "T") + "Z");
+        if (Number.isNaN(bucketMs)) continue;
+        const index = Math.round((bucketMs - bucketStartMs) / bucketIntervalMs);
+        if (index < 0 || index >= numBuckets) continue;
+        let byIndex = depthsByQueue.get(row.queue_name);
+        if (!byIndex) {
+          byIndex = new Map();
+          depthsByQueue.set(row.queue_name, byIndex);
+        }
+        byIndex.set(index, row.depth);
+      }
+
+      const byQueue = new Map<string, QueueListMetric>();
+      for (const row of summaryRows ?? []) {
+        const byIndex = depthsByQueue.get(row.queue_name);
+        const sparkline: number[] = new Array(numBuckets);
+        let last = 0;
+        for (let i = 0; i < numBuckets; i++) {
+          const value = byIndex?.get(i);
+          if (value !== undefined) last = value;
+          sparkline[i] = last;
+        }
+        byQueue.set(row.queue_name, {
+          p50WaitMs: finiteOrNull(row.p50_wait_ms),
+          p95WaitMs: finiteOrNull(row.p95_wait_ms),
+          peakQueued: row.peak_queued,
+          depthSparkline: sparkline,
+        });
+      }
+
+      return { bucketStartMs, bucketIntervalMs, byQueue };
+    } catch (error) {
+      logger.warn("QueueMetricsPresenter: failed to load queue metrics", {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      return empty;
+    }
+  }
+}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx
index 5fa237cee6e..d529fdf0d22 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx
@@ -38,6 +38,7 @@ import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstan
 import { requireUser } from "~/services/session.server";
 import { cn } from "~/utils/cn";
 import { EnvironmentParamSchema } from "~/utils/pathBuilder";
+import { canAccessQueueMetricsUi } from "~/v3/canAccessQueueMetricsUi.server";
 import { QueryScopeSchema } from "~/v3/querySchemas";
 import { useCurrentPlan } from "../_app.orgs.$organizationSlug/route";
 import { MetricWidget } from "../resources.metric";
@@ -50,6 +51,15 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   const user = await requireUser(request);
   const { projectParam, organizationSlug, envParam, dashboardKey } = ParamSchema.parse(params);
 
+  // The built-in "queues" dashboard is part of the metrics UI (unlinked, but reachable by
+  // URL), so gate it per-org like the rest of the Queue Metrics view.
+  if (
+    dashboardKey === "queues" &&
+    !(await canAccessQueueMetricsUi({ userId: user.id, organizationSlug }))
+  ) {
+    throw new Response(undefined, { status: 404, statusText: "Not found" });
+  }
+
   const project = await findProjectBySlug(organizationSlug, projectParam, user.id);
   if (!project) {
     throw new Response(undefined, {
@@ -376,6 +386,7 @@ export function MetricDashboard({
                     promptSlugs={prompts.length > 0 ? prompts : undefined}
                     operations={operations.length > 0 ? operations : undefined}
                     providers={providers.length > 0 ? providers : undefined}
+                    fillGaps={widget.fillGaps}
                     config={widget.display}
                     organizationId={organization.id}
                     projectId={project.id}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/ExamplesContent.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/ExamplesContent.tsx
index 05b4f4d9b62..3188b5409a6 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/ExamplesContent.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/ExamplesContent.tsx
@@ -3,7 +3,7 @@ import { Header3 } from "~/components/primitives/Headers";
 import { Paragraph } from "~/components/primitives/Paragraph";
 import SegmentedControl from "~/components/primitives/SegmentedControl";
 import type { QueryScope } from "~/services/queryService.server";
-import { querySchemas } from "~/v3/querySchemas";
+import { visibleQuerySchemas } from "~/v3/querySchemas";
 import { TryableCodeBlock } from "./TRQLGuideContent";
 
 // Example queries for the Examples tab
@@ -211,14 +211,14 @@ LIMIT 20`,
   },
 ];
 
-const tableOptions = querySchemas.map((s) => ({ label: s.name, value: s.name }));
+const tableOptions = visibleQuerySchemas.map((s) => ({ label: s.name, value: s.name }));
 
 export function ExamplesContent({
   onTryExample,
 }: {
   onTryExample: (query: string, scope: QueryScope) => void;
 }) {
-  const [selectedTable, setSelectedTable] = useState(querySchemas[0].name);
+  const [selectedTable, setSelectedTable] = useState(visibleQuerySchemas[0].name);
   const filtered = exampleQueries.filter((e) => e.table === selectedTable);
 
   return (
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/TableSchemaContent.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/TableSchemaContent.tsx
index 285a1f68731..9fc6ec32923 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/TableSchemaContent.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/TableSchemaContent.tsx
@@ -4,7 +4,7 @@ import { Badge } from "~/components/primitives/Badge";
 import { CopyableText } from "~/components/primitives/CopyableText";
 import { Paragraph } from "~/components/primitives/Paragraph";
 import SegmentedControl from "~/components/primitives/SegmentedControl";
-import { querySchemas } from "~/v3/querySchemas";
+import { visibleQuerySchemas } from "~/v3/querySchemas";
 
 function ColumnHelpItem({ col }: { col: ColumnSchema }) {
   return (
@@ -43,11 +43,11 @@ function ColumnHelpItem({ col }: { col: ColumnSchema }) {
   );
 }
 
-const tableOptions = querySchemas.map((s) => ({ label: s.name, value: s.name }));
+const tableOptions = visibleQuerySchemas.map((s) => ({ label: s.name, value: s.name }));
 
 export function TableSchemaContent() {
-  const [selectedTable, setSelectedTable] = useState(querySchemas[0].name);
-  const table = querySchemas.find((s) => s.name === selectedTable) ?? querySchemas[0];
+  const [selectedTable, setSelectedTable] = useState(visibleQuerySchemas[0].name);
+  const table = visibleQuerySchemas.find((s) => s.name === selectedTable) ?? visibleQuerySchemas[0];
 
   return (
     <div>
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/AllocationView.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/AllocationView.tsx
new file mode 100644
index 00000000000..8b13afbcb87
--- /dev/null
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/AllocationView.tsx
@@ -0,0 +1,490 @@
+import { Form, useNavigation } from "@remix-run/react";
+import { type ReactNode, useEffect, useMemo, useState } from "react";
+import { BigNumber } from "~/components/metrics/BigNumber";
+import { Badge } from "~/components/primitives/Badge";
+import { Button } from "~/components/primitives/Buttons";
+import { Callout } from "~/components/primitives/Callout";
+import { Dialog, DialogContent, DialogHeader, DialogTrigger } from "~/components/primitives/Dialog";
+import { Input } from "~/components/primitives/Input";
+import { Paragraph } from "~/components/primitives/Paragraph";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHeader,
+  TableHeaderCell,
+  TableRow,
+} from "~/components/primitives/Table";
+import { SimpleTooltip } from "~/components/primitives/Tooltip";
+import { getSeriesColor } from "~/components/code/chartColors";
+import { QueueName } from "~/components/runs/v3/QueueName";
+import { type Environment } from "~/presenters/v3/EnvironmentQueuePresenter.server";
+import {
+  type QueueAllocation,
+  type QueueAllocationItem,
+} from "~/presenters/v3/QueueAllocationPresenter.server";
+import { cn } from "~/utils/cn";
+
+type Drafts = Record<string, number>;
+
+export function AllocationView({
+  allocation,
+  environment,
+}: {
+  allocation: QueueAllocation;
+  environment: Environment;
+}) {
+  const [drafts, setDrafts] = useState<Drafts>({});
+  const [reviewOpen, setReviewOpen] = useState(false);
+  const navigation = useNavigation();
+  const isSubmitting = navigation.state !== "idle";
+
+  const envLimit = environment.concurrencyLimit;
+  const burstLimit = Math.round(envLimit * environment.burstFactor);
+
+  useEffect(() => {
+    if (navigation.state === "loading" || navigation.state === "idle") {
+      setReviewOpen(false);
+    }
+  }, [navigation.state]);
+
+  // After an apply revalidates the loader, drop drafts that now match the saved limits.
+  useEffect(() => {
+    setDrafts((prev) => {
+      const next = { ...prev };
+      for (const queue of allocation.queues) {
+        if (next[queue.id] !== undefined && next[queue.id] === queue.limit) {
+          delete next[queue.id];
+        }
+      }
+      return next;
+    });
+  }, [allocation]);
+
+  const draftLimit = (queue: QueueAllocationItem): number | null => drafts[queue.id] ?? queue.limit;
+
+  const draftAllocated = allocation.queues.reduce((sum, queue) => {
+    const limit = draftLimit(queue);
+    return limit === null ? sum : sum + Math.min(limit, envLimit);
+  }, 0);
+
+  const changes = allocation.queues.filter(
+    (queue) => drafts[queue.id] !== undefined && drafts[queue.id] !== queue.limit
+  );
+
+  const unlimitedCount = allocation.queues.filter((queue) => draftLimit(queue) === null).length;
+  const allocationPct = envLimit > 0 ? Math.round((draftAllocated / envLimit) * 100) : 0;
+  const overAllocated = draftAllocated > envLimit;
+
+  const setDraft = (queue: QueueAllocationItem, value: string) => {
+    setDrafts((prev) => {
+      const next = { ...prev };
+      if (value.trim() === "") {
+        delete next[queue.id];
+        return next;
+      }
+      const parsed = parseInt(value, 10);
+      if (!Number.isFinite(parsed) || parsed < 0) return prev;
+      if (parsed === queue.limit) {
+        delete next[queue.id];
+      } else {
+        next[queue.id] = parsed;
+      }
+      return next;
+    });
+  };
+
+  const changesPayload = useMemo(
+    () =>
+      JSON.stringify(changes.map((queue) => ({ friendlyId: queue.id, limit: drafts[queue.id] }))),
+    [changes, drafts]
+  );
+
+  const colorByQueue = useMemo(() => {
+    const map = new Map<string, string>();
+    allocation.queues.forEach((queue, i) => map.set(queue.id, getSeriesColor(i)));
+    return map;
+  }, [allocation.queues]);
+  const colorFor = (id: string) => colorByQueue.get(id) ?? "#878C99";
+
+  // Busiest first: the queues you'd rebalance are the ones under load. Colors stay
+  // keyed to the loader order so they don't shift as counts change.
+  const tableQueues = useMemo(
+    () => [...allocation.queues].sort((a, b) => b.running + b.queued - (a.running + a.queued)),
+    [allocation.queues]
+  );
+
+  return (
+    <div className="flex flex-col gap-4 overflow-y-auto p-3">
+      <div className="grid grid-cols-1 gap-3 md:grid-cols-3">
+        <BigNumber
+          title="Environment limit"
+          value={envLimit}
+          suffix={environment.burstFactor > 1 ? `bursts up to ${burstLimit}` : undefined}
+          suffixClassName="text-text-dimmed"
+        />
+        <BigNumber
+          title="Allocated"
+          value={draftAllocated}
+          valueClassName={cn(overAllocated && "text-warning")}
+          suffix={`${allocationPct}% of the environment limit`}
+          suffixClassName={overAllocated ? "text-warning" : "text-text-dimmed"}
+        />
+        <BigNumber
+          title="Queues"
+          value={allocation.totalQueues}
+          suffix={
+            unlimitedCount > 0
+              ? `${unlimitedCount} without a limit (can use up to ${envLimit})`
+              : "all have limits"
+          }
+          suffixClassName="text-text-dimmed"
+        />
+      </div>
+
+      <AllocationBar
+        queues={allocation.queues}
+        draftLimit={draftLimit}
+        envLimit={envLimit}
+        burstLimit={burstLimit}
+        draftAllocated={draftAllocated}
+        colorFor={colorFor}
+      />
+
+      {overAllocated && (
+        <Callout variant="warning">
+          The queue limits add up to more than the environment limit, so queues will compete for
+          concurrency when the environment saturates. Reduce limits to guarantee each queue its
+          allocation.
+        </Callout>
+      )}
+
+      {allocation.truncated && (
+        <Callout variant="info">
+          Showing the first {allocation.queues.length} of {allocation.totalQueues} queues.
+          Allocation totals only include the queues shown.
+        </Callout>
+      )}
+
+      <div className="flex items-center gap-2">
+        <Button
+          type="button"
+          variant="minimal/small"
+          onClick={() => setDrafts({})}
+          disabled={changes.length === 0 || isSubmitting}
+        >
+          Reset changes
+        </Button>
+        <div className="grow" />
+        <Dialog open={reviewOpen} onOpenChange={setReviewOpen}>
+          <DialogTrigger asChild>
+            <Button
+              type="button"
+              variant="primary/small"
+              disabled={changes.length === 0 || isSubmitting}
+            >
+              Review {changes.length} change{changes.length === 1 ? "" : "s"}…
+            </Button>
+          </DialogTrigger>
+          <DialogContent>
+            <DialogHeader>Apply queue limits</DialogHeader>
+            <div className="max-h-64 overflow-y-auto">
+              <Table>
+                <TableHeader>
+                  <TableRow>
+                    <TableHeaderCell>Queue</TableHeaderCell>
+                    <TableHeaderCell alignment="right">Current</TableHeaderCell>
+                    <TableHeaderCell alignment="right">New</TableHeaderCell>
+                  </TableRow>
+                </TableHeader>
+                <TableBody>
+                  {changes.map((queue) => (
+                    <TableRow key={queue.id}>
+                      <TableCell>
+                        <QueueName name={queue.name} type={queue.type} />
+                      </TableCell>
+                      <TableCell alignment="right">{queue.limit ?? "–"}</TableCell>
+                      <TableCell alignment="right">{drafts[queue.id]}</TableCell>
+                    </TableRow>
+                  ))}
+                </TableBody>
+              </Table>
+            </div>
+            <Paragraph variant="small" className="mt-2">
+              Limits apply immediately and are set as overrides, so they survive deploys until
+              removed.
+            </Paragraph>
+            <Form method="post" className="mt-3 flex justify-end">
+              <input type="hidden" name="action" value="allocation-apply" />
+              <input type="hidden" name="changes" value={changesPayload} />
+              <Button type="submit" variant="primary/medium" disabled={isSubmitting}>
+                {isSubmitting
+                  ? "Applying…"
+                  : `Apply ${changes.length} change${changes.length === 1 ? "" : "s"}`}
+              </Button>
+            </Form>
+          </DialogContent>
+        </Dialog>
+      </div>
+
+      <Table containerClassName="border-t">
+        <TableHeader>
+          <TableRow>
+            <TableHeaderCell>Name</TableHeaderCell>
+            <TableHeaderCell alignment="right">Running</TableHeaderCell>
+            <TableHeaderCell alignment="right">Queued</TableHeaderCell>
+            <TableHeaderCell
+              alignment="right"
+              tooltip="The queue's concurrency limit. Queues without a limit can use up to the environment limit."
+            >
+              Limit
+            </TableHeaderCell>
+          </TableRow>
+        </TableHeader>
+        <TableBody>
+          {tableQueues.map((queue) => {
+            const changed = drafts[queue.id] !== undefined && drafts[queue.id] !== queue.limit;
+            return (
+              <TableRow key={queue.id}>
+                <TableCell>
+                  <span className="flex items-center gap-2">
+                    <span
+                      className="size-2 shrink-0 rounded-[2px]"
+                      style={{ backgroundColor: colorFor(queue.id) }}
+                    />
+                    <QueueName name={queue.name} type={queue.type} paused={queue.paused} />
+                    {queue.paused && (
+                      <Badge variant="extra-small" className="text-warning">
+                        Paused
+                      </Badge>
+                    )}
+                    {queue.overridden && (
+                      <Badge variant="extra-small" className="text-text-bright">
+                        Override
+                      </Badge>
+                    )}
+                  </span>
+                </TableCell>
+                <TableCell alignment="right">{queue.running}</TableCell>
+                <TableCell alignment="right">{queue.queued}</TableCell>
+                <TableCell alignment="right">
+                  <span className="flex items-center justify-end gap-2">
+                    {changed && (
+                      <Badge variant="extra-small" className="text-success">
+                        {queue.limit ?? "–"} → {drafts[queue.id]}
+                      </Badge>
+                    )}
+                    <Input
+                      type="number"
+                      min={0}
+                      value={drafts[queue.id] ?? queue.limit ?? ""}
+                      placeholder={String(envLimit)}
+                      onChange={(e) => setDraft(queue, e.target.value)}
+                      disabled={isSubmitting}
+                      className="w-24"
+                      variant="small"
+                    />
+                  </span>
+                </TableCell>
+              </TableRow>
+            );
+          })}
+        </TableBody>
+      </Table>
+    </div>
+  );
+}
+
+const MAX_BAR_SEGMENTS = 24;
+
+function AllocationBar({
+  queues,
+  draftLimit,
+  envLimit,
+  burstLimit,
+  draftAllocated,
+  colorFor,
+}: {
+  queues: QueueAllocationItem[];
+  draftLimit: (queue: QueueAllocationItem) => number | null;
+  envLimit: number;
+  burstLimit: number;
+  draftAllocated: number;
+  colorFor: (id: string) => string;
+}) {
+  const limited = queues
+    .map((queue) => ({ queue, limit: draftLimit(queue) }))
+    .filter(
+      (entry): entry is { queue: QueueAllocationItem; limit: number } =>
+        typeof entry.limit === "number" && entry.limit > 0
+    )
+    .sort((a, b) => b.limit - a.limit);
+
+  const top = limited.slice(0, MAX_BAR_SEGMENTS);
+  const rest = limited.slice(MAX_BAR_SEGMENTS);
+  const restTotal = rest.reduce((sum, entry) => sum + entry.limit, 0);
+  const restRunning = rest.reduce(
+    (sum, entry) => sum + Math.min(entry.queue.running, entry.limit),
+    0
+  );
+
+  const hasBurst = burstLimit > envLimit;
+  // The axis runs to the burst ceiling: allocations are guaranteed up to the env
+  // limit, and everything between the limit and burst is shared overflow headroom.
+  const scale = Math.max(draftAllocated, envLimit, burstLimit);
+  if (scale === 0) return null;
+
+  const free = Math.max(0, envLimit - draftAllocated);
+  const limitMarkerPct = (envLimit / scale) * 100;
+  const burstZoneWidthPct = ((Math.min(burstLimit, scale) - envLimit) / scale) * 100;
+
+  return (
+    <div className="flex flex-col gap-1">
+      <div className="relative">
+        <div className="relative h-3 w-full overflow-hidden rounded-sm bg-charcoal-750">
+          {hasBurst && (
+            <SimpleTooltip
+              asChild
+              button={
+                <div
+                  className="absolute inset-y-0"
+                  style={{
+                    left: `${limitMarkerPct}%`,
+                    width: `${burstZoneWidthPct}%`,
+                    backgroundImage:
+                      "repeating-linear-gradient(45deg, rgba(255,255,255,0.06) 0 2px, transparent 2px 6px)",
+                  }}
+                />
+              }
+              content={`Shared burst headroom: beyond the environment limit, queues can burst up to ${burstLimit} combined`}
+              disableHoverableContent
+            />
+          )}
+          <div className="pointer-events-none absolute inset-0 flex gap-px">
+            {top.map((entry) => (
+              <BarSegment
+                key={entry.queue.id}
+                color={colorFor(entry.queue.id)}
+                widthPct={(entry.limit / scale) * 100}
+                usagePct={Math.min(entry.queue.running / entry.limit, 1) * 100}
+                tooltip={
+                  <QueueSegmentTooltip
+                    queue={entry.queue}
+                    limit={entry.limit}
+                    envLimit={envLimit}
+                    color={colorFor(entry.queue.id)}
+                  />
+                }
+              />
+            ))}
+            {restTotal > 0 && (
+              <BarSegment
+                color="#878C99"
+                widthPct={(restTotal / scale) * 100}
+                usagePct={(restRunning / restTotal) * 100}
+                tooltip={`${rest.length} more queues: ${restRunning} of ${restTotal} running`}
+              />
+            )}
+          </div>
+        </div>
+        <div
+          className="absolute inset-y-[-3px] w-px bg-text-bright"
+          style={{ left: `${limitMarkerPct}%` }}
+        />
+      </div>
+      <div className="relative h-4 text-xs text-text-dimmed">
+        <span className="absolute left-0 top-0">
+          {draftAllocated} allocated
+          {free > 0 ? ` · ${free} unallocated` : ""}
+        </span>
+        {hasBurst ? (
+          <>
+            <span
+              className="absolute top-0 -translate-x-1/2 whitespace-nowrap"
+              style={{ left: `${limitMarkerPct}%` }}
+            >
+              Environment limit {envLimit}
+            </span>
+            <span className="absolute right-0 top-0">Burst {burstLimit}</span>
+          </>
+        ) : (
+          <span className="absolute right-0 top-0">Environment limit {envLimit}</span>
+        )}
+      </div>
+    </div>
+  );
+}
+
+function QueueSegmentTooltip({
+  queue,
+  limit,
+  envLimit,
+  color,
+}: {
+  queue: QueueAllocationItem;
+  limit: number;
+  envLimit: number;
+  color: string;
+}) {
+  const utilizationPct = limit > 0 ? Math.round((queue.running / limit) * 100) : 0;
+  const sharePct = envLimit > 0 ? Math.round((limit / envLimit) * 100) : 0;
+  return (
+    <div className="flex flex-col gap-1.5 p-1 text-left">
+      <span className="flex items-center gap-1.5">
+        <span className="size-2 shrink-0 rounded-[2px]" style={{ backgroundColor: color }} />
+        <QueueName name={queue.name} type={queue.type} paused={queue.paused} />
+        {queue.paused && (
+          <Badge variant="extra-small" className="text-warning">
+            Paused
+          </Badge>
+        )}
+      </span>
+      <div className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-0.5 text-xs">
+        <span className="text-text-dimmed">Running</span>
+        <span className="text-right tabular-nums text-text-bright">
+          {queue.running} of {limit} ({utilizationPct}%)
+        </span>
+        <span className="text-text-dimmed">Queued</span>
+        <span className="text-right tabular-nums text-text-bright">{queue.queued}</span>
+        <span className="text-text-dimmed">Allocation</span>
+        <span className="text-right tabular-nums text-text-bright">
+          {sharePct}% of the environment limit
+        </span>
+      </div>
+    </div>
+  );
+}
+
+/** One queue's slice of the capacity bar: dim fill = allocation, solid fill = current usage. */
+function BarSegment({
+  color,
+  widthPct,
+  usagePct,
+  tooltip,
+}: {
+  color: string;
+  widthPct: number;
+  usagePct: number;
+  tooltip: ReactNode;
+}) {
+  return (
+    <SimpleTooltip
+      asChild
+      button={
+        <div
+          className="pointer-events-auto relative h-full min-w-px overflow-hidden"
+          style={{ width: `${widthPct}%`, backgroundColor: `${color}33` }}
+        >
+          {usagePct > 0 && (
+            <div
+              className="absolute inset-y-0 left-0"
+              style={{ width: `${usagePct}%`, backgroundColor: color }}
+            />
+          )}
+        </div>
+      }
+      content={tooltip}
+      disableHoverableContent
+    />
+  );
+}
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx
index 877b1235a97..24fe2212953 100644
--- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx
@@ -7,11 +7,11 @@ import {
   RectangleStackIcon,
 } from "@heroicons/react/20/solid";
 import { DialogClose } from "@radix-ui/react-dialog";
-import { Form, useNavigation, type MetaFunction } from "@remix-run/react";
+import { Form, Link, useNavigation, type MetaFunction } from "@remix-run/react";
 import { type ActionFunctionArgs, type LoaderFunctionArgs } from "@remix-run/server-runtime";
 import type { QueueItem } from "@trigger.dev/core/v3/schemas";
 import type { RuntimeEnvironmentType } from "@trigger.dev/database";
-import { useEffect, useState } from "react";
+import { type ReactNode, useEffect, useMemo, useState } from "react";
 import { typedjson, useTypedLoaderData } from "remix-typedjson";
 import { z } from "zod";
 import { ConcurrencyIcon } from "~/assets/icons/ConcurrencyIcon";
@@ -21,7 +21,6 @@ import { AdminDebugTooltip } from "~/components/admin/debugTooltip";
 import { QueuesHasNoTasks } from "~/components/BlankStatePanels";
 import { environmentFullTitle } from "~/components/environments/EnvironmentLabel";
 import { PageBody, PageContainer } from "~/components/layout/AppLayout";
-import { BigNumber } from "~/components/metrics/BigNumber";
 import { Badge } from "~/components/primitives/Badge";
 import { Button, LinkButton, type ButtonVariant } from "~/components/primitives/Buttons";
 import { Callout } from "~/components/primitives/Callout";
@@ -55,6 +54,7 @@ import {
 import { QueueName } from "~/components/runs/v3/QueueName";
 import { env } from "~/env.server";
 import { useAutoRevalidate } from "~/hooks/useAutoRevalidate";
+import { LoadingBarDivider } from "~/components/primitives/LoadingBarDivider";
 import { useEnvironment } from "~/hooks/useEnvironment";
 import { useOrganization } from "~/hooks/useOrganizations";
 import { useProject } from "~/hooks/useProject";
@@ -64,6 +64,24 @@ import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server";
 import { getUserById } from "~/models/user.server";
 import { EnvironmentQueuePresenter } from "~/presenters/v3/EnvironmentQueuePresenter.server";
 import { QueueListPresenter } from "~/presenters/v3/QueueListPresenter.server";
+import {
+  QueueMetricsPresenter,
+  type QueueListMetric,
+} from "~/presenters/v3/QueueMetricsPresenter.server";
+import * as Ariakit from "@ariakit/react";
+import { AppliedFilter } from "~/components/primitives/AppliedFilter";
+import { SelectItem, SelectPopover, SelectProvider } from "~/components/primitives/Select";
+import { TimeFilter, timeFilterFromTo } from "~/components/runs/v3/SharedFilters";
+import { useSearchParams } from "~/hooks/useSearchParam";
+import { parseFiniteInt } from "~/utils/searchParams";
+import { UsageSparkline } from "~/components/primitives/UsageSparkline";
+import { buildActivityTimeAxis } from "~/components/primitives/charts/activityTimeAxis";
+import { Chart, type ChartConfig } from "~/components/primitives/charts/ChartCompound";
+import {
+  useMetricResourceQuery,
+  type MetricResourceTimeRange,
+} from "~/hooks/useMetricResourceQuery";
+import { logger } from "~/services/logger.server";
 import { requireUserId } from "~/services/session.server";
 import { cn } from "~/utils/cn";
 import { ENVIRONMENT_PAUSE_SOURCE_BILLING_LIMIT } from "~/utils/environmentPauseSource";
@@ -72,18 +90,36 @@ import {
   docsPath,
   EnvironmentParamSchema,
   v3BillingPath,
+  v3QueuePath,
   v3RunsPath,
 } from "~/utils/pathBuilder";
 import { concurrencySystem } from "~/v3/services/concurrencySystemInstance.server";
 import { PauseEnvironmentService } from "~/v3/services/pauseEnvironment.server";
 import { PauseQueueService } from "~/v3/services/pauseQueue.server";
 import { useCurrentPlan } from "../_app.orgs.$organizationSlug/route";
+import { BigNumber } from "~/components/metrics/BigNumber";
+import { canAccessQueueMetricsUi } from "~/v3/canAccessQueueMetricsUi.server";
+import { QueueAllocationPresenter } from "~/presenters/v3/QueueAllocationPresenter.server";
+import { TabButton, TabContainer } from "~/components/primitives/Tabs";
+import { AllocationView } from "./AllocationView";
 
 const SearchParamsSchema = z.object({
   query: z.string().optional(),
   page: z.coerce.number().min(1).default(1),
+  period: z.string().optional(),
+  from: z.string().optional(),
+  to: z.string().optional(),
+  view: z.string().optional(),
+  sort: z.enum(["busiest", "queued", "name"]).optional(),
 });
 
+const AllocationChangesSchema = z
+  .array(z.object({ friendlyId: z.string(), limit: z.number().int().min(0) }))
+  .min(1)
+  .max(200);
+
+const QUEUE_METRICS_DEFAULT_PERIOD = "1d";
+
 export const meta: MetaFunction = () => {
   return [
     {
@@ -97,7 +133,9 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
   const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params);
 
   const url = new URL(request.url);
-  const { page, query } = SearchParamsSchema.parse(Object.fromEntries(url.searchParams));
+  const { page, query, period, from, to, view, sort } = SearchParamsSchema.parse(
+    Object.fromEntries(url.searchParams)
+  );
 
   const project = await findProjectBySlug(organizationSlug, projectParam, userId);
   if (!project) {
@@ -115,22 +153,82 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => {
     });
   }
 
+  // Per-org gate for the metrics UI. When off, this org gets the classic Queues page and
+  // no metrics query fires.
+  const queueMetricsUiEnabled = await canAccessQueueMetricsUi({ userId, organizationSlug });
+
   try {
     const queueListPresenter = new QueueListPresenter();
     const queues = await queueListPresenter.call({
       environment,
       query,
       page,
+      // Relevance ordering rides the metrics pipeline, so it is part of the gated UI.
+      sort: queueMetricsUiEnabled ? (sort ?? "busiest") : "name",
     });
 
     const environmentQueuePresenter = new EnvironmentQueuePresenter();
 
     const autoReloadPollIntervalMs = env.QUEUES_AUTORELOAD_POLL_INTERVAL_MS;
 
+    // Per-queue list metrics (Delay p95 + backlog sparkline columns) are SSR'd with the table.
+    // The environment header tiles are fetched client-side per card (see QueueEnvMetricTile) so a
+    // slow ClickHouse query never blocks the queues list from rendering.
+    let metrics: {
+      bucketStartMs: number;
+      bucketIntervalMs: number;
+      byQueue: Record<string, QueueListMetric>;
+    } | null = null;
+
+    const allocationView = queueMetricsUiEnabled && view === "allocation";
+
+    if (queueMetricsUiEnabled && queues.success && !allocationView) {
+      // Metrics are additive observability; a ClickHouse hiccup must not take down queue
+      // management. Fail open to metrics: null instead of bubbling to the page-level 400.
+      try {
+        const presenter = new QueueMetricsPresenter();
+        const queueNames = queues.queues.map((q) =>
+          q.type === "task" ? `task/${q.name}` : q.name
+        );
+        const timeRange = timeFilterFromTo({
+          period,
+          from: parseFiniteInt(from),
+          to: parseFiniteInt(to),
+          defaultPeriod: QUEUE_METRICS_DEFAULT_PERIOD,
+        });
+        const queueMetrics =
+          queueNames.length > 0
+            ? await presenter.getQueueListMetrics({
+                environment,
+                queueNames,
+                from: timeRange.from,
+                to: timeRange.to,
+              })
+            : null;
+        if (queueMetrics) {
+          metrics = {
+            bucketStartMs: queueMetrics.bucketStartMs,
+            bucketIntervalMs: queueMetrics.bucketIntervalMs,
+            byQueue: Object.fromEntries(queueMetrics.byQueue),
+          };
+        }
+      } catch (error) {
+        logger.warn("Queue list metrics unavailable, rendering without them", { error });
+      }
+    }
+
+    const allocation =
+      allocationView && queues.success
+        ? await new QueueAllocationPresenter().call({ environment })
+        : null;
+
     return typedjson({
       ...queues,
       environment: await environmentQueuePresenter.call(environment),
       autoReloadPollIntervalMs,
+      metrics,
+      allocation,
+      queueMetricsUiEnabled,
     });
   } catch (error) {
     console.error(error);
@@ -293,12 +391,61 @@ export const action = async ({ request, params }: ActionFunctionArgs) => {
 
       return redirectWithSuccessMessage(redirectPath, request, "Queue concurrency limit reset");
     }
+    case "allocation-apply": {
+      if (!(await canAccessQueueMetricsUi({ userId, organizationSlug }))) {
+        return redirectWithErrorMessage(redirectPath, request, "Not available");
+      }
+
+      let changes;
+      try {
+        changes = AllocationChangesSchema.parse(JSON.parse(String(formData.get("changes"))));
+      } catch {
+        return redirectWithErrorMessage(redirectPath, request, "Invalid changes");
+      }
+
+      const user = await getUserById(userId);
+      if (!user) {
+        return redirectWithErrorMessage(redirectPath, request, "User not found");
+      }
+
+      let failed = 0;
+      for (const change of changes) {
+        const result = await concurrencySystem.queues.overrideQueueConcurrencyLimit(
+          environment,
+          change.friendlyId,
+          change.limit,
+          user
+        );
+        if (!result.isOk()) failed++;
+      }
+
+      if (failed > 0) {
+        return redirectWithErrorMessage(
+          redirectPath,
+          request,
+          `Failed to update ${failed} of ${changes.length} queue limits`
+        );
+      }
+
+      return redirectWithSuccessMessage(
+        redirectPath,
+        request,
+        `Updated ${changes.length} queue limit${changes.length === 1 ? "" : "s"}`
+      );
+    }
     default:
       return redirectWithErrorMessage(redirectPath, request, "Something went wrong");
   }
 };
 
 export default function Page() {
+  // Per-org flag decides which whole page renders. Off => the classic Queues page,
+  // byte-for-byte the pre-metrics UI. Each branch is its own component (own hooks).
+  const { queueMetricsUiEnabled } = useTypedLoaderData<typeof loader>();
+  return queueMetricsUiEnabled ? <QueuesWithMetricsView /> : <ClassicQueuesView />;
+}
+
+function QueuesWithMetricsView() {
   const {
     environment,
     queues,
@@ -308,24 +455,28 @@ export default function Page() {
     totalQueues,
     hasFilters,
     autoReloadPollIntervalMs,
+    metrics,
+    allocation,
   } = useTypedLoaderData<typeof loader>();
 
+  const metricsByQueue = metrics?.byQueue ?? {};
+
   const organization = useOrganization();
   const project = useProject();
   const env = useEnvironment();
   const plan = useCurrentPlan();
+  const maxPeriodDays = plan?.v3Subscription?.plan?.limits?.queryPeriodDays?.number;
 
-  useAutoRevalidate({ interval: autoReloadPollIntervalMs, onFocus: true });
-
-  const limitStatus =
-    environment.running === environment.concurrencyLimit * environment.burstFactor
-      ? "limit"
-      : environment.running > environment.concurrencyLimit
-        ? "burst"
-        : "within";
+  // The header tiles fetch client-side with the same period/from/to the TimeFilter writes.
+  const { value, replace } = useSearchParams();
+  const timeRange = {
+    period: value("period") ?? null,
+    from: value("from") ?? null,
+    to: value("to") ?? null,
+  };
+  const view = value("view") === "allocation" ? ("allocation" as const) : ("queues" as const);
 
-  const limitClassName =
-    limitStatus === "burst" ? "text-warning" : limitStatus === "limit" ? "text-error" : undefined;
+  useAutoRevalidate({ interval: autoReloadPollIntervalMs, onFocus: true });
 
   return (
     <PageContainer>
@@ -333,6 +484,30 @@ export default function Page() {
         <PageTitle title="Queues" />
         <PageAccessories>
           <AdminDebugTooltip />
+          {plan ? (
+            plan?.v3Subscription?.plan?.limits.concurrentRuns.canExceed ? (
+              <LinkButton
+                to={concurrencyPath(organization, project, env)}
+                variant="tertiary/small"
+                LeadingIcon={ConcurrencyIcon}
+                leadingIconClassName="text-amber-500"
+              >
+                Increase limit
+              </LinkButton>
+            ) : (
+              <LinkButton
+                to={v3BillingPath(organization, "Upgrade your plan for more concurrency")}
+                variant="tertiary/small"
+                LeadingIcon={ArrowUpCircleIcon}
+                leadingIconClassName="text-indigo-500"
+              >
+                Increase limit
+              </LinkButton>
+            )
+          ) : null}
+          {environment.runsEnabled && env.pauseSource !== ENVIRONMENT_PAUSE_SOURCE_BILLING_LIMIT ? (
+            <EnvironmentPauseResumeButton env={env} />
+          ) : null}
           <LinkButton
             variant={"docs/small"}
             LeadingIcon={BookOpenIcon}
@@ -343,110 +518,78 @@ export default function Page() {
         </PageAccessories>
       </NavBar>
       <PageBody scrollable={false}>
-        <div className="grid max-h-full grid-rows-[auto_1fr] overflow-hidden">
-          <div className="grid grid-cols-3 gap-3 p-3">
-            <BigNumber
-              title="Queued"
-              value={environment.queued}
-              suffix={env.paused ? <span className="text-warning">paused</span> : undefined}
-              animate
-              accessory={
-                <div className="flex items-start gap-1">
-                  {environment.runsEnabled &&
-                  env.pauseSource !== ENVIRONMENT_PAUSE_SOURCE_BILLING_LIMIT ? (
-                    <EnvironmentPauseResumeButton env={env} />
-                  ) : null}
-                  <LinkButton
-                    variant="secondary/small"
-                    LeadingIcon={RunsIcon}
-                    leadingIconClassName="text-runs"
-                    className="px-2"
-                    to={v3RunsPath(organization, project, env, {
-                      statuses: ["PENDING"],
-                      period: "30d",
-                      rootOnly: false,
-                    })}
-                    tooltip="View queued runs"
-                  />
-                </div>
-              }
-              valueClassName={env.paused ? "text-warning tabular-nums" : "tabular-nums"}
-              compactThreshold={1000000}
-            />
-            <BigNumber
-              title="Running"
-              value={environment.running}
-              animate
-              valueClassName={cn(limitClassName, "tabular-nums")}
-              suffix={
-                limitStatus === "burst" ? (
-                  <span className={cn(limitClassName, "flex items-center gap-1")}>
-                    Including {environment.running - environment.concurrencyLimit} burst runs{" "}
-                    <BurstFactorTooltip environment={environment} />
-                  </span>
-                ) : limitStatus === "limit" ? (
-                  "At concurrency limit"
-                ) : undefined
-              }
-              accessory={
-                <LinkButton
-                  variant="secondary/small"
-                  LeadingIcon={RunsIcon}
-                  leadingIconClassName="text-runs"
-                  className="px-2"
-                  to={v3RunsPath(organization, project, env, {
-                    statuses: ["DEQUEUED", "EXECUTING"],
-                    period: "30d",
-                    rootOnly: false,
-                  })}
-                  tooltip="View running runs"
-                />
-              }
-              compactThreshold={1000000}
-            />
-            <BigNumber
-              title="Concurrency limit"
-              value={environment.concurrencyLimit}
-              animate
-              valueClassName={limitClassName}
-              suffix={
-                environment.burstFactor > 1 ? (
-                  <span className={cn(limitClassName, "flex items-center gap-1")}>
-                    Burst limit {environment.burstFactor * environment.concurrencyLimit}{" "}
-                    <BurstFactorTooltip environment={environment} />
-                  </span>
-                ) : undefined
-              }
-              accessory={
-                plan ? (
-                  plan?.v3Subscription?.plan?.limits.concurrentRuns.canExceed ? (
-                    <LinkButton
-                      to={concurrencyPath(organization, project, env)}
-                      variant="tertiary/small"
-                      LeadingIcon={ConcurrencyIcon}
-                      leadingIconClassName="text-amber-500"
-                    >
-                      Increase limit
-                    </LinkButton>
-                  ) : (
-                    <LinkButton
-                      to={v3BillingPath(organization, "Upgrade your plan for more concurrency")}
-                      variant="secondary/small"
-                      LeadingIcon={ArrowUpCircleIcon}
-                      leadingIconClassName="text-indigo-500"
-                    >
-                      Increase limit
-                    </LinkButton>
-                  )
-                ) : null
-              }
-            />
+        <div className="grid max-h-full grid-rows-[auto_auto_1fr] overflow-hidden">
+          <div className="grid grid-cols-2 gap-3 p-3 lg:grid-cols-4">
+            {QUEUE_HEADER_TILES.map((tile) => (
+              <QueueEnvMetricTile
+                key={tile.id}
+                tile={tile}
+                timeRange={timeRange}
+                referenceLines={
+                  tile.id === "saturation"
+                    ? [
+                        { y: 100, label: `Limit ${environment.concurrencyLimit}` },
+                        ...(environment.burstFactor > 1
+                          ? [
+                              {
+                                y: Math.round(environment.burstFactor * 100),
+                                label: `Burst ${Math.round(
+                                  environment.concurrencyLimit * environment.burstFactor
+                                )}`,
+                              },
+                            ]
+                          : []),
+                      ]
+                    : undefined
+                }
+              />
+            ))}
           </div>
 
           {success ? (
+            <TabContainer className="px-3">
+              <TabButton
+                isActive={view === "queues"}
+                layoutId="queues-view"
+                onClick={() => replace({ view: undefined })}
+              >
+                Queues
+              </TabButton>
+              <TabButton
+                isActive={view === "allocation"}
+                layoutId="queues-view"
+                onClick={() => replace({ view: "allocation", page: undefined })}
+              >
+                Allocation
+              </TabButton>
+            </TabContainer>
+          ) : (
+            <div />
+          )}
+
+          {success && view === "allocation" ? (
+            allocation ? (
+              <AllocationView allocation={allocation} environment={environment} />
+            ) : (
+              <div className="grid place-items-center py-16">
+                <Spinner className="size-6" />
+              </div>
+            )
+          ) : success ? (
             <div className="grid max-h-full min-h-full grid-rows-[auto_1fr] overflow-x-auto">
               <div className="flex items-center justify-between gap-2 border-t border-grid-dimmed px-1.5 py-1.5">
-                <QueueFilters />
+                <div className="flex items-center gap-2">
+                  <QueueFilters />
+                  <QueueSortFilter />
+                  <TimeFilter
+                    defaultPeriod={QUEUE_METRICS_DEFAULT_PERIOD}
+                    labelName="Period"
+                    hideLabel
+                    maxPeriodDays={maxPeriodDays}
+                    valueClassName="text-text-bright"
+                    shortcut={{ key: "d" }}
+                  />
+                </div>
                 <PaginationControls
                   currentPage={pagination.currentPage}
                   totalPages={pagination.mode === "unfiltered" ? pagination.totalPages : 1}
@@ -502,6 +645,14 @@ export default function Page() {
                     >
                       Limited by
                     </TableHeaderCell>
+                    <TableHeaderCell>Health</TableHeaderCell>
+                    <TableHeaderCell
+                      alignment="right"
+                      tooltip="The 95th-percentile scheduling delay (time from when a run became eligible to when it was dequeued) over the selected window."
+                    >
+                      Delay p95
+                    </TableHeaderCell>
+                    <TableHeaderCell>Backlog</TableHeaderCell>
                     <TableHeaderCell className="w-[1%] pl-32">
                       <span className="sr-only">Pause/resume</span>
                     </TableHeaderCell>
@@ -518,11 +669,19 @@ export default function Page() {
                       const queueFilterableName = `${queue.type === "task" ? "task/" : ""}${
                         queue.name
                       }`;
+                      const queueMetric = metricsByQueue[queueFilterableName];
                       return (
                         <TableRow key={queue.name}>
                           <TableCell>
                             <span className="flex items-center gap-2">
-                              <QueueName {...queue} />
+                              <Link
+                                to={v3QueuePath(organization, project, env, {
+                                  friendlyId: queue.id,
+                                })}
+                                className="rounded-sm hover:underline focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-charcoal-500"
+                              >
+                                <QueueName {...queue} />
+                              </Link>
                               {queue.concurrency?.overriddenAt ? (
                                 <SimpleTooltip
                                   button={
@@ -600,6 +759,41 @@ export default function Page() {
                               "Environment"
                             )}
                           </TableCell>
+                          <TableCell className={cn(queue.paused ? "opacity-50" : undefined)}>
+                            <QueueHealthBadge
+                              paused={queue.paused}
+                              running={queue.running}
+                              queued={queue.queued}
+                              limit={limit}
+                            />
+                          </TableCell>
+                          <TableCell alignment="right" className="tabular-nums">
+                            {queueMetric && queueMetric.p95WaitMs !== null ? (
+                              <span
+                                className={cn(
+                                  queueMetric.p95WaitMs >= 60_000
+                                    ? "text-warning"
+                                    : "text-text-bright"
+                                )}
+                              >
+                                {formatWaitMs(queueMetric.p95WaitMs)}
+                              </span>
+                            ) : (
+                              <span className="text-text-dimmed">–</span>
+                            )}
+                          </TableCell>
+                          <TableCell>
+                            <UsageSparkline
+                              data={queueMetric?.depthSparkline}
+                              total={queueMetric?.peakQueued}
+                              bucketStartMs={metrics?.bucketStartMs}
+                              bucketIntervalMs={metrics?.bucketIntervalMs}
+                              color="#A78BFA"
+                              totalClassName="text-text-dimmed"
+                              unitLabel={{ singular: "queued", plural: "queued" }}
+                              formatTotal={(v) => v.toLocaleString()}
+                            />
+                          </TableCell>
                           <TableCellMenu
                             isSticky
                             visibleButtons={
@@ -670,7 +864,7 @@ export default function Page() {
                     })
                   ) : (
                     <TableRow>
-                      <TableCell colSpan={7}>
+                      <TableCell colSpan={9}>
                         <div className="grid place-items-center py-6 text-text-dimmed">
                           <Paragraph>
                             {hasFilters
@@ -1059,6 +1253,709 @@ export function QueueFilters() {
   return <SearchInput placeholder="Search queues…" paramName="query" resetParams={["page"]} />;
 }
 
+const QUEUE_SORT_OPTIONS = [
+  { value: "busiest", label: "Busiest" },
+  { value: "queued", label: "Backlog" },
+  { value: "name", label: "Name" },
+] as const;
+
+type QueueSortValue = (typeof QUEUE_SORT_OPTIONS)[number]["value"];
+
+function QueueSortFilter() {
+  const { value, replace } = useSearchParams();
+  const sort: QueueSortValue = (value("sort") as QueueSortValue) ?? "busiest";
+  const label = QUEUE_SORT_OPTIONS.find((option) => option.value === sort)?.label ?? "Busiest";
+
+  return (
+    <SelectProvider
+      value={sort}
+      setValue={(next) =>
+        replace({ sort: next === "busiest" ? undefined : (next as string), page: undefined })
+      }
+    >
+      <Ariakit.Select render={<div className="group cursor-pointer focus-custom" />}>
+        <AppliedFilter
+          label="Sort"
+          value={label}
+          removable={false}
+          variant="secondary/small"
+          valueClassName="text-text-bright"
+        />
+      </Ariakit.Select>
+      <SelectPopover className="min-w-0 max-w-[min(240px,var(--popover-available-width))]">
+        {QUEUE_SORT_OPTIONS.map((option) => (
+          <SelectItem key={option.value} value={option.value} className="gap-x-2 text-text-bright">
+            {option.label}
+          </SelectItem>
+        ))}
+      </SelectPopover>
+    </SelectProvider>
+  );
+}
+
+type MetricTileRow = Record<string, number | string | null>;
+
+type QueueHeaderTile = {
+  id: string;
+  label: string;
+  color: string;
+  query: string;
+  /** Formats a single bucket's value in the chart tooltip. */
+  formatValue?: (value: number) => string;
+  derive: (rows: MetricTileRow[]) => {
+    sparkline: number[];
+    total: number;
+    formatTotal?: (total: number) => string;
+    totalClassName?: string;
+  };
+};
+
+function tileNumber(value: number | string | null): number {
+  const n = typeof value === "number" ? value : Number(value);
+  return Number.isFinite(n) ? n : 0;
+}
+
+function tileTimeToMs(value: number | string | null): number {
+  const s = String(value).replace(" ", "T");
+  return Date.parse(s.endsWith("Z") ? s : `${s}Z`);
+}
+
+// Header tiles fetch their own TRQL query client-side (resources.metric) with fillGaps, mirroring the
+// metrics dashboard widgets: the gauges (saturation inputs, backlog) carry, counters/p95 zero-fill.
+const QUEUE_HEADER_TILES: QueueHeaderTile[] = [
+  {
+    id: "saturation",
+    label: "Env saturation",
+    color: "#6366F1",
+    query: `SELECT timeBucket() AS t,\n  max(max_env_running) AS used,\n  max(max_env_limit) AS env_limit\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+    formatValue: (v) => `${v}%`,
+    derive: (rows) => {
+      const sparkline = rows.map((r) => {
+        const limit = tileNumber(r.env_limit);
+        return limit > 0 ? Math.round((tileNumber(r.used) / limit) * 100) : 0;
+      });
+      const peak = sparkline.reduce((max, v) => Math.max(max, v), 0);
+      return { sparkline, total: peak, formatTotal: (v) => `${v}% peak` };
+    },
+  },
+  {
+    id: "backlog",
+    label: "Backlog",
+    color: "#A78BFA",
+    query: `SELECT timeBucket() AS t,\n  max(max_env_queued) AS queued\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+    derive: (rows) => {
+      const sparkline = rows.map((r) => tileNumber(r.queued));
+      const peak = sparkline.reduce((max, v) => Math.max(max, v), 0);
+      return { sparkline, total: peak, formatTotal: (v) => `${v.toLocaleString()} peak` };
+    },
+  },
+  {
+    id: "p95",
+    label: "Scheduling delay p95",
+    color: "#F59E0B",
+    query: `SELECT timeBucket() AS t,\n  round(quantilesTDigestMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[3]) AS p95\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+    formatValue: formatWaitMs,
+    derive: (rows) => {
+      const sparkline = rows.map((r) => tileNumber(r.p95));
+      const worst = sparkline.reduce((max, v) => Math.max(max, v), 0);
+      return {
+        sparkline,
+        total: worst,
+        formatTotal: (v) => (v > 0 ? formatWaitMs(v) : "–"),
+        totalClassName: worst >= 60_000 ? "text-warning" : undefined,
+      };
+    },
+  },
+  {
+    id: "throttled",
+    label: "Throttled",
+    color: "#F59E0B",
+    query: `SELECT timeBucket() AS t,\n  sum(throttled_count) AS throttled\nFROM env_metrics\nGROUP BY t\nORDER BY t`,
+    derive: (rows) => {
+      const sparkline = rows.map((r) => tileNumber(r.throttled));
+      const total = sparkline.reduce((sum, v) => sum + v, 0);
+      return {
+        sparkline,
+        total,
+        totalClassName: total > 0 ? "text-warning" : undefined,
+      };
+    },
+  },
+];
+
+type TileTimeRange = MetricResourceTimeRange;
+
+function QueueEnvMetricTile({
+  tile,
+  timeRange,
+  referenceLines,
+}: {
+  tile: QueueHeaderTile;
+  timeRange: TileTimeRange;
+  referenceLines?: Array<{ y: number; label?: string }>;
+}) {
+  const organization = useOrganization();
+  const project = useProject();
+  const environment = useEnvironment();
+
+  const { rows, isLoading, showLoading, failed } = useMetricResourceQuery(tile.query, {
+    organizationId: organization.id,
+    projectId: project.id,
+    environmentId: environment.id,
+    timeRange,
+    defaultPeriod: QUEUE_METRICS_DEFAULT_PERIOD,
+    fillGaps: true,
+  });
+
+  const { sparkline, total, formatTotal, totalClassName } = tile.derive(rows);
+
+  // Same point shape the full-size charts use so the shared axis/tooltip helpers apply.
+  const data = rows
+    .map((r, i) => ({ bucket: tileTimeToMs(r.t), [tile.id]: sparkline[i] ?? 0 }))
+    .filter((p) => Number.isFinite(p.bucket));
+
+  const chartConfig = useMemo<ChartConfig>(
+    () => ({ [tile.id]: { label: tile.label, color: tile.color } }),
+    [tile.id, tile.label, tile.color]
+  );
+
+  const { tooltipLabelFormatter } = useMemo(() => buildActivityTimeAxis(data), [data]);
+  const hasData = data.length > 0 && sparkline.some((v) => v > 0);
+
+  return (
+    <HeaderTile
+      label={tile.label}
+      value={
+        showLoading ? (
+          <span className="inline-block h-3 w-12 animate-pulse rounded bg-grid-bright" />
+        ) : failed ? undefined : formatTotal ? (
+          formatTotal(total)
+        ) : (
+          total.toLocaleString()
+        )
+      }
+      valueClassName={totalClassName}
+    >
+      <LoadingBarDivider isLoading={isLoading} className="bg-transparent" />
+      {showLoading ? (
+        <div className="h-16 w-full animate-pulse rounded bg-grid-bright/60" />
+      ) : failed ? (
+        <div className="flex h-16 items-center text-xs text-text-dimmed">
+          Unable to load metrics
+        </div>
+      ) : hasData ? (
+        <div className="h-16 w-full">
+          <Chart.Root
+            config={chartConfig}
+            data={data}
+            dataKey="bucket"
+            series={[tile.id]}
+            fillContainer
+          >
+            <Chart.Line
+              lineType="monotone"
+              showDots={false}
+              referenceLines={referenceLines}
+              xAxisProps={{ hide: true }}
+              yAxisProps={{ hide: true }}
+              tooltipLabelFormatter={tooltipLabelFormatter}
+              tooltipValueFormatter={tile.formatValue}
+            />
+          </Chart.Root>
+        </div>
+      ) : (
+        <div className="flex h-16 items-center text-xs text-text-dimmed">No activity</div>
+      )}
+    </HeaderTile>
+  );
+}
+
+function HeaderTile({
+  label,
+  value,
+  valueClassName,
+  children,
+}: {
+  label: ReactNode;
+  value?: ReactNode;
+  valueClassName?: string;
+  children: ReactNode;
+}) {
+  return (
+    <div className="flex flex-col gap-1.5 rounded-sm border border-grid-dimmed bg-background-bright px-3 py-2">
+      <div className="flex items-baseline justify-between gap-2">
+        <span className="truncate text-xs text-text-dimmed">{label}</span>
+        {value !== undefined ? (
+          <span className={cn("shrink-0 text-sm tabular-nums text-text-bright", valueClassName)}>
+            {value}
+          </span>
+        ) : null}
+      </div>
+      {children}
+    </div>
+  );
+}
+
+function QueueHealthBadge({
+  paused,
+  running,
+  queued,
+  limit,
+}: {
+  paused: boolean;
+  running: number;
+  queued: number;
+  limit: number;
+}) {
+  if (paused) {
+    return (
+      <Badge variant="extra-small" className="text-warning">
+        Paused
+      </Badge>
+    );
+  }
+  if (running >= limit && queued > 0) {
+    return (
+      <Badge variant="extra-small" className="text-warning">
+        At capacity
+      </Badge>
+    );
+  }
+  if (queued > 0) {
+    return (
+      <Badge variant="extra-small" className="text-blue-500">
+        Backlogged
+      </Badge>
+    );
+  }
+  if (running > 0) {
+    return (
+      <Badge variant="extra-small" className="text-success">
+        Active
+      </Badge>
+    );
+  }
+  return (
+    <Badge variant="extra-small" className="text-text-dimmed">
+      Idle
+    </Badge>
+  );
+}
+
+function formatWaitMs(ms: number): string {
+  if (ms < 1000) return `${Math.round(ms)}ms`;
+  if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
+  if (ms < 3_600_000) return `${(ms / 60_000).toFixed(1)}m`;
+  return `${(ms / 3_600_000).toFixed(1)}h`;
+}
+
+// Classic Queues page, restored verbatim from before the Queue Metrics feature. Rendered
+// when queueMetricsUiEnabled is off so a gated org sees exactly the pre-metrics UI.
+function ClassicQueuesView() {
+  const {
+    environment,
+    queues,
+    success,
+    pagination,
+    code,
+    totalQueues,
+    hasFilters,
+    autoReloadPollIntervalMs,
+  } = useTypedLoaderData<typeof loader>();
+
+  const organization = useOrganization();
+  const project = useProject();
+  const env = useEnvironment();
+  const plan = useCurrentPlan();
+
+  useAutoRevalidate({ interval: autoReloadPollIntervalMs, onFocus: true });
+
+  const limitStatus =
+    environment.running === environment.concurrencyLimit * environment.burstFactor
+      ? "limit"
+      : environment.running > environment.concurrencyLimit
+        ? "burst"
+        : "within";
+
+  const limitClassName =
+    limitStatus === "burst" ? "text-warning" : limitStatus === "limit" ? "text-error" : undefined;
+
+  return (
+    <PageContainer>
+      <NavBar>
+        <PageTitle title="Queues" />
+        <PageAccessories>
+          <AdminDebugTooltip />
+          <LinkButton
+            variant={"docs/small"}
+            LeadingIcon={BookOpenIcon}
+            to={docsPath("/queue-concurrency")}
+          >
+            Queues docs
+          </LinkButton>
+        </PageAccessories>
+      </NavBar>
+      <PageBody scrollable={false}>
+        <div className="grid max-h-full grid-rows-[auto_1fr] overflow-hidden">
+          <div className="grid grid-cols-3 gap-3 p-3">
+            <BigNumber
+              title="Queued"
+              value={environment.queued}
+              suffix={env.paused ? <span className="text-warning">paused</span> : undefined}
+              animate
+              accessory={
+                <div className="flex items-start gap-1">
+                  {environment.runsEnabled &&
+                  env.pauseSource !== ENVIRONMENT_PAUSE_SOURCE_BILLING_LIMIT ? (
+                    <EnvironmentPauseResumeButton env={env} />
+                  ) : null}
+                  <LinkButton
+                    variant="secondary/small"
+                    LeadingIcon={RunsIcon}
+                    leadingIconClassName="text-runs"
+                    className="px-2"
+                    to={v3RunsPath(organization, project, env, {
+                      statuses: ["PENDING"],
+                      period: "30d",
+                      rootOnly: false,
+                    })}
+                    tooltip="View queued runs"
+                  />
+                </div>
+              }
+              valueClassName={env.paused ? "text-warning tabular-nums" : "tabular-nums"}
+              compactThreshold={1000000}
+            />
+            <BigNumber
+              title="Running"
+              value={environment.running}
+              animate
+              valueClassName={cn(limitClassName, "tabular-nums")}
+              suffix={
+                limitStatus === "burst" ? (
+                  <span className={cn(limitClassName, "flex items-center gap-1")}>
+                    Including {environment.running - environment.concurrencyLimit} burst runs{" "}
+                    <BurstFactorTooltip environment={environment} />
+                  </span>
+                ) : limitStatus === "limit" ? (
+                  "At concurrency limit"
+                ) : undefined
+              }
+              accessory={
+                <LinkButton
+                  variant="secondary/small"
+                  LeadingIcon={RunsIcon}
+                  leadingIconClassName="text-runs"
+                  className="px-2"
+                  to={v3RunsPath(organization, project, env, {
+                    statuses: ["DEQUEUED", "EXECUTING"],
+                    period: "30d",
+                    rootOnly: false,
+                  })}
+                  tooltip="View running runs"
+                />
+              }
+              compactThreshold={1000000}
+            />
+            <BigNumber
+              title="Concurrency limit"
+              value={environment.concurrencyLimit}
+              animate
+              valueClassName={limitClassName}
+              suffix={
+                environment.burstFactor > 1 ? (
+                  <span className={cn(limitClassName, "flex items-center gap-1")}>
+                    Burst limit {environment.burstFactor * environment.concurrencyLimit}{" "}
+                    <BurstFactorTooltip environment={environment} />
+                  </span>
+                ) : undefined
+              }
+              accessory={
+                plan ? (
+                  plan?.v3Subscription?.plan?.limits.concurrentRuns.canExceed ? (
+                    <LinkButton
+                      to={concurrencyPath(organization, project, env)}
+                      variant="tertiary/small"
+                      LeadingIcon={ConcurrencyIcon}
+                      leadingIconClassName="text-amber-500"
+                    >
+                      Increase limit
+                    </LinkButton>
+                  ) : (
+                    <LinkButton
+                      to={v3BillingPath(organization, "Upgrade your plan for more concurrency")}
+                      variant="secondary/small"
+                      LeadingIcon={ArrowUpCircleIcon}
+                      leadingIconClassName="text-indigo-500"
+                    >
+                      Increase limit
+                    </LinkButton>
+                  )
+                ) : null
+              }
+            />
+          </div>
+
+          {success ? (
+            <div className="grid max-h-full min-h-full grid-rows-[auto_1fr] overflow-x-auto">
+              <div className="flex items-center justify-between gap-2 border-t border-grid-dimmed px-1.5 py-1.5">
+                <QueueFilters />
+                <PaginationControls
+                  currentPage={pagination.currentPage}
+                  totalPages={pagination.mode === "unfiltered" ? pagination.totalPages : 1}
+                  hasNextPage={pagination.mode === "filtered" ? pagination.hasMore : undefined}
+                  showPageNumbers={false}
+                />
+              </div>
+              <Table containerClassName="border-t">
+                <TableHeader>
+                  <TableRow>
+                    <TableHeaderCell>Name</TableHeaderCell>
+                    <TableHeaderCell alignment="right">Queued</TableHeaderCell>
+                    <TableHeaderCell alignment="right">Running</TableHeaderCell>
+                    <TableHeaderCell alignment="right">Limit</TableHeaderCell>
+                    <TableHeaderCell
+                      alignment="right"
+                      tooltip={
+                        <div className="max-w-xs space-y-2 p-1 text-left">
+                          <div className="space-y-0.5">
+                            <Header3>Environment</Header3>
+                            <Paragraph
+                              variant="small"
+                              className="!text-wrap text-text-dimmed"
+                              spacing
+                            >
+                              This queue is limited by your environment's concurrency limit of{" "}
+                              {environment.concurrencyLimit}.
+                            </Paragraph>
+                          </div>
+                          <div className="space-y-0.5">
+                            <Header3>User</Header3>
+                            <Paragraph
+                              variant="small"
+                              className="!text-wrap text-text-dimmed"
+                              spacing
+                            >
+                              This queue is limited by a concurrency limit set in your code.
+                            </Paragraph>
+                          </div>
+                          <div className="space-y-0.5">
+                            <Header3>Override</Header3>
+                            <Paragraph
+                              variant="small"
+                              className="!text-wrap text-text-dimmed"
+                              spacing
+                            >
+                              This queue's concurrency limit has been manually overridden from the
+                              dashboard or API.
+                            </Paragraph>
+                          </div>
+                        </div>
+                      }
+                    >
+                      Limited by
+                    </TableHeaderCell>
+                    <TableHeaderCell className="w-[1%] pl-32">
+                      <span className="sr-only">Pause/resume</span>
+                    </TableHeaderCell>
+                  </TableRow>
+                </TableHeader>
+                <TableBody>
+                  {queues.length > 0 ? (
+                    queues.map((queue) => {
+                      const limit = queue.concurrencyLimit ?? environment.concurrencyLimit;
+                      const isAtConcurrencyLimit = queue.running >= limit;
+                      const isAtQueueLimit =
+                        environment.queueSizeLimit !== null &&
+                        queue.queued >= environment.queueSizeLimit;
+                      const queueFilterableName = `${queue.type === "task" ? "task/" : ""}${
+                        queue.name
+                      }`;
+                      return (
+                        <TableRow key={queue.name}>
+                          <TableCell>
+                            <span className="flex items-center gap-2">
+                              <QueueName {...queue} />
+                              {queue.concurrency?.overriddenAt ? (
+                                <SimpleTooltip
+                                  button={
+                                    <Badge variant="extra-small" className="text-text-bright">
+                                      Concurrency limit overridden
+                                    </Badge>
+                                  }
+                                  content="This queue's concurrency limit has been manually overridden from the dashboard or API."
+                                  className="max-w-xs"
+                                  disableHoverableContent
+                                />
+                              ) : null}
+                              {queue.paused ? (
+                                <Badge variant="extra-small" className="text-warning">
+                                  Paused
+                                </Badge>
+                              ) : null}
+                              {isAtQueueLimit ? (
+                                <Badge variant="extra-small" className="text-error">
+                                  At queue limit
+                                </Badge>
+                              ) : null}
+                              {isAtConcurrencyLimit ? (
+                                <Badge variant="extra-small" className="text-warning">
+                                  At concurrency limit
+                                </Badge>
+                              ) : null}
+                            </span>
+                          </TableCell>
+                          <TableCell
+                            alignment="right"
+                            className={cn(
+                              "w-[1%] pl-16 tabular-nums",
+                              queue.paused ? "opacity-50" : undefined,
+                              isAtQueueLimit && "text-error"
+                            )}
+                          >
+                            {queue.queued}
+                          </TableCell>
+                          <TableCell
+                            alignment="right"
+                            className={cn(
+                              "w-[1%] pl-16 tabular-nums",
+                              queue.paused ? "opacity-50" : undefined,
+                              queue.running > 0 && "text-text-bright",
+                              isAtConcurrencyLimit && "text-warning"
+                            )}
+                          >
+                            {queue.running}
+                          </TableCell>
+                          <TableCell
+                            alignment="right"
+                            className={cn(
+                              "w-[1%] pl-16 tabular-nums",
+                              queue.paused ? "opacity-50" : undefined,
+                              queue.concurrency?.overriddenAt && "font-medium text-text-bright"
+                            )}
+                          >
+                            {limit}
+                          </TableCell>
+                          <TableCell
+                            alignment="right"
+                            className={cn(
+                              "w-[1%] pl-16",
+                              queue.paused ? "opacity-50" : undefined,
+                              isAtConcurrencyLimit && "text-warning",
+                              queue.concurrency?.overriddenAt && "font-medium text-text-bright"
+                            )}
+                          >
+                            {queue.concurrency?.overriddenAt ? (
+                              <span className="text-text-bright">Override</span>
+                            ) : queue.concurrencyLimit ? (
+                              "User"
+                            ) : (
+                              "Environment"
+                            )}
+                          </TableCell>
+                          <TableCellMenu
+                            isSticky
+                            visibleButtons={
+                              queue.paused && <QueuePauseResumeButton queue={queue} />
+                            }
+                            hiddenButtons={
+                              !queue.paused && <QueuePauseResumeButton queue={queue} />
+                            }
+                            popoverContent={
+                              <>
+                                {queue.paused ? (
+                                  <QueuePauseResumeButton
+                                    queue={queue}
+                                    variant="minimal/small"
+                                    fullWidth
+                                    showTooltip={false}
+                                  />
+                                ) : (
+                                  <QueuePauseResumeButton
+                                    queue={queue}
+                                    variant="minimal/small"
+                                    fullWidth
+                                    showTooltip={false}
+                                  />
+                                )}
+
+                                <PopoverMenuItem
+                                  icon={RunsIcon}
+                                  leadingIconClassName="text-runs"
+                                  title="View all runs"
+                                  to={v3RunsPath(organization, project, env, {
+                                    queues: [queueFilterableName],
+                                    period: "30d",
+                                    rootOnly: false,
+                                  })}
+                                />
+                                <PopoverMenuItem
+                                  icon={RectangleStackIcon}
+                                  leadingIconClassName="text-queues"
+                                  title="View queued runs"
+                                  to={v3RunsPath(organization, project, env, {
+                                    queues: [queueFilterableName],
+                                    statuses: ["PENDING"],
+                                    period: "30d",
+                                    rootOnly: false,
+                                  })}
+                                />
+                                <PopoverMenuItem
+                                  icon={Spinner}
+                                  leadingIconClassName="text-queues animate-none"
+                                  title="View running runs"
+                                  to={v3RunsPath(organization, project, env, {
+                                    queues: [queueFilterableName],
+                                    statuses: ["DEQUEUED", "EXECUTING"],
+                                    period: "30d",
+                                    rootOnly: false,
+                                  })}
+                                />
+                                <QueueOverrideConcurrencyButton
+                                  queue={queue}
+                                  environmentConcurrencyLimit={environment.concurrencyLimit}
+                                />
+                              </>
+                            }
+                          />
+                        </TableRow>
+                      );
+                    })
+                  ) : (
+                    <TableRow>
+                      <TableCell colSpan={7}>
+                        <div className="grid place-items-center py-6 text-text-dimmed">
+                          <Paragraph>
+                            {hasFilters
+                              ? "No queues found matching your filters"
+                              : "No queues found"}
+                          </Paragraph>
+                        </div>
+                      </TableCell>
+                    </TableRow>
+                  )}
+                </TableBody>
+              </Table>
+            </div>
+          ) : (
+            <div className="grid place-items-center py-6 text-text-dimmed">
+              {totalQueues === 0 ? (
+                <div className="pt-12">
+                  <QueuesHasNoTasks />
+                </div>
+              ) : code === "engine-version" ? (
+                <EngineVersionUpgradeCallout />
+              ) : (
+                <Callout variant="error">Something went wrong</Callout>
+              )}
+            </div>
+          )}
+        </div>
+      </PageBody>
+    </PageContainer>
+  );
+}
+
 function BurstFactorTooltip({
   environment,
 }: {
diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues_.$queueParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues_.$queueParam/route.tsx
new file mode 100644
index 00000000000..e6a21c6514f
--- /dev/null
+++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues_.$queueParam/route.tsx
@@ -0,0 +1,783 @@
+import { type MetaFunction } from "@remix-run/react";
+import { type LoaderFunctionArgs } from "@remix-run/server-runtime";
+import { useMemo } from "react";
+import { typedjson, useTypedLoaderData } from "remix-typedjson";
+import { z } from "zod";
+import { PageBody, PageContainer } from "~/components/layout/AppLayout";
+import { NavBar, PageTitle } from "~/components/primitives/PageHeader";
+import { buildActivityTimeAxis } from "~/components/primitives/charts/activityTimeAxis";
+import {
+  Chart,
+  type ChartConfig,
+  type ChartState,
+} from "~/components/primitives/charts/ChartCompound";
+import { ChartCard } from "~/components/primitives/charts/ChartCard";
+import {
+  useMetricResourceQuery,
+  type MetricResourceTimeRange,
+} from "~/hooks/useMetricResourceQuery";
+import { findProjectBySlug } from "~/models/project.server";
+import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server";
+import { QueueRetrievePresenter } from "~/presenters/v3/QueueRetrievePresenter.server";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHeader,
+  TableHeaderCell,
+  TableRow,
+} from "~/components/primitives/Table";
+import { TabButton, TabContainer } from "~/components/primitives/Tabs";
+import { engine } from "~/v3/runEngine.server";
+import { TimeFilter } from "~/components/runs/v3/SharedFilters";
+import { useSearchParams } from "~/hooks/useSearchParam";
+import { useCurrentPlan } from "../_app.orgs.$organizationSlug/route";
+import { canAccessQueueMetricsUi } from "~/v3/canAccessQueueMetricsUi.server";
+import { requireUserId } from "~/services/session.server";
+import { cn } from "~/utils/cn";
+import { EnvironmentParamSchema } from "~/utils/pathBuilder";
+
+export const meta: MetaFunction = () => [{ title: `Queue metrics | Trigger.dev` }];
+
+const ParamsSchema = EnvironmentParamSchema.extend({ queueParam: z.string() });
+
+export const loader = async ({ request, params }: LoaderFunctionArgs) => {
+  const userId = await requireUserId(request);
+  const { organizationSlug, projectParam, envParam, queueParam } = ParamsSchema.parse(params);
+
+  // This whole page is part of the metrics UI; gate it per-org (the list already hides
+  // the only link to it, this is defense in depth).
+  if (!(await canAccessQueueMetricsUi({ userId, organizationSlug }))) {
+    throw new Response(undefined, { status: 404, statusText: "Not found" });
+  }
+
+  const url = new URL(request.url);
+
+  const project = await findProjectBySlug(organizationSlug, projectParam, userId);
+  if (!project) throw new Response(undefined, { status: 404, statusText: "Project not found" });
+
+  const environment = await findEnvironmentBySlug(project.id, envParam, userId);
+  if (!environment)
+    throw new Response(undefined, { status: 404, statusText: "Environment not found" });
+
+  const retrieve = await new QueueRetrievePresenter().call({ environment, queueInput: queueParam });
+  if (!retrieve.success) {
+    throw new Response(undefined, { status: 404, statusText: "Queue not found" });
+  }
+
+  const queue = retrieve.queue;
+  const fullName = queue.type === "task" ? `task/${queue.name}` : queue.name;
+
+  const ckBreakdown = await engine.concurrencyKeyBreakdown(environment, fullName, {
+    limit: CK_LIVE_LIMIT,
+  });
+
+  // Charts + CH-derived stats are fetched client-side per card (see QueueDetailChartCard /
+  // useQueueMetric) so the drill-down renders instantly. The loader only returns the live
+  // "now" counts + identifiers the client fetches need.
+  return typedjson({
+    queue,
+    fullName,
+    ckBreakdown,
+    loadedAt: Date.now(),
+    backPath: url.pathname.replace(/\/[^/]+$/, ""),
+    ids: {
+      organizationId: environment.organizationId,
+      projectId: environment.projectId,
+      environmentId: environment.id,
+    },
+  });
+};
+
+const COLORS = {
+  running: "#6366F1",
+  limit: "#4D525B",
+  queued: "#A78BFA",
+  p50: "#22D3EE",
+  p95: "#F59E0B",
+  p99: "#EF4444",
+  throttled: "#F59E0B",
+  ckKeys: "#34D399",
+  ckWait: "#F59E0B",
+};
+
+const CK_LIVE_LIMIT = 50;
+
+type Ids = { organizationId: string; projectId: string; environmentId: string };
+
+type TimeRangeParams = MetricResourceTimeRange;
+
+const QUEUE_METRICS_DEFAULT_PERIOD = "1d";
+
+export default function Page() {
+  const { queue, fullName, ckBreakdown, loadedAt, backPath, ids } =
+    useTypedLoaderData<typeof loader>();
+  const plan = useCurrentPlan();
+  const maxPeriodDays = plan?.v3Subscription?.plan?.limits?.queryPeriodDays?.number;
+
+  const { value, replace } = useSearchParams();
+  const timeRange: TimeRangeParams = {
+    period: value("period") ?? null,
+    from: value("from") ?? null,
+    to: value("to") ?? null,
+  };
+
+  // The Concurrency keys tab exists only for queues with key activity: live keys in the
+  // ckIndex, or nonzero CK history in the selected range (one cached scalar query decides).
+  const { rows: gateRows, showLoading: gateLoading } = useQueueMetric(
+    `SELECT max(max_ck_backlogged) AS peak_keys, max(max_ck_wait_ms) AS peak_wait\nFROM queue_metrics`,
+    { ids, timeRange, queueName: fullName }
+  );
+  const gateRow = gateRows[0];
+  const hasHistory = gateRow
+    ? toNumber(gateRow.peak_keys) > 0 || toNumber(gateRow.peak_wait) > 0
+    : false;
+  const showKeysTab = ckBreakdown.keys.length > 0 || (!gateLoading && hasHistory);
+  const view = value("view") === "keys" && showKeysTab ? "keys" : "overview";
+
+  return (
+    <PageContainer>
+      <NavBar>
+        <PageTitle title={queue.name} backButton={{ to: backPath, text: "Queues" }} />
+      </NavBar>
+      <PageBody>
+        <div className="flex flex-col gap-4 p-3">
+          <div className="flex items-start justify-between gap-2">
+            <QueueStats
+              queue={{ running: queue.running, queued: queue.queued }}
+              ids={ids}
+              timeRange={timeRange}
+              queueName={fullName}
+            />
+            <TimeFilter
+              defaultPeriod={QUEUE_METRICS_DEFAULT_PERIOD}
+              labelName="Period"
+              hideLabel
+              maxPeriodDays={maxPeriodDays}
+              valueClassName="text-text-bright"
+              shortcut={{ key: "d" }}
+            />
+          </div>
+
+          {showKeysTab && (
+            <TabContainer>
+              <TabButton
+                isActive={view === "overview"}
+                layoutId="queue-detail-view"
+                onClick={() => replace({ view: undefined, key: undefined })}
+              >
+                Overview
+              </TabButton>
+              <TabButton
+                isActive={view === "keys"}
+                layoutId="queue-detail-view"
+                onClick={() => replace({ view: "keys" })}
+              >
+                Concurrency keys
+              </TabButton>
+            </TabContainer>
+          )}
+
+          {view === "keys" ? (
+            <ConcurrencyKeysView
+              breakdown={ckBreakdown}
+              loadedAt={loadedAt}
+              ids={ids}
+              timeRange={timeRange}
+              queueName={fullName}
+            />
+          ) : (
+            <OverviewCharts ids={ids} timeRange={timeRange} queueName={fullName} />
+          )}
+        </div>
+      </PageBody>
+    </PageContainer>
+  );
+}
+
+function OverviewCharts({
+  ids,
+  timeRange,
+  queueName,
+}: {
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+}) {
+  return (
+    <>
+      <QueueDetailChartCard
+        title="Concurrency"
+        query={`SELECT timeBucket() AS t, max(max_running) AS running, max(max_limit) AS limit\nFROM queue_metrics\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        series={[
+          { key: "running", label: "Running", color: COLORS.running },
+          { key: "limit", label: "Limit", color: COLORS.limit },
+        ]}
+      />
+      <QueueDetailChartCard
+        title="Queue depth (backlog)"
+        query={`SELECT timeBucket() AS t, max(max_queued) AS queued\nFROM queue_metrics\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        series={[{ key: "queued", label: "Queued", color: COLORS.queued }]}
+      />
+      <QueueDetailChartCard
+        title="Scheduling delay"
+        query={`SELECT timeBucket() AS t,\n  round(quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[1]) AS p50,\n  round(quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[3]) AS p95,\n  round(quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[4]) AS p99\nFROM queue_metrics\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        valueFormat={formatWaitMs}
+        series={[
+          { key: "p50", label: "p50", color: COLORS.p50 },
+          { key: "p95", label: "p95", color: COLORS.p95 },
+          { key: "p99", label: "p99", color: COLORS.p99 },
+        ]}
+      />
+      <QueueDetailChartCard
+        title="Throttled buckets"
+        query={`SELECT timeBucket() AS t, sum(throttled_count) AS throttled\nFROM queue_metrics\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        series={[{ key: "throttled", label: "Throttled", color: COLORS.throttled }]}
+      />
+    </>
+  );
+}
+
+type CkBreakdown = {
+  totalBackloggedKeys: number;
+  keys: Array<{
+    concurrencyKey: string;
+    queued: number;
+    running: number;
+    oldestEnqueuedAt: number;
+  }>;
+};
+
+function ConcurrencyKeysView({
+  breakdown,
+  loadedAt,
+  ids,
+  timeRange,
+  queueName,
+}: {
+  breakdown: CkBreakdown;
+  loadedAt: number;
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+}) {
+  return (
+    <>
+      <GroupedKeyChartCard
+        title="Backlog by key"
+        rankExpr="max(max_queued)"
+        seriesExpr="max(max_queued)"
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+      />
+      <GroupedKeyChartCard
+        title="Throughput by key (started)"
+        rankExpr="deltaSumTimestampMerge(started_delta)"
+        seriesExpr="deltaSumTimestampMerge(started_delta)"
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+      />
+      <QueueDetailChartCard
+        title="Keys with queued runs (count)"
+        query={`SELECT timeBucket() AS t, max(max_ck_backlogged) AS keys\nFROM queue_metrics\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        series={[{ key: "keys", label: "Keys", color: COLORS.ckKeys }]}
+      />
+      <QueueDetailChartCard
+        title="Most-starved key wait (max across all keys)"
+        query={`SELECT timeBucket() AS t, max(max_ck_wait_ms) AS wait\nFROM queue_metrics\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        valueFormat={formatWaitMs}
+        series={[{ key: "wait", label: "Max wait", color: COLORS.ckWait }]}
+      />
+      <KeyStatsTable
+        breakdown={breakdown}
+        loadedAt={loadedAt}
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+      />
+    </>
+  );
+}
+
+// TRQL string literal escape (standard SQL doubling).
+function trqlString(value: string): string {
+  return value.replace(/'/g, "''");
+}
+
+const KEY_SERIES_COLORS = [
+  "#34D399",
+  "#6366F1",
+  "#F59E0B",
+  "#22D3EE",
+  "#A78BFA",
+  "#EF4444",
+  "#F472B6",
+  "#84CC16",
+];
+
+type GroupedKeyChartProps = {
+  title: string;
+  /** Aggregate expression ranking keys over the whole range (top 8 charted). */
+  rankExpr: string;
+  /** Aggregate expression charted per (bucket, key). */
+  seriesExpr: string;
+  fillGaps?: boolean;
+  valueFormat?: (value: number) => string;
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+};
+
+// Two-step top-N: rank keys over the range, then chart those keys as grouped series
+// (the per-key table is activity-bound, so ranking is a cheap scan).
+function GroupedKeyChartCard(props: GroupedKeyChartProps) {
+  const { rows, showLoading, failed } = useQueueMetric(
+    `SELECT concurrency_key, ${props.rankExpr} AS peak\nFROM queue_metrics_by_key\nGROUP BY concurrency_key\nORDER BY peak DESC\nLIMIT 8`,
+    { ids: props.ids, timeRange: props.timeRange, queueName: props.queueName }
+  );
+  const keys = useMemo(
+    () => rows.filter((r) => toNumber(r.peak) > 0).map((r) => String(r.concurrency_key)),
+    [rows]
+  );
+
+  if (showLoading || failed || keys.length === 0) return null;
+  return <GroupedKeySeries keys={keys} {...props} />;
+}
+
+function GroupedKeySeries({
+  keys,
+  title,
+  seriesExpr,
+  fillGaps,
+  valueFormat,
+  ids,
+  timeRange,
+  queueName,
+}: GroupedKeyChartProps & { keys: string[] }) {
+  const inList = keys.map((k) => `'${trqlString(k)}'`).join(", ");
+  const { rows, showLoading, failed } = useQueueMetric(
+    `SELECT timeBucket() AS t, concurrency_key, ${seriesExpr} AS v\nFROM queue_metrics_by_key\nWHERE concurrency_key IN (${inList})\nGROUP BY t, concurrency_key\nORDER BY t`,
+    { ids, timeRange, queueName, fillGaps }
+  );
+
+  const data = useMemo(() => {
+    const buckets = new Map<number, { bucket: number } & Record<string, number>>();
+    for (const r of rows) {
+      const bucket = clickhouseTimeToMs(r.t);
+      if (!Number.isFinite(bucket)) continue;
+      let point = buckets.get(bucket);
+      if (!point) {
+        point = { bucket } as { bucket: number } & Record<string, number>;
+        buckets.set(bucket, point);
+      }
+      point[String(r.concurrency_key)] = toNumber(r.v);
+    }
+    return [...buckets.values()].sort((a, b) => a.bucket - b.bucket);
+  }, [rows]);
+
+  const chartConfig = useMemo(() => {
+    const cfg: ChartConfig = {};
+    keys.forEach((k, i) => {
+      cfg[k] = { label: k, color: KEY_SERIES_COLORS[i % KEY_SERIES_COLORS.length]! };
+    });
+    return cfg;
+  }, [keys]);
+
+  const { tickFormatter, tooltipLabelFormatter } = useMemo(
+    () => buildActivityTimeAxis(data),
+    [data]
+  );
+  const state: ChartState = showLoading ? "loading" : failed ? "invalid" : undefined;
+
+  return (
+    <div className="h-64">
+      <ChartCard title={title}>
+        <Chart.Root
+          config={chartConfig}
+          data={data}
+          dataKey="bucket"
+          series={keys}
+          state={state}
+          fillContainer
+        >
+          <Chart.Line
+            lineType="monotone"
+            xAxisProps={{ tickFormatter }}
+            yAxisProps={valueFormat ? { tickFormatter: (v: number) => valueFormat(v) } : undefined}
+            tooltipLabelFormatter={tooltipLabelFormatter}
+            tooltipValueFormatter={valueFormat}
+          />
+        </Chart.Root>
+      </ChartCard>
+    </div>
+  );
+}
+
+type KeyRangeStats = { started: number; peakBacklog: number; meanWaitMs: number };
+
+// Live breakdown (queued/running now, oldest wait) merged with per-key range stats from
+// the history tier; keys with history but no live backlog still appear. Clicking a key
+// pins the drill-down charts via the `key` search param.
+function KeyStatsTable({
+  breakdown,
+  loadedAt,
+  ids,
+  timeRange,
+  queueName,
+}: {
+  breakdown: CkBreakdown;
+  loadedAt: number;
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+}) {
+  const { value, replace, del } = useSearchParams();
+  const selectedKey = value("key");
+
+  const { rows, showLoading } = useQueueMetric(
+    `SELECT concurrency_key,\n  deltaSumTimestampMerge(started_delta) AS started,\n  max(max_queued) AS peak_backlog,\n  if(sum(wait_ms_count) > 0, round(sum(wait_ms_sum) / sum(wait_ms_count)), 0) AS mean_wait\nFROM queue_metrics_by_key\nGROUP BY concurrency_key\nORDER BY peak_backlog DESC\nLIMIT 50`,
+    { ids, timeRange, queueName }
+  );
+
+  const merged = useMemo(() => {
+    const range = new Map<string, KeyRangeStats>();
+    for (const r of rows) {
+      range.set(String(r.concurrency_key), {
+        started: toNumber(r.started),
+        peakBacklog: toNumber(r.peak_backlog),
+        meanWaitMs: toNumber(r.mean_wait),
+      });
+    }
+    const liveKeys = new Set(breakdown.keys.map((k) => k.concurrencyKey));
+    const live = breakdown.keys.map((k) => ({
+      key: k.concurrencyKey,
+      queued: k.queued,
+      running: k.running,
+      oldestWaitMs: Math.max(0, loadedAt - k.oldestEnqueuedAt),
+      range: range.get(k.concurrencyKey),
+    }));
+    const historyOnly = [...range.entries()]
+      .filter(([key]) => !liveKeys.has(key))
+      .map(([key, stats]) => ({
+        key,
+        queued: 0,
+        running: 0,
+        oldestWaitMs: null as number | null,
+        range: stats,
+      }));
+    return [...live, ...historyOnly].slice(0, 50);
+  }, [rows, breakdown, loadedAt]);
+
+  if (merged.length === 0) return null;
+
+  return (
+    <>
+      <div className="rounded-sm border border-grid-dimmed bg-background-bright">
+        <div className="flex items-baseline justify-between px-3 pt-2">
+          <div className="text-sm text-text-bright">Concurrency keys</div>
+          <div className="text-xs text-text-dimmed">
+            {breakdown.totalBackloggedKeys > 0
+              ? `${breakdown.totalBackloggedKeys.toLocaleString()} ${
+                  breakdown.totalBackloggedKeys === 1 ? "key" : "keys"
+                } with queued runs now`
+              : "No keys with queued runs right now"}
+          </div>
+        </div>
+        <Table>
+          <TableHeader>
+            <TableRow>
+              <TableHeaderCell>Key</TableHeaderCell>
+              <TableHeaderCell alignment="right">Queued now</TableHeaderCell>
+              <TableHeaderCell alignment="right">Running now</TableHeaderCell>
+              <TableHeaderCell alignment="right">Oldest wait</TableHeaderCell>
+              <TableHeaderCell alignment="right">Started</TableHeaderCell>
+              <TableHeaderCell alignment="right">Peak backlog</TableHeaderCell>
+              <TableHeaderCell alignment="right">Mean delay</TableHeaderCell>
+            </TableRow>
+          </TableHeader>
+          <TableBody>
+            {merged.map((row) => (
+              <TableRow
+                key={row.key}
+                isSelected={selectedKey === row.key}
+                className="cursor-pointer"
+                onClick={() => (selectedKey === row.key ? del("key") : replace({ key: row.key }))}
+              >
+                <TableCell>{row.key}</TableCell>
+                <TableCell alignment="right">{row.queued.toLocaleString()}</TableCell>
+                <TableCell alignment="right">{row.running.toLocaleString()}</TableCell>
+                <TableCell alignment="right">
+                  {row.oldestWaitMs === null ? "–" : formatWaitMs(row.oldestWaitMs)}
+                </TableCell>
+                <TableCell alignment="right">
+                  {row.range ? row.range.started.toLocaleString() : showLoading ? "…" : "–"}
+                </TableCell>
+                <TableCell alignment="right">
+                  {row.range ? row.range.peakBacklog.toLocaleString() : showLoading ? "…" : "–"}
+                </TableCell>
+                <TableCell alignment="right">
+                  {row.range && row.range.meanWaitMs > 0 ? formatWaitMs(row.range.meanWaitMs) : "–"}
+                </TableCell>
+              </TableRow>
+            ))}
+          </TableBody>
+        </Table>
+      </div>
+      {selectedKey && (
+        <KeyDrilldown keyName={selectedKey} ids={ids} timeRange={timeRange} queueName={queueName} />
+      )}
+    </>
+  );
+}
+
+function KeyDrilldown({
+  keyName,
+  ids,
+  timeRange,
+  queueName,
+}: {
+  keyName: string;
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+}) {
+  const pin = `concurrency_key = '${trqlString(keyName)}'`;
+  return (
+    <>
+      <QueueDetailChartCard
+        title={`Key ${keyName}: backlog and running`}
+        query={`SELECT timeBucket() AS t, max(max_queued) AS queued, max(max_running) AS running\nFROM queue_metrics_by_key\nWHERE ${pin}\nGROUP BY t\nORDER BY t`}
+        fillGaps
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        series={[
+          { key: "queued", label: "Queued", color: COLORS.queued },
+          { key: "running", label: "Running", color: COLORS.running },
+        ]}
+      />
+      <QueueDetailChartCard
+        title={`Key ${keyName}: throughput`}
+        query={`SELECT timeBucket() AS t, deltaSumTimestampMerge(started_delta) AS started\nFROM queue_metrics_by_key\nWHERE ${pin}\nGROUP BY t\nORDER BY t`}
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        series={[{ key: "started", label: "Started", color: COLORS.ckKeys }]}
+      />
+      <QueueDetailChartCard
+        title={`Key ${keyName}: mean scheduling delay`}
+        query={`SELECT timeBucket() AS t, if(sum(wait_ms_count) > 0, round(sum(wait_ms_sum) / sum(wait_ms_count)), 0) AS wait\nFROM queue_metrics_by_key\nWHERE ${pin}\nGROUP BY t\nORDER BY t`}
+        ids={ids}
+        timeRange={timeRange}
+        queueName={queueName}
+        valueFormat={formatWaitMs}
+        series={[{ key: "wait", label: "Mean delay", color: COLORS.p95 }]}
+      />
+    </>
+  );
+}
+
+function useQueueMetric(
+  query: string,
+  opts: { ids: Ids; timeRange: TimeRangeParams; queueName: string; fillGaps?: boolean }
+) {
+  return useMetricResourceQuery(query, {
+    ...opts.ids,
+    timeRange: opts.timeRange,
+    defaultPeriod: QUEUE_METRICS_DEFAULT_PERIOD,
+    queues: [opts.queueName],
+    fillGaps: opts.fillGaps,
+  });
+}
+
+function toNumber(value: number | string | null | undefined): number {
+  const n = typeof value === "number" ? value : Number(value);
+  return Number.isFinite(n) ? n : 0;
+}
+
+function clickhouseTimeToMs(value: unknown): number {
+  const s = String(value).replace(" ", "T");
+  return Date.parse(s.endsWith("Z") ? s : `${s}Z`);
+}
+
+type SeriesConfig = { key: string; label: string; color: string };
+
+function QueueDetailChartCard({
+  title,
+  query,
+  series,
+  ids,
+  timeRange,
+  queueName,
+  valueFormat,
+  fillGaps,
+}: {
+  title: string;
+  query: string;
+  series: SeriesConfig[];
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+  valueFormat?: (value: number) => string;
+  fillGaps?: boolean;
+}) {
+  const { rows, showLoading, failed } = useQueueMetric(query, {
+    ids,
+    timeRange,
+    queueName,
+    fillGaps,
+  });
+
+  const data = useMemo(() => {
+    return rows
+      .map((r) => {
+        const point: { bucket: number } & Record<string, number> = {
+          bucket: clickhouseTimeToMs(r.t),
+        };
+        for (const s of series) point[s.key] = toNumber(r[s.key]);
+        return point;
+      })
+      .filter((p) => Number.isFinite(p.bucket));
+  }, [rows, series]);
+
+  const chartConfig = useMemo(() => {
+    const cfg: ChartConfig = {};
+    for (const s of series) cfg[s.key] = { label: s.label, color: s.color };
+    return cfg;
+  }, [series]);
+
+  const { tickFormatter, tooltipLabelFormatter } = useMemo(
+    () => buildActivityTimeAxis(data),
+    [data]
+  );
+
+  const state: ChartState = showLoading ? "loading" : failed ? "invalid" : undefined;
+
+  return (
+    <div className="h-64">
+      <ChartCard title={title}>
+        <Chart.Root
+          config={chartConfig}
+          data={data}
+          dataKey="bucket"
+          series={series.map((s) => s.key)}
+          state={state}
+          fillContainer
+        >
+          <Chart.Line
+            lineType="monotone"
+            xAxisProps={{ tickFormatter }}
+            yAxisProps={valueFormat ? { tickFormatter: (v: number) => valueFormat(v) } : undefined}
+            tooltipLabelFormatter={tooltipLabelFormatter}
+            tooltipValueFormatter={valueFormat}
+          />
+        </Chart.Root>
+      </ChartCard>
+    </div>
+  );
+}
+
+function QueueStats({
+  queue,
+  ids,
+  timeRange,
+  queueName,
+}: {
+  queue: { running: number; queued: number };
+  ids: Ids;
+  timeRange: TimeRangeParams;
+  queueName: string;
+}) {
+  // One scalar query feeds the CH-derived stats; the "now" counts come from the loader (live).
+  const { rows, showLoading } = useQueueMetric(
+    `SELECT max(max_limit) AS lim, max(max_queued) AS peak_queued, deltaSumTimestampMerge(started_delta) AS started,\n  round(quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[3]) AS worst_p95\nFROM queue_metrics`,
+    { ids, timeRange, queueName }
+  );
+  const row = rows[0];
+  const worstP95 = row ? toNumber(row.worst_p95) : 0;
+
+  return (
+    <div className="grid grid-cols-2 gap-2 sm:grid-cols-4 lg:grid-cols-6">
+      <Stat label="Running now" value={queue.running.toLocaleString()} />
+      <Stat label="Queued now" value={queue.queued.toLocaleString()} />
+      <Stat
+        label="Limit"
+        value={row ? toNumber(row.lim).toLocaleString() : "–"}
+        loading={showLoading}
+      />
+      <Stat
+        label="Peak queued"
+        value={row ? toNumber(row.peak_queued).toLocaleString() : "–"}
+        loading={showLoading}
+      />
+      <Stat
+        label="Started"
+        value={row ? toNumber(row.started).toLocaleString() : "–"}
+        loading={showLoading}
+      />
+      <Stat
+        label="Delay p95"
+        value={worstP95 > 0 ? formatWaitMs(worstP95) : "–"}
+        loading={showLoading}
+        className={worstP95 >= 60_000 ? "text-warning" : undefined}
+      />
+    </div>
+  );
+}
+
+function Stat({
+  label,
+  value,
+  className,
+  loading,
+}: {
+  label: string;
+  value: string;
+  className?: string;
+  loading?: boolean;
+}) {
+  return (
+    <div className="rounded-sm border border-grid-dimmed bg-background-bright px-3 py-2">
+      <div className="text-xs text-text-dimmed">{label}</div>
+      {loading ? (
+        <div className="mt-1 h-6 w-12 animate-pulse rounded bg-grid-bright/50" />
+      ) : (
+        <div className={cn("text-2xl tabular-nums text-text-bright", className)}>{value}</div>
+      )}
+    </div>
+  );
+}
+
+function formatWaitMs(ms: number): string {
+  if (ms < 1000) return `${Math.round(ms)}ms`;
+  if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
+  if (ms < 3_600_000) return `${(ms / 60_000).toFixed(1)}m`;
+  return `${(ms / 3_600_000).toFixed(1)}h`;
+}
diff --git a/apps/webapp/app/routes/admin.api.v1.queue-metrics.ts b/apps/webapp/app/routes/admin.api.v1.queue-metrics.ts
new file mode 100644
index 00000000000..69e4e8c1fac
--- /dev/null
+++ b/apps/webapp/app/routes/admin.api.v1.queue-metrics.ts
@@ -0,0 +1,45 @@
+import { type ActionFunctionArgs, type LoaderFunctionArgs, json } from "@remix-run/server-runtime";
+import { z } from "zod";
+import { requireAdminApiRequest } from "~/services/personalAccessToken.server";
+import {
+  probeQueueMetricsStreams,
+  readQueueMetricsControls,
+  writeQueueMetricsControls,
+} from "~/v3/queueMetrics.server";
+
+export async function loader({ request }: LoaderFunctionArgs) {
+  await requireAdminApiRequest(request);
+  const [controls, streams] = await Promise.all([
+    readQueueMetricsControls(),
+    probeQueueMetricsStreams(),
+  ]);
+  return json({ controls, streams });
+}
+
+const BodySchema = z.object({
+  enabled: z.boolean().optional(),
+  sampleRate: z.number().min(0).max(1).optional(),
+});
+
+export async function action({ request }: ActionFunctionArgs) {
+  await requireAdminApiRequest(request);
+
+  if (request.method !== "POST") {
+    return json({ error: "Method not allowed" }, { status: 405 });
+  }
+
+  let body: unknown;
+  try {
+    body = await request.json();
+  } catch {
+    return json({ error: "Invalid JSON body" }, { status: 400 });
+  }
+
+  const parsed = BodySchema.safeParse(body);
+  if (!parsed.success) {
+    return json({ error: "Invalid payload", details: parsed.error.issues }, { status: 400 });
+  }
+
+  await writeQueueMetricsControls(parsed.data);
+  return json({ ok: true, controls: await readQueueMetricsControls() });
+}
diff --git a/apps/webapp/app/routes/admin.queue-metrics.tsx b/apps/webapp/app/routes/admin.queue-metrics.tsx
new file mode 100644
index 00000000000..6deaedce66e
--- /dev/null
+++ b/apps/webapp/app/routes/admin.queue-metrics.tsx
@@ -0,0 +1,190 @@
+import { useFetcher, useRevalidator } from "@remix-run/react";
+import { json } from "@remix-run/server-runtime";
+import { useEffect, useState } from "react";
+import { typedjson, useTypedLoaderData } from "remix-typedjson";
+import { z } from "zod";
+import { Button } from "~/components/primitives/Buttons";
+import { Callout } from "~/components/primitives/Callout";
+import { Header1, Header2 } from "~/components/primitives/Headers";
+import { Input } from "~/components/primitives/Input";
+import { Paragraph } from "~/components/primitives/Paragraph";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHeader,
+  TableHeaderCell,
+  TableRow,
+} from "~/components/primitives/Table";
+import { dashboardAction, dashboardLoader } from "~/services/routeBuilders/dashboardBuilder";
+import {
+  probeQueueMetricsStreams,
+  readQueueMetricsControls,
+  writeQueueMetricsControls,
+} from "~/v3/queueMetrics.server";
+
+export const loader = dashboardLoader({ authorization: { requireSuper: true } }, async () => {
+  const [controls, streams] = await Promise.all([
+    readQueueMetricsControls(),
+    probeQueueMetricsStreams(),
+  ]);
+  return typedjson({ controls, streams });
+});
+
+const BodySchema = z.object({
+  enabled: z.boolean().optional(),
+  sampleRate: z.number().min(0).max(1).optional(),
+});
+
+export const action = dashboardAction(
+  { authorization: { requireSuper: true } },
+  async ({ request }) => {
+    let body: unknown;
+    try {
+      body = await request.json();
+    } catch {
+      return json({ error: "Invalid JSON body" }, { status: 400 });
+    }
+    const parsed = BodySchema.safeParse(body);
+    if (!parsed.success) {
+      return json({ error: "Invalid payload" }, { status: 400 });
+    }
+    await writeQueueMetricsControls(parsed.data);
+    return json({ success: true });
+  }
+);
+
+export default function AdminQueueMetricsRoute() {
+  const { controls, streams } = useTypedLoaderData<typeof loader>();
+  const saveFetcher = useFetcher<{ success?: boolean; error?: string }>();
+  const revalidator = useRevalidator();
+
+  const [enabled, setEnabled] = useState(controls.enabled);
+  const [sampleRate, setSampleRate] = useState(String(controls.sampleRate));
+  const [error, setError] = useState<string | null>(null);
+
+  useEffect(() => {
+    setEnabled(controls.enabled);
+    setSampleRate(String(controls.sampleRate));
+  }, [controls.enabled, controls.sampleRate]);
+
+  useEffect(() => {
+    if (saveFetcher.data?.success) {
+      setError(null);
+      revalidator.revalidate();
+    } else if (saveFetcher.data?.error) {
+      setError(saveFetcher.data.error);
+    }
+  }, [saveFetcher.data]);
+
+  const isSaving = saveFetcher.state === "submitting";
+
+  const handleSave = () => {
+    const rate = Number(sampleRate);
+    if (!Number.isFinite(rate) || rate < 0 || rate > 1) {
+      setError("Sample rate must be a number between 0 and 1");
+      return;
+    }
+    saveFetcher.submit(JSON.stringify({ enabled, sampleRate: rate }), {
+      method: "POST",
+      encType: "application/json",
+    });
+  };
+
+  const totalLag = streams.reduce((sum, s) => sum + (s.lag ?? 0), 0);
+  const lagUnknownCount = streams.filter((s) => s.lag === null).length;
+
+  return (
+    <main className="flex h-full min-w-0 flex-1 flex-col overflow-y-auto px-4 pb-4 lg:order-last">
+      <div className="max-w-2xl space-y-4 py-4">
+        <Header1>Queue metrics ingest</Header1>
+        <Callout variant="warning">
+          Live controls for the queue-metrics ingest pipeline on the run-queue Redis. Changes take
+          effect within ~10s across all instances (no redeploy). Watch EngineCPU on the run-queue
+          Redis when enabling or raising the sample rate.
+        </Callout>
+
+        <div className="space-y-3 rounded-md border border-grid-bright p-4">
+          <Header2>Controls</Header2>
+          <label className="flex items-center gap-2 text-sm text-text-bright">
+            <input
+              type="checkbox"
+              checked={enabled}
+              onChange={(e) => setEnabled(e.target.checked)}
+            />
+            Emission enabled <span className="text-text-dimmed">(queue_metrics:enabled)</span>
+          </label>
+          <div className="flex flex-col gap-1">
+            <label className="text-sm text-text-dimmed">
+              Gauge sample rate 0–1 (queue_metrics:gauge_sample_rate); default{" "}
+              {controls.sampleRateDefault}
+            </label>
+            <Input
+              type="number"
+              min={0}
+              max={1}
+              step={0.05}
+              value={sampleRate}
+              onChange={(e) => setSampleRate(e.target.value)}
+              className="w-32"
+            />
+          </div>
+          {error && <Callout variant="error">{error}</Callout>}
+          <div className="flex justify-end">
+            <Button variant="primary/small" onClick={handleSave} disabled={isSaving}>
+              {isSaving ? "Saving..." : "Save controls"}
+            </Button>
+          </div>
+        </div>
+
+        <div className="space-y-3 rounded-md border border-grid-bright p-4">
+          <div className="flex items-center justify-between">
+            <Header2>Stream health{totalLag > 0 ? ` (lag ${totalLag})` : ""}</Header2>
+            <Button
+              variant="tertiary/small"
+              onClick={() => revalidator.revalidate()}
+              disabled={revalidator.state === "loading"}
+            >
+              Refresh
+            </Button>
+          </div>
+          <Paragraph variant="extra-small">
+            Depth = entries buffered in the shard stream; Lag = entries not yet delivered to the
+            consumer group (rising = consumer falling behind; "unknown" = entries were trimmed past
+            the group, i.e. data was lost); Pending = unacked entries. Gauges and counters share one
+            stream family on the metrics Redis.
+          </Paragraph>
+          {lagUnknownCount > 0 && (
+            <Callout variant="error">
+              Lag is unknown on {lagUnknownCount} shard{lagUnknownCount === 1 ? "" : "s"}: entries
+              were trimmed past the consumer group's read position, so stream data was lost. Check
+              consumer health.
+            </Callout>
+          )}
+          <Table>
+            <TableHeader>
+              <TableRow>
+                <TableHeaderCell>Stream</TableHeaderCell>
+                <TableHeaderCell>Shard</TableHeaderCell>
+                <TableHeaderCell alignment="right">Depth</TableHeaderCell>
+                <TableHeaderCell alignment="right">Lag</TableHeaderCell>
+                <TableHeaderCell alignment="right">Pending</TableHeaderCell>
+              </TableRow>
+            </TableHeader>
+            <TableBody>
+              {streams.map((s) => (
+                <TableRow key={`${s.stream}-${s.shard}`}>
+                  <TableCell>{s.stream}</TableCell>
+                  <TableCell>{s.shard}</TableCell>
+                  <TableCell alignment="right">{s.depth}</TableCell>
+                  <TableCell alignment="right">{s.lag ?? "unknown"}</TableCell>
+                  <TableCell alignment="right">{s.pending}</TableCell>
+                </TableRow>
+              ))}
+            </TableBody>
+          </Table>
+        </div>
+      </div>
+    </main>
+  );
+}
diff --git a/apps/webapp/app/routes/admin.tsx b/apps/webapp/app/routes/admin.tsx
index a95b016ca5b..7d24fe312fa 100644
--- a/apps/webapp/app/routes/admin.tsx
+++ b/apps/webapp/app/routes/admin.tsx
@@ -38,6 +38,10 @@ export default function Page() {
               label: "Global Feature Flags",
               to: "/admin/feature-flags",
             },
+            {
+              label: "Queue Metrics",
+              to: "/admin/queue-metrics",
+            },
             {
               label: "Notifications",
               to: "/admin/notifications",
diff --git a/apps/webapp/app/routes/api.v1.query.schema.ts b/apps/webapp/app/routes/api.v1.query.schema.ts
index 3e95d16818d..976fa72b267 100644
--- a/apps/webapp/app/routes/api.v1.query.schema.ts
+++ b/apps/webapp/app/routes/api.v1.query.schema.ts
@@ -1,7 +1,7 @@
 import { json } from "@remix-run/server-runtime";
 import type { ColumnSchema, TableSchema } from "@internal/tsql";
 import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server";
-import { querySchemas } from "~/v3/querySchemas";
+import { visibleQuerySchemas } from "~/v3/querySchemas";
 
 function serializeColumn(col: ColumnSchema) {
   const result: Record<string, unknown> = {
@@ -51,7 +51,7 @@ export const loader = createLoaderApiRoute(
     },
   },
   async () => {
-    const tables = querySchemas.map(serializeTable);
+    const tables = visibleQuerySchemas.map(serializeTable);
     return json({ tables });
   }
 );
diff --git a/apps/webapp/app/routes/resources.metric.tsx b/apps/webapp/app/routes/resources.metric.tsx
index d456ba1ce1b..5bf0ed693ad 100644
--- a/apps/webapp/app/routes/resources.metric.tsx
+++ b/apps/webapp/app/routes/resources.metric.tsx
@@ -50,6 +50,8 @@ const MetricWidgetQuery = z.object({
   operations: z.array(z.string()).optional(),
   providers: z.array(z.string()).optional(),
   tags: z.array(z.string()).optional(),
+  // Opt into server-side gap fill (carry-forward for gauges, zero-fill for counters).
+  fillGaps: z.boolean().optional(),
 });
 
 export const action = async ({ request }: ActionFunctionArgs) => {
@@ -85,6 +87,7 @@ export const action = async ({ request }: ActionFunctionArgs) => {
     operations,
     providers,
     tags: _tags,
+    fillGaps,
   } = submission.data;
 
   // Check they should be able to access it
@@ -122,6 +125,7 @@ export const action = async ({ request }: ActionFunctionArgs) => {
     promptVersions,
     operations,
     providers,
+    fillGaps,
     // Set higher concurrency if many widgets are on screen at once
     customOrgConcurrencyLimit: env.METRIC_WIDGET_DEFAULT_ORG_CONCURRENCY_LIMIT,
   });
diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query.ai-generate.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query.ai-generate.tsx
index c1626b966d2..4a9ab462dcf 100644
--- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query.ai-generate.tsx
+++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query.ai-generate.tsx
@@ -8,7 +8,7 @@ import type { AITimeFilter } from "~/routes/_app.orgs.$organizationSlug.projects
 import { requireUserId } from "~/services/session.server";
 import { EnvironmentParamSchema } from "~/utils/pathBuilder";
 import { AIQueryService } from "~/v3/services/aiQueryService.server";
-import { querySchemas } from "~/v3/querySchemas";
+import { visibleQuerySchemas } from "~/v3/querySchemas";
 
 const RequestSchema = z.object({
   prompt: z.string().min(1, "Prompt is required"),
@@ -85,7 +85,7 @@ export async function action({ request, params }: ActionFunctionArgs) {
   const { prompt, mode, currentQuery } = submission.data;
 
   const service = new AIQueryService(
-    querySchemas,
+    visibleQuerySchemas,
     openai(env.AI_RUN_FILTER_MODEL ?? "gpt-4o-mini")
   );
 
diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts
index 57b877ed876..70af40ec89f 100644
--- a/apps/webapp/app/services/queryService.server.ts
+++ b/apps/webapp/app/services/queryService.server.ts
@@ -7,7 +7,12 @@ import {
   type TSQLQueryResult,
 } from "@internal/clickhouse";
 import type { CustomerQuerySource } from "@trigger.dev/database";
-import type { TableSchema, WhereClauseCondition } from "@internal/tsql";
+import {
+  calculateTimeBucketInterval,
+  type TableSchema,
+  type TimeBucketInterval,
+  type WhereClauseCondition,
+} from "@internal/tsql";
 import { z } from "zod";
 import { prisma } from "~/db.server";
 import { env } from "~/env.server";
@@ -110,6 +115,41 @@ export type ExecuteQueryResult<T> =
     }
   | { success: false; error: Error };
 
+const INTERVAL_UNIT_SECONDS: Record<TimeBucketInterval["unit"], number> = {
+  SECOND: 1,
+  MINUTE: 60,
+  HOUR: 3_600,
+  DAY: 86_400,
+  WEEK: 604_800,
+  MONTH: 2_592_000,
+};
+
+function floorToSeconds(date: Date, alignSeconds: number): Date {
+  const ms = alignSeconds * 1000;
+  return new Date(Math.floor(date.getTime() / ms) * ms);
+}
+
+/**
+ * Swap a table for one of its rollups when the query's bucket interval is at least the
+ * rollup's granularity. The rollup has identical logical columns, so only the physical
+ * table (and therefore rows read) changes.
+ */
+function resolveRollup(schema: TableSchema, timeRange: { from: Date; to: Date }): TableSchema {
+  if (!schema.rollups || schema.rollups.length === 0) {
+    return schema;
+  }
+  const interval = calculateTimeBucketInterval(
+    timeRange.from,
+    timeRange.to,
+    schema.timeBucketThresholds
+  );
+  const intervalSeconds = interval.value * INTERVAL_UNIT_SECONDS[interval.unit];
+  const best = [...schema.rollups]
+    .sort((a, b) => b.minIntervalSeconds - a.minIntervalSeconds)
+    .find((r) => r.minIntervalSeconds <= intervalSeconds);
+  return best ? { ...schema, clickhouseName: best.clickhouseName } : schema;
+}
+
 export async function getDefaultPeriod(organizationId: string): Promise<string> {
   const idealDefaultPeriodDays = 7;
   const maxQueryPeriod = await getLimit(organizationId, "queryPeriodDays", 30);
@@ -183,6 +223,14 @@ export async function executeQuery<TOut extends z.ZodSchema>(
     defaultPeriod,
   });
 
+  // Align the time bounds so repeated auto-refresh queries produce identical query
+  // params and can share ClickHouse query-cache entries (params are part of the key).
+  const alignSeconds = matchedSchema?.queryCache?.alignSeconds;
+  if (alignSeconds) {
+    if (timeFilter.from) timeFilter.from = floorToSeconds(timeFilter.from, alignSeconds);
+    if (timeFilter.to) timeFilter.to = floorToSeconds(timeFilter.to, alignSeconds);
+  }
+
   // Calculate the effective "from" date the user is requesting (for period clipping check)
   // This is null only when the user specifies just a "to" date (rare case)
   let requestedFromDate: Date | null = null;
@@ -192,6 +240,9 @@ export async function executeQuery<TOut extends z.ZodSchema>(
     // Period specified (or default) - calculate from now
     const periodMs = parse(timeFilter.period ?? defaultPeriod) ?? 7 * 24 * 60 * 60 * 1000;
     requestedFromDate = new Date(Date.now() - periodMs);
+    if (alignSeconds) {
+      requestedFromDate = floorToSeconds(requestedFromDate, alignSeconds);
+    }
   }
 
   // Build the fallback WHERE condition based on what the user specified
@@ -207,7 +258,10 @@ export async function executeQuery<TOut extends z.ZodSchema>(
   }
 
   const maxQueryPeriod = await getLimit(organizationId, "queryPeriodDays", 30);
-  const maxQueryPeriodDate = new Date(Date.now() - maxQueryPeriod * 24 * 60 * 60 * 1000);
+  let maxQueryPeriodDate = new Date(Date.now() - maxQueryPeriod * 24 * 60 * 60 * 1000);
+  if (alignSeconds) {
+    maxQueryPeriodDate = floorToSeconds(maxQueryPeriodDate, alignSeconds);
+  }
 
   // Check if the requested time period exceeds the plan limit
   const periodClipped = requestedFromDate !== null && requestedFromDate < maxQueryPeriodDate;
@@ -255,6 +309,10 @@ export async function executeQuery<TOut extends z.ZodSchema>(
     to: to ?? undefined,
     defaultPeriod,
   });
+  if (alignSeconds) {
+    timeRange.from = floorToSeconds(timeRange.from, alignSeconds);
+    timeRange.to = floorToSeconds(timeRange.to, alignSeconds);
+  }
 
   try {
     // Build field mappings for project_ref → project_id and environment_id → slug translation
@@ -277,10 +335,19 @@ export async function executeQuery<TOut extends z.ZodSchema>(
       organizationId,
       "query"
     );
+    // Serve coarse-bucket queries from the table's rollup when one qualifies.
+    const effectiveSchemas = matchedSchema?.rollups
+      ? querySchemas.map((s) => (s === matchedSchema ? resolveRollup(s, timeRange) : s))
+      : querySchemas;
+
+    const queryCacheSettings: ClickHouseSettings = matchedSchema?.queryCache
+      ? { use_query_cache: 1, query_cache_ttl: matchedSchema.queryCache.ttlSeconds }
+      : {};
+
     const result = await executeTSQL(queryClickhouse.reader, {
       ...baseOptions,
       schema: z.record(z.any()),
-      tableSchema: querySchemas,
+      tableSchema: effectiveSchemas,
       transformValues: true,
       enforcedWhereClause,
       fieldMappings,
@@ -290,6 +357,7 @@ export async function executeQuery<TOut extends z.ZodSchema>(
       timeRange,
       clickhouseSettings: {
         ...getDefaultClickhouseSettings(),
+        ...queryCacheSettings,
         ...baseOptions.clickhouseSettings, // Allow caller overrides if needed
       },
       querySettings: {
diff --git a/apps/webapp/app/utils/pathBuilder.ts b/apps/webapp/app/utils/pathBuilder.ts
index 187bc50b549..edd65f8bde4 100644
--- a/apps/webapp/app/utils/pathBuilder.ts
+++ b/apps/webapp/app/utils/pathBuilder.ts
@@ -522,6 +522,15 @@ export function v3QueuesPath(
   return `${v3EnvironmentPath(organization, project, environment)}/queues`;
 }
 
+export function v3QueuePath(
+  organization: OrgForPath,
+  project: ProjectForPath,
+  environment: EnvironmentForPath,
+  queue: { friendlyId: string }
+) {
+  return `${v3QueuesPath(organization, project, environment)}/${queue.friendlyId}`;
+}
+
 export function v3WaitpointTokensPath(
   organization: OrgForPath,
   project: ProjectForPath,
diff --git a/apps/webapp/app/v3/canAccessQueueMetricsUi.server.ts b/apps/webapp/app/v3/canAccessQueueMetricsUi.server.ts
new file mode 100644
index 00000000000..0e3c142b272
--- /dev/null
+++ b/apps/webapp/app/v3/canAccessQueueMetricsUi.server.ts
@@ -0,0 +1,26 @@
+import { prisma } from "~/db.server";
+import { FEATURE_FLAG } from "~/v3/featureFlags";
+import { makeFlag } from "~/v3/featureFlags.server";
+
+// Per-org gate for the Queue Metrics dashboard UI. Org override wins over the global
+// FeatureFlag table value, which wins over the off-by-default. Ingestion/emission is a
+// separate global flag; this only decides whether an org sees the metrics view.
+export async function canAccessQueueMetricsUi(options: {
+  userId: string;
+  organizationSlug: string;
+}): Promise<boolean> {
+  const org = await prisma.organization.findFirst({
+    where: {
+      slug: options.organizationSlug,
+      members: { some: { userId: options.userId } },
+    },
+    select: { featureFlags: true },
+  });
+
+  const flag = makeFlag();
+  return flag({
+    key: FEATURE_FLAG.queueMetricsUiEnabled,
+    defaultValue: false,
+    overrides: (org?.featureFlags as Record<string, unknown>) ?? {},
+  });
+}
diff --git a/apps/webapp/app/v3/featureFlags.ts b/apps/webapp/app/v3/featureFlags.ts
index 637830aef06..fa17e504a37 100644
--- a/apps/webapp/app/v3/featureFlags.ts
+++ b/apps/webapp/app/v3/featureFlags.ts
@@ -19,6 +19,7 @@ export const FEATURE_FLAG = {
   computeMigrationRequireTemplate: "computeMigrationRequireTemplate",
   devBranchesEnabled: "devBranchesEnabled",
   runOpsMintKind: "runOpsMintKind",
+  queueMetricsUiEnabled: "queueMetricsUiEnabled",
 } as const;
 
 export const FeatureFlagCatalog = {
@@ -54,6 +55,9 @@ export const FeatureFlagCatalog = {
   // Per-org run-ops-id mint cutover. Defaults to "cuid"; only honored when
   // RUN_OPS_MINT_ENABLED is on AND isSplitEnabled() is true.
   [FEATURE_FLAG.runOpsMintKind]: z.enum(["cuid", "runOpsId"]),
+  // Per-org access to the Queue Metrics dashboard UI (view only; emission is global and
+  // separate). Off unless enabled for the org.
+  [FEATURE_FLAG.queueMetricsUiEnabled]: z.coerce.boolean(),
 };
 
 export type FeatureFlagKey = keyof typeof FeatureFlagCatalog;
diff --git a/apps/webapp/app/v3/querySchemas.ts b/apps/webapp/app/v3/querySchemas.ts
index 4784ad75629..540ae670091 100644
--- a/apps/webapp/app/v3/querySchemas.ts
+++ b/apps/webapp/app/v3/querySchemas.ts
@@ -614,8 +614,333 @@ export const metricsSchema: TableSchema = {
 };
 
 /**
- * All available schemas for the query editor
+ * Schema definition for the queue_metrics table (trigger_dev.queue_metrics_v1).
+ * Pre-aggregated into 10-second buckets. Counter columns re-aggregate with sum(),
+ * gauges with max(), and wait_quantiles with quantilesMerge() — never FINAL.
  */
+export const queueMetricsSchema: TableSchema = {
+  name: "queue_metrics",
+  clickhouseName: "trigger_dev.queue_metrics_v1",
+  description: "Per-queue depth, concurrency, throttling, and scheduling-delay metrics",
+  timeConstraint: "bucket_start",
+  tenantColumns: {
+    organizationId: "organization_id",
+    projectId: "project_id",
+    environmentId: "environment_id",
+  },
+  columns: {
+    environment: {
+      name: "environment",
+      clickhouseName: "environment_id",
+      ...column("String", { description: "The environment slug", example: "prod" }),
+      fieldMapping: "environment",
+      customRenderType: "environment",
+    },
+    project: {
+      name: "project",
+      clickhouseName: "project_id",
+      ...column("String", {
+        description: "The project reference, they always start with `proj_`.",
+        example: "proj_howcnaxbfxdmwmxazktx",
+      }),
+      fieldMapping: "project",
+      customRenderType: "project",
+    },
+    queue: {
+      name: "queue",
+      clickhouseName: "queue_name",
+      ...column("LowCardinality(String)", {
+        description: "The queue name",
+        example: "my-queue",
+        coreColumn: true,
+      }),
+    },
+    bucket_start: {
+      name: "bucket_start",
+      ...column("DateTime", {
+        description: "The start of the 10-second aggregation bucket",
+        example: "2024-01-15 09:30:00",
+        coreColumn: true,
+      }),
+    },
+    // Cumulative-counter delta states. Read with deltaSumTimestampMerge(<col>) (loss-tolerant,
+    // reset-safe), never sum(); opaque like wait_quantiles. Merging across queues is
+    // invalid (mixes unrelated odometers): totals must GROUP BY queue, then sum outside.
+    enqueue_delta: {
+      name: "enqueue_delta",
+      mergeGroupKey: "queue",
+      ...column("String", {
+        description:
+          "Runs enqueued (cumulative-counter delta). Read with deltaSumTimestampMerge(enqueue_delta) grouped by queue. For totals across queues, sum the per-queue results in an outer query, never merge across queues. Per-bucket values can undercount by one inter-reading delta at bucket boundaries (the bridge lives in the prior bucket's state); totals over the whole range are exact.",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    started_delta: {
+      name: "started_delta",
+      mergeGroupKey: "queue",
+      ...column("String", {
+        description:
+          "Runs dequeued/started (throughput). Read with deltaSumTimestampMerge(started_delta) grouped by queue. For totals across queues, sum the per-queue results in an outer query, never merge across queues. Per-bucket values can undercount by one inter-reading delta at bucket boundaries (the bridge lives in the prior bucket's state); totals over the whole range are exact.",
+        coreColumn: true,
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    ack_delta: {
+      name: "ack_delta",
+      mergeGroupKey: "queue",
+      ...column("String", {
+        description:
+          "Runs acked (completed). Read with deltaSumTimestampMerge(ack_delta) grouped by queue; sum per-queue results for totals.",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    nack_delta: {
+      name: "nack_delta",
+      mergeGroupKey: "queue",
+      ...column("String", {
+        description:
+          "Runs nacked. Read with deltaSumTimestampMerge(nack_delta) grouped by queue; sum per-queue results for totals.",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    dlq_delta: {
+      name: "dlq_delta",
+      mergeGroupKey: "queue",
+      ...column("String", {
+        description:
+          "Runs dead-lettered. Read with deltaSumTimestampMerge(dlq_delta) grouped by queue; sum per-queue results for totals.",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    throttled_count: {
+      name: "throttled_count",
+      ...column("UInt64", {
+        description: "Gauge emissions where running>=limit and queued>0. Aggregate with sum().",
+        coreColumn: true,
+      }),
+    },
+    max_queued: {
+      name: "max_queued",
+      ...column("UInt32", {
+        description: "Peak queue depth in the bucket. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    max_running: {
+      name: "max_running",
+      ...column("UInt32", {
+        description: "Peak running (concurrency) in the bucket. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    max_limit: {
+      name: "max_limit",
+      ...column("UInt32", {
+        description: "The queue concurrency limit. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    max_env_queued: {
+      name: "max_env_queued",
+      ...column("UInt32", {
+        description: "Peak environment-wide queued in the bucket. Aggregate with max().",
+        fillMode: "carry",
+      }),
+    },
+    max_env_running: {
+      name: "max_env_running",
+      ...column("UInt32", {
+        description: "Peak environment-wide running in the bucket. Aggregate with max().",
+        fillMode: "carry",
+      }),
+    },
+    max_env_limit: {
+      name: "max_env_limit",
+      ...column("UInt32", {
+        description: "The environment concurrency limit. Aggregate with max().",
+        fillMode: "carry",
+      }),
+    },
+    max_ck_backlogged: {
+      name: "max_ck_backlogged",
+      ...column("UInt32", {
+        description:
+          "Peak number of distinct concurrency keys with queued runs in the bucket. Aggregate with max(). Zero for queues that do not use concurrency keys.",
+        fillMode: "carry",
+      }),
+    },
+    max_ck_wait_ms: {
+      name: "max_ck_wait_ms",
+      ...column("UInt32", {
+        description:
+          "Worst head-of-line wait (ms) across concurrency keys in the bucket: how long the most-starved key's oldest queued run has been waiting. Aggregate with max(). Zero for queues that do not use concurrency keys.",
+        fillMode: "carry",
+      }),
+    },
+    wait_ms_sum: {
+      name: "wait_ms_sum",
+      ...column("UInt64", {
+        description: "Sum of scheduling delays (ms). Mean = wait_ms_sum/wait_ms_count.",
+      }),
+    },
+    wait_ms_count: {
+      name: "wait_ms_count",
+      ...column("UInt64", {
+        description: "Count of scheduling-delay samples. Aggregate with sum().",
+      }),
+    },
+    wait_quantiles: {
+      name: "wait_quantiles",
+      ...column("String", {
+        description:
+          "Scheduling-delay (dequeue minus eligible-at) quantile state. Read with quantilesMerge(0.5,0.9,0.95,0.99)(wait_quantiles)[n].",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+  },
+  timeBucketThresholds: [
+    { maxRangeSeconds: 3 * 60 * 60, interval: { value: 10, unit: "SECOND" } },
+    { maxRangeSeconds: 12 * 60 * 60, interval: { value: 1, unit: "MINUTE" } },
+    { maxRangeSeconds: 2 * 24 * 60 * 60, interval: { value: 5, unit: "MINUTE" } },
+    { maxRangeSeconds: 7 * 24 * 60 * 60, interval: { value: 15, unit: "MINUTE" } },
+    { maxRangeSeconds: 30 * 24 * 60 * 60, interval: { value: 1, unit: "HOUR" } },
+    { maxRangeSeconds: 90 * 24 * 60 * 60, interval: { value: 6, unit: "HOUR" } },
+    { maxRangeSeconds: 180 * 24 * 60 * 60, interval: { value: 1, unit: "DAY" } },
+    { maxRangeSeconds: 365 * 24 * 60 * 60, interval: { value: 1, unit: "WEEK" } },
+  ] satisfies BucketThreshold[],
+  // Ranges whose bucket interval is >= 5 minutes read the 5m rollup instead (same
+  // logical columns, ~30x fewer rows).
+  rollups: [{ minIntervalSeconds: 300, clickhouseName: "trigger_dev.queue_metrics_5m_v1" }],
+  queryCache: { ttlSeconds: 30, alignSeconds: 30 },
+};
+
+/**
+ * Schema definition for the env_metrics table (trigger_dev.env_metrics_v1).
+ * Environment-level rollup of queue_metrics with the queue dimension dropped, so
+ * header tiles and saturation charts cost the same regardless of how many queues
+ * the environment has. Keeps the full 10-second granularity: row count is
+ * queue-independent, so even 30-day ranges stay small.
+ */
+export const envMetricsSchema: TableSchema = {
+  name: "env_metrics",
+  clickhouseName: "trigger_dev.env_metrics_v1",
+  description:
+    "Environment-level concurrency, saturation, throttling, and scheduling-delay metrics (10-second buckets)",
+  timeConstraint: "bucket_start",
+  tenantColumns: {
+    organizationId: "organization_id",
+    projectId: "project_id",
+    environmentId: "environment_id",
+  },
+  columns: {
+    environment: {
+      name: "environment",
+      clickhouseName: "environment_id",
+      ...column("String", { description: "The environment slug", example: "prod" }),
+      fieldMapping: "environment",
+      customRenderType: "environment",
+    },
+    project: {
+      name: "project",
+      clickhouseName: "project_id",
+      ...column("String", {
+        description: "The project reference, they always start with `proj_`.",
+        example: "proj_howcnaxbfxdmwmxazktx",
+      }),
+      fieldMapping: "project",
+      customRenderType: "project",
+    },
+    bucket_start: {
+      name: "bucket_start",
+      ...column("DateTime", {
+        description: "The start of the 10-second aggregation bucket",
+        example: "2024-01-15 09:30:00",
+        coreColumn: true,
+      }),
+    },
+    max_env_queued: {
+      name: "max_env_queued",
+      ...column("UInt32", {
+        description: "Peak environment-wide queued in the bucket. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    max_env_running: {
+      name: "max_env_running",
+      ...column("UInt32", {
+        description: "Peak environment-wide running in the bucket. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    max_env_limit: {
+      name: "max_env_limit",
+      ...column("UInt32", {
+        description: "The environment concurrency limit. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    throttled_count: {
+      name: "throttled_count",
+      ...column("UInt64", {
+        description:
+          "Gauge emissions where a queue was at its limit with work queued. Aggregate with sum().",
+        coreColumn: true,
+      }),
+    },
+    wait_ms_sum: {
+      name: "wait_ms_sum",
+      ...column("UInt64", {
+        description: "Sum of scheduling delays (ms). Mean = wait_ms_sum/wait_ms_count.",
+      }),
+    },
+    wait_ms_count: {
+      name: "wait_ms_count",
+      ...column("UInt64", {
+        description: "Count of scheduling-delay samples. Aggregate with sum().",
+      }),
+    },
+    wait_quantiles: {
+      name: "wait_quantiles",
+      ...column("String", {
+        description:
+          "Scheduling-delay quantile state (TDigest). Read with quantilesTDigestMerge(0.5,0.9,0.95,0.99)(wait_quantiles)[n].",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+  },
+  timeBucketThresholds: [
+    { maxRangeSeconds: 3 * 60 * 60, interval: { value: 10, unit: "SECOND" } },
+    { maxRangeSeconds: 12 * 60 * 60, interval: { value: 1, unit: "MINUTE" } },
+    { maxRangeSeconds: 2 * 24 * 60 * 60, interval: { value: 5, unit: "MINUTE" } },
+    { maxRangeSeconds: 7 * 24 * 60 * 60, interval: { value: 15, unit: "MINUTE" } },
+    { maxRangeSeconds: 30 * 24 * 60 * 60, interval: { value: 1, unit: "HOUR" } },
+    { maxRangeSeconds: 90 * 24 * 60 * 60, interval: { value: 6, unit: "HOUR" } },
+    { maxRangeSeconds: 180 * 24 * 60 * 60, interval: { value: 1, unit: "DAY" } },
+    { maxRangeSeconds: 365 * 24 * 60 * 60, interval: { value: 1, unit: "WEEK" } },
+  ] satisfies BucketThreshold[],
+  queryCache: { ttlSeconds: 30, alignSeconds: 30 },
+};
+
 /**
  * Schema definition for the llm_metrics table (trigger_dev.llm_metrics_v1)
  */
@@ -975,13 +1300,154 @@ export const llmModelsSchema: TableSchema = {
   },
 };
 
+/**
+ * Per-concurrency-key drill-down for queues that shard work with `concurrencyKey`
+ * (e.g. per-tenant fairness). Rows are activity-bound: a (queue, key, bucket) row exists
+ * only when that key had events, so key cardinality cannot inflate the table.
+ */
+export const queueMetricsByKeySchema: TableSchema = {
+  name: "queue_metrics_by_key",
+  clickhouseName: "trigger_dev.queue_metrics_ck_v1",
+  description: "Per-concurrency-key queue metrics: backlog, throughput, and wait by key",
+  hidden: true,
+  timeConstraint: "bucket_start",
+  tenantColumns: {
+    organizationId: "organization_id",
+    projectId: "project_id",
+    environmentId: "environment_id",
+  },
+  columns: {
+    environment: {
+      name: "environment",
+      clickhouseName: "environment_id",
+      ...column("String", { description: "The environment slug", example: "prod" }),
+      fieldMapping: "environment",
+      customRenderType: "environment",
+    },
+    project: {
+      name: "project",
+      clickhouseName: "project_id",
+      ...column("String", {
+        description: "The project reference, they always start with `proj_`.",
+        example: "proj_howcnaxbfxdmwmxazktx",
+      }),
+      fieldMapping: "project",
+      customRenderType: "project",
+    },
+    queue: {
+      name: "queue",
+      clickhouseName: "queue_name",
+      ...column("LowCardinality(String)", {
+        description: "The queue name",
+        example: "my-queue",
+        coreColumn: true,
+      }),
+    },
+    concurrency_key: {
+      name: "concurrency_key",
+      ...column("String", {
+        description: "The concurrency key the run was sharded by (e.g. a tenant id)",
+        example: "tenant-42",
+        coreColumn: true,
+      }),
+    },
+    bucket_start: {
+      name: "bucket_start",
+      ...column("DateTime", {
+        description: "The start of the 10-second aggregation bucket",
+        example: "2024-01-15 09:30:00",
+        coreColumn: true,
+      }),
+    },
+    enqueue_delta: {
+      name: "enqueue_delta",
+      mergeGroupKey: ["queue", "concurrency_key"],
+      ...column("String", {
+        description:
+          "Runs enqueued for this key (cumulative-counter delta). Read with deltaSumTimestampMerge(enqueue_delta) grouped by queue and concurrency_key, or with both pinned; never merge across keys.",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    started_delta: {
+      name: "started_delta",
+      mergeGroupKey: ["queue", "concurrency_key"],
+      ...column("String", {
+        description:
+          "Runs dequeued/started for this key (throughput). Read with deltaSumTimestampMerge(started_delta) grouped by queue and concurrency_key, or with both pinned; never merge across keys.",
+        coreColumn: true,
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    ack_delta: {
+      name: "ack_delta",
+      mergeGroupKey: ["queue", "concurrency_key"],
+      ...column("String", {
+        description:
+          "Runs acked (completed) for this key. Read with deltaSumTimestampMerge(ack_delta) grouped by queue and concurrency_key, or with both pinned.",
+      }),
+      groupable: false,
+      sortable: false,
+      filterable: false,
+    },
+    max_queued: {
+      name: "max_queued",
+      ...column("UInt32", {
+        description: "Peak backlog for this key in the bucket. Aggregate with max().",
+        coreColumn: true,
+        fillMode: "carry",
+      }),
+    },
+    max_running: {
+      name: "max_running",
+      ...column("UInt32", {
+        description: "Peak running for this key in the bucket. Aggregate with max().",
+        fillMode: "carry",
+      }),
+    },
+    wait_ms_sum: {
+      name: "wait_ms_sum",
+      ...column("UInt64", {
+        description:
+          "Sum of scheduling delays (ms) for this key. Mean = wait_ms_sum/wait_ms_count.",
+      }),
+    },
+    wait_ms_count: {
+      name: "wait_ms_count",
+      ...column("UInt64", {
+        description: "Count of scheduling-delay samples for this key. Aggregate with sum().",
+      }),
+    },
+  },
+  timeBucketThresholds: [
+    { maxRangeSeconds: 3 * 60 * 60, interval: { value: 10, unit: "SECOND" } },
+    { maxRangeSeconds: 12 * 60 * 60, interval: { value: 1, unit: "MINUTE" } },
+    { maxRangeSeconds: 2 * 24 * 60 * 60, interval: { value: 5, unit: "MINUTE" } },
+    { maxRangeSeconds: 7 * 24 * 60 * 60, interval: { value: 15, unit: "MINUTE" } },
+    { maxRangeSeconds: 30 * 24 * 60 * 60, interval: { value: 1, unit: "HOUR" } },
+    { maxRangeSeconds: 90 * 24 * 60 * 60, interval: { value: 6, unit: "HOUR" } },
+    { maxRangeSeconds: 180 * 24 * 60 * 60, interval: { value: 1, unit: "DAY" } },
+    { maxRangeSeconds: 365 * 24 * 60 * 60, interval: { value: 1, unit: "WEEK" } },
+  ] satisfies BucketThreshold[],
+  queryCache: { ttlSeconds: 30, alignSeconds: 30 },
+};
+
 export const querySchemas: TableSchema[] = [
   runsSchema,
   metricsSchema,
   llmMetricsSchema,
   llmModelsSchema,
+  queueMetricsSchema,
+  envMetricsSchema,
+  queueMetricsByKeySchema,
 ];
 
+/** Schemas shown in user-facing listings (editor autocomplete, schema docs, schema API). */
+export const visibleQuerySchemas: TableSchema[] = querySchemas.filter((s) => !s.hidden);
+
 /**
  * Default query for the query editor
  */
diff --git a/apps/webapp/app/v3/queueMetrics.server.ts b/apps/webapp/app/v3/queueMetrics.server.ts
new file mode 100644
index 00000000000..14d9c4dc93d
--- /dev/null
+++ b/apps/webapp/app/v3/queueMetrics.server.ts
@@ -0,0 +1,247 @@
+import { type ClickHouse, type QueueMetricsRawV1Input } from "@internal/clickhouse";
+import {
+  allStreamKeys,
+  CachedRedisFlag,
+  CachedRedisNumber,
+  MetricsStreamConsumer,
+  MetricsStreamEmitter,
+  probeShardStates,
+  type MetricDefinition,
+  type ShardState,
+  type StreamEntry,
+} from "@internal/metrics-pipeline";
+import { createRedisClient, type Redis, type RedisOptions } from "@internal/redis";
+import os from "node:os";
+import { env } from "~/env.server";
+import { getDefaultClickhouseClient } from "~/services/clickhouse/clickhouseFactory.server";
+import { logger } from "~/services/logger.server";
+import { signalsEmitter } from "~/services/signals.server";
+import { singleton } from "~/utils/singleton";
+import { mapEntryToRows, QueueNameLimiter } from "./queueMetricsMapping";
+import { meter } from "./tracer.server";
+
+const FLAG_KEY = "queue_metrics:enabled";
+const SAMPLE_RATE_KEY = "queue_metrics:gauge_sample_rate";
+const TRUTHY = new Set(["1", "true", "on", "enabled", "yes"]);
+
+// Same physical Redis as the RunQueue (host/port/auth). Stream keys are kept out of the
+// keyPrefix on every access path, so only the connection details matter here.
+function runQueueRedisOptions(): RedisOptions {
+  return {
+    port: env.RUN_ENGINE_RUN_QUEUE_REDIS_PORT ?? undefined,
+    host: env.RUN_ENGINE_RUN_QUEUE_REDIS_HOST ?? undefined,
+    username: env.RUN_ENGINE_RUN_QUEUE_REDIS_USERNAME ?? undefined,
+    password: env.RUN_ENGINE_RUN_QUEUE_REDIS_PASSWORD ?? undefined,
+    enableAutoPipelining: true,
+    ...(env.RUN_ENGINE_RUN_QUEUE_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }),
+  };
+}
+
+// Metrics stream Redis: a dedicated instance when QUEUE_METRICS_REDIS_HOST is set (so the
+// metrics backlog never competes with the run queue), else the run-queue Redis. Carries BOTH
+// gauges and counters — gauges are read inside the queue-op Lua and returned on the reply,
+// then XADDed here by Node, so the run-queue Redis holds no metrics stream.
+function metricsRedisOptions(): RedisOptions {
+  if (!env.QUEUE_METRICS_REDIS_HOST) return runQueueRedisOptions();
+  return {
+    host: env.QUEUE_METRICS_REDIS_HOST,
+    port: env.QUEUE_METRICS_REDIS_PORT ?? undefined,
+    username: env.QUEUE_METRICS_REDIS_USERNAME ?? undefined,
+    password: env.QUEUE_METRICS_REDIS_PASSWORD ?? undefined,
+    enableAutoPipelining: true,
+    ...(env.QUEUE_METRICS_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }),
+  };
+}
+
+// One stream family on the metrics Redis carrying both gauge snapshots and cumulative
+// counter readings; one consumer group reads it.
+function metricsDefinition(): MetricDefinition {
+  // A stalled consumer holds up to maxLen entries per shard in Redis memory: cap lower
+  // by default when the stream shares the queue-critical run-queue Redis.
+  const defaultMaxLen = env.QUEUE_METRICS_REDIS_HOST ? 8_000_000 : 2_000_000;
+  return {
+    name: "queue_metrics",
+    shardCount: env.QUEUE_METRICS_STREAM_SHARD_COUNT,
+    consumerGroup: "queue_metrics_cg",
+    maxLen: env.QUEUE_METRICS_COUNTER_STREAM_MAXLEN ?? defaultMaxLen,
+  };
+}
+
+// Dedicated client for the admin read/write/probe surface — works regardless of whether
+// this instance runs the emitter/consumer. keyPrefix unset to match the raw control keys.
+function adminRedis(): Redis {
+  return singleton("queueMetricsAdminRedis", () =>
+    createRedisClient(
+      { ...runQueueRedisOptions(), keyPrefix: undefined },
+      { onError: (error) => logger.error("queue metrics admin redis error", { error }) }
+    )
+  );
+}
+
+function metricsAdminRedis(): Redis {
+  return singleton("queueMetricsCounterAdminRedis", () =>
+    createRedisClient(
+      { ...metricsRedisOptions(), keyPrefix: undefined },
+      { onError: (error) => logger.error("queue metrics counter admin redis error", { error }) }
+    )
+  );
+}
+
+export type QueueMetricsControls = {
+  enabled: boolean;
+  enabledKeySet: boolean;
+  sampleRate: number;
+  sampleRateKeySet: boolean;
+  sampleRateDefault: number;
+};
+
+export async function readQueueMetricsControls(): Promise<QueueMetricsControls> {
+  const [enabledRaw, rateRaw] = (await adminRedis().mget(FLAG_KEY, SAMPLE_RATE_KEY)) as (
+    | string
+    | null
+  )[];
+  const sampleRateDefault = env.QUEUE_METRICS_GAUGE_SAMPLE_RATE;
+  const parsed = rateRaw == null ? Number.NaN : Number(rateRaw);
+  return {
+    enabled: enabledRaw != null && TRUTHY.has(enabledRaw.trim().toLowerCase()),
+    enabledKeySet: enabledRaw != null,
+    sampleRate: Number.isFinite(parsed) ? Math.min(1, Math.max(0, parsed)) : sampleRateDefault,
+    sampleRateKeySet: rateRaw != null,
+    sampleRateDefault,
+  };
+}
+
+export async function writeQueueMetricsControls(update: {
+  enabled?: boolean;
+  sampleRate?: number;
+}): Promise<void> {
+  const client = adminRedis();
+  const ops: Promise<unknown>[] = [];
+  if (update.enabled !== undefined) {
+    ops.push(client.set(FLAG_KEY, update.enabled ? "1" : "0"));
+  }
+  if (update.sampleRate !== undefined) {
+    ops.push(client.set(SAMPLE_RATE_KEY, String(Math.min(1, Math.max(0, update.sampleRate)))));
+  }
+  await Promise.all(ops);
+}
+
+export type LabeledShardState = ShardState & { stream: "queue_metrics" };
+
+export async function probeQueueMetricsStreams(): Promise<LabeledShardState[]> {
+  const def = metricsDefinition();
+  const states = await probeShardStates(metricsAdminRedis(), allStreamKeys(def), def.consumerGroup);
+  return states.map((s) => ({ ...s, stream: "queue_metrics" as const }));
+}
+
+/** Injected into the RunQueue when QUEUE_METRICS_EMIT_ENABLED=1; emits only while the flag is on. */
+export function getQueueMetricsEmitter(): MetricsStreamEmitter {
+  return singleton("queueMetricsEmitter", () => {
+    // Control keys stay on the run-queue Redis (the admin surface + docs point there).
+    const controlRedis = runQueueRedisOptions();
+    const flag = new CachedRedisFlag({ redis: controlRedis, key: FLAG_KEY, cacheTtlMs: 10_000 });
+    // Live-tunable (Redis key, 10s cache); the env value is the default when the key is unset.
+    const gaugeSampleRate = new CachedRedisNumber({
+      redis: controlRedis,
+      key: SAMPLE_RATE_KEY,
+      defaultValue: env.QUEUE_METRICS_GAUGE_SAMPLE_RATE,
+      min: 0,
+      max: 1,
+      cacheTtlMs: 10_000,
+    });
+    return new MetricsStreamEmitter({
+      redis: metricsRedisOptions(),
+      definition: metricsDefinition(),
+      flag,
+      meter,
+      gaugeSampleRate,
+      counterOdometerTtlMs: env.QUEUE_METRICS_COUNTER_ODOMETER_TTL_SECONDS * 1000,
+    });
+  });
+}
+
+const queueNameLimiter = singleton(
+  "queueMetricsQueueNameLimiter",
+  () => new QueueNameLimiter(env.QUEUE_METRICS_MAX_QUEUE_NAMES_PER_ENV)
+);
+
+const concurrencyKeyLimiter = singleton(
+  "queueMetricsConcurrencyKeyLimiter",
+  () => new QueueNameLimiter(env.QUEUE_METRICS_MAX_CONCURRENCY_KEYS_PER_QUEUE, 50_000)
+);
+
+function mapEntry(entry: StreamEntry): QueueMetricsRawV1Input[] {
+  return mapEntryToRows(entry, {
+    queueNames: queueNameLimiter,
+    concurrencyKeys: concurrencyKeyLimiter,
+  });
+}
+
+function makeInsert(): (
+  rows: QueueMetricsRawV1Input[],
+  opts: { dedupToken: string }
+) => Promise<void> {
+  const ch: ClickHouse = getDefaultClickhouseClient();
+  const insertRaw = ch.queueMetrics.insertRaw;
+  return async (rows, { dedupToken }) => {
+    const [error] = await insertRaw(rows, {
+      params: {
+        clickhouse_settings: {
+          insert_deduplication_token: dedupToken,
+          async_insert: 0,
+          // Propagate the token through the MV so a raw-deduped retry can't leave
+          // queue_metrics_v1 short when the MV insert failed on the first attempt.
+          deduplicate_blocks_in_dependent_materialized_views: 1,
+        },
+      },
+    });
+    if (error) throw error;
+  };
+}
+
+function getQueueMetricsConsumers(): MetricsStreamConsumer<QueueMetricsRawV1Input>[] {
+  return singleton("queueMetricsConsumers", () => {
+    const insert = makeInsert();
+    return [
+      new MetricsStreamConsumer<QueueMetricsRawV1Input>({
+        consumerName: `${os.hostname()}-${process.pid}`,
+        batchSize: env.QUEUE_METRICS_CONSUMER_BATCH_SIZE,
+        meter,
+        mapEntry,
+        insert,
+        redis: metricsRedisOptions(),
+        definition: metricsDefinition(),
+      }),
+    ];
+  });
+}
+
+// Construct the emitter at boot (not lazily on the first enqueue) so its flag has warmed
+// before any traffic — otherwise the first op after boot reads the default and is dropped.
+export function initQueueMetricsEmitter(): void {
+  if (env.QUEUE_METRICS_EMIT_ENABLED !== "1") return;
+  getQueueMetricsEmitter();
+}
+
+declare global {
+  // eslint-disable-next-line no-var
+  var __queueMetricsConsumerRegistered__: boolean | undefined;
+}
+
+export function initQueueMetricsConsumer(): void {
+  if (env.QUEUE_METRICS_CONSUMER_ENABLED !== "1") return;
+  if (global.__queueMetricsConsumerRegistered__) return;
+  global.__queueMetricsConsumerRegistered__ = true;
+
+  const consumers = getQueueMetricsConsumers();
+  const stop = () =>
+    Promise.all(consumers.map((c) => c.stop())).catch((error) =>
+      logger.error("queue metrics consumer stop failed", { error })
+    );
+  signalsEmitter.on("SIGTERM", stop);
+  signalsEmitter.on("SIGINT", stop);
+
+  Promise.all(consumers.map((c) => c.start()))
+    .then(() => logger.info("Queue metrics consumer started"))
+    .catch((error) => logger.error("queue metrics consumers failed to start", { error }));
+}
diff --git a/apps/webapp/app/v3/queueMetricsMapping.ts b/apps/webapp/app/v3/queueMetricsMapping.ts
new file mode 100644
index 00000000000..9433b361a88
--- /dev/null
+++ b/apps/webapp/app/v3/queueMetricsMapping.ts
@@ -0,0 +1,164 @@
+import { type QueueMetricsRawV1Input } from "@internal/clickhouse";
+import { entryOrderKey, entryTimeMs, type StreamEntry } from "@internal/metrics-pipeline";
+
+const OPS = new Set(["gauge", "enqueue", "started", "ack", "nack", "dlq"]);
+
+// {org:ORGID}:proj:PROJECTID:env:ENVID:queue:QUEUENAME[:ck:CK]. Anchored (not a
+// positional split) so a queue name containing ":" survives; the lazy name capture
+// stops before an optional ":ck:" suffix, which is captured (the ":ck:*" wildcard of
+// aggregate CK-dequeue gauges maps to no key).
+const DESCRIPTOR = /^\{org:([^}]+)\}:proj:([^:]+):env:([^:]+):queue:(.+?)(?::ck:(.+))?$/;
+
+export function descriptorFromQueue(q: string): {
+  organization_id: string;
+  project_id: string;
+  environment_id: string;
+  queue_name: string;
+  concurrency_key: string;
+} | null {
+  const match = DESCRIPTOR.exec(q);
+  if (!match) return null;
+  const ck = match[5];
+  return {
+    organization_id: match[1]!,
+    project_id: match[2]!,
+    environment_id: match[3]!,
+    queue_name: match[4]!,
+    concurrency_key: ck && ck !== "*" ? ck : "",
+  };
+}
+
+export const OVERFLOW_QUEUE_NAME = "__overflow__";
+
+/**
+ * Bounds per-scope name cardinality (both queue_name per env and concurrency_key per
+ * queue are user-controlled GROUP BY keys). Names beyond the cap map to OVERFLOW_QUEUE_NAME.
+ * Per-process and reset on restart, so the cap is approximate: a protective bound, not a quota.
+ */
+export class QueueNameLimiter {
+  private readonly byScope = new Map<string, Set<string>>();
+
+  constructor(
+    private readonly maxPerScope: number,
+    private readonly maxScopes = 10_000
+  ) {}
+
+  limit(scope: string, name: string): string {
+    if (this.maxPerScope <= 0) return name;
+    let names = this.byScope.get(scope);
+    if (!names) {
+      if (this.byScope.size >= this.maxScopes) {
+        const oldest = this.byScope.keys().next().value;
+        if (oldest !== undefined) this.byScope.delete(oldest);
+      }
+      names = new Set();
+      this.byScope.set(scope, names);
+    }
+    if (names.has(name)) return name;
+    if (names.size >= this.maxPerScope) return OVERFLOW_QUEUE_NAME;
+    names.add(name);
+    return name;
+  }
+}
+
+function num(value: string | undefined): number | undefined {
+  if (value == null) return undefined;
+  const n = Number(value);
+  return Number.isFinite(n) ? n : undefined;
+}
+
+export type QueueMetricsLimiters = {
+  queueNames?: QueueNameLimiter;
+  concurrencyKeys?: QueueNameLimiter;
+};
+
+/**
+ * One stream entry maps to 1..2 raw rows: gauges are single rows carrying their parsed
+ * concurrency_key; a counter entry yields a base row when `cum` is present plus a per-key
+ * row when `ck`/`ckcum` are present (the emitter's dual-odometer entry). Baseline entries
+ * carry only one of the two, by design.
+ */
+export function mapEntryToRows(
+  entry: StreamEntry,
+  limiters?: QueueMetricsLimiters
+): QueueMetricsRawV1Input[] {
+  const f = entry.fields;
+  const op = f.op;
+  if (!op || !OPS.has(op) || !f.q) return [];
+  const descriptor = descriptorFromQueue(f.q);
+  if (!descriptor || !descriptor.queue_name) return [];
+
+  let queueOverflowed = false;
+  if (limiters?.queueNames) {
+    descriptor.queue_name = limiters.queueNames.limit(
+      descriptor.environment_id,
+      descriptor.queue_name
+    );
+    queueOverflowed = descriptor.queue_name === OVERFLOW_QUEUE_NAME;
+  }
+
+  // Counter entries carry the key as a field (q is base-normalized); gauges carry it in q.
+  let ck = descriptor.concurrency_key || (typeof f.ck === "string" ? f.ck : "");
+  if (ck && limiters?.concurrencyKeys) {
+    const scope = `${descriptor.environment_id}:${descriptor.queue_name}`;
+    if (limiters.concurrencyKeys.limit(scope, ck) === OVERFLOW_QUEUE_NAME) ck = "";
+  }
+  // Overflowed queue names share one row; per-key attribution under them is meaningless.
+  if (queueOverflowed) ck = "";
+
+  const eventMs = entryTimeMs(entry.id) ?? Date.now();
+  const eventTime = new Date(eventMs).toISOString().slice(0, 19).replace("T", " ");
+  const base = {
+    organization_id: descriptor.organization_id,
+    project_id: descriptor.project_id,
+    environment_id: descriptor.environment_id,
+    queue_name: descriptor.queue_name,
+    event_time: eventTime,
+    op: op as QueueMetricsRawV1Input["op"],
+  };
+
+  if (op === "gauge") {
+    return [
+      {
+        ...base,
+        concurrency_key: ck,
+        queued: num(f.ql),
+        running: num(f.cc),
+        queue_limit: num(f.lim),
+        env_queued: num(f.eql),
+        env_running: num(f.ec),
+        env_limit: num(f.elim),
+        throttled: num(f.thr),
+        ck_backlogged: num(f.ckq),
+        ck_max_wait_ms: num(f.ckw),
+      },
+    ];
+  }
+
+  // Overflowed names drop counters entirely: merging distinct odometers under one shared
+  // name produces garbage deltas (gauges above stay, max across the overflow set is
+  // still meaningful).
+  if (queueOverflowed) return [];
+
+  const rows: QueueMetricsRawV1Input[] = [];
+  const orderKey = entryOrderKey(entry.id);
+  const waitMs = op === "started" && f.wait != null ? num(f.wait) : undefined;
+  if (f.cum != null) {
+    rows.push({
+      ...base,
+      cumulative: num(f.cum),
+      order_key: orderKey,
+      ...(waitMs !== undefined ? { wait_ms: waitMs } : {}),
+    });
+  }
+  if (ck && f.ckcum != null) {
+    rows.push({
+      ...base,
+      concurrency_key: ck,
+      cumulative: num(f.ckcum),
+      order_key: orderKey,
+      ...(waitMs !== undefined ? { wait_ms: waitMs } : {}),
+    });
+  }
+  return rows;
+}
diff --git a/apps/webapp/app/v3/runEngine.server.ts b/apps/webapp/app/v3/runEngine.server.ts
index 4d9e263d6be..85986933290 100644
--- a/apps/webapp/app/v3/runEngine.server.ts
+++ b/apps/webapp/app/v3/runEngine.server.ts
@@ -7,6 +7,7 @@ import { logger } from "~/services/logger.server";
 import { defaultMachine, getCurrentPlan } from "~/services/platform.v3.server";
 import { singleton } from "~/utils/singleton";
 import { allMachines } from "./machinePresets.server";
+import { getQueueMetricsEmitter } from "./queueMetrics.server";
 import { runEnginePendingVersionLookup } from "./runEnginePendingVersionLookup.server";
 import { pickRunOpsStoreForCompletion } from "./runOpsMigration/crossSeamGuard.server";
 import { runEngineControlPlaneResolver } from "./runOpsMigration/runEngineControlPlaneResolver.server";
@@ -83,6 +84,7 @@ function createRunEngine() {
         tracer,
       },
       shardCount: env.RUN_ENGINE_RUN_QUEUE_SHARD_COUNT,
+      queueMetrics: env.QUEUE_METRICS_EMIT_ENABLED === "1" ? getQueueMetricsEmitter() : undefined,
       processWorkerQueueDebounceMs: env.RUN_ENGINE_PROCESS_WORKER_QUEUE_DEBOUNCE_MS,
       dequeueBlockingTimeoutSeconds: env.RUN_ENGINE_DEQUEUE_BLOCKING_TIMEOUT_SECONDS,
       masterQueueConsumersIntervalMs: env.RUN_ENGINE_MASTER_QUEUE_CONSUMERS_INTERVAL_MS,
diff --git a/apps/webapp/package.json b/apps/webapp/package.json
index 643093624b4..90dc92447f7 100644
--- a/apps/webapp/package.json
+++ b/apps/webapp/package.json
@@ -17,6 +17,7 @@
     "typecheck": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" tsc --noEmit -p ./tsconfig.check.json",
     "db:seed": "tsx seed.ts",
     "db:seed:ai-spans": "tsx seed-ai-spans.mts",
+    "db:seed:queue-metrics": "tsx seed-queue-metrics.mts",
     "upload:sourcemaps": "bash ./upload-sourcemaps.sh",
     "test": "vitest --no-file-parallelism",
     "eval:dev": "evalite watch"
@@ -57,6 +58,7 @@
     "@internal/dashboard-agent": "workspace:*",
     "@internal/dashboard-agent-db": "workspace:*",
     "@internal/llm-model-catalog": "workspace:*",
+    "@internal/metrics-pipeline": "workspace:*",
     "@internal/redis": "workspace:*",
     "@internal/run-engine": "workspace:*",
     "@internal/run-ops-database": "workspace:*",
diff --git a/apps/webapp/seed-queue-metrics.mts b/apps/webapp/seed-queue-metrics.mts
new file mode 100644
index 00000000000..709ba8f25ed
--- /dev/null
+++ b/apps/webapp/seed-queue-metrics.mts
@@ -0,0 +1,947 @@
+import { prisma } from "./app/db.server";
+import { createOrganization } from "./app/models/organization.server";
+import { createProject } from "./app/models/project.server";
+import { ClickHouse } from "@internal/clickhouse";
+import type { QueueMetricsRawV1Input } from "@internal/clickhouse";
+import { generateFriendlyId } from "./app/v3/friendlyIdentifiers";
+
+// Queue metrics simulator: writes realistic raw rows into a synthetic tenant's
+// queue_metrics_raw_v1 and lets the MV build queue_metrics_v1 (the same path the real
+// consumer uses), so the dashboard can be built without the run engine. See TRI-10407.
+
+const ORG_TITLE = "Queue Metrics Dev";
+const PROJECT_NAME = "queue-metrics-demo";
+
+type Rng = () => number;
+type QueueProfile = {
+  name: string;
+  limit: (bucket: number) => number;
+  arrivals: (bucket: number, rng: Rng) => number; // expected new runs enqueued this bucket
+  waitBaseMs: number;
+  sparse?: boolean; // emit no rows when the queue is fully idle (tests carry-forward gaps)
+  // Concurrency-key queue: adds CK-health gauge fields + live ckIndex staging (--usage)
+  ck?: {
+    backlogged: (bucket: number, rng: Rng) => number;
+    maxWaitMs: (bucket: number, rng: Rng) => number;
+  };
+};
+type Scenario = {
+  description: string;
+  envLimit: (bucket: number) => number;
+  queues: QueueProfile[];
+};
+
+// ---------------------------------------------------------------------------
+// CLI args
+// ---------------------------------------------------------------------------
+
+function parseArgs(argv: string[]) {
+  const flags: Record<string, string> = {};
+  for (let i = 0; i < argv.length; i++) {
+    const t = argv[i];
+    if (t.startsWith("--")) {
+      const k = t.slice(2);
+      const n = argv[i + 1];
+      if (n && !n.startsWith("--")) {
+        flags[k] = n;
+        i++;
+      } else flags[k] = "true";
+    }
+  }
+  return flags;
+}
+
+function parseDuration(s: string): number {
+  const m = s.match(/^(\d+)\s*(s|m|h|d)?$/);
+  if (!m) throw new Error(`bad duration: ${s}`);
+  const n = Number(m[1]);
+  const unit = m[2] ?? "s";
+  return n * { s: 1, m: 60, h: 3600, d: 86400 }[unit]!;
+}
+
+// ---------------------------------------------------------------------------
+// Deterministic RNG + distributions
+// ---------------------------------------------------------------------------
+
+function mulberry32(seed: number): Rng {
+  let a = seed >>> 0;
+  return () => {
+    a |= 0;
+    a = (a + 0x6d2b79f5) | 0;
+    let t = Math.imul(a ^ (a >>> 15), 1 | a);
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
+
+function standardNormal(rng: Rng): number {
+  let u = 0;
+  let v = 0;
+  while (u === 0) u = rng();
+  while (v === 0) v = rng();
+  return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
+}
+
+function lognormal(medianMs: number, sigma: number, rng: Rng): number {
+  return Math.exp(Math.log(Math.max(medianMs, 1)) + sigma * standardNormal(rng));
+}
+
+function poisson(lambda: number, rng: Rng): number {
+  if (lambda <= 0) return 0;
+  if (lambda > 30) return Math.max(0, Math.round(lambda + standardNormal(rng) * Math.sqrt(lambda)));
+  const L = Math.exp(-lambda);
+  let k = 0;
+  let p = 1;
+  do {
+    k++;
+    p *= rng();
+  } while (p > L);
+  return k - 1;
+}
+
+function formatChDateTime(date: Date): string {
+  return date.toISOString().slice(0, 19).replace("T", " ");
+}
+
+// ---------------------------------------------------------------------------
+// Scenarios
+// ---------------------------------------------------------------------------
+
+const steady = (): QueueProfile[] => [
+  { name: "emails", limit: () => 20, arrivals: (_b, r) => poisson(12, r), waitBaseMs: 40 },
+  { name: "webhooks", limit: () => 15, arrivals: (_b, r) => poisson(9, r), waitBaseMs: 40 },
+  { name: "reports", limit: () => 10, arrivals: (_b, r) => poisson(5, r), waitBaseMs: 60 },
+];
+
+// periodic bursts every ~30 buckets
+const bursty = (name: string, limit: number, base: number): QueueProfile => ({
+  name,
+  limit: () => limit,
+  arrivals: (b, r) => poisson(b % 30 < 4 ? base * 5 : base, r),
+  waitBaseMs: 50,
+});
+
+const scenarios: Record<string, (totalBuckets: number, bucketSec: number) => Scenario> = {
+  steady: () => ({
+    description: "all queues below capacity, no throttling",
+    envLimit: () => 60,
+    queues: steady(),
+  }),
+
+  burst: () => ({
+    description: "periodic arrival bursts -> backlog + wait spikes + throttling",
+    envLimit: () => 60,
+    queues: [bursty("ingest", 20, 6), bursty("transform", 20, 7)],
+  }),
+
+  // Tela case: sum of per-queue limits far exceeds the env limit, so queues compete.
+  "over-allocated-env": () => ({
+    description: "Sum(queue limits)=120 >> env limit=40; env saturates, queues env-limited",
+    envLimit: () => 40,
+    queues: Array.from({ length: 6 }, (_v, i) => ({
+      name: `worker-${i + 1}`,
+      limit: () => 20,
+      arrivals: (_b: number, r: Rng) => poisson(14, r),
+      waitBaseMs: 50,
+    })),
+  }),
+
+  "single-queue-starves-others": () => ({
+    description: "one greedy queue consumes most of a small env limit, starving the rest",
+    envLimit: () => 30,
+    queues: [
+      { name: "greedy", limit: () => 40, arrivals: (_b, r) => poisson(45, r), waitBaseMs: 60 },
+      { name: "polite-1", limit: () => 10, arrivals: (_b, r) => poisson(6, r), waitBaseMs: 50 },
+      { name: "polite-2", limit: () => 10, arrivals: (_b, r) => poisson(6, r), waitBaseMs: 50 },
+    ],
+  }),
+
+  "throttled-backlog": () => ({
+    description:
+      "arrival rate persistently above the queue limit -> permanent backlog + throttling",
+    envLimit: () => 50,
+    queues: [
+      { name: "overloaded", limit: () => 10, arrivals: (_b, r) => poisson(16, r), waitBaseMs: 80 },
+    ],
+  }),
+
+  "idle-sparse": () => ({
+    description: "sparse arrivals with many empty buckets (carry-forward gaps)",
+    envLimit: () => 50,
+    queues: Array.from({ length: 4 }, (_v, i) => ({
+      name: `sparse-${i + 1}`,
+      limit: () => 5,
+      arrivals: (_b: number, r: Rng) => (r() < 0.12 ? poisson(3, r) : 0),
+      waitBaseMs: 30,
+      sparse: true,
+    })),
+  }),
+
+  "spike-then-drain": (totalBuckets) => ({
+    description: "heavy arrivals for the first third, then zero; backlog builds then drains",
+    envLimit: () => 60,
+    queues: [
+      {
+        name: "batch-job",
+        limit: () => 15,
+        arrivals: (b, r) => (b < totalBuckets / 3 ? poisson(30, r) : 0),
+        waitBaseMs: 70,
+      },
+    ],
+  }),
+
+  // Pagination + relevance-ranking design surface: one runaway queue, a busy-but-healthy
+  // head, a bursty middle, and a long sparse tail across 61 queues (the list pages at 25).
+  "many-queues": () => ({
+    description:
+      "61 queues: one runaway, busy head, bursty middle, long sparse tail (pagination + ranking)",
+    envLimit: () => 150,
+    queues: [
+      { name: "imports", limit: () => 8, arrivals: (_b, r) => poisson(14, r), waitBaseMs: 80 },
+      ...["checkout", "notifications", "emails"].map((name, i) => ({
+        name,
+        limit: () => 15,
+        arrivals: (_b: number, r: Rng) => poisson(7 + i, r),
+        waitBaseMs: 60,
+      })),
+      ...Array.from({ length: 12 }, (_v, i) =>
+        bursty(`service-${String(i + 1).padStart(2, "0")}`, 10, 2)
+      ),
+      ...Array.from({ length: 20 }, (_v, i) => ({
+        name: `job-${String(i + 1).padStart(2, "0")}`,
+        limit: () => 5,
+        arrivals: (_b: number, r: Rng) => poisson(1, r),
+        waitBaseMs: 40,
+      })),
+      ...Array.from({ length: 25 }, (_v, i) => ({
+        name: `tenant-${String(i + 1).padStart(2, "0")}`,
+        limit: () => 3,
+        arrivals: (_b: number, r: Rng) => (r() < 0.05 ? poisson(2, r) : 0),
+        waitBaseMs: 30,
+        sparse: true,
+      })),
+    ],
+  }),
+
+  // Per-tenant concurrency keys: a hog tenant periodically floods the queue and starves
+  // the others, so the CK charts (keys with backlog, most-starved wait) and the live
+  // per-key table on the queue detail page have something to show. Use with --usage.
+  "tenant-hotspot": () => ({
+    description:
+      "CK queue where a hog tenant starves others: CK charts + live key table (use --usage)",
+    envLimit: () => 40,
+    queues: [
+      {
+        name: "per-tenant",
+        limit: () => 10,
+        arrivals: (b, r) => poisson(b % 60 < 20 ? 25 : 8, r),
+        waitBaseMs: 60,
+        ck: {
+          backlogged: (b, r) => (b % 60 < 20 ? 6 + Math.round(r() * 6) : Math.round(r() * 3)),
+          maxWaitMs: (b, r) =>
+            b % 60 < 20
+              ? Math.round(lognormal(90_000, 0.5, r))
+              : Math.round(lognormal(3_000, 0.6, r)),
+        },
+      },
+      { name: "background", limit: () => 10, arrivals: (_b, r) => poisson(5, r), waitBaseMs: 40 },
+    ],
+  }),
+
+  // Default: one env with a variety of queue behaviours + occasional env saturation.
+  mixed: (totalBuckets) => ({
+    description: "variety of queue profiles in one env, with occasional env saturation",
+    envLimit: (b) => (b % 40 < 12 ? 45 : 70), // dips low periodically to flip env saturation
+    queues: [
+      { name: "emails", limit: () => 20, arrivals: (_b, r) => poisson(12, r), waitBaseMs: 40 },
+      bursty("webhooks", 20, 6),
+      { name: "reports", limit: () => 10, arrivals: (_b, r) => poisson(8, r), waitBaseMs: 80 },
+      {
+        name: "cleanup",
+        limit: () => 5,
+        arrivals: (_b, r) => (r() < 0.12 ? poisson(3, r) : 0),
+        waitBaseMs: 30,
+        sparse: true,
+      },
+      {
+        name: "nightly-batch",
+        limit: () => 15,
+        arrivals: (b, r) => (b < totalBuckets / 5 ? poisson(18, r) : 0),
+        waitBaseMs: 70,
+      },
+    ],
+  }),
+};
+
+// ---------------------------------------------------------------------------
+// Simulation
+// ---------------------------------------------------------------------------
+
+type Ids = { organization_id: string; project_id: string; environment_id: string };
+const WAIT_SIGMA = 0.6;
+const NACK_RATE = 0.02;
+const DLQ_RATE = 0.004;
+
+type CounterOp = "enqueue" | "started" | "ack" | "nack" | "dlq";
+// Per-(queue, op) odometers, mirroring the production emitter: cumulative readings with a
+// cum=0 baseline on the first one, so deltaSumTimestamp captures the 0->1 delta.
+type CounterState = Record<CounterOp, number>[];
+
+function counterRows(
+  counters: CounterState,
+  q: number,
+  ids: Ids,
+  queueName: string,
+  eventTime: string,
+  orderKey: () => number,
+  op: CounterOp,
+  wait_ms?: number
+): QueueMetricsRawV1Input[] {
+  const rows: QueueMetricsRawV1Input[] = [];
+  if (counters[q][op] === 0) {
+    rows.push({
+      ...ids,
+      queue_name: queueName,
+      event_time: eventTime,
+      op,
+      cumulative: 0,
+      order_key: orderKey(),
+    });
+  }
+  counters[q][op] += 1;
+  rows.push({
+    ...ids,
+    queue_name: queueName,
+    event_time: eventTime,
+    op,
+    cumulative: counters[q][op],
+    order_key: orderKey(),
+    ...(wait_ms !== undefined ? { wait_ms } : {}),
+  });
+  return rows;
+}
+
+function newCounterState(n: number): CounterState {
+  return Array.from({ length: n }, () => ({ enqueue: 0, started: 0, ack: 0, nack: 0, dlq: 0 }));
+}
+
+// Per-key simulation for CK profiles: 12 tenants (tenant-01 is the hog, matching
+// stageRedisUsage), per-tenant backlog drained round-robin, per-tenant odometers.
+const CK_TENANT_COUNT = 12;
+type CkSimState = { backlog: number[]; counters: Map<number, Record<CounterOp, number>> };
+const ckSim = new Map<number, CkSimState>();
+
+function ckTenantName(t: number): string {
+  return `tenant-${String(t + 1).padStart(2, "0")}`;
+}
+
+function ckCounterRows(
+  state: CkSimState,
+  tenant: number,
+  ids: Ids,
+  queueName: string,
+  eventTime: string,
+  orderKey: () => number,
+  op: CounterOp,
+  wait_ms?: number
+): QueueMetricsRawV1Input[] {
+  let c = state.counters.get(tenant);
+  if (!c) {
+    c = { enqueue: 0, started: 0, ack: 0, nack: 0, dlq: 0 };
+    state.counters.set(tenant, c);
+  }
+  const common = {
+    ...ids,
+    queue_name: queueName,
+    concurrency_key: ckTenantName(tenant),
+    event_time: eventTime,
+  };
+  const rows: QueueMetricsRawV1Input[] = [];
+  if (c[op] === 0) rows.push({ ...common, op, cumulative: 0, order_key: orderKey() });
+  c[op] += 1;
+  rows.push({
+    ...common,
+    op,
+    cumulative: c[op],
+    order_key: orderKey(),
+    ...(wait_ms !== undefined ? { wait_ms } : {}),
+  });
+  return rows;
+}
+
+// Advance one bucket of the simulation for every queue, returning the raw rows to insert.
+// `backlog` and `counters` are mutated in place so state carries across buckets (and into
+// live mode).
+function simulateBucket(
+  scenario: Scenario,
+  bucket: number,
+  bucketSec: number,
+  eventTime: string,
+  bucketEpochSec: number,
+  ids: Ids,
+  backlog: number[],
+  counters: CounterState,
+  rng: Rng
+): QueueMetricsRawV1Input[] {
+  const envLimit = scenario.envLimit(bucket);
+  const n = scenario.queues.length;
+
+  const limit = new Array(n);
+  const desired = new Array(n);
+  for (let q = 0; q < n; q++) {
+    limit[q] = scenario.queues[q].limit(bucket);
+    const arrivals = Math.min(500, scenario.queues[q].arrivals(bucket, rng));
+    const prior = backlog[q]; // backlog carried from earlier buckets, before this bucket's arrivals
+    backlog[q] += arrivals; // arrivals join the backlog; recorded as enqueues below
+    (desired as any)[q] = { arrivals, prior, want: Math.min(limit[q], backlog[q]) };
+  }
+
+  // Env cap: if the queues collectively want more concurrency than the env allows, scale down.
+  const sumWant = desired.reduce((s: number, d: any) => s + d.want, 0);
+  const scale = sumWant > envLimit && sumWant > 0 ? envLimit / sumWant : 1;
+
+  const running = new Array(n);
+  const queued = new Array(n);
+  let envRunning = 0;
+  let envQueued = 0;
+  for (let q = 0; q < n; q++) {
+    const d = desired[q] as any;
+    running[q] = Math.floor(d.want * scale);
+    queued[q] = backlog[q] - running[q];
+    envRunning += running[q];
+    envQueued += queued[q];
+  }
+
+  // Order keys are time-based (like the production stream ids) so appended runs and live
+  // mode stay monotonic; the per-bucket sequence keeps them unique within a bucket.
+  let bucketSeq = 0;
+  const orderKey = () => bucketEpochSec * 1_000_000 + bucketSeq++;
+
+  const rows: QueueMetricsRawV1Input[] = [];
+  for (let q = 0; q < n; q++) {
+    const profile = scenario.queues[q];
+    const started = running[q];
+    const arrivals = (desired[q] as any).arrivals as number;
+    const prior = (desired[q] as any).prior as number; // depth a starting run actually queued behind
+    backlog[q] = queued[q]; // carry the unserved remainder forward
+
+    if (profile.sparse && arrivals === 0 && started === 0 && prior === 0) {
+      continue; // fully idle: leave a gap so carry-forward is exercised
+    }
+
+    // CK-health fields stay coherent with the depth: no queued runs means no backlogged keys.
+    const ckBacklogged = profile.ck
+      ? queued[q] > 0
+        ? Math.max(1, Math.min(profile.ck.backlogged(bucket, rng), queued[q]))
+        : 0
+      : undefined;
+    const ckMaxWaitMs =
+      profile.ck && ckBacklogged ? Math.round(profile.ck.maxWaitMs(bucket, rng)) : undefined;
+
+    const gauge: QueueMetricsRawV1Input = {
+      ...ids,
+      queue_name: profile.name,
+      event_time: eventTime,
+      op: "gauge",
+      running: running[q],
+      queued: queued[q],
+      queue_limit: limit[q],
+      env_running: envRunning,
+      env_queued: envQueued,
+      env_limit: envLimit,
+      throttled: queued[q] > 0 && (running[q] >= limit[q] || scale < 1) ? 1 : 0,
+      ...(ckBacklogged !== undefined
+        ? { ck_backlogged: ckBacklogged, ck_max_wait_ms: ckMaxWaitMs ?? 0 }
+        : {}),
+    };
+    rows.push(gauge);
+
+    for (let a = 0; a < arrivals; a++) {
+      rows.push(...counterRows(counters, q, ids, profile.name, eventTime, orderKey, "enqueue"));
+    }
+
+    // Per-key rows for CK profiles: assign arrivals hog-weighted, drain round-robin
+    // (fair share), then emit per-tenant odometers + a per-key gauge per active tenant.
+    if (profile.ck) {
+      let ckq = ckSim.get(q);
+      if (!ckq) {
+        ckq = { backlog: new Array(CK_TENANT_COUNT).fill(0), counters: new Map() };
+        ckSim.set(q, ckq);
+      }
+      const hogShare = bucket % 60 < 20 ? 0.6 : 0.15;
+      const arrivalsPerTenant = new Array(CK_TENANT_COUNT).fill(0);
+      for (let a = 0; a < arrivals; a++) {
+        const t = rng() < hogShare ? 0 : 1 + Math.floor(rng() * (CK_TENANT_COUNT - 1));
+        arrivalsPerTenant[t]++;
+        ckq.backlog[t]++;
+      }
+      const drainedPerTenant = new Array(CK_TENANT_COUNT).fill(0);
+      let remaining = started;
+      while (remaining > 0 && ckq.backlog.some((v) => v > 0)) {
+        for (let t = 0; t < CK_TENANT_COUNT && remaining > 0; t++) {
+          if (ckq.backlog[t] > 0) {
+            ckq.backlog[t]--;
+            drainedPerTenant[t]++;
+            remaining--;
+          }
+        }
+      }
+      for (let t = 0; t < CK_TENANT_COUNT; t++) {
+        const fairShare = Math.max(1, limit[q] / CK_TENANT_COUNT);
+        const ckMedianWait = profile.waitBaseMs + (ckq.backlog[t] / fairShare) * bucketSec * 1000;
+        for (let a = 0; a < arrivalsPerTenant[t]; a++) {
+          rows.push(...ckCounterRows(ckq, t, ids, profile.name, eventTime, orderKey, "enqueue"));
+        }
+        for (let d = 0; d < drainedPerTenant[t]; d++) {
+          rows.push(
+            ...ckCounterRows(
+              ckq,
+              t,
+              ids,
+              profile.name,
+              eventTime,
+              orderKey,
+              "started",
+              Math.round(lognormal(ckMedianWait, WAIT_SIGMA, rng))
+            )
+          );
+          rows.push(...ckCounterRows(ckq, t, ids, profile.name, eventTime, orderKey, "ack"));
+        }
+        if (ckq.backlog[t] > 0 || drainedPerTenant[t] > 0) {
+          rows.push({
+            ...ids,
+            queue_name: profile.name,
+            concurrency_key: ckTenantName(t),
+            event_time: eventTime,
+            op: "gauge",
+            queued: ckq.backlog[t],
+            running: drainedPerTenant[t],
+          });
+        }
+      }
+    }
+
+    const medianWait = profile.waitBaseMs + (prior / Math.max(limit[q], 1)) * bucketSec * 1000;
+    for (let s = 0; s < started; s++) {
+      rows.push(
+        ...counterRows(
+          counters,
+          q,
+          ids,
+          profile.name,
+          eventTime,
+          orderKey,
+          "started",
+          Math.round(lognormal(medianWait, WAIT_SIGMA, rng))
+        )
+      );
+      const roll = rng();
+      const op: CounterOp = roll < DLQ_RATE ? "dlq" : roll < DLQ_RATE + NACK_RATE ? "nack" : "ack";
+      rows.push(...counterRows(counters, q, ids, profile.name, eventTime, orderKey, op));
+    }
+  }
+  return rows;
+}
+
+// ---------------------------------------------------------------------------
+// ClickHouse
+// ---------------------------------------------------------------------------
+
+function clickhouse(): ClickHouse {
+  const clickhouseUrl = process.env.CLICKHOUSE_URL ?? process.env.EVENTS_CLICKHOUSE_URL;
+  if (!clickhouseUrl) {
+    console.error("CLICKHOUSE_URL not set");
+    process.exit(1);
+  }
+  const url = new URL(clickhouseUrl);
+  // Allowlist local hosts only (this script TRUNCATEs), and never echo the URL (it carries creds).
+  const localHosts = new Set(["localhost", "127.0.0.1", "::1", "0.0.0.0"]);
+  if (!localHosts.has(url.hostname)) {
+    console.error(`Refusing to run against a non-local ClickHouse host: ${url.hostname}`);
+    process.exit(1);
+  }
+  url.searchParams.delete("secure");
+  return new ClickHouse({ url: url.toString(), name: "queue-metrics-simulator" });
+}
+
+async function insertBatched(ch: ClickHouse, rows: QueueMetricsRawV1Input[], nonce: string) {
+  const BATCH = 25_000;
+  for (let i = 0; i < rows.length; i += BATCH) {
+    const slice = rows.slice(i, i + BATCH);
+    const [error] = await ch.queueMetrics.insertRaw(slice, {
+      params: { clickhouse_settings: { insert_deduplication_token: `${nonce}:${i}` } },
+    });
+    if (error) {
+      console.error("insert failed:", error.message);
+      process.exit(1);
+    }
+  }
+}
+
+async function resetEnv(ch: ClickHouse, environmentId: string) {
+  const raw = (
+    ch.writer as unknown as { client: { command: (a: { query: string }) => Promise<unknown> } }
+  ).client;
+  for (const table of [
+    "queue_metrics_raw_v1",
+    "queue_metrics_v1",
+    "queue_metrics_5m_v1",
+    "env_metrics_v1",
+    "queue_metrics_ck_v1",
+  ]) {
+    await raw.command({
+      query: `DELETE FROM trigger_dev.${table} WHERE environment_id = '${environmentId}'`,
+    });
+  }
+  console.log(`Reset queue metrics for environment ${environmentId}`);
+}
+
+// Fake running counts in the run-queue Redis (Running column + allocation usage bars).
+// Reconciled every run: staged with --usage, cleared otherwise.
+async function stageRedisUsage(scenario: Scenario, ids: Ids, seed: number, clear: boolean) {
+  const host = process.env.RUN_ENGINE_RUN_QUEUE_REDIS_HOST ?? process.env.REDIS_HOST ?? "localhost";
+  const port = Number(
+    process.env.RUN_ENGINE_RUN_QUEUE_REDIS_PORT ?? process.env.REDIS_PORT ?? 6379
+  );
+  const localHosts = new Set(["localhost", "127.0.0.1", "::1", "0.0.0.0"]);
+  if (!localHosts.has(host)) {
+    console.warn(`Skipping Redis usage staging on a non-local host: ${host}`);
+    return;
+  }
+  try {
+    const { createRedisClient } = await import("@internal/redis");
+    const redis = createRedisClient({ host, port });
+    const rng = mulberry32(seed + 1);
+    const prefix = "engine:runqueue:";
+    const logicalBase = `{org:${ids.organization_id}}:proj:${ids.project_id}:env:${ids.environment_id}:queue:`;
+    const base = `${prefix}${logicalBase}`;
+    for (const [q, profile] of scenario.queues.entries()) {
+      const key = `${base}${profile.name}:currentDequeued`;
+      await redis.del(key);
+
+      // CK staging (ckIndex + per-key subqueues) feeds the live per-key table on the queue
+      // detail page. Members are stored unprefixed, exactly like the run-queue Lua does.
+      const ckIndexKey = `${base}${profile.name}:ckIndex`;
+      const lengthCounterKey = `${base}${profile.name}:lengthCounter`;
+      const staleMembers = await redis.zrange(ckIndexKey, 0, -1);
+      for (const member of staleMembers) {
+        await redis.del(`${prefix}${member}`, `${prefix}${member}:currentConcurrency`);
+      }
+      await redis.del(ckIndexKey, lengthCounterKey);
+
+      if (clear) continue;
+      const limit = profile.limit(0);
+      // First queue rides at/over its limit, the rest at 30-90%, sparse mostly idle.
+      const count = profile.sparse
+        ? rng() < 0.3
+          ? 1
+          : 0
+        : q === 0
+          ? limit + Math.round(rng() * 2)
+          : Math.round(limit * (0.3 + 0.6 * rng()));
+      if (count > 0) {
+        await redis.sadd(key, ...Array.from({ length: count }, (_v, i) => `sim_run_${i}`));
+      }
+
+      if (profile.ck) {
+        const now = Date.now();
+        const tenants = 12;
+        let totalCkQueued = 0;
+        for (let t = 1; t <= tenants; t++) {
+          const tenant = `tenant-${String(t).padStart(2, "0")}`;
+          const member = `${logicalBase}${profile.name}:ck:${tenant}`;
+          const hog = t === 1;
+          const queuedCount = hog ? 40 : 1 + Math.round(rng() * 5);
+          const runningCount = hog ? limit : Math.round(rng() * 2);
+          const oldestAgeMs = hog ? 15 * 60_000 : 5_000 + Math.round(rng() * 55_000);
+          const zargs: Array<string | number> = [];
+          for (let i = 0; i < queuedCount; i++) {
+            zargs.push(now - oldestAgeMs + i * 250, `sim_${tenant}_run_${i}`);
+          }
+          await redis.zadd(`${prefix}${member}`, ...zargs);
+          if (runningCount > 0) {
+            await redis.sadd(
+              `${prefix}${member}:currentConcurrency`,
+              ...Array.from({ length: runningCount }, (_v, i) => `sim_${tenant}_running_${i}`)
+            );
+          }
+          await redis.zadd(ckIndexKey, now - oldestAgeMs, member);
+          totalCkQueued += queuedCount;
+        }
+        // The aggregate "Queued now" reads ZCARD(base) + this counter; keep them coherent.
+        await redis.set(lengthCounterKey, totalCkQueued, "EX", 24 * 3600);
+      }
+    }
+    await redis.quit();
+    console.log(
+      clear
+        ? "Cleared staged Redis usage."
+        : "Staged fake running counts in Redis (Running column + allocation usage bars)."
+    );
+  } catch (error) {
+    console.warn("Redis usage staging skipped:", error instanceof Error ? error.message : error);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+// Make the synthetic project a V2 engine project with a current dev worker + a Postgres
+// TaskQueue per simulated queue, so the /queues list renders the V2 table (it pages from
+// Postgres and gates on engine version; ClickHouse only holds the metrics).
+async function ensureTaskQueues(
+  scenario: Scenario,
+  projectId: string,
+  runtimeEnvironmentId: string
+) {
+  await prisma.project.update({ where: { id: projectId }, data: { engine: "V2" } });
+
+  await prisma.backgroundWorker.upsert({
+    where: {
+      projectId_runtimeEnvironmentId_version: {
+        projectId,
+        runtimeEnvironmentId,
+        version: "queue-metrics-sim",
+      },
+    },
+    update: {},
+    create: {
+      friendlyId: generateFriendlyId("worker"),
+      engine: "V2",
+      contentHash: "queue-metrics-sim",
+      sdkVersion: "4.0.0",
+      cliVersion: "4.0.0",
+      projectId,
+      runtimeEnvironmentId,
+      version: "queue-metrics-sim",
+      metadata: {},
+    },
+  });
+
+  for (const profile of scenario.queues) {
+    const concurrencyLimit = profile.limit(0);
+    await prisma.taskQueue.upsert({
+      where: { runtimeEnvironmentId_name: { runtimeEnvironmentId, name: profile.name } },
+      create: {
+        friendlyId: generateFriendlyId("queue"),
+        version: "V2",
+        name: profile.name,
+        orderableName: profile.name,
+        concurrencyLimit,
+        runtimeEnvironmentId,
+        projectId,
+        type: "NAMED",
+      },
+      update: { concurrencyLimit },
+    });
+  }
+
+  // Drop queues left over from a previously seeded scenario so switching scenarios
+  // does not leave metric-less rows in the list.
+  const { count: pruned } = await prisma.taskQueue.deleteMany({
+    where: {
+      runtimeEnvironmentId,
+      name: { notIn: scenario.queues.map((q) => q.name) },
+    },
+  });
+  console.log(
+    `Ensured ${scenario.queues.length} task queues in Postgres${pruned > 0 ? `, pruned ${pruned} stale` : ""}.`
+  );
+}
+
+function printHelp() {
+  const lines = Object.entries(scenarios).map(
+    ([name, build]) => `  ${name.padEnd(28)}${build(720, 10).description}`
+  );
+  console.log(`Queue metrics simulator: seeds a synthetic tenant with realistic queue metrics.
+
+Usage: pnpm --filter webapp run db:seed:queue-metrics -- [flags]
+
+Flags:
+  --scenario <name>   which scenario to seed (default: mixed)
+  --project <name>    project to seed into (default: ${PROJECT_NAME}); use one
+                      project per scenario to browse them side by side
+  --window <dur>      how much history to backfill, e.g. 30m, 6h, 1d (default: 2h)
+  --bucket <sec>      seconds per simulated bucket (default: 10)
+  --seed <n>          RNG seed for reproducible data (default: 1)
+  --usage             stage fake running counts in Redis so the Running column and
+                      the Allocation tab's usage bars have data (cleared when omitted)
+  --live              after backfilling, keep appending one bucket per interval
+  --reset             clear this environment's metrics before seeding
+  --reset-only        clear and exit without seeding
+  --help              this text
+
+Scenarios:
+${lines.join("\n")}
+
+Example designer setup (one project per scenario):
+  pnpm --filter webapp run db:seed:queue-metrics -- --scenario mixed --reset
+  pnpm --filter webapp run db:seed:queue-metrics -- --scenario many-queues --project qm-many-queues --reset
+  pnpm --filter webapp run db:seed:queue-metrics -- --scenario throttled-backlog --project qm-throttled --reset
+  pnpm --filter webapp run db:seed:queue-metrics -- --scenario tenant-hotspot --project qm-tenants --usage --reset`);
+}
+
+async function main() {
+  const flags = parseArgs(process.argv.slice(2));
+  if (flags.help === "true") {
+    printHelp();
+    process.exit(0);
+  }
+  const scenarioName = flags.scenario ?? "mixed";
+  const build = scenarios[scenarioName];
+  if (!build) {
+    console.error(
+      `Unknown scenario "${scenarioName}". Options: ${Object.keys(scenarios).join(", ")}`
+    );
+    process.exit(1);
+  }
+  const bucketSec = Number(flags.bucket ?? 10);
+  if (!Number.isFinite(bucketSec) || bucketSec <= 0) {
+    console.error(`--bucket must be a positive number of seconds, got: ${flags.bucket}`);
+    process.exit(1);
+  }
+  const windowSec = parseDuration(flags.window ?? "2h");
+  const totalBuckets = Math.floor(windowSec / bucketSec);
+  if (!Number.isFinite(totalBuckets) || totalBuckets <= 0) {
+    console.error(
+      `--window must be longer than --bucket (got ${windowSec}s window, ${bucketSec}s bucket)`
+    );
+    process.exit(1);
+  }
+  const seed = Number(flags.seed ?? 1);
+  const live = flags.live === "true";
+
+  const user = await prisma.user.findUnique({ where: { email: "local@trigger.dev" } });
+  if (!user) {
+    console.error("User local@trigger.dev not found. Run `pnpm run db:seed` first.");
+    process.exit(1);
+  }
+
+  let org = await prisma.organization.findFirst({
+    where: { title: ORG_TITLE, members: { some: { userId: user.id } } },
+  });
+  if (!org)
+    org = await createOrganization({ title: ORG_TITLE, userId: user.id, companySize: "1-10" });
+
+  const projectName = flags.project ?? PROJECT_NAME;
+  let project = await prisma.project.findFirst({
+    where: { name: projectName, organizationId: org.id },
+  });
+  if (!project) {
+    project = await createProject({
+      organizationSlug: org.slug,
+      name: projectName,
+      userId: user.id,
+      version: "v3",
+    });
+  }
+
+  const runtimeEnv = await prisma.runtimeEnvironment.findFirst({
+    where: { projectId: project.id, type: "DEVELOPMENT" },
+  });
+  if (!runtimeEnv) {
+    console.error("No DEVELOPMENT environment found for project.");
+    process.exit(1);
+  }
+
+  const ids: Ids = {
+    organization_id: org.id,
+    project_id: project.id,
+    environment_id: runtimeEnv.id,
+  };
+  const ch = clickhouse();
+  const nonce = `qmsim-${Date.now()}-${seed}`;
+
+  if (flags.reset === "true" || flags["reset-only"] === "true") {
+    await resetEnv(ch, runtimeEnv.id);
+    if (flags["reset-only"] === "true") {
+      await ch.close();
+      process.exit(0);
+    }
+  }
+
+  const scenario = build(totalBuckets, bucketSec);
+  await ensureTaskQueues(scenario, project.id, runtimeEnv.id);
+  await stageRedisUsage(scenario, ids, seed, flags.usage !== "true");
+  const rng = mulberry32(seed);
+  const backlog = new Array(scenario.queues.length).fill(0);
+
+  console.log(`Scenario "${scenarioName}": ${scenario.description}`);
+  console.log(
+    `Backfilling ${totalBuckets} x ${bucketSec}s buckets (${flags.window ?? "2h"}) for ${scenario.queues.length} queues...`
+  );
+
+  // Backfill: buckets from (now - window) up to now, aligned to the bucket grid.
+  const nowBucket = Math.floor(Date.now() / 1000 / bucketSec) * bucketSec;
+  const startBucket = nowBucket - totalBuckets * bucketSec;
+  const counters = newCounterState(scenario.queues.length);
+  const rows: QueueMetricsRawV1Input[] = [];
+  for (let b = 0; b < totalBuckets; b++) {
+    const bucketEpochSec = startBucket + b * bucketSec;
+    const eventTime = formatChDateTime(new Date(bucketEpochSec * 1000));
+    rows.push(
+      ...simulateBucket(
+        scenario,
+        b,
+        bucketSec,
+        eventTime,
+        bucketEpochSec,
+        ids,
+        backlog,
+        counters,
+        rng
+      )
+    );
+  }
+  await insertBatched(ch, rows, nonce);
+  console.log(`Inserted ${rows.length} raw rows.`);
+
+  // Merge the AggregatingMergeTree partials so argMax "current value" widgets read cleanly.
+  // The real pipeline relies on background merges; the simulator forces it for a tidy demo.
+  const raw = (
+    ch.writer as unknown as { client: { command: (a: { query: string }) => Promise<unknown> } }
+  ).client;
+  await raw.command({ query: `OPTIMIZE TABLE trigger_dev.queue_metrics_v1 FINAL` });
+  await raw.command({ query: `OPTIMIZE TABLE trigger_dev.queue_metrics_5m_v1 FINAL` });
+  await raw.command({ query: `OPTIMIZE TABLE trigger_dev.env_metrics_v1 FINAL` });
+  await raw.command({ query: `OPTIMIZE TABLE trigger_dev.queue_metrics_ck_v1 FINAL` });
+
+  const origin = process.env.APP_ORIGIN ?? "http://localhost:3030";
+  console.log(
+    `\nQueues dashboard: ${origin}/orgs/${org.slug}/projects/${project.slug}/env/dev/dashboards/queues`
+  );
+
+  if (live) {
+    console.log(`\nLive mode: appending one bucket every ${bucketSec}s (Ctrl-C to stop)...`);
+    let b = totalBuckets;
+    // eslint-disable-next-line no-constant-condition
+    while (true) {
+      await new Promise((r) => setTimeout(r, bucketSec * 1000));
+      const bucketEpochSec = Math.floor(Date.now() / 1000 / bucketSec) * bucketSec;
+      const eventTime = formatChDateTime(new Date(bucketEpochSec * 1000));
+      const liveRows = simulateBucket(
+        scenario,
+        b,
+        bucketSec,
+        eventTime,
+        bucketEpochSec,
+        ids,
+        backlog,
+        counters,
+        rng
+      );
+      await insertBatched(ch, liveRows, `${nonce}:live:${b}`);
+      console.log(`bucket ${b}: ${liveRows.length} rows @ ${eventTime}`);
+      b++;
+    }
+  }
+
+  await ch.close();
+  process.exit(0);
+}
+
+main().catch((e) => {
+  console.error(e);
+  process.exit(1);
+});
diff --git a/apps/webapp/test/queueMetricsMapping.test.ts b/apps/webapp/test/queueMetricsMapping.test.ts
new file mode 100644
index 00000000000..61e3893c7fb
--- /dev/null
+++ b/apps/webapp/test/queueMetricsMapping.test.ts
@@ -0,0 +1,239 @@
+import { describe, expect, it } from "vitest";
+import {
+  descriptorFromQueue,
+  mapEntryToRows,
+  OVERFLOW_QUEUE_NAME,
+  QueueNameLimiter,
+} from "~/v3/queueMetricsMapping";
+
+describe("descriptorFromQueue", () => {
+  it("parses a plain descriptor", () => {
+    expect(descriptorFromQueue("{org:o1}:proj:p1:env:e1:queue:task/my-task")).toEqual({
+      organization_id: "o1",
+      project_id: "p1",
+      environment_id: "e1",
+      queue_name: "task/my-task",
+      concurrency_key: "",
+    });
+  });
+
+  it("captures a concurrency-key suffix", () => {
+    expect(descriptorFromQueue("{org:o1}:proj:p1:env:e1:queue:task/t:ck:tenant-3")).toEqual(
+      expect.objectContaining({ queue_name: "task/t", concurrency_key: "tenant-3" })
+    );
+  });
+
+  it("maps the ck wildcard to no key", () => {
+    expect(descriptorFromQueue("{org:o1}:proj:p1:env:e1:queue:task/t:ck:*")).toEqual(
+      expect.objectContaining({ queue_name: "task/t", concurrency_key: "" })
+    );
+  });
+
+  it("keeps colons inside the queue name", () => {
+    expect(descriptorFromQueue("{org:o1}:proj:p1:env:e1:queue:my:odd:queue")).toEqual(
+      expect.objectContaining({ queue_name: "my:odd:queue", concurrency_key: "" })
+    );
+  });
+
+  it("keeps colons in the name while capturing a real ck suffix", () => {
+    expect(descriptorFromQueue("{org:o1}:proj:p1:env:e1:queue:a:b:ck:t9")).toEqual(
+      expect.objectContaining({ queue_name: "a:b", concurrency_key: "t9" })
+    );
+  });
+
+  it("rejects malformed descriptors", () => {
+    expect(descriptorFromQueue("not-a-descriptor")).toBeNull();
+    expect(descriptorFromQueue("{org:o1}:proj:p1:env:e1")).toBeNull();
+    expect(descriptorFromQueue("")).toBeNull();
+  });
+});
+
+describe("QueueNameLimiter", () => {
+  it("passes names through under the cap and overflows past it, per scope", () => {
+    const limiter = new QueueNameLimiter(2);
+    expect(limiter.limit("env1", "a")).toBe("a");
+    expect(limiter.limit("env1", "b")).toBe("b");
+    expect(limiter.limit("env1", "c")).toBe(OVERFLOW_QUEUE_NAME);
+    expect(limiter.limit("env1", "a")).toBe("a");
+    expect(limiter.limit("env2", "c")).toBe("c");
+  });
+
+  it("is unlimited when the cap is 0", () => {
+    const limiter = new QueueNameLimiter(0);
+    for (let i = 0; i < 100; i++) {
+      expect(limiter.limit("env1", `q${i}`)).toBe(`q${i}`);
+    }
+  });
+
+  it("evicts the oldest scope when the scope map is full", () => {
+    const limiter = new QueueNameLimiter(1, 2);
+    expect(limiter.limit("env1", "a")).toBe("a");
+    expect(limiter.limit("env2", "a")).toBe("a");
+    expect(limiter.limit("env3", "a")).toBe("a");
+    expect(limiter.limit("env1", "b")).toBe("b");
+  });
+});
+
+describe("mapEntryToRows", () => {
+  const q = "{org:o1}:proj:p1:env:e1:queue:task/t";
+
+  it("maps a gauge entry with numeric fields", () => {
+    const rows = mapEntryToRows({
+      id: "1700000000000-0",
+      fields: {
+        op: "gauge",
+        q,
+        ql: "5",
+        cc: "2",
+        lim: "10",
+        eql: "7",
+        ec: "3",
+        elim: "20",
+        thr: "1",
+      },
+    });
+    expect(rows).toHaveLength(1);
+    expect(rows[0]).toEqual(
+      expect.objectContaining({
+        op: "gauge",
+        organization_id: "o1",
+        queue_name: "task/t",
+        concurrency_key: "",
+        queued: 5,
+        running: 2,
+        queue_limit: 10,
+        env_queued: 7,
+        env_running: 3,
+        env_limit: 20,
+        throttled: 1,
+      })
+    );
+    expect(rows[0]!.event_time).toBe("2023-11-14 22:13:20");
+    expect(rows[0]!.ck_backlogged).toBeUndefined();
+    expect(rows[0]!.ck_max_wait_ms).toBeUndefined();
+  });
+
+  it("keeps the key on per-subqueue gauges and maps the CK-health tail", () => {
+    const rows = mapEntryToRows({
+      id: "1700000000000-0",
+      fields: { op: "gauge", q: `${q}:ck:tenant-1`, ql: "4", ckq: "3", ckw: "2500" },
+    });
+    expect(rows).toHaveLength(1);
+    expect(rows[0]).toEqual(
+      expect.objectContaining({
+        op: "gauge",
+        queue_name: "task/t",
+        concurrency_key: "tenant-1",
+        queued: 4,
+        ck_backlogged: 3,
+        ck_max_wait_ms: 2500,
+      })
+    );
+  });
+
+  it("maps started with wait_ms + cumulative and drops unknown ops", () => {
+    const started = mapEntryToRows({
+      id: "1700000000000-0",
+      fields: { op: "started", q, wait: "48", cum: "512" },
+    });
+    expect(started).toHaveLength(1);
+    expect(started[0]).toEqual(
+      expect.objectContaining({
+        op: "started",
+        wait_ms: 48,
+        cumulative: 512,
+        order_key: (1700000000000n * 1000000n).toString(),
+      })
+    );
+    expect(mapEntryToRows({ id: "1-0", fields: { op: "ack", q, cum: "9" } })[0]).toEqual(
+      expect.objectContaining({ op: "ack", cumulative: 9 })
+    );
+    expect(mapEntryToRows({ id: "1-0", fields: { op: "bogus", q } })).toEqual([]);
+    expect(mapEntryToRows({ id: "1-0", fields: { op: "ack" } })).toEqual([]);
+  });
+
+  it("expands a dual-odometer counter entry into base + per-key rows", () => {
+    const rows = mapEntryToRows({
+      id: "1700000000000-3",
+      fields: { op: "started", q, ck: "tenant-9", wait: "80", cum: "41", ckcum: "7" },
+    });
+    expect(rows).toHaveLength(2);
+    expect(rows[0]).toEqual(
+      expect.objectContaining({ queue_name: "task/t", cumulative: 41, wait_ms: 80 })
+    );
+    expect(rows[0]!.concurrency_key).toBeUndefined();
+    expect(rows[1]).toEqual(
+      expect.objectContaining({
+        queue_name: "task/t",
+        concurrency_key: "tenant-9",
+        cumulative: 7,
+        wait_ms: 80,
+      })
+    );
+    expect(rows[0]!.order_key).toBe(rows[1]!.order_key);
+
+    // Baseline entries carry exactly one odometer each.
+    const baseBaseline = mapEntryToRows({ id: "1-0", fields: { op: "started", q, cum: "0" } });
+    expect(baseBaseline).toHaveLength(1);
+    expect(baseBaseline[0]!.concurrency_key).toBeUndefined();
+    const ckBaseline = mapEntryToRows({
+      id: "1-1",
+      fields: { op: "started", q, ck: "tenant-9", ckcum: "0" },
+    });
+    expect(ckBaseline).toHaveLength(1);
+    expect(ckBaseline[0]).toEqual(
+      expect.objectContaining({ concurrency_key: "tenant-9", cumulative: 0 })
+    );
+  });
+
+  it("applies the queue-name limiter: gauges overflow, counters drop", () => {
+    const limiters = { queueNames: new QueueNameLimiter(1) };
+    const first = mapEntryToRows({ id: "1-0", fields: { op: "ack", q, cum: "1" } }, limiters);
+    expect(first[0]!.queue_name).toBe("task/t");
+
+    // Overflowed gauges keep flowing under the shared name (max stays meaningful),
+    // with per-key attribution stripped.
+    const overflowGauge = mapEntryToRows(
+      {
+        id: "1-1",
+        fields: { op: "gauge", q: "{org:o1}:proj:p1:env:e1:queue:task/other:ck:t1", ql: "3" },
+      },
+      limiters
+    );
+    expect(overflowGauge[0]!.queue_name).toBe(OVERFLOW_QUEUE_NAME);
+    expect(overflowGauge[0]!.concurrency_key).toBe("");
+
+    // Overflowed counters are dropped: merging distinct odometers under one key
+    // produces garbage deltas.
+    const overflowCounter = mapEntryToRows(
+      { id: "1-2", fields: { op: "ack", q: "{org:o1}:proj:p1:env:e1:queue:task/other", cum: "4" } },
+      limiters
+    );
+    expect(overflowCounter).toEqual([]);
+  });
+
+  it("applies the concurrency-key limiter: overflow drops the per-key row, keeps base", () => {
+    const limiters = { concurrencyKeys: new QueueNameLimiter(1) };
+    const first = mapEntryToRows(
+      { id: "1-0", fields: { op: "ack", q, ck: "t1", cum: "5", ckcum: "2" } },
+      limiters
+    );
+    expect(first).toHaveLength(2);
+
+    const overflowed = mapEntryToRows(
+      { id: "1-1", fields: { op: "ack", q, ck: "t2", cum: "6", ckcum: "1" } },
+      limiters
+    );
+    expect(overflowed).toHaveLength(1);
+    expect(overflowed[0]!.cumulative).toBe(6);
+    expect(overflowed[0]!.concurrency_key).toBeUndefined();
+
+    // Gauge for an overflowed key keeps the row but loses the attribution.
+    const overflowGauge = mapEntryToRows(
+      { id: "1-2", fields: { op: "gauge", q: `${q}:ck:t3`, ql: "2" } },
+      limiters
+    );
+    expect(overflowGauge).toHaveLength(1);
+    expect(overflowGauge[0]!.concurrency_key).toBe("");
+  });
+});
diff --git a/internal-packages/clickhouse/schema/035_create_queue_metrics_v1.sql b/internal-packages/clickhouse/schema/035_create_queue_metrics_v1.sql
new file mode 100644
index 00000000000..8ea1e65f09f
--- /dev/null
+++ b/internal-packages/clickhouse/schema/035_create_queue_metrics_v1.sql
@@ -0,0 +1,267 @@
+-- +goose Up
+
+-- Queue metrics: raw landing table -> MV -> aggregated read target (mirrors
+-- llm_model_aggregates_v1, migration 027). Raw rows feed an MV on insert, and
+-- reads hit the aggregated table.
+
+-- Short-TTL raw landing, one row per stream entry. non_replicated_deduplication_window
+-- makes consumer replays idempotent via insert_deduplication_token.
+CREATE TABLE IF NOT EXISTS trigger_dev.queue_metrics_raw_v1
+(
+  organization_id  LowCardinality(String),
+  project_id       LowCardinality(String),
+  environment_id   String CODEC(ZSTD(1)),
+  queue_name       String CODEC(ZSTD(1)),
+  concurrency_key  String DEFAULT '' CODEC(ZSTD(1)),  -- per-key attribution ('' = base/whole-queue row)
+  event_time       DateTime CODEC(Delta(4), ZSTD(1)),
+  order_key        UInt64 DEFAULT 0,                 -- stream-id composite (ms*1e6+seq), deltaSumTimestamp ordering key
+  op               LowCardinality(String),          -- gauge | enqueue | started | ack | nack | dlq
+  running          UInt32 DEFAULT 0,
+  queued           UInt32 DEFAULT 0,
+  queue_limit      UInt32 DEFAULT 0,
+  env_running      UInt32 DEFAULT 0,
+  env_queued       UInt32 DEFAULT 0,
+  env_limit        UInt32 DEFAULT 0,
+  throttled        UInt8  DEFAULT 0,                 -- 1 on a gauge emission with running>=limit AND queued>0
+  ck_backlogged    UInt32 DEFAULT 0,                 -- gauge on CK queues: distinct concurrency keys with queued work
+  ck_max_wait_ms   UInt32 DEFAULT 0,                 -- gauge on CK queues: most-starved key's head-of-line wait
+  wait_ms          UInt32 DEFAULT 0,                 -- set on op='started' (scheduling delay)
+  cumulative       UInt64 DEFAULT 0                  -- monotonic per-(queue,op) odometer on a counter op, diffed at read time
+)
+ENGINE = MergeTree()
+PARTITION BY toDate(event_time)
+ORDER BY (organization_id, project_id, environment_id, queue_name, event_time)
+TTL event_time + INTERVAL 6 HOUR
+SETTINGS non_replicated_deduplication_window = 1000, ttl_only_drop_parts = 1;
+
+-- (2) Aggregated read target (TRQL/dashboards query this).
+CREATE TABLE IF NOT EXISTS trigger_dev.queue_metrics_v1
+(
+  organization_id  LowCardinality(String),
+  project_id       LowCardinality(String),
+  environment_id   String CODEC(ZSTD(1)),
+  queue_name       String CODEC(ZSTD(1)),
+  bucket_start     DateTime CODEC(Delta(4), ZSTD(1)),
+
+  -- Cumulative-counter deltas: each op maintains a monotonic odometer, and deltaSumTimestamp
+  -- sums positive consecutive deltas (ignoring resets) ordered by event_time, so a lost
+  -- reading self-heals (the next surviving reading restates the total). Read with
+  -- deltaSumTimestampMerge(<col>), never sum().
+  enqueue_delta    AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  started_delta    AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  ack_delta        AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  nack_delta       AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  dlq_delta        AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  throttled_count  SimpleAggregateFunction(sum, UInt64),
+
+  max_queued       SimpleAggregateFunction(max, UInt32),
+  max_running      SimpleAggregateFunction(max, UInt32),
+  max_limit        SimpleAggregateFunction(max, UInt32),
+  max_env_queued   SimpleAggregateFunction(max, UInt32),
+  max_env_running  SimpleAggregateFunction(max, UInt32),
+  max_env_limit    SimpleAggregateFunction(max, UInt32),
+  max_ck_backlogged SimpleAggregateFunction(max, UInt32),
+  max_ck_wait_ms   SimpleAggregateFunction(max, UInt32),
+
+  wait_ms_sum      SimpleAggregateFunction(sum, UInt64),
+  wait_ms_count    SimpleAggregateFunction(sum, UInt64),
+  wait_quantiles   AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), UInt32)
+)
+ENGINE = AggregatingMergeTree()
+PARTITION BY toDate(bucket_start)
+ORDER BY (organization_id, project_id, environment_id, queue_name, bucket_start)
+TTL bucket_start + INTERVAL 30 DAY
+SETTINGS ttl_only_drop_parts = 1, non_replicated_deduplication_window = 1000;
+
+-- (3) MV: raw -> aggregated, 10s buckets.
+CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.queue_metrics_mv_v1
+TO trigger_dev.queue_metrics_v1 AS
+SELECT
+  organization_id, project_id, environment_id, queue_name,
+  toStartOfInterval(event_time, INTERVAL 10 SECOND) AS bucket_start,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'enqueue' AND concurrency_key = '') AS enqueue_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'started' AND concurrency_key = '') AS started_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'ack' AND concurrency_key = '')     AS ack_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'nack' AND concurrency_key = '')    AS nack_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'dlq' AND concurrency_key = '')     AS dlq_delta,
+  sum(throttled)          AS throttled_count,
+  max(queued)             AS max_queued,
+  max(running)            AS max_running,
+  max(queue_limit)        AS max_limit,
+  max(env_queued)         AS max_env_queued,
+  max(env_running)        AS max_env_running,
+  max(env_limit)          AS max_env_limit,
+  max(ck_backlogged)      AS max_ck_backlogged,
+  max(ck_max_wait_ms)     AS max_ck_wait_ms,
+  sumIf(wait_ms, op = 'started' AND concurrency_key = '')                 AS wait_ms_sum,
+  countIf(op = 'started' AND wait_ms > 0 AND concurrency_key = '')        AS wait_ms_count,
+  quantilesStateIf(0.5, 0.9, 0.95, 0.99)(wait_ms, op = 'started' AND wait_ms > 0 AND concurrency_key = '') AS wait_quantiles
+FROM trigger_dev.queue_metrics_raw_v1
+GROUP BY organization_id, project_id, environment_id, queue_name, bucket_start;
+
+-- (4) Env-level 10s rollup (no queue dimension) for header tiles/saturation charts.
+-- Row count is queue-independent (~8640/day/env), so full granularity stays cheap at any range.
+-- No counter deltas on purpose: cross-queue deltaSumTimestamp state merges mix unrelated
+-- odometers (env totals must GROUP BY queue then sum). TDigest because an env-level
+-- reservoir absorbs every sample in the environment.
+CREATE TABLE IF NOT EXISTS trigger_dev.env_metrics_v1
+(
+  organization_id  LowCardinality(String),
+  project_id       LowCardinality(String),
+  environment_id   String CODEC(ZSTD(1)),
+  bucket_start     DateTime CODEC(Delta(4), ZSTD(1)),
+
+  max_env_queued   SimpleAggregateFunction(max, UInt32),
+  max_env_running  SimpleAggregateFunction(max, UInt32),
+  max_env_limit    SimpleAggregateFunction(max, UInt32),
+  throttled_count  SimpleAggregateFunction(sum, UInt64),
+
+  wait_ms_sum      SimpleAggregateFunction(sum, UInt64),
+  wait_ms_count    SimpleAggregateFunction(sum, UInt64),
+  wait_quantiles   AggregateFunction(quantilesTDigest(0.5, 0.9, 0.95, 0.99), UInt32)
+)
+ENGINE = AggregatingMergeTree()
+PARTITION BY toDate(bucket_start)
+ORDER BY (organization_id, project_id, environment_id, bucket_start)
+TTL bucket_start + INTERVAL 30 DAY
+SETTINGS ttl_only_drop_parts = 1, non_replicated_deduplication_window = 1000;
+
+-- (5) MV: raw -> env rollup.
+CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.env_metrics_mv_v1
+TO trigger_dev.env_metrics_v1 AS
+SELECT
+  organization_id, project_id, environment_id,
+  toStartOfInterval(event_time, INTERVAL 10 SECOND) AS bucket_start,
+  max(env_queued)         AS max_env_queued,
+  max(env_running)        AS max_env_running,
+  max(env_limit)          AS max_env_limit,
+  sum(throttled)          AS throttled_count,
+  sumIf(wait_ms, op = 'started' AND concurrency_key = '')                 AS wait_ms_sum,
+  countIf(op = 'started' AND wait_ms > 0 AND concurrency_key = '')        AS wait_ms_count,
+  quantilesTDigestStateIf(0.5, 0.9, 0.95, 0.99)(wait_ms, op = 'started' AND wait_ms > 0 AND concurrency_key = '') AS wait_quantiles
+FROM trigger_dev.queue_metrics_raw_v1
+GROUP BY organization_id, project_id, environment_id, bucket_start;
+
+-- (6) Per-queue 5m rollup, exact column mirror of queue_metrics_v1, for ranking and
+-- env-wide GROUP BY queue reads at long ranges.
+CREATE TABLE IF NOT EXISTS trigger_dev.queue_metrics_5m_v1
+(
+  organization_id  LowCardinality(String),
+  project_id       LowCardinality(String),
+  environment_id   String CODEC(ZSTD(1)),
+  queue_name       String CODEC(ZSTD(1)),
+  bucket_start     DateTime CODEC(Delta(4), ZSTD(1)),
+
+  enqueue_delta    AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  started_delta    AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  ack_delta        AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  nack_delta       AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  dlq_delta        AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  throttled_count  SimpleAggregateFunction(sum, UInt64),
+
+  max_queued       SimpleAggregateFunction(max, UInt32),
+  max_running      SimpleAggregateFunction(max, UInt32),
+  max_limit        SimpleAggregateFunction(max, UInt32),
+  max_env_queued   SimpleAggregateFunction(max, UInt32),
+  max_env_running  SimpleAggregateFunction(max, UInt32),
+  max_env_limit    SimpleAggregateFunction(max, UInt32),
+  max_ck_backlogged SimpleAggregateFunction(max, UInt32),
+  max_ck_wait_ms   SimpleAggregateFunction(max, UInt32),
+
+  wait_ms_sum      SimpleAggregateFunction(sum, UInt64),
+  wait_ms_count    SimpleAggregateFunction(sum, UInt64),
+  wait_quantiles   AggregateFunction(quantiles(0.5, 0.9, 0.95, 0.99), UInt32)
+)
+ENGINE = AggregatingMergeTree()
+PARTITION BY toDate(bucket_start)
+ORDER BY (organization_id, project_id, environment_id, queue_name, bucket_start)
+TTL bucket_start + INTERVAL 30 DAY
+SETTINGS ttl_only_drop_parts = 1, non_replicated_deduplication_window = 1000;
+
+-- (7) MV: raw -> 5m rollup. MUST read raw, never cascade off queue_metrics_v1 with
+-- -MergeState: MV GROUP BY merges states in hash order, and out-of-time-order
+-- deltaSumTimestamp merges double-count bridging spans (verified 3x inflation).
+CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.queue_metrics_5m_mv_v1
+TO trigger_dev.queue_metrics_5m_v1 AS
+SELECT
+  organization_id, project_id, environment_id, queue_name,
+  toStartOfInterval(event_time, INTERVAL 5 MINUTE) AS bucket_start,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'enqueue' AND concurrency_key = '') AS enqueue_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'started' AND concurrency_key = '') AS started_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'ack' AND concurrency_key = '')     AS ack_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'nack' AND concurrency_key = '')    AS nack_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'dlq' AND concurrency_key = '')     AS dlq_delta,
+  sum(throttled)          AS throttled_count,
+  max(queued)             AS max_queued,
+  max(running)            AS max_running,
+  max(queue_limit)        AS max_limit,
+  max(env_queued)         AS max_env_queued,
+  max(env_running)        AS max_env_running,
+  max(env_limit)          AS max_env_limit,
+  max(ck_backlogged)      AS max_ck_backlogged,
+  max(ck_max_wait_ms)     AS max_ck_wait_ms,
+  sumIf(wait_ms, op = 'started' AND concurrency_key = '')                 AS wait_ms_sum,
+  countIf(op = 'started' AND wait_ms > 0 AND concurrency_key = '')        AS wait_ms_count,
+  quantilesStateIf(0.5, 0.9, 0.95, 0.99)(wait_ms, op = 'started' AND wait_ms > 0 AND concurrency_key = '') AS wait_quantiles
+FROM trigger_dev.queue_metrics_raw_v1
+GROUP BY organization_id, project_id, environment_id, queue_name, bucket_start;
+
+
+-- (8) Per-concurrency-key 10s tier. Rows are activity-bound (a (queue, key, bucket) row
+-- exists only when that key had an event in that bucket), so user-controlled key
+-- cardinality cannot inflate it beyond event volume (~19 bytes/event measured).
+-- Lean columns: no nack/dlq deltas and no per-key quantile states (mean wait via sums).
+CREATE TABLE IF NOT EXISTS trigger_dev.queue_metrics_ck_v1
+(
+  organization_id  LowCardinality(String),
+  project_id       LowCardinality(String),
+  environment_id   String CODEC(ZSTD(1)),
+  queue_name       String CODEC(ZSTD(1)),
+  concurrency_key  String CODEC(ZSTD(1)),
+  bucket_start     DateTime CODEC(Delta(4), ZSTD(1)),
+
+  enqueue_delta    AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  started_delta    AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+  ack_delta        AggregateFunction(deltaSumTimestamp, UInt64, UInt64),
+
+  max_queued       SimpleAggregateFunction(max, UInt32),
+  max_running      SimpleAggregateFunction(max, UInt32),
+
+  wait_ms_sum      SimpleAggregateFunction(sum, UInt64),
+  wait_ms_count    SimpleAggregateFunction(sum, UInt64)
+)
+ENGINE = AggregatingMergeTree()
+PARTITION BY toDate(bucket_start)
+ORDER BY (organization_id, project_id, environment_id, queue_name, concurrency_key, bucket_start)
+TTL bucket_start + INTERVAL 30 DAY
+SETTINGS ttl_only_drop_parts = 1, non_replicated_deduplication_window = 1000;
+
+-- (9) MV: raw -> per-key tier. Only rows with a real key: per-key counter rows carry
+-- per-key odometers (safe to merge within their own (queue, key) group), and per-key
+-- gauge rows carry per-subqueue depth/running.
+CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.queue_metrics_ck_mv_v1
+TO trigger_dev.queue_metrics_ck_v1 AS
+SELECT
+  organization_id, project_id, environment_id, queue_name, concurrency_key,
+  toStartOfInterval(event_time, INTERVAL 10 SECOND) AS bucket_start,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'enqueue') AS enqueue_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'started') AS started_delta,
+  deltaSumTimestampStateIf(cumulative, order_key, op = 'ack')     AS ack_delta,
+  maxIf(queued, op = 'gauge')  AS max_queued,
+  maxIf(running, op = 'gauge') AS max_running,
+  sumIf(wait_ms, op = 'started')          AS wait_ms_sum,
+  countIf(op = 'started' AND wait_ms > 0) AS wait_ms_count
+FROM trigger_dev.queue_metrics_raw_v1
+WHERE concurrency_key != ''
+GROUP BY organization_id, project_id, environment_id, queue_name, concurrency_key, bucket_start;
+
+-- +goose Down
+DROP VIEW IF EXISTS trigger_dev.queue_metrics_ck_mv_v1;
+DROP TABLE IF EXISTS trigger_dev.queue_metrics_ck_v1;
+DROP VIEW IF EXISTS trigger_dev.queue_metrics_5m_mv_v1;
+DROP TABLE IF EXISTS trigger_dev.queue_metrics_5m_v1;
+DROP VIEW IF EXISTS trigger_dev.env_metrics_mv_v1;
+DROP TABLE IF EXISTS trigger_dev.env_metrics_v1;
+DROP VIEW IF EXISTS trigger_dev.queue_metrics_mv_v1;
+DROP TABLE IF EXISTS trigger_dev.queue_metrics_v1;
+DROP TABLE IF EXISTS trigger_dev.queue_metrics_raw_v1;
diff --git a/internal-packages/clickhouse/src/client/tsql.ts b/internal-packages/clickhouse/src/client/tsql.ts
index c712820812f..ddf1d059b97 100644
--- a/internal-packages/clickhouse/src/client/tsql.ts
+++ b/internal-packages/clickhouse/src/client/tsql.ts
@@ -108,6 +108,11 @@ export interface ExecuteTSQLOptions<TOut extends z.ZodSchema> {
    * based on the span of the time range.
    */
   timeRange?: TimeRange;
+  /**
+   * Opt-in: emit rows for empty time buckets in a top-level time-bucketed query
+   * (counters zero-fill, gauges carry forward). Off by default.
+   */
+  fillGaps?: boolean;
 }
 
 /**
@@ -192,6 +197,7 @@ export async function executeTSQL<TOut extends z.ZodSchema>(
       fieldMappings: options.fieldMappings,
       whereClauseFallback: options.whereClauseFallback,
       timeRange: options.timeRange,
+      fillGaps: options.fillGaps,
     });
 
     generatedSql = sql;
diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts
index 0b252a98f67..97c2209b1cb 100644
--- a/internal-packages/clickhouse/src/index.ts
+++ b/internal-packages/clickhouse/src/index.ts
@@ -32,6 +32,14 @@ import {
 } from "./taskEvents.js";
 import { insertMetrics } from "./metrics.js";
 import { insertLlmMetrics } from "./llmMetrics.js";
+import {
+  insertQueueMetricsRaw,
+  getQueueListMetricsSummary,
+  getQueueDepthSparklines,
+  getQueueRanking,
+  getQueueRankingNames,
+  getQueueRankingCount,
+} from "./queueMetrics.js";
 import {
   getSessionTagsQueryBuilder,
   getSessionsCountQueryBuilder,
@@ -65,6 +73,7 @@ export type * from "./taskRuns.js";
 export type * from "./taskEvents.js";
 export type * from "./metrics.js";
 export type * from "./llmMetrics.js";
+export type * from "./queueMetrics.js";
 export type * from "./llmModelAggregates.js";
 export type * from "./errors.js";
 export type * from "./sessions.js";
@@ -260,6 +269,17 @@ export class ClickHouse {
     };
   }
 
+  get queueMetrics() {
+    return {
+      insertRaw: insertQueueMetricsRaw(this.writer),
+      listSummary: getQueueListMetricsSummary(this.reader),
+      depthSparklines: getQueueDepthSparklines(this.reader),
+      ranking: getQueueRanking(this.reader),
+      rankingNames: getQueueRankingNames(this.reader),
+      rankingCount: getQueueRankingCount(this.reader),
+    };
+  }
+
   get llmModelAggregates() {
     return {
       globalMetrics: getGlobalModelMetrics(this.reader),
diff --git a/internal-packages/clickhouse/src/queueMetrics.test.ts b/internal-packages/clickhouse/src/queueMetrics.test.ts
new file mode 100644
index 00000000000..00532041e44
--- /dev/null
+++ b/internal-packages/clickhouse/src/queueMetrics.test.ts
@@ -0,0 +1,525 @@
+import { clickhouseTest } from "@internal/testcontainers";
+import { z } from "zod";
+import { ClickHouse } from "./index.js";
+import type { QueueMetricsRawV1Input } from "./queueMetrics.js";
+
+const ORG = "org_qm";
+const PROJECT = "project_qm";
+const ENV = "env_qm";
+const EVENT_TIME = "2026-06-30 12:00:05"; // all rows land in the 10s bucket starting 12:00:00
+
+function base(op: QueueMetricsRawV1Input["op"], queue: string): QueueMetricsRawV1Input {
+  return {
+    organization_id: ORG,
+    project_id: PROJECT,
+    environment_id: ENV,
+    queue_name: queue,
+    event_time: EVENT_TIME,
+    op,
+  };
+}
+
+// Cumulative counters: each op keeps a monotonic per-(queue,op) odometer, so a counter row
+// carries the running total in `cumulative`. deltaSumTimestamp reconstructs the increase
+// (last - first) from a seeded cum=0 baseline; order_key orders readings within an op.
+let orderKey = 0;
+function counter(
+  op: QueueMetricsRawV1Input["op"],
+  queue: string,
+  total: number,
+  waits?: number[]
+): QueueMetricsRawV1Input[] {
+  const rows: QueueMetricsRawV1Input[] = [
+    { ...base(op, queue), cumulative: 0, order_key: orderKey++ },
+  ];
+  for (let cum = 1; cum <= total; cum++) {
+    rows.push({
+      ...base(op, queue),
+      cumulative: cum,
+      order_key: orderKey++,
+      ...(waits ? { wait_ms: waits[cum - 1] } : {}),
+    });
+  }
+  return rows;
+}
+
+const aggregatedRow = z.object({
+  enqueue_count: z.coerce.number(),
+  started_count: z.coerce.number(),
+  ack_count: z.coerce.number(),
+  nack_count: z.coerce.number(),
+  dlq_count: z.coerce.number(),
+  throttled_count: z.coerce.number(),
+  max_running: z.coerce.number(),
+  max_queued: z.coerce.number(),
+  max_limit: z.coerce.number(),
+  max_env_running: z.coerce.number(),
+  max_env_queued: z.coerce.number(),
+  max_env_limit: z.coerce.number(),
+  max_ck_backlogged: z.coerce.number(),
+  max_ck_wait_ms: z.coerce.number(),
+  wait_ms_sum: z.coerce.number(),
+  wait_ms_count: z.coerce.number(),
+  wait_p50: z.coerce.number(),
+  wait_p90: z.coerce.number(),
+  wait_p95: z.coerce.number(),
+  wait_p99: z.coerce.number(),
+});
+
+function readAggregated(ch: ClickHouse) {
+  return ch.reader.query({
+    name: "read-queue-metrics-aggregated",
+    query: `SELECT
+        deltaSumTimestampMerge(enqueue_delta) AS enqueue_count,
+        deltaSumTimestampMerge(started_delta) AS started_count,
+        deltaSumTimestampMerge(ack_delta) AS ack_count,
+        deltaSumTimestampMerge(nack_delta) AS nack_count,
+        deltaSumTimestampMerge(dlq_delta) AS dlq_count,
+        sum(throttled_count) AS throttled_count,
+        max(max_running) AS max_running,
+        max(max_queued) AS max_queued,
+        max(max_limit) AS max_limit,
+        max(max_env_running) AS max_env_running,
+        max(max_env_queued) AS max_env_queued,
+        max(max_env_limit) AS max_env_limit,
+        max(max_ck_backlogged) AS max_ck_backlogged,
+        max(max_ck_wait_ms) AS max_ck_wait_ms,
+        sum(wait_ms_sum) AS wait_ms_sum,
+        sum(wait_ms_count) AS wait_ms_count,
+        quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles) AS wait_arr,
+        wait_arr[1] AS wait_p50,
+        wait_arr[2] AS wait_p90,
+        wait_arr[3] AS wait_p95,
+        wait_arr[4] AS wait_p99
+      FROM trigger_dev.queue_metrics_v1
+      WHERE queue_name = {queueName: String}
+      GROUP BY organization_id, project_id, environment_id, queue_name, bucket_start`,
+    schema: aggregatedRow,
+    params: z.object({ queueName: z.string() }),
+  });
+}
+
+// Synchronous insert so the MV-populated rows are queryable immediately.
+const SYNC = { params: { clickhouse_settings: { async_insert: 0 as const } } };
+
+describe("queue_metrics_v1", () => {
+  clickhouseTest(
+    "buckets counters, gauges and wait percentiles via the MV",
+    async ({ clickhouseContainer }) => {
+      const ch = new ClickHouse({ url: clickhouseContainer.getConnectionUrl(), name: "test" });
+      const queue = "queue-a";
+
+      const rows: QueueMetricsRawV1Input[] = [
+        ...counter("enqueue", queue, 3),
+        ...counter("started", queue, 10, [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]),
+        ...counter("ack", queue, 2),
+        ...counter("nack", queue, 1),
+        ...counter("dlq", queue, 1),
+        {
+          ...base("gauge", queue),
+          running: 8,
+          queued: 4,
+          queue_limit: 10,
+          env_running: 40,
+          env_queued: 10,
+          env_limit: 50,
+          throttled: 0,
+          ck_backlogged: 3,
+          ck_max_wait_ms: 2500,
+        },
+        {
+          ...base("gauge", queue),
+          running: 10,
+          queued: 6,
+          queue_limit: 10,
+          env_running: 50,
+          env_queued: 20,
+          env_limit: 50,
+          throttled: 1, // running >= limit AND queued > 0
+          ck_backlogged: 2,
+          ck_max_wait_ms: 1500,
+        },
+      ];
+
+      const [insertError] = await ch.queueMetrics.insertRaw(rows, SYNC);
+      expect(insertError).toBeNull();
+
+      const [queryError, result] = await readAggregated(ch)({ queueName: queue });
+      expect(queryError).toBeNull();
+      expect(result).toHaveLength(1);
+      const row = result![0]!;
+
+      expect(row.enqueue_count).toBe(3);
+      expect(row.started_count).toBe(10);
+      expect(row.ack_count).toBe(2);
+      expect(row.nack_count).toBe(1);
+      expect(row.dlq_count).toBe(1);
+      expect(row.throttled_count).toBe(1);
+
+      expect(row.max_running).toBe(10);
+      expect(row.max_queued).toBe(6);
+      expect(row.max_limit).toBe(10);
+      expect(row.max_env_running).toBe(50);
+      expect(row.max_env_queued).toBe(20);
+      expect(row.max_env_limit).toBe(50);
+      expect(row.max_ck_backlogged).toBe(3);
+      expect(row.max_ck_wait_ms).toBe(2500);
+
+      expect(row.wait_ms_sum).toBe(5500);
+      expect(row.wait_ms_count).toBe(10);
+
+      // Percentiles over [100..1000]: monotonic and within the value range.
+      expect(row.wait_p50).toBeGreaterThanOrEqual(400);
+      expect(row.wait_p50).toBeLessThanOrEqual(650);
+      expect(row.wait_p90).toBeGreaterThanOrEqual(row.wait_p50);
+      expect(row.wait_p95).toBeGreaterThanOrEqual(row.wait_p90);
+      expect(row.wait_p99).toBeGreaterThanOrEqual(row.wait_p95);
+      expect(row.wait_p99).toBeLessThanOrEqual(1000);
+
+      await ch.close();
+    }
+  );
+
+  clickhouseTest(
+    "merges wait-quantile state across separate insert blocks",
+    async ({ clickhouseContainer }) => {
+      const ch = new ClickHouse({ url: clickhouseContainer.getConnectionUrl(), name: "test" });
+      const queue = "queue-b";
+
+      // Cumulative odometer continues across the two insert blocks (baseline 0, then 1..10);
+      // deltaSumTimestamp state and quantile state merge across the parts into one bucket.
+      const startedRow = (cum: number, wait_ms?: number): QueueMetricsRawV1Input => ({
+        ...base("started", queue),
+        cumulative: cum,
+        order_key: orderKey++,
+        ...(wait_ms !== undefined ? { wait_ms } : {}),
+      });
+
+      const [e1] = await ch.queueMetrics.insertRaw(
+        [startedRow(0), ...[100, 200, 300, 400, 500].map((w, i) => startedRow(i + 1, w))],
+        SYNC
+      );
+      expect(e1).toBeNull();
+      const [e2] = await ch.queueMetrics.insertRaw(
+        [600, 700, 800, 900, 1000].map((w, i) => startedRow(i + 6, w)),
+        SYNC
+      );
+      expect(e2).toBeNull();
+
+      const [queryError, result] = await readAggregated(ch)({ queueName: queue });
+      expect(queryError).toBeNull();
+      expect(result).toHaveLength(1);
+      const row = result![0]!;
+
+      // Both blocks contribute to one bucket: counts and sums add, quantile state merges.
+      expect(row.started_count).toBe(10);
+      expect(row.wait_ms_sum).toBe(5500);
+      expect(row.wait_ms_count).toBe(10);
+      expect(row.wait_p50).toBeGreaterThanOrEqual(400);
+      expect(row.wait_p50).toBeLessThanOrEqual(650);
+      expect(row.wait_p99).toBeGreaterThanOrEqual(row.wait_p50);
+      expect(row.wait_p99).toBeLessThanOrEqual(1000);
+
+      await ch.close();
+    }
+  );
+
+  clickhouseTest(
+    "5m and env rollups agree with the 10s tier, and env buckets are 10s",
+    async ({ clickhouseContainer }) => {
+      const ch = new ClickHouse({ url: clickhouseContainer.getConnectionUrl(), name: "test" });
+
+      // Own org so the env-level read (no queue filter) stays isolated from other tests.
+      const rollOrg = "org_qm_roll";
+      const rows: QueueMetricsRawV1Input[] = [
+        ...counter("started", "roll-a", 7, [100, 150, 200, 250, 300, 350, 400]),
+        ...counter("started", "roll-b", 3, [500, 600, 700]),
+        {
+          ...base("gauge", "roll-a"),
+          running: 4,
+          queued: 9,
+          env_running: 30,
+          env_limit: 50,
+          ck_backlogged: 5,
+          ck_max_wait_ms: 9000,
+        },
+        { ...base("gauge", "roll-b"), running: 2, queued: 1, env_running: 45, env_limit: 50 },
+        {
+          ...base("gauge", "roll-a"),
+          event_time: "2026-06-30 12:00:15",
+          running: 1,
+          queued: 2,
+          env_running: 20,
+          env_limit: 50,
+          ck_backlogged: 2,
+          ck_max_wait_ms: 3000,
+        },
+      ].map((row) => ({ ...row, organization_id: rollOrg }));
+      const [insertError] = await ch.queueMetrics.insertRaw(rows, SYNC);
+      expect(insertError).toBeNull();
+
+      const perQueue = (table: string) =>
+        ch.reader.query({
+          name: "per-queue-both-tiers",
+          query: `SELECT queue_name, deltaSumTimestampMerge(started_delta) AS started
+          FROM ${table}
+          WHERE queue_name IN ('roll-a', 'roll-b')
+          GROUP BY queue_name ORDER BY queue_name`,
+          schema: z.object({ queue_name: z.string(), started: z.coerce.number() }),
+        })({});
+      const [e10, rows10] = await perQueue("trigger_dev.queue_metrics_v1");
+      const [e5m, rows5m] = await perQueue("trigger_dev.queue_metrics_5m_v1");
+      expect(e10).toBeNull();
+      expect(e5m).toBeNull();
+      expect(rows10).toEqual([
+        { queue_name: "roll-a", started: 7 },
+        { queue_name: "roll-b", started: 3 },
+      ]);
+      expect(rows5m).toEqual(rows10);
+
+      // CK-health gauges roll into the 5m mirror too.
+      const [ckError, ckRows] = await ch.reader.query({
+        name: "ck-5m-read",
+        query: `SELECT max(max_ck_backlogged) AS ck_keys, max(max_ck_wait_ms) AS ck_wait
+          FROM trigger_dev.queue_metrics_5m_v1
+          WHERE queue_name = 'roll-a'`,
+        schema: z.object({ ck_keys: z.coerce.number(), ck_wait: z.coerce.number() }),
+      })({});
+      expect(ckError).toBeNull();
+      expect(ckRows![0]).toEqual({ ck_keys: 5, ck_wait: 9000 });
+
+      // Env-wide totals: sum of per-queue merges (a single merge across queues would mix
+      // odometers and double-count).
+      const [envTotalError, envTotal] = await ch.reader.query({
+        name: "env-total-per-queue-sum",
+        query: `SELECT sum(started) AS started FROM (
+            SELECT queue_name, deltaSumTimestampMerge(started_delta) AS started
+            FROM trigger_dev.queue_metrics_5m_v1
+            WHERE queue_name IN ('roll-a', 'roll-b')
+            GROUP BY queue_name
+          )`,
+        schema: z.object({ started: z.coerce.number() }),
+      })({});
+      expect(envTotalError).toBeNull();
+      expect(envTotal![0]!.started).toBe(10);
+
+      const [envError, envRows] = await ch.reader.query({
+        name: "env-rollup-read",
+        query: `SELECT
+            max(max_env_running) AS max_env_running,
+            max(max_env_limit) AS max_env_limit,
+            uniqExact(bucket_start) AS buckets,
+            round(quantilesTDigestMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[4]) AS wait_p99
+          FROM trigger_dev.env_metrics_v1
+          WHERE organization_id = {org: String}`,
+        schema: z.object({
+          max_env_running: z.coerce.number(),
+          max_env_limit: z.coerce.number(),
+          buckets: z.coerce.number(),
+          wait_p99: z.coerce.number(),
+        }),
+        params: z.object({ org: z.string() }),
+      })({ org: rollOrg });
+      expect(envError).toBeNull();
+      expect(envRows![0]!.max_env_running).toBe(45);
+      expect(envRows![0]!.max_env_limit).toBe(50);
+      // 12:00:05 and 12:00:15 land in separate 10s env buckets (12:00:00 and 12:00:10).
+      expect(envRows![0]!.buckets).toBe(2);
+      expect(envRows![0]!.wait_p99).toBeGreaterThanOrEqual(600);
+      expect(envRows![0]!.wait_p99).toBeLessThanOrEqual(1000);
+
+      await ch.close();
+    }
+  );
+
+  clickhouseTest(
+    "merged ranking returns the page and the windowed total in one query",
+    async ({ clickhouseContainer }) => {
+      const ch = new ClickHouse({ url: clickhouseContainer.getConnectionUrl(), name: "test" });
+
+      const gauge = (queue: string, queued: number, running: number): QueueMetricsRawV1Input => ({
+        ...base("gauge", queue),
+        queued,
+        running,
+      });
+      const [insertError] = await ch.queueMetrics.insertRaw(
+        [gauge("rank-low", 1, 0), gauge("rank-high", 50, 3), gauge("rank-mid", 10, 2)],
+        SYNC
+      );
+      expect(insertError).toBeNull();
+
+      const args = {
+        organizationId: ORG,
+        projectId: PROJECT,
+        environmentId: ENV,
+        startTime: "2026-06-30 11:50:00",
+        nameContains: "rank-",
+        byQueuedOnly: 0,
+      };
+      const [pageError, page] = await ch.queueMetrics.ranking({ ...args, limit: 2, offset: 0 });
+      expect(pageError).toBeNull();
+      expect(page).toEqual([
+        { queue_name: "rank-high", ranked_total: 3 },
+        { queue_name: "rank-mid", ranked_total: 3 },
+      ]);
+
+      const [countError, count] = await ch.queueMetrics.rankingCount(args);
+      expect(countError).toBeNull();
+      expect(count![0]!.ranked).toBe(3);
+
+      const [namesError, names] = await ch.queueMetrics.rankingNames({ ...args, limit: 10 });
+      expect(namesError).toBeNull();
+      expect(names!.map((r) => r.queue_name)).toEqual(["rank-high", "rank-mid", "rank-low"]);
+
+      await ch.close();
+    }
+  );
+});
+
+describe("consumer retry idempotency", () => {
+  clickhouseTest(
+    "re-inserting a batch with the same dedup token does not inflate any tier",
+    async ({ clickhouseContainer }) => {
+      const ch = new ClickHouse({ url: clickhouseContainer.getConnectionUrl(), name: "test" });
+
+      const dedupOrg = "org_qm_dedup";
+      const rows: QueueMetricsRawV1Input[] = [
+        ...counter("started", "dedup-q", 3, [100, 200, 300]),
+        { ...base("gauge", "dedup-q"), running: 2, queued: 1, env_running: 5, env_limit: 10 },
+      ].map((row) => ({ ...row, organization_id: dedupOrg }));
+
+      const retrySettings = {
+        params: {
+          clickhouse_settings: {
+            async_insert: 0 as const,
+            insert_deduplication_token: "qm-test-retry-batch",
+            deduplicate_blocks_in_dependent_materialized_views: 1 as const,
+          },
+        },
+      };
+      for (let attempt = 0; attempt < 3; attempt++) {
+        const [error] = await ch.queueMetrics.insertRaw(rows, retrySettings);
+        expect(error).toBeNull();
+      }
+
+      const [tiersError, tiers] = await ch.reader.query({
+        name: "dedup-tier-counts",
+        query: `SELECT
+            (SELECT count() FROM trigger_dev.queue_metrics_v1 WHERE organization_id = {org: String}) AS rows_10s,
+            (SELECT count() FROM trigger_dev.queue_metrics_5m_v1 WHERE organization_id = {org: String}) AS rows_5m,
+            (SELECT count() FROM trigger_dev.env_metrics_v1 WHERE organization_id = {org: String}) AS rows_env,
+            (SELECT sum(wait_ms_count) FROM trigger_dev.env_metrics_v1 WHERE organization_id = {org: String}) AS wait_count,
+            (SELECT deltaSumTimestampMerge(started_delta) FROM trigger_dev.queue_metrics_v1 WHERE organization_id = {org: String}) AS started`,
+        schema: z.object({
+          rows_10s: z.coerce.number(),
+          rows_5m: z.coerce.number(),
+          rows_env: z.coerce.number(),
+          wait_count: z.coerce.number(),
+          started: z.coerce.number(),
+        }),
+        params: z.object({ org: z.string() }),
+      })({ org: dedupOrg });
+      expect(tiersError).toBeNull();
+      const t = tiers![0]!;
+      // Without dedup windows on the MV targets, retries append copies: rows and sums triple.
+      expect(t.rows_10s).toBe(1);
+      expect(t.rows_5m).toBe(1);
+      expect(t.rows_env).toBe(1);
+      expect(t.wait_count).toBe(3);
+      expect(t.started).toBe(3);
+
+      await ch.close();
+    }
+  );
+});
+
+describe("per-concurrency-key tier", () => {
+  clickhouseTest(
+    "per-key rows feed the ck tier without polluting per-queue counters or waits",
+    async ({ clickhouseContainer }) => {
+      const ch = new ClickHouse({ url: clickhouseContainer.getConnectionUrl(), name: "test" });
+      const ckOrg = "org_qm_ck";
+      const queue = "ck-tier-q";
+      const withCk = (row: QueueMetricsRawV1Input, ck: string): QueueMetricsRawV1Input => ({
+        ...row,
+        concurrency_key: ck,
+      });
+
+      // 5 started events on one queue across two keys (t1 x3, t2 x2). Each event lands as
+      // a base row (base odometer) + a per-key row (per-key odometer), both carrying wait,
+      // exactly like the consumer expansion. Baselines seed each odometer.
+      const rows: QueueMetricsRawV1Input[] = [];
+      let ok = 0;
+      const started = (cum: number, ck: string, ckcum: number, wait: number) => {
+        rows.push({ ...base("started", queue), cumulative: cum, order_key: ok, wait_ms: wait });
+        rows.push(
+          withCk({ ...base("started", queue), cumulative: ckcum, order_key: ok, wait_ms: wait }, ck)
+        );
+        ok++;
+      };
+      rows.push({ ...base("started", queue), cumulative: 0, order_key: ok++ });
+      rows.push(withCk({ ...base("started", queue), cumulative: 0, order_key: ok++ }, "t1"));
+      rows.push(withCk({ ...base("started", queue), cumulative: 0, order_key: ok++ }, "t2"));
+      started(1, "t1", 1, 100);
+      started(2, "t1", 2, 200);
+      started(3, "t2", 1, 300);
+      started(4, "t1", 3, 400);
+      started(5, "t2", 2, 500);
+      // Per-subqueue gauges carry the key.
+      rows.push(withCk({ ...base("gauge", queue), queued: 4, running: 1 }, "t1"));
+      rows.push(withCk({ ...base("gauge", queue), queued: 2, running: 0 }, "t2"));
+
+      const [insertError] = await ch.queueMetrics.insertRaw(
+        rows.map((r) => ({ ...r, organization_id: ckOrg })),
+        SYNC
+      );
+      expect(insertError).toBeNull();
+
+      const [perQueueError, perQueue] = await ch.reader.query({
+        name: "ck-per-queue-read",
+        query: `SELECT
+            deltaSumTimestampMerge(started_delta) AS started,
+            sum(wait_ms_sum) AS wait_sum,
+            sum(wait_ms_count) AS wait_count,
+            max(max_queued) AS peak_queued
+          FROM trigger_dev.queue_metrics_v1
+          WHERE organization_id = {org: String}`,
+        schema: z.object({
+          started: z.coerce.number(),
+          wait_sum: z.coerce.number(),
+          wait_count: z.coerce.number(),
+          peak_queued: z.coerce.number(),
+        }),
+        params: z.object({ org: z.string() }),
+      })({ org: ckOrg });
+      expect(perQueueError).toBeNull();
+      // Base rows only: 5 events (not 10), waits counted once, per-key gauges still max in.
+      expect(perQueue![0]).toEqual({ started: 5, wait_sum: 1500, wait_count: 5, peak_queued: 4 });
+
+      const [ckError, ckRows] = await ch.reader.query({
+        name: "ck-tier-read",
+        query: `SELECT concurrency_key,
+            deltaSumTimestampMerge(started_delta) AS started,
+            max(max_queued) AS peak_queued,
+            sum(wait_ms_sum) AS wait_sum
+          FROM trigger_dev.queue_metrics_ck_v1
+          WHERE organization_id = {org: String}
+          GROUP BY concurrency_key ORDER BY concurrency_key`,
+        schema: z.object({
+          concurrency_key: z.string(),
+          started: z.coerce.number(),
+          peak_queued: z.coerce.number(),
+          wait_sum: z.coerce.number(),
+        }),
+        params: z.object({ org: z.string() }),
+      })({ org: ckOrg });
+      expect(ckError).toBeNull();
+      expect(ckRows).toEqual([
+        { concurrency_key: "t1", started: 3, peak_queued: 4, wait_sum: 700 },
+        { concurrency_key: "t2", started: 2, peak_queued: 2, wait_sum: 800 },
+      ]);
+
+      await ch.close();
+    }
+  );
+});
diff --git a/internal-packages/clickhouse/src/queueMetrics.ts b/internal-packages/clickhouse/src/queueMetrics.ts
new file mode 100644
index 00000000000..dce9323ef26
--- /dev/null
+++ b/internal-packages/clickhouse/src/queueMetrics.ts
@@ -0,0 +1,214 @@
+import { z } from "zod";
+import type { ClickhouseReader, ClickhouseWriter } from "./client/types.js";
+
+export const QueueMetricsRawV1Input = z.object({
+  organization_id: z.string(),
+  project_id: z.string(),
+  environment_id: z.string(),
+  queue_name: z.string(),
+  concurrency_key: z.string().optional(),
+  event_time: z.string(),
+  // Exact UInt64 ordering key; a string preserves precision past JS safe-integer range
+  // (see entryOrderKey). A plain number is still accepted for small test values.
+  order_key: z.union([z.string(), z.number()]).optional(),
+  op: z.enum(["gauge", "enqueue", "started", "ack", "nack", "dlq"]),
+  running: z.number().optional(),
+  queued: z.number().optional(),
+  queue_limit: z.number().optional(),
+  env_running: z.number().optional(),
+  env_queued: z.number().optional(),
+  env_limit: z.number().optional(),
+  throttled: z.number().optional(),
+  ck_backlogged: z.number().optional(),
+  ck_max_wait_ms: z.number().optional(),
+  wait_ms: z.number().optional(),
+  cumulative: z.number().optional(),
+});
+
+export type QueueMetricsRawV1Input = z.input<typeof QueueMetricsRawV1Input>;
+
+export function insertQueueMetricsRaw(ch: ClickhouseWriter) {
+  return ch.insertUnsafe<QueueMetricsRawV1Input>({
+    name: "insertQueueMetricsRaw",
+    table: "trigger_dev.queue_metrics_raw_v1",
+  });
+}
+
+// --- Reads (Queues list metrics + health) ---
+
+const QueueMetricsListParams = z.object({
+  organizationId: z.string(),
+  projectId: z.string(),
+  environmentId: z.string(),
+  queueNames: z.array(z.string()),
+  startTime: z.string(),
+  endTime: z.string(),
+});
+
+const QueueMetricsSummaryRow = z.object({
+  queue_name: z.string(),
+  p50_wait_ms: z.coerce.number(),
+  p95_wait_ms: z.coerce.number(),
+  peak_queued: z.coerce.number(),
+  started_count: z.coerce.number(),
+});
+
+// Callers align window bounds to the bucket grid so repeated loads share cache entries.
+const QUEUE_METRICS_CACHE_SETTINGS = {
+  use_query_cache: 1,
+  query_cache_ttl: 30,
+} as const;
+
+/** Per-queue rollups over a window, for a fixed set of queues (the visible page). */
+export function getQueueListMetricsSummary(reader: ClickhouseReader) {
+  return reader.query({
+    name: "getQueueListMetricsSummary",
+    query: `SELECT
+        queue_name,
+        round(quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[1]) AS p50_wait_ms,
+        round(quantilesMerge(0.5, 0.9, 0.95, 0.99)(wait_quantiles)[3]) AS p95_wait_ms,
+        max(max_queued) AS peak_queued,
+        deltaSumTimestampMerge(started_delta) AS started_count
+      FROM trigger_dev.queue_metrics_v1
+      WHERE organization_id = {organizationId: String}
+        AND project_id = {projectId: String}
+        AND environment_id = {environmentId: String}
+        AND queue_name IN {queueNames: Array(String)}
+        AND bucket_start >= {startTime: DateTime}
+        AND bucket_start < {endTime: DateTime}
+      GROUP BY queue_name`,
+    params: QueueMetricsListParams,
+    schema: QueueMetricsSummaryRow,
+    settings: QUEUE_METRICS_CACHE_SETTINGS,
+  });
+}
+
+const QueueDepthSparklineParams = QueueMetricsListParams.extend({
+  bucketSeconds: z.number(),
+});
+
+const QueueDepthSparklineRow = z.object({
+  queue_name: z.string(),
+  bucket: z.string(),
+  depth: z.coerce.number(),
+});
+
+/** Per-queue, per-bucket peak depth for inline sparklines (carry-forward filled by the caller). */
+export function getQueueDepthSparklines(reader: ClickhouseReader) {
+  return reader.query({
+    name: "getQueueDepthSparklines",
+    query: `SELECT
+        queue_name,
+        toStartOfInterval(bucket_start, toIntervalSecond({bucketSeconds: UInt32})) AS bucket,
+        max(max_queued) AS depth
+      FROM trigger_dev.queue_metrics_v1
+      WHERE organization_id = {organizationId: String}
+        AND project_id = {projectId: String}
+        AND environment_id = {environmentId: String}
+        AND queue_name IN {queueNames: Array(String)}
+        AND bucket_start >= {startTime: DateTime}
+        AND bucket_start < {endTime: DateTime}
+      GROUP BY queue_name, bucket
+      ORDER BY bucket`,
+    params: QueueDepthSparklineParams,
+    schema: QueueDepthSparklineRow,
+    settings: QUEUE_METRICS_CACHE_SETTINGS,
+  });
+}
+
+const QueueRankingParams = z.object({
+  organizationId: z.string(),
+  projectId: z.string(),
+  environmentId: z.string(),
+  startTime: z.string(),
+  /** 1 = rank by peak backlog only; 0 = backlog + running ("busiest"). */
+  byQueuedOnly: z.number(),
+  nameContains: z.string(),
+  limit: z.number(),
+  offset: z.number(),
+});
+
+const QueueRankingRow = z.object({
+  queue_name: z.string(),
+  ranked_total: z.coerce.number(),
+});
+
+// Ranking reads the 5m rollup: a 15-minute window there costs ~30x fewer rows than the
+// 10s table.
+const RANKING_WHERE = `organization_id = {organizationId: String}
+        AND project_id = {projectId: String}
+        AND environment_id = {environmentId: String}
+        AND bucket_start >= {startTime: DateTime}
+        AND queue_name != '__overflow__'
+        AND ({nameContains: String} = '' OR positionCaseInsensitive(queue_name, {nameContains: String}) > 0)`;
+
+/**
+ * One page of queue names ranked by recent activity, with the total ranked count on
+ * every row (window function), so page + count cost a single scan.
+ */
+export function getQueueRanking(reader: ClickhouseReader) {
+  return reader.query({
+    name: "getQueueRanking",
+    query: `SELECT queue_name, count() OVER () AS ranked_total
+      FROM (
+        SELECT queue_name
+        FROM trigger_dev.queue_metrics_5m_v1
+        WHERE ${RANKING_WHERE}
+        GROUP BY queue_name
+        ORDER BY
+          if({byQueuedOnly: UInt8} = 1, max(max_queued), max(max_queued) + max(max_running)) DESC,
+          queue_name ASC
+      )
+      LIMIT {limit: UInt32} OFFSET {offset: UInt32}`,
+    params: QueueRankingParams,
+    schema: QueueRankingRow,
+    settings: QUEUE_METRICS_CACHE_SETTINGS,
+  });
+}
+
+const QueueRankingNamesParams = QueueRankingParams.omit({ byQueuedOnly: true, offset: true });
+
+const QueueRankingNameRow = z.object({
+  queue_name: z.string(),
+});
+
+/** All ranked queue names (activity order), used to exclude them from the alphabetical tail. */
+export function getQueueRankingNames(reader: ClickhouseReader) {
+  return reader.query({
+    name: "getQueueRankingNames",
+    query: `SELECT queue_name
+      FROM trigger_dev.queue_metrics_5m_v1
+      WHERE ${RANKING_WHERE}
+      GROUP BY queue_name
+      ORDER BY max(max_queued) + max(max_running) DESC, queue_name ASC
+      LIMIT {limit: UInt32}`,
+    params: QueueRankingNamesParams,
+    schema: QueueRankingNameRow,
+    settings: QUEUE_METRICS_CACHE_SETTINGS,
+  });
+}
+
+const QueueRankingCountParams = QueueRankingParams.omit({
+  byQueuedOnly: true,
+  limit: true,
+  offset: true,
+});
+
+const QueueRankingCountRow = z.object({
+  ranked: z.coerce.number(),
+});
+
+/** Ranked-queue count alone, for pages past the ranked head (approximate uniq is fine). */
+export function getQueueRankingCount(reader: ClickhouseReader) {
+  return reader.query({
+    name: "getQueueRankingCount",
+    query: `SELECT uniq(queue_name) AS ranked
+      FROM trigger_dev.queue_metrics_5m_v1
+      WHERE ${RANKING_WHERE}`,
+    params: QueueRankingCountParams,
+    schema: QueueRankingCountRow,
+    settings: QUEUE_METRICS_CACHE_SETTINGS,
+  });
+}
+
+// (per-queue detail series is now fetched via TRQL + fillGaps from the metric resource route)
diff --git a/internal-packages/metrics-pipeline/package.json b/internal-packages/metrics-pipeline/package.json
new file mode 100644
index 00000000000..10a7c137a1f
--- /dev/null
+++ b/internal-packages/metrics-pipeline/package.json
@@ -0,0 +1,33 @@
+{
+  "name": "@internal/metrics-pipeline",
+  "private": true,
+  "version": "0.0.1",
+  "main": "./dist/src/index.js",
+  "types": "./dist/src/index.d.ts",
+  "type": "module",
+  "exports": {
+    ".": {
+      "@triggerdotdev/source": "./src/index.ts",
+      "import": "./dist/src/index.js",
+      "types": "./dist/src/index.d.ts",
+      "default": "./dist/src/index.js"
+    }
+  },
+  "dependencies": {
+    "@internal/redis": "workspace:*",
+    "@internal/tracing": "workspace:*",
+    "@trigger.dev/core": "workspace:*"
+  },
+  "devDependencies": {
+    "@internal/testcontainers": "workspace:*",
+    "rimraf": "6.0.1"
+  },
+  "scripts": {
+    "clean": "rimraf dist",
+    "typecheck": "tsc --noEmit -p tsconfig.build.json",
+    "test": "vitest --sequence.concurrent=false --no-file-parallelism",
+    "test:coverage": "vitest --sequence.concurrent=false --no-file-parallelism --coverage.enabled",
+    "build": "pnpm run clean && tsc -p tsconfig.build.json",
+    "dev": "tsc --watch  -p tsconfig.build.json"
+  }
+}
diff --git a/internal-packages/metrics-pipeline/src/cachedValue.ts b/internal-packages/metrics-pipeline/src/cachedValue.ts
new file mode 100644
index 00000000000..7f7bbb07903
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/cachedValue.ts
@@ -0,0 +1,125 @@
+import { createRedisClient, type Redis, type RedisOptions } from "@internal/redis";
+import { Logger } from "@trigger.dev/core/logger";
+
+export type CachedRedisValueOptions<T> = {
+  redis: RedisOptions;
+  key: string;
+  parse: (raw: string | null) => T;
+  defaultValue: T;
+  cacheTtlMs?: number;
+  logger?: Logger;
+  loggerName?: string;
+};
+
+// Reads a Redis key with a short stale-while-revalidate cache and a synchronous getter for
+// hot paths. Warms eagerly on construction; concurrent refreshes dedupe onto one GET so an
+// awaited refresh always resolves to a completed read.
+export class CachedRedisValue<T> {
+  private readonly redis: Redis;
+  private readonly key: string;
+  private readonly parse: (raw: string | null) => T;
+  private readonly cacheTtlMs: number;
+  private readonly logger: Logger;
+  private value: T;
+  private lastFetchedAt = 0;
+  private refreshPromise?: Promise<T>;
+
+  constructor(options: CachedRedisValueOptions<T>) {
+    this.logger = options.logger ?? new Logger(options.loggerName ?? "CachedRedisValue", "warn");
+    this.redis = createRedisClient(
+      { ...options.redis, keyPrefix: undefined },
+      {
+        onError: (error) =>
+          this.logger.error("cached value redis error", { error, key: options.key }),
+      }
+    );
+    this.key = options.key;
+    this.parse = options.parse;
+    this.cacheTtlMs = options.cacheTtlMs ?? 10_000;
+    this.value = options.defaultValue;
+    void this.refresh();
+  }
+
+  get(): T {
+    if (Date.now() - this.lastFetchedAt > this.cacheTtlMs) {
+      void this.refresh();
+    }
+    return this.value;
+  }
+
+  async refresh(): Promise<T> {
+    if (this.refreshPromise) return this.refreshPromise;
+    this.refreshPromise = this.#doRefresh();
+    try {
+      return await this.refreshPromise;
+    } finally {
+      this.refreshPromise = undefined;
+    }
+  }
+
+  async #doRefresh(): Promise<T> {
+    try {
+      this.value = this.parse(await this.redis.get(this.key));
+    } catch (error) {
+      this.logger.debug("cached value refresh failed, keeping cached value", {
+        error,
+        key: this.key,
+      });
+    } finally {
+      this.lastFetchedAt = Date.now();
+    }
+    return this.value;
+  }
+
+  async close(): Promise<void> {
+    await this.redis.quit();
+  }
+}
+
+export type CachedRedisNumberOptions = {
+  redis: RedisOptions;
+  key: string;
+  defaultValue: number;
+  min?: number;
+  max?: number;
+  cacheTtlMs?: number;
+  logger?: Logger;
+};
+
+// Live-tunable numeric value, clamped to [min,max]; falls back to defaultValue on a
+// missing/unparseable key. Exposes a synchronous value() for hot paths.
+export class CachedRedisNumber {
+  private readonly inner: CachedRedisValue<number>;
+
+  constructor(options: CachedRedisNumberOptions) {
+    const min = options.min ?? Number.NEGATIVE_INFINITY;
+    const max = options.max ?? Number.POSITIVE_INFINITY;
+    const clamp = (n: number) => Math.min(max, Math.max(min, n));
+    const fallback = clamp(options.defaultValue);
+    this.inner = new CachedRedisValue<number>({
+      redis: options.redis,
+      key: options.key,
+      parse: (raw) => {
+        // Number("") is 0 (not NaN), so treat blank/whitespace as missing => fallback.
+        const n = raw == null || raw.trim() === "" ? Number.NaN : Number(raw);
+        return Number.isFinite(n) ? clamp(n) : fallback;
+      },
+      defaultValue: fallback,
+      cacheTtlMs: options.cacheTtlMs,
+      logger: options.logger,
+      loggerName: "CachedRedisNumber",
+    });
+  }
+
+  value(): number {
+    return this.inner.get();
+  }
+
+  refresh(): Promise<number> {
+    return this.inner.refresh();
+  }
+
+  close(): Promise<void> {
+    return this.inner.close();
+  }
+}
diff --git a/internal-packages/metrics-pipeline/src/consumer.test.ts b/internal-packages/metrics-pipeline/src/consumer.test.ts
new file mode 100644
index 00000000000..672fa426999
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/consumer.test.ts
@@ -0,0 +1,392 @@
+import { createRedisClient } from "@internal/redis";
+import { redisTest } from "@internal/testcontainers";
+import { expect } from "vitest";
+import { CachedRedisFlag } from "./flag.js";
+import { CachedRedisNumber } from "./cachedValue.js";
+import { MetricsStreamConsumer } from "./consumer.js";
+import { MetricsStreamEmitter } from "./emitter.js";
+import { shardFor } from "./hash.js";
+import { streamKey, type MetricDefinition } from "./types.js";
+
+async function waitFor(cond: () => boolean, timeoutMs = 5000): Promise<void> {
+  const start = Date.now();
+  while (!cond()) {
+    if (Date.now() - start > timeoutMs) throw new Error("waitFor timed out");
+    await new Promise((r) => setTimeout(r, 50));
+  }
+}
+
+function definitionFor(suffix: string, shardCount = 2): MetricDefinition {
+  return { name: `qm_${Date.now()}_${suffix}`, shardCount, consumerGroup: "cg", maxLen: 1000 };
+}
+
+redisTest(
+  "emitter -> consumer round trip maps rows, dedups, and acks",
+  async ({ redisOptions }) => {
+    const definition = definitionFor("rt");
+    const emitter = new MetricsStreamEmitter({
+      redis: redisOptions,
+      definition,
+      flag: { enabled: () => true },
+    });
+    const inserted: Array<{ rows: Array<Record<string, string>>; dedupToken: string }> = [];
+
+    const consumer = new MetricsStreamConsumer<Record<string, string>>({
+      redis: redisOptions,
+      definition,
+      consumerName: "c1",
+      mapEntry: (e) => ({ id: e.id, ...e.fields }),
+      insert: async (rows, { dedupToken }) => {
+        inserted.push({ rows, dedupToken });
+      },
+      blockMs: 200,
+    });
+
+    await consumer.start();
+    emitter.emit("queueA", { op: "enqueue", q: "queueA" });
+    emitter.emit("queueB", { op: "started", q: "queueB", wait: 42 });
+
+    await waitFor(() => inserted.flatMap((i) => i.rows).length >= 2);
+    await consumer.stop();
+
+    const rows = inserted.flatMap((i) => i.rows);
+    expect(rows).toContainEqual(expect.objectContaining({ op: "enqueue", q: "queueA" }));
+    expect(rows).toContainEqual(
+      expect.objectContaining({ op: "started", q: "queueB", wait: "42" })
+    );
+    expect(inserted[0]!.dedupToken).toMatch(/^[0-9a-f]{40}$/);
+
+    const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+    for (const key of consumer.streamKeys()) {
+      const pending = (await admin.xpending(key, definition.consumerGroup)) as [
+        number,
+        ...unknown[],
+      ];
+      expect(pending[0]).toBe(0);
+    }
+    await admin.quit();
+    await emitter.close();
+  }
+);
+
+redisTest("emit is a no-op when the flag is disabled", async ({ redisOptions }) => {
+  const definition = definitionFor("off");
+  const emitter = new MetricsStreamEmitter({
+    redis: redisOptions,
+    definition,
+    flag: { enabled: () => false },
+  });
+
+  emitter.emit("q", { op: "enqueue", q: "q" });
+  await new Promise((r) => setTimeout(r, 200));
+
+  const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  const len = await admin.xlen(streamKey(definition, shardFor("q", definition.shardCount)));
+  expect(len).toBe(0);
+  await admin.quit();
+  await emitter.close();
+});
+
+redisTest("reclaims stale pending entries from a dead consumer", async ({ redisOptions }) => {
+  const definition = definitionFor("claim", 1);
+  const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  const key = streamKey(definition, 0);
+
+  await admin.xgroup("CREATE", key, definition.consumerGroup, "$", "MKSTREAM");
+  await admin.xadd(key, "*", "op", "ack", "q", "qZ");
+  await admin.xadd(key, "*", "op", "nack", "q", "qZ");
+  await admin.xreadgroup(
+    "GROUP",
+    definition.consumerGroup,
+    "zombie",
+    "COUNT",
+    10,
+    "STREAMS",
+    key,
+    ">"
+  );
+
+  const inserted: Array<Record<string, string>> = [];
+  const consumer = new MetricsStreamConsumer<Record<string, string>>({
+    redis: redisOptions,
+    definition,
+    consumerName: "live",
+    mapEntry: (e) => ({ id: e.id, ...e.fields }),
+    insert: async (rows) => {
+      inserted.push(...rows);
+    },
+    blockMs: 200,
+    claimIdleMs: 0,
+  });
+
+  await consumer.start();
+  await waitFor(() => inserted.length >= 2);
+  await consumer.stop();
+
+  expect(inserted.map((r) => r.op).sort()).toEqual(["ack", "nack"]);
+  const pending = (await admin.xpending(key, definition.consumerGroup)) as [number, ...unknown[]];
+  expect(pending[0]).toBe(0);
+  await admin.quit();
+});
+
+redisTest(
+  "per-stream batches: one insert + distinct dedup token per shard stream",
+  async ({ redisOptions }) => {
+    const definition = definitionFor("pershard", 2);
+    const emitter = new MetricsStreamEmitter({
+      redis: redisOptions,
+      definition,
+      flag: { enabled: () => true },
+    });
+    // Two shard keys that land on different shards.
+    const a = "shardkey-a";
+    let b = "shardkey-b0";
+    for (let i = 1; shardFor(b, 2) === shardFor(a, 2); i++) b = `shardkey-b${i}`;
+
+    const inserted: Array<{ rows: Array<Record<string, string>>; dedupToken: string }> = [];
+    const consumer = new MetricsStreamConsumer<Record<string, string>>({
+      redis: redisOptions,
+      definition,
+      consumerName: "c1",
+      mapEntry: (e) => ({ id: e.id, ...e.fields }),
+      insert: async (rows, { dedupToken }) => {
+        inserted.push({ rows, dedupToken });
+      },
+      blockMs: 200,
+    });
+
+    await consumer.start();
+    emitter.emit(a, { op: "enqueue", q: a });
+    emitter.emit(b, { op: "enqueue", q: b });
+    await waitFor(() => inserted.flatMap((i) => i.rows).length >= 2);
+    await consumer.stop();
+    await emitter.close();
+
+    // Each shard's batch is its own dedup block with its own (stream-scoped) token.
+    const batchesWithRows = inserted.filter((i) => i.rows.length > 0);
+    expect(batchesWithRows.length).toBe(2);
+    expect(new Set(batchesWithRows.map((i) => i.dedupToken)).size).toBe(2);
+  }
+);
+
+redisTest(
+  "probe reports lag as null (not 0) when Redis cannot compute it",
+  async ({ redisOptions }) => {
+    const definition = definitionFor("nillag", 1);
+    const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+    const key = streamKey(definition, 0);
+
+    await admin.xgroup("CREATE", key, definition.consumerGroup, "0", "MKSTREAM");
+    const ids: string[] = [];
+    for (let i = 0; i < 5; i++) {
+      ids.push((await admin.xadd(key, "*", "op", "enqueue", "q", "qT")) as string);
+    }
+    // SETID to an arbitrary id makes the group's entries-read unknown => lag is nil
+    // (severe trimming can do the same in prod); the probe must NOT report that as 0.
+    await admin.xgroup("SETID", key, definition.consumerGroup, ids[2]!);
+
+    const consumer = new MetricsStreamConsumer<Record<string, string>>({
+      redis: redisOptions,
+      definition,
+      consumerName: "c1",
+      mapEntry: (e) => ({ id: e.id, ...e.fields }),
+      insert: async () => {},
+    });
+    try {
+      const states = await consumer.streamState();
+      expect(states[0]!.lag).toBeNull();
+    } finally {
+      await consumer.stop();
+      await admin.quit();
+    }
+  }
+);
+
+redisTest(
+  "emitGauge XADDs an op=gauge snapshot onto the shared metrics stream",
+  async ({ redisOptions }) => {
+    const definition = definitionFor("gauge", 2);
+    const emitter = new MetricsStreamEmitter({
+      redis: redisOptions,
+      definition,
+      flag: { enabled: () => true },
+    });
+
+    // Emits before the connection is ready are dropped by design (loss-tolerant).
+    await emitter.waitUntilReady();
+    emitter.emitGauge("q1", {
+      op: "gauge",
+      q: "q1",
+      ql: 5,
+      cc: 2,
+      lim: 10,
+      eql: 3,
+      ec: 1,
+      elim: 20,
+      thr: 0,
+    });
+
+    const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+    const key = streamKey(definition, shardFor("q1", 2));
+    // Plain XADD (no odometer, no cum=0 seed) => exactly one entry, unlike counter emit().
+    await waitFor2(async () => (await admin.xlen(key)) === 1);
+    const raw = (await admin.xrange(key, "-", "+")) as Array<[string, string[]]>;
+    const flat = raw[0]![1];
+    const fields: Record<string, string> = {};
+    for (let i = 0; i + 1 < flat.length; i += 2) fields[flat[i]!] = flat[i + 1]!;
+    expect(fields.op).toBe("gauge");
+    expect(fields.q).toBe("q1");
+    expect(fields.ql).toBe("5");
+    expect(fields.thr).toBe("0");
+    await admin.quit();
+    await emitter.close();
+  }
+);
+
+async function waitFor2(cond: () => Promise<boolean>, timeoutMs = 5000): Promise<void> {
+  const start = Date.now();
+  while (!(await cond())) {
+    if (Date.now() - start > timeoutMs) throw new Error("waitFor2 timed out");
+    await new Promise((r) => setTimeout(r, 50));
+  }
+}
+
+redisTest("sampledSync gates on both the flag and the sample rate", async ({ redisOptions }) => {
+  const definition = definitionFor("sample");
+  const off = new MetricsStreamEmitter({
+    redis: redisOptions,
+    definition,
+    flag: { enabled: () => true },
+    gaugeSampleRate: 0,
+  });
+  const on = new MetricsStreamEmitter({
+    redis: redisOptions,
+    definition,
+    flag: { enabled: () => true },
+    gaugeSampleRate: 1,
+  });
+  const disabled = new MetricsStreamEmitter({
+    redis: redisOptions,
+    definition,
+    flag: { enabled: () => false },
+    gaugeSampleRate: 1,
+  });
+
+  expect(off.sampledSync()).toBe(false); // rate 0 => never sampled in
+  expect(on.sampledSync()).toBe(true); // rate 1 + enabled => always
+  expect(disabled.sampledSync()).toBe(false); // disabled => never, regardless of rate
+  expect(on.enabledSync()).toBe(true); // enabledSync (counters) is unaffected by sampling
+
+  await Promise.all([off.close(), on.close(), disabled.close()]);
+});
+
+redisTest("sampledSync honors a live rate provider (no reconstruct)", async ({ redisOptions }) => {
+  const definition = definitionFor("live");
+  let rate = 1;
+  const emitter = new MetricsStreamEmitter({
+    redis: redisOptions,
+    definition,
+    flag: { enabled: () => true },
+    gaugeSampleRate: { value: () => rate },
+  });
+  expect(emitter.sampledSync()).toBe(true);
+  rate = 0;
+  expect(emitter.sampledSync()).toBe(false);
+  await emitter.close();
+});
+
+redisTest("CachedRedisNumber reads live, clamps, and falls back", async ({ redisOptions }) => {
+  const key = `rate_${Date.now()}`;
+  const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  const num = new CachedRedisNumber({ redis: redisOptions, key, defaultValue: 1, min: 0, max: 1 });
+
+  await num.refresh();
+  expect(num.value()).toBe(1); // missing key => default
+  await admin.set(key, "0.25");
+  await num.refresh();
+  expect(num.value()).toBe(0.25);
+  await admin.set(key, "5");
+  await num.refresh();
+  expect(num.value()).toBe(1); // out of range => clamped
+  await admin.set(key, "nonsense");
+  await num.refresh();
+  expect(num.value()).toBe(1); // unparseable => default
+
+  await num.close();
+  await admin.quit();
+});
+
+redisTest("streamState reports depth, lag, and pending per shard", async ({ redisOptions }) => {
+  const definition = definitionFor("state", 1);
+  const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  const key = streamKey(definition, 0);
+
+  await admin.xgroup("CREATE", key, definition.consumerGroup, "$", "MKSTREAM");
+  await admin.xadd(key, "*", "op", "enqueue", "q", "qX");
+  await admin.xadd(key, "*", "op", "ack", "q", "qX");
+  // Read one entry as some consumer and leave it unacked -> 1 pending, 1 still undelivered.
+  await admin.xreadgroup(
+    "GROUP",
+    definition.consumerGroup,
+    "reader",
+    "COUNT",
+    1,
+    "STREAMS",
+    key,
+    ">"
+  );
+
+  const consumer = new MetricsStreamConsumer<Record<string, string>>({
+    redis: redisOptions,
+    definition,
+    consumerName: "c1",
+    mapEntry: (e) => ({ id: e.id, ...e.fields }),
+    insert: async () => {},
+  });
+
+  try {
+    const states = await consumer.streamState();
+    expect(states).toHaveLength(1);
+    expect(states[0]!.depth).toBe(2);
+    expect(states[0]!.pending).toBe(1);
+    expect(states[0]!.lag).toBe(1);
+  } finally {
+    await consumer.stop();
+    await admin.quit();
+  }
+});
+
+redisTest("CachedRedisFlag reads a redis key with caching", async ({ redisOptions }) => {
+  const key = `flag_${Date.now()}`;
+  const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  const flag = new CachedRedisFlag({ redis: redisOptions, key, cacheTtlMs: 10_000 });
+
+  expect(flag.enabled()).toBe(false);
+  await flag.refresh();
+  expect(flag.enabled()).toBe(false);
+
+  await admin.set(key, "1");
+  await flag.refresh();
+  expect(flag.enabled()).toBe(true);
+
+  await admin.set(key, "0");
+  await flag.refresh();
+  expect(flag.enabled()).toBe(false);
+
+  await flag.close();
+  await admin.quit();
+});
+
+redisTest("CachedRedisFlag warms eagerly on construction", async ({ redisOptions }) => {
+  const key = `flag_eager_${Date.now()}`;
+  const admin = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  await admin.set(key, "1");
+
+  const flag = new CachedRedisFlag({ redis: redisOptions, key });
+  // No manual refresh(): the constructor kicks one off so the first real read is warm.
+  await waitFor(() => flag.enabled() === true);
+  expect(flag.enabled()).toBe(true);
+
+  await flag.close();
+  await admin.quit();
+});
diff --git a/internal-packages/metrics-pipeline/src/consumer.ts b/internal-packages/metrics-pipeline/src/consumer.ts
new file mode 100644
index 00000000000..9e333e70ab1
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/consumer.ts
@@ -0,0 +1,336 @@
+import { createRedisClient, type Redis, type RedisOptions } from "@internal/redis";
+import {
+  getMeter,
+  type Counter,
+  type Histogram,
+  type Meter,
+  type ObservableGauge,
+  ValueType,
+} from "@internal/tracing";
+import { Logger } from "@trigger.dev/core/logger";
+import { dedupTokenFromEntryIds } from "./idempotency.js";
+import { allStreamKeys, type MetricDefinition, type StreamEntry } from "./types.js";
+
+export type MetricsStreamConsumerOptions<TRow> = {
+  redis: RedisOptions;
+  definition: MetricDefinition;
+  /** Unique per process; distinct replicas MUST use distinct names (PEL ownership). */
+  consumerName: string;
+  /** Map a stream entry to a row, or null to drop it (still acked). */
+  mapEntry: (entry: StreamEntry) => TRow | TRow[] | null;
+  /** Insert a batch. Must be idempotent w.r.t. dedupToken; throw to retry the batch. */
+  insert: (rows: TRow[], opts: { dedupToken: string }) => Promise<void>;
+  batchSize?: number;
+  blockMs?: number;
+  claimIdleMs?: number;
+  /** How often to scan for stale pending entries (XAUTOCLAIM); not every poll. */
+  reclaimIntervalMs?: number;
+  errorBackoffMs?: number;
+  logger?: Logger;
+  meter?: Meter;
+};
+
+type RawEntry = [id: string, fields: string[]];
+type RawStream = [key: string, entries: RawEntry[]];
+
+/** Per-shard stream health, surfaced as observable gauges and usable directly in tests.
+ * `lag: null` means Redis could not compute it (entries trimmed past the group's read
+ * position) — treat as an alert, NOT as zero: it coincides with data loss. */
+export type ShardState = { shard: number; depth: number; lag: number | null; pending: number };
+
+function parseFields(flat: string[]): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (let i = 0; i + 1 < flat.length; i += 2) {
+    out[flat[i]!] = flat[i + 1]!;
+  }
+  return out;
+}
+
+const sleep = (ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms));
+
+/**
+ * Reads a sharded metrics stream via a consumer group, inserting each stream's poll-batch
+ * as its own dedup block (so an XAUTOCLAIM-reclaimed batch re-forms the same id set and
+ * token), acking only after a successful insert. Sequential read/insert/ack per process.
+ */
+export class MetricsStreamConsumer<TRow> {
+  private readonly redis: Redis;
+  private readonly probeRedis: Redis;
+  private readonly def: MetricDefinition;
+  private readonly keys: string[];
+  private readonly consumerName: string;
+  private readonly batchSize: number;
+  private readonly blockMs: number;
+  private readonly claimIdleMs: number;
+  private readonly reclaimIntervalMs: number;
+  private lastReclaimAt = 0;
+  private readonly errorBackoffMs: number;
+  private readonly logger: Logger;
+  private readonly mapEntry: (entry: StreamEntry) => TRow | TRow[] | null;
+  private readonly insert: (rows: TRow[], opts: { dedupToken: string }) => Promise<void>;
+
+  private readonly meter: Meter;
+  private readonly entriesCounter: Counter;
+  private readonly rowsCounter: Counter;
+  private readonly insertErrorCounter: Counter;
+  private readonly insertDuration: Histogram;
+  private readonly observables: ObservableGauge[];
+  private readonly batchCallback: Parameters<Meter["addBatchObservableCallback"]>[0];
+
+  private running = false;
+  private loopPromise?: Promise<void>;
+
+  constructor(options: MetricsStreamConsumerOptions<TRow>) {
+    this.logger = options.logger ?? new Logger("MetricsStreamConsumer", "info");
+    const redisConfig = { ...options.redis, keyPrefix: undefined };
+    this.redis = createRedisClient(redisConfig, {
+      onError: (error) => this.logger.error("consumer redis error", { error }),
+    });
+    // Separate client so the observable-gauge probes never queue behind the blocking XREADGROUP.
+    this.probeRedis = createRedisClient(redisConfig, {
+      onError: (error) => this.logger.error("consumer probe redis error", { error }),
+    });
+    this.def = options.definition;
+    this.keys = allStreamKeys(options.definition);
+    this.consumerName = options.consumerName;
+    this.batchSize = options.batchSize ?? 1000;
+    this.blockMs = options.blockMs ?? 1000;
+    this.claimIdleMs = options.claimIdleMs ?? 60_000;
+    this.reclaimIntervalMs = options.reclaimIntervalMs ?? 15_000;
+    this.errorBackoffMs = options.errorBackoffMs ?? 1000;
+    this.mapEntry = options.mapEntry;
+    this.insert = options.insert;
+
+    this.meter = options.meter ?? getMeter("metrics-pipeline");
+    this.entriesCounter = this.meter.createCounter("queue_metrics.consumer.entries", {
+      description: "Stream entries read (attr source=new|reclaimed)",
+      valueType: ValueType.INT,
+    });
+    this.rowsCounter = this.meter.createCounter("queue_metrics.consumer.rows_inserted", {
+      description: "Rows inserted into the sink",
+      valueType: ValueType.INT,
+    });
+    this.insertErrorCounter = this.meter.createCounter("queue_metrics.consumer.insert_errors", {
+      description: "Failed inserts (batch left pending for retry)",
+      valueType: ValueType.INT,
+    });
+    this.insertDuration = this.meter.createHistogram("queue_metrics.consumer.insert_duration", {
+      description: "Sink insert latency",
+      unit: "ms",
+      valueType: ValueType.INT,
+    });
+
+    const depthGauge = this.meter.createObservableGauge("queue_metrics.consumer.stream_depth", {
+      description: "Entries currently in each shard stream (approaches MAXLEN => trimming)",
+      valueType: ValueType.INT,
+    });
+    const lagGauge = this.meter.createObservableGauge("queue_metrics.consumer.group_lag", {
+      description: "Entries not yet delivered to the consumer group (consumer falling behind)",
+      valueType: ValueType.INT,
+    });
+    const pendingGauge = this.meter.createObservableGauge("queue_metrics.consumer.pending", {
+      description: "Unacked (in-flight or stuck) entries in the group PEL",
+      valueType: ValueType.INT,
+    });
+    const lagUnknownGauge = this.meter.createObservableGauge("queue_metrics.consumer.lag_unknown", {
+      description:
+        "1 when Redis cannot compute group lag (entries trimmed => data loss); alert on this",
+      valueType: ValueType.INT,
+    });
+    this.observables = [depthGauge, lagGauge, pendingGauge, lagUnknownGauge];
+    this.batchCallback = async (result) => {
+      const states = await this.streamState();
+      for (const s of states) {
+        const attrs = { stream: this.def.name, shard: String(s.shard) };
+        result.observe(depthGauge, s.depth, attrs);
+        if (s.lag !== null) result.observe(lagGauge, s.lag, attrs);
+        result.observe(lagUnknownGauge, s.lag === null ? 1 : 0, attrs);
+        result.observe(pendingGauge, s.pending, attrs);
+      }
+    };
+    this.meter.addBatchObservableCallback(this.batchCallback, this.observables);
+  }
+
+  async start(): Promise<void> {
+    if (this.running) return;
+    await this.ensureGroups();
+    this.running = true;
+    this.loopPromise = this.loop();
+  }
+
+  async stop(): Promise<void> {
+    this.running = false;
+    this.meter.removeBatchObservableCallback(this.batchCallback, this.observables);
+    await this.loopPromise?.catch(() => {});
+    await Promise.all([this.redis.quit().catch(() => {}), this.probeRedis.quit().catch(() => {})]);
+  }
+
+  private async ensureGroups(): Promise<void> {
+    for (const key of this.keys) {
+      try {
+        // "0" (not "$"): a brand-new stream's group must not skip entries emitted
+        // between emitter boot and the first consumer's group creation.
+        await this.redis.xgroup("CREATE", key, this.def.consumerGroup, "0", "MKSTREAM");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (!message.includes("BUSYGROUP")) throw error;
+      }
+    }
+  }
+
+  private async loop(): Promise<void> {
+    while (this.running) {
+      try {
+        if (Date.now() - this.lastReclaimAt >= this.reclaimIntervalMs) {
+          this.lastReclaimAt = Date.now();
+          await this.reclaimStale();
+        }
+        await this.readNew();
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        // Self-heal a missing group (stream trimmed to nothing / deleted / Redis flushed):
+        // recreate it rather than wedging the loop on NOGROUP forever.
+        if (message.includes("NOGROUP")) {
+          this.logger.warn("consumer group missing; recreating", { error });
+          await this.ensureGroups().catch(() => {});
+        } else {
+          this.logger.error("consumer loop iteration failed", { error });
+        }
+        await sleep(this.errorBackoffMs);
+      }
+    }
+  }
+
+  private async readNew(): Promise<number> {
+    const ids = this.keys.map(() => ">");
+    const response = (await this.redis.xreadgroup(
+      "GROUP",
+      this.def.consumerGroup,
+      this.consumerName,
+      "COUNT",
+      this.batchSize,
+      "BLOCK",
+      this.blockMs,
+      "STREAMS",
+      ...this.keys,
+      ...ids
+    )) as RawStream[] | null;
+
+    if (!response) return 0;
+    return this.processStreams(response, "new");
+  }
+
+  private async reclaimStale(): Promise<void> {
+    for (const key of this.keys) {
+      const result = (await this.redis.xautoclaim(
+        key,
+        this.def.consumerGroup,
+        this.consumerName,
+        this.claimIdleMs,
+        "0",
+        "COUNT",
+        this.batchSize
+      )) as [string, RawEntry[], string[]] | null;
+
+      const entries = result?.[1] ?? [];
+      if (entries.length === 0) continue;
+      await this.processStreams([[key, entries]], "reclaimed");
+    }
+  }
+
+  // One insert (dedup block) and XACK per stream, so a reclaimed batch re-forms the
+  // original per-stream id set and token. On insert failure that stream's entries stay
+  // pending for a later XAUTOCLAIM; other streams still progress.
+  private async processStreams(streams: RawStream[], source: "new" | "reclaimed"): Promise<number> {
+    let processed = 0;
+    let firstError: unknown;
+
+    for (const [key, entries] of streams) {
+      if (entries.length === 0) continue;
+      const keyIds: string[] = [];
+      const rows: TRow[] = [];
+      for (const [id, flat] of entries) {
+        keyIds.push(id);
+        const mapped = this.mapEntry({ id, fields: parseFields(flat) });
+        if (Array.isArray(mapped)) rows.push(...mapped);
+        else if (mapped !== null) rows.push(mapped);
+      }
+      this.entriesCounter.add(keyIds.length, { source });
+
+      if (rows.length > 0) {
+        const startedAt = Date.now();
+        try {
+          await this.insert(rows, { dedupToken: dedupTokenFromEntryIds(keyIds, key) });
+        } catch (error) {
+          this.insertErrorCounter.add(1);
+          firstError ??= error;
+          continue;
+        } finally {
+          this.insertDuration.record(Date.now() - startedAt);
+        }
+        this.rowsCounter.add(rows.length);
+      }
+
+      await this.redis.xack(key, this.def.consumerGroup, ...keyIds);
+      processed += keyIds.length;
+    }
+
+    if (firstError !== undefined) throw firstError;
+    return processed;
+  }
+
+  /** Per-shard depth (XLEN), group lag, and pending — the consumer-health signals. */
+  async streamState(): Promise<ShardState[]> {
+    return probeShardStates(this.probeRedis, this.keys, this.def.consumerGroup);
+  }
+
+  /** All shard stream keys this consumer reads (for diagnostics/tests). */
+  streamKeys(): string[] {
+    return this.keys.slice();
+  }
+}
+
+/**
+ * Per-shard depth/lag/pending for a metric stream — usable without a running consumer
+ * (e.g. from an admin route). `redis` should have keyPrefix unset, matching the stream keys.
+ */
+export async function probeShardStates(
+  redis: Redis,
+  keys: string[],
+  consumerGroup: string
+): Promise<ShardState[]> {
+  const out: ShardState[] = [];
+  for (let shard = 0; shard < keys.length; shard++) {
+    const key = keys[shard]!;
+    const depth = Number(await redis.xlen(key)) || 0;
+    // lag defaults to null (unknown) and only becomes a number when the group is found and
+    // Redis reports one: a nil lag (or a missing group on an existing stream) means we can't
+    // compute it, e.g. entries were trimmed past the group's read position (data loss).
+    let lag: number | null = null;
+    let pending = 0;
+    try {
+      const groups = (await redis.call("XINFO", "GROUPS", key)) as unknown[];
+      for (const raw of groups) {
+        const info = flatToMap(raw as unknown[]);
+        if (info.name === consumerGroup) {
+          const rawLag = info.lag;
+          lag = rawLag == null ? null : Number(rawLag);
+          if (lag !== null && !Number.isFinite(lag)) lag = null;
+          pending = Number(info.pending) || 0;
+        }
+      }
+    } catch {
+      // Stream/group may not exist yet; treat as zero.
+    }
+    out.push({ shard, depth, lag, pending });
+  }
+  return out;
+}
+
+function flatToMap(flat: unknown[]): Record<string, unknown> {
+  const out: Record<string, unknown> = {};
+  for (let i = 0; i + 1 < flat.length; i += 2) {
+    out[String(flat[i])] = flat[i + 1];
+  }
+  return out;
+}
diff --git a/internal-packages/metrics-pipeline/src/emitter.ts b/internal-packages/metrics-pipeline/src/emitter.ts
new file mode 100644
index 00000000000..692956d98cb
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/emitter.ts
@@ -0,0 +1,242 @@
+import { createRedisClient, type Redis, type RedisOptions } from "@internal/redis";
+import { getMeter, type Counter, type Meter, ValueType } from "@internal/tracing";
+import { Logger } from "@trigger.dev/core/logger";
+import { shardFor } from "./hash.js";
+import { streamKey, type MetricDefinition, type MetricFields } from "./types.js";
+
+export type MetricsStreamEmitterOptions = {
+  redis: RedisOptions;
+  definition: MetricDefinition;
+  /** Synchronous enabled check (e.g. CachedRedisFlag); emits are no-ops when false. */
+  flag: { enabled(): boolean };
+  /** Probability (0..1) that a sampled emission fires; applies to `sampledSync()`, not
+   * `emit()`. Pass a `{ value() }` provider (e.g. CachedRedisNumber) to tune it live
+   * without a redeploy. Default 1 (always). */
+  gaugeSampleRate?: number | { value(): number };
+  /** TTL (ms) refreshed on every counter write on the per-(queue,op) odometer key.
+   * Active queues never expire; idle-past-TTL queues purge and self-heal on return.
+   * Default 7 days. */
+  counterOdometerTtlMs?: number;
+  /** TTL (ms) for per-concurrency-key odometers; short because key cardinality is
+   * user-controlled and cumulative counters make idle-gap expiry loss-free. Default 24h. */
+  ckOdometerTtlMs?: number;
+  logger?: Logger;
+  meter?: Meter;
+};
+
+type CumulativeCommand = (
+  odometerKey: string,
+  streamKey: string,
+  ttlMs: string,
+  maxLen: string,
+  op: string,
+  q: string,
+  ...extraFields: string[]
+) => Promise<unknown>;
+
+type CumulativeCkCommand = (
+  odometerKey: string,
+  ckOdometerKey: string,
+  streamKey: string,
+  ttlMs: string,
+  ckTtlMs: string,
+  maxLen: string,
+  op: string,
+  q: string,
+  ck: string,
+  ...extraFields: string[]
+) => Promise<unknown>;
+
+// INCR the odometer, refresh its TTL, and XADD the reading (new value as `cum`) in one round
+// trip. Refresh-on-write is load-bearing: only genuinely idle queues expire. On first creation
+// (v==1) XADD a cum=0 baseline first (smaller stream id => sorts first) so deltaSum captures the
+// 0->1 transition and the total reconstructs exactly.
+// ARGV: [1]=ttlMs [2]=maxLen [3]=op [4]=q [5..]=extra field/value pairs (e.g. wait).
+const CUMULATIVE_LUA = `
+local v = redis.call('INCR', KEYS[1])
+redis.call('PEXPIRE', KEYS[1], ARGV[1])
+local maxlen = tonumber(ARGV[2]) or 0
+local function xadd(cum, withExtra)
+  local x = {'XADD', KEYS[2]}
+  if maxlen > 0 then x[#x+1]='MAXLEN'; x[#x+1]='~'; x[#x+1]=ARGV[2] end
+  x[#x+1]='*'
+  x[#x+1]='op'; x[#x+1]=ARGV[3]
+  x[#x+1]='q';  x[#x+1]=ARGV[4]
+  if withExtra then for i=5,#ARGV do x[#x+1]=ARGV[i] end end
+  x[#x+1]='cum'; x[#x+1]=cum
+  redis.call(unpack(x))
+end
+if v == 1 then xadd(0, false) end
+xadd(v, true)
+`;
+
+// CK variant: advances base + per-key odometers, ONE reading entry carries both (cum +
+// ck/ckcum), so per-key attribution adds no stream volume. Baselines seed independently:
+// cum-only entry = base row, ck+ckcum-only entry = per-key row, reading entry = both.
+// KEYS: [1]=baseOdometer [2]=ckOdometer [3]=stream. ARGV: [1]=baseTtlMs [2]=ckTtlMs
+// [3]=maxLen [4]=op [5]=q [6]=ck [7..]=extra field/value pairs.
+const CUMULATIVE_CK_LUA = `
+local v = redis.call('INCR', KEYS[1])
+redis.call('PEXPIRE', KEYS[1], ARGV[1])
+local ckv = redis.call('INCR', KEYS[2])
+redis.call('PEXPIRE', KEYS[2], ARGV[2])
+local maxlen = tonumber(ARGV[3]) or 0
+local function xadd(fields, withExtra)
+  local x = {'XADD', KEYS[3]}
+  if maxlen > 0 then x[#x+1]='MAXLEN'; x[#x+1]='~'; x[#x+1]=ARGV[3] end
+  x[#x+1]='*'
+  x[#x+1]='op'; x[#x+1]=ARGV[4]
+  x[#x+1]='q';  x[#x+1]=ARGV[5]
+  if withExtra then for i=7,#ARGV do x[#x+1]=ARGV[i] end end
+  for i=1,#fields do x[#x+1]=fields[i] end
+  redis.call(unpack(x))
+end
+if v == 1 then xadd({'cum', 0}, false) end
+if ckv == 1 then xadd({'ck', ARGV[6], 'ckcum', 0}, false) end
+xadd({'ck', ARGV[6], 'cum', v, 'ckcum', ckv}, true)
+`;
+
+/** Node-side producer: XADDs events to a sharded metrics stream, gated on a flag. */
+export class MetricsStreamEmitter {
+  private readonly redis: Redis;
+  private readonly def: MetricDefinition;
+  private readonly flag: { enabled(): boolean };
+  private readonly sampleRate: () => number;
+  private readonly odometerTtlMs: number;
+  private readonly ckOdometerTtlMs: number;
+  private readonly logger: Logger;
+  private readonly emittedCounter: Counter;
+  private readonly errorCounter: Counter;
+
+  constructor(options: MetricsStreamEmitterOptions) {
+    this.logger = options.logger ?? new Logger("MetricsStreamEmitter", "warn");
+    this.redis = createRedisClient(
+      { ...options.redis, keyPrefix: undefined },
+      { onError: (error) => this.logger.error("emitter redis error", { error }) }
+    );
+    this.redis.defineCommand("qmEmitCumulative", { numberOfKeys: 2, lua: CUMULATIVE_LUA });
+    this.redis.defineCommand("qmEmitCumulativeCk", { numberOfKeys: 3, lua: CUMULATIVE_CK_LUA });
+    this.odometerTtlMs = options.counterOdometerTtlMs ?? 7 * 24 * 60 * 60 * 1000;
+    this.ckOdometerTtlMs = options.ckOdometerTtlMs ?? 24 * 60 * 60 * 1000;
+    this.def = options.definition;
+    this.flag = options.flag;
+    const rate = options.gaugeSampleRate;
+    if (typeof rate === "object") {
+      this.sampleRate = () => rate.value();
+    } else {
+      const fixed = Math.min(1, Math.max(0, rate ?? 1));
+      this.sampleRate = () => fixed;
+    }
+
+    const meter = options.meter ?? getMeter("metrics-pipeline");
+    this.emittedCounter = meter.createCounter("queue_metrics.emitter.emitted", {
+      description: "Node-side metric events XADDed to the stream",
+      valueType: ValueType.INT,
+    });
+    this.errorCounter = meter.createCounter("queue_metrics.emitter.errors", {
+      description: "Failed metric-event XADDs (dropped)",
+      valueType: ValueType.INT,
+    });
+  }
+
+  enabledSync(): boolean {
+    return this.flag.enabled();
+  }
+
+  // Enabled AND (probabilistically) sampled-in. For high-frequency sampled emissions
+  // (e.g. Lua gauges); exact-count events use enabledSync()/emit() and are never sampled.
+  sampledSync(): boolean {
+    if (!this.flag.enabled()) return false;
+    const rate = this.sampleRate();
+    if (rate >= 1) return true;
+    if (rate <= 0) return false;
+    return Math.random() < rate;
+  }
+
+  // Fire-and-forget gauge emit: a plain XADD of an op=gauge snapshot (no odometer). The
+  // gauge value was read atomically inside the queue op's Lua and returned on the reply;
+  // this just lands it on the metrics stream. Loss-tolerant (sampled), never throws into
+  // the caller. Shares the counter stream (one stream family on the metrics Redis).
+  emitGauge(shardKey: string, fields: MetricFields): void {
+    if (!this.flag.enabled()) return;
+    // Drop rather than queue while the metrics Redis is unreachable: ioredis would hold
+    // every command in its offline queue until rejection, and metrics are loss-tolerant.
+    if (this.redis.status !== "ready") return;
+    const op = String(fields.op ?? "gauge");
+    const stream = streamKey(this.def, shardFor(shardKey, this.def.shardCount));
+    const args: string[] = [];
+    if (this.def.maxLen) args.push("MAXLEN", "~", String(this.def.maxLen));
+    args.push("*");
+    for (const [field, value] of Object.entries(fields)) {
+      args.push(field, String(value));
+    }
+    this.emittedCounter.add(1, { op });
+    this.redis.xadd(stream, ...(args as [string, ...string[]])).catch((error) => {
+      this.errorCounter.add(1);
+      this.logger.debug("metrics gauge emit failed", { error, stream });
+    });
+  }
+
+  // Fire-and-forget cumulative counter emit: advances the per-(queue,op) odometer and
+  // XADDs its new absolute value. No-op when disabled, never throws into the caller. A
+  // lost XADD self-heals (the next reading restates the total); the INCR is never sampled.
+  // A non-empty `fields.ck` also advances a per-concurrency-key odometer and rides the
+  // same entry as ck/ckcum (see CUMULATIVE_CK_LUA for the baseline/row mapping).
+  emit(shardKey: string, fields: MetricFields): void {
+    if (!this.flag.enabled()) return;
+    if (this.redis.status !== "ready") return;
+    const op = String(fields.op ?? "unknown");
+    const q = String(fields.q ?? "");
+    const ck = fields.ck != null && String(fields.ck) !== "" ? String(fields.ck) : null;
+    const shard = shardFor(shardKey, this.def.shardCount);
+    const stream = streamKey(this.def, shard);
+    // The odometer carries the stream's {shard} hash tag so INCR + XADD stay in one
+    // Cluster slot (the shard is derived from the queue, so the mapping is stable).
+    // The key format is part of the rolling-deploy data shape: concurrent old/new
+    // emitters with different formats split an odometer and corrupt its deltas.
+    const odometerKey = `${this.def.name}_cum:{${shard}}:${op}:${q}`;
+    const extra: string[] = [];
+    for (const [field, value] of Object.entries(fields)) {
+      if (field === "op" || field === "q" || field === "ck") continue;
+      extra.push(field, String(value));
+    }
+    this.emittedCounter.add(1, { op });
+    const maxLen = String(this.def.maxLen ?? 0);
+    const done = (error: unknown) => {
+      this.errorCounter.add(1);
+      this.logger.debug("metrics emit failed", { error, stream });
+    };
+    if (ck) {
+      const client = this.redis as unknown as { qmEmitCumulativeCk: CumulativeCkCommand };
+      client
+        .qmEmitCumulativeCk(
+          odometerKey,
+          `${odometerKey}:ck:${ck}`,
+          stream,
+          String(this.odometerTtlMs),
+          String(this.ckOdometerTtlMs),
+          maxLen,
+          op,
+          q,
+          ck,
+          ...extra
+        )
+        .catch(done);
+      return;
+    }
+    const client = this.redis as unknown as { qmEmitCumulative: CumulativeCommand };
+    client
+      .qmEmitCumulative(odometerKey, stream, String(this.odometerTtlMs), maxLen, op, q, ...extra)
+      .catch(done);
+  }
+
+  // Resolves once the metrics Redis connection is ready (emits before that are dropped).
+  waitUntilReady(): Promise<void> {
+    if (this.redis.status === "ready") return Promise.resolve();
+    return new Promise((resolve) => this.redis.once("ready", () => resolve()));
+  }
+
+  async close(): Promise<void> {
+    await this.redis.quit();
+  }
+}
diff --git a/internal-packages/metrics-pipeline/src/flag.ts b/internal-packages/metrics-pipeline/src/flag.ts
new file mode 100644
index 00000000000..5931e088939
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/flag.ts
@@ -0,0 +1,46 @@
+import type { RedisOptions } from "@internal/redis";
+import type { Logger } from "@trigger.dev/core/logger";
+import { CachedRedisValue } from "./cachedValue.js";
+
+export type CachedRedisFlagOptions = {
+  redis: RedisOptions;
+  /** Redis key holding the flag. A value of "1"/"true"/"on"/"enabled" is truthy. */
+  key: string;
+  cacheTtlMs?: number;
+  defaultValue?: boolean;
+  logger?: Logger;
+};
+
+const TRUTHY = new Set(["1", "true", "on", "enabled", "yes"]);
+
+/**
+ * Boolean feature flag from a Redis key with a short stale-while-revalidate cache,
+ * exposing a synchronous getter for hot paths (building Lua ARGV on every op).
+ */
+export class CachedRedisFlag {
+  private readonly inner: CachedRedisValue<boolean>;
+
+  constructor(options: CachedRedisFlagOptions) {
+    this.inner = new CachedRedisValue<boolean>({
+      redis: options.redis,
+      key: options.key,
+      parse: (raw) => raw != null && TRUTHY.has(raw.trim().toLowerCase()),
+      defaultValue: options.defaultValue ?? false,
+      cacheTtlMs: options.cacheTtlMs,
+      logger: options.logger,
+      loggerName: "CachedRedisFlag",
+    });
+  }
+
+  enabled(): boolean {
+    return this.inner.get();
+  }
+
+  refresh(): Promise<boolean> {
+    return this.inner.refresh();
+  }
+
+  async close(): Promise<void> {
+    await this.inner.close();
+  }
+}
diff --git a/internal-packages/metrics-pipeline/src/hash.ts b/internal-packages/metrics-pipeline/src/hash.ts
new file mode 100644
index 00000000000..b14324c138a
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/hash.ts
@@ -0,0 +1,15 @@
+/** FNV-1a 32-bit hash. Deterministic across processes; used only for sharding. */
+export function fnv1a32(str: string): number {
+  let hash = 0x811c9dc5;
+  for (let i = 0; i < str.length; i++) {
+    hash ^= str.charCodeAt(i);
+    hash = Math.imul(hash, 0x01000193);
+  }
+  return hash >>> 0;
+}
+
+/** Deterministic shard index in [0, shardCount) for a key. */
+export function shardFor(key: string, shardCount: number): number {
+  if (shardCount <= 1) return 0;
+  return fnv1a32(key) % shardCount;
+}
diff --git a/internal-packages/metrics-pipeline/src/idempotency.ts b/internal-packages/metrics-pipeline/src/idempotency.ts
new file mode 100644
index 00000000000..60cbd661f53
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/idempotency.ts
@@ -0,0 +1,11 @@
+import { createHash } from "node:crypto";
+
+// Deterministic, order-independent token over a batch of entry ids. A redelivered
+// batch yields the same token, so ClickHouse's raw-table dedup window drops the replay.
+// `scope` (the stream key) disambiguates id sets that could collide across streams.
+export function dedupTokenFromEntryIds(ids: string[], scope = ""): string {
+  const sorted = [...ids].sort();
+  return createHash("sha1")
+    .update(`${scope}|${sorted.join(",")}`)
+    .digest("hex");
+}
diff --git a/internal-packages/metrics-pipeline/src/index.ts b/internal-packages/metrics-pipeline/src/index.ts
new file mode 100644
index 00000000000..223c5feab17
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/index.ts
@@ -0,0 +1,26 @@
+export { CachedRedisFlag, type CachedRedisFlagOptions } from "./flag.js";
+export {
+  CachedRedisNumber,
+  type CachedRedisNumberOptions,
+  CachedRedisValue,
+  type CachedRedisValueOptions,
+} from "./cachedValue.js";
+export { MetricsStreamEmitter, type MetricsStreamEmitterOptions } from "./emitter.js";
+export {
+  MetricsStreamConsumer,
+  type MetricsStreamConsumerOptions,
+  type ShardState,
+  probeShardStates,
+} from "./consumer.js";
+export { createMetricsGaugeComputeLua, type GaugeComputeLuaParams } from "./lua.js";
+export { dedupTokenFromEntryIds } from "./idempotency.js";
+export { shardFor, fnv1a32 } from "./hash.js";
+export {
+  streamKey,
+  allStreamKeys,
+  entryTimeMs,
+  entryOrderKey,
+  type MetricDefinition,
+  type MetricFields,
+  type StreamEntry,
+} from "./types.js";
diff --git a/internal-packages/metrics-pipeline/src/lua.ts b/internal-packages/metrics-pipeline/src/lua.ts
new file mode 100644
index 00000000000..64f3b896c0d
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/lua.ts
@@ -0,0 +1,50 @@
+// Each field is a Lua expression evaluated inside the target script. queueLimit/
+// envLimit must be the EFFECTIVE enforced limit, else an unset limit reads as throttled.
+export type GaugeComputeLuaParams = {
+  // Lua boolean expression; when true the gauge is computed (else the extra reads are skipped).
+  enabledArg: string;
+  queued: string;
+  running: string;
+  queueLimit: string;
+  envQueued: string;
+  envRunning: string;
+  envLimit: string;
+  // Lua statements run first inside the pcall (e.g. to compute aggregate locals).
+  preamble?: string;
+  // Lua boolean expression (in __cc/__lim/__ql) for the throttled flag. Pass "false"
+  // where cc >= lim is not a valid throttle signal (e.g. summed CK aggregates).
+  throttledExpr?: string;
+  // CK-health extras (both or neither): appended as an optional gauge tail, gauge[8]/gauge[9].
+  ckBacklogged?: string;
+  ckMaxWaitMs?: string;
+};
+
+// Computes an op=gauge snapshot into the enclosing script's `__qm_g` local (a flat
+// {ql, cc, lim, eql, ec, elim, thr} array) so the script can RETURN it; Node then XADDs it
+// to the metrics Redis. No Redis write here (the run-queue Redis carries no metrics stream).
+// Gated on the sample flag and pcall-wrapped. The script MUST declare `local __qm_g` first.
+export function createMetricsGaugeComputeLua(params: GaugeComputeLuaParams): string {
+  const throttled = params.throttledExpr ?? "__cc >= __lim and __ql > 0";
+  const hasCk = params.ckBacklogged != null && params.ckMaxWaitMs != null;
+  const gauge = hasCk
+    ? `    local __ckq = tonumber(${params.ckBacklogged}) or 0
+    local __ckw = tonumber(${params.ckMaxWaitMs}) or 0
+    __qm_g = {__ql, __cc, __lim, __eql, __ec, __elim, __thr, __ckq, __ckw}`
+    : `    __qm_g = {__ql, __cc, __lim, __eql, __ec, __elim, __thr}`;
+
+  return `
+if ${params.enabledArg} then
+  pcall(function()
+    ${params.preamble ?? ""}
+    local __ql = tonumber(${params.queued}) or 0
+    local __cc = tonumber(${params.running}) or 0
+    local __lim = tonumber(${params.queueLimit}) or 0
+    local __eql = tonumber(${params.envQueued}) or 0
+    local __ec = tonumber(${params.envRunning}) or 0
+    local __elim = tonumber(${params.envLimit}) or 0
+    local __thr = 0
+    if ${throttled} then __thr = 1 end
+${gauge}
+  end)
+end`;
+}
diff --git a/internal-packages/metrics-pipeline/src/pipeline.test.ts b/internal-packages/metrics-pipeline/src/pipeline.test.ts
new file mode 100644
index 00000000000..73979310798
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/pipeline.test.ts
@@ -0,0 +1,116 @@
+import { describe, expect, it } from "vitest";
+import { createMetricsGaugeComputeLua } from "./lua.js";
+import { dedupTokenFromEntryIds } from "./idempotency.js";
+import { fnv1a32, shardFor } from "./hash.js";
+import { allStreamKeys, entryOrderKey, entryTimeMs, streamKey } from "./types.js";
+
+describe("shardFor", () => {
+  it("is deterministic and in range", () => {
+    expect(shardFor("queueA", 1)).toBe(0);
+    const s = shardFor("queueA", 4);
+    expect(s).toBeGreaterThanOrEqual(0);
+    expect(s).toBeLessThan(4);
+    expect(shardFor("queueA", 4)).toBe(s);
+    expect(fnv1a32("queueA")).toBe(fnv1a32("queueA"));
+  });
+});
+
+describe("dedupTokenFromEntryIds", () => {
+  it("is order-independent and set-sensitive", () => {
+    expect(dedupTokenFromEntryIds(["1-0", "2-0"])).toBe(dedupTokenFromEntryIds(["2-0", "1-0"]));
+    expect(dedupTokenFromEntryIds(["1-0"])).not.toBe(dedupTokenFromEntryIds(["2-0"]));
+    expect(dedupTokenFromEntryIds(["1-0"])).toMatch(/^[0-9a-f]{40}$/);
+  });
+});
+
+describe("stream keys", () => {
+  it("names and parses entry time", () => {
+    expect(streamKey({ name: "queue_metrics" }, 3)).toBe("queue_metrics:{3}");
+    expect(allStreamKeys({ name: "qm", shardCount: 2, consumerGroup: "cg" })).toEqual([
+      "qm:{0}",
+      "qm:{1}",
+    ]);
+    expect(entryTimeMs("1717000000000-5")).toBe(1717000000000);
+    expect(entryTimeMs("nope")).toBeNull();
+  });
+
+  it("entryOrderKey stays exact and strictly monotonic at real epoch magnitudes", () => {
+    const ms = 1783000000000; // ~2026: ms*1e6 is past JS safe-integer range, so a number key
+    const k = (seq: number) => BigInt(entryOrderKey(`${ms}-${seq}`));
+    // adjacent seq within one ms must not collapse to the same key (the float bug)
+    expect(k(0)).toBe(BigInt(ms) * 1000000n);
+    expect(k(1) - k(0)).toBe(1n);
+    expect(k(2) - k(1)).toBe(1n);
+    // a later ms always outranks any seq of an earlier ms (up to the 1M/ms factor)
+    expect(BigInt(entryOrderKey(`${ms + 1}-0`))).toBeGreaterThan(k(999999));
+  });
+});
+
+describe("createMetricsGaugeComputeLua", () => {
+  it("assigns __qm_g inside a gated, pcall-wrapped block and never XADDs", () => {
+    const lua = createMetricsGaugeComputeLua({
+      enabledArg: "ARGV[#ARGV] == '1'",
+      queued: "redis.call('ZCARD', KEYS[2])",
+      running: "queueCurrent",
+      queueLimit: "queueLimit",
+      envQueued: "redis.call('ZCARD', KEYS[8])",
+      envRunning: "envCurrent",
+      envLimit: "envLimit",
+    });
+
+    expect(lua).toContain("if ARGV[#ARGV] == '1' then");
+    expect(lua).toContain("pcall(function()");
+    expect(lua).toContain("__qm_g = {__ql, __cc, __lim, __eql, __ec, __elim, __thr}");
+    expect(lua).toContain("if __cc >= __lim and __ql > 0 then __thr = 1 end");
+    // The whole point of the refactor: no Redis write happens in the run-queue script.
+    expect(lua).not.toContain("XADD");
+  });
+
+  it("honors a custom throttled expression and preamble", () => {
+    const lua = createMetricsGaugeComputeLua({
+      enabledArg: "true",
+      preamble: "local agg = 1",
+      queued: "0",
+      running: "0",
+      queueLimit: "0",
+      envQueued: "0",
+      envRunning: "0",
+      envLimit: "0",
+      throttledExpr: "false",
+    });
+    expect(lua).toContain("local agg = 1");
+    expect(lua).toContain("if false then __thr = 1 end");
+    expect(lua).not.toContain("XADD");
+  });
+
+  it("appends the CK-health tail only when both CK params are set", () => {
+    const withCk = createMetricsGaugeComputeLua({
+      enabledArg: "true",
+      queued: "0",
+      running: "0",
+      queueLimit: "0",
+      envQueued: "0",
+      envRunning: "0",
+      envLimit: "0",
+      ckBacklogged: "redis.call('ZCARD', ckIndexKey)",
+      ckMaxWaitMs: "__ckwait",
+    });
+    expect(withCk).toContain(
+      "__qm_g = {__ql, __cc, __lim, __eql, __ec, __elim, __thr, __ckq, __ckw}"
+    );
+    expect(withCk).toContain("local __ckq = tonumber(redis.call('ZCARD', ckIndexKey)) or 0");
+
+    const withoutCk = createMetricsGaugeComputeLua({
+      enabledArg: "true",
+      queued: "0",
+      running: "0",
+      queueLimit: "0",
+      envQueued: "0",
+      envRunning: "0",
+      envLimit: "0",
+      ckBacklogged: "0",
+    });
+    expect(withoutCk).toContain("__qm_g = {__ql, __cc, __lim, __eql, __ec, __elim, __thr}");
+    expect(withoutCk).not.toContain("__ckq");
+  });
+});
diff --git a/internal-packages/metrics-pipeline/src/types.ts b/internal-packages/metrics-pipeline/src/types.ts
new file mode 100644
index 00000000000..d9e9e43f554
--- /dev/null
+++ b/internal-packages/metrics-pipeline/src/types.ts
@@ -0,0 +1,42 @@
+export type MetricFields = Record<string, string | number>;
+
+export type StreamEntry = {
+  id: string;
+  fields: Record<string, string>;
+};
+
+export type MetricDefinition = {
+  /** Logical name, e.g. "queue_metrics". Used as the stream key prefix. */
+  name: string;
+  shardCount: number;
+  consumerGroup: string;
+  /** Approximate MAXLEN cap applied on XADD (`MAXLEN ~ N`). Omit for unbounded. */
+  maxLen?: number;
+};
+
+// Keys are used verbatim on every access path (Lua ARGV, emitter, consumer), so
+// they must NOT be subject to an ioredis keyPrefix. `{shard}` is a Cluster hash tag.
+export function streamKey(definition: Pick<MetricDefinition, "name">, shard: number): string {
+  return `${definition.name}:{${shard}}`;
+}
+
+export function allStreamKeys(definition: MetricDefinition): string[] {
+  return Array.from({ length: Math.max(1, definition.shardCount) }, (_, shard) =>
+    streamKey(definition, shard)
+  );
+}
+
+// The ms part of a stream entry id is its emission time.
+export function entryTimeMs(id: string): number | null {
+  const ms = Number(id.split("-")[0]);
+  return Number.isFinite(ms) ? ms : null;
+}
+
+// Ordering key from a stream id (`<ms>-<seq>`) = ms*1e6+seq, for deltaSumTimestamp. BigInt +
+// string because ms*1e6 exceeds JS safe-integer range at real epoch magnitudes (a number would
+// collapse nearby seq values); the ClickHouse order_key column is UInt64 and takes the string.
+// The 1e6 factor (1M entries/ms/shard, far above any single Redis stream) stays within UInt64.
+export function entryOrderKey(id: string): string {
+  const [ms, seq] = id.split("-");
+  return (BigInt(Number(ms) || 0) * 1000000n + BigInt(Number(seq) || 0)).toString();
+}
diff --git a/internal-packages/metrics-pipeline/test/setup.ts b/internal-packages/metrics-pipeline/test/setup.ts
new file mode 100644
index 00000000000..b2bacd6baf5
--- /dev/null
+++ b/internal-packages/metrics-pipeline/test/setup.ts
@@ -0,0 +1,4 @@
+import { vi } from "vitest";
+
+// Set extended timeout for container tests
+vi.setConfig({ testTimeout: 60_000 });
diff --git a/internal-packages/metrics-pipeline/tsconfig.build.json b/internal-packages/metrics-pipeline/tsconfig.build.json
new file mode 100644
index 00000000000..89c87a3dc67
--- /dev/null
+++ b/internal-packages/metrics-pipeline/tsconfig.build.json
@@ -0,0 +1,21 @@
+{
+  "include": ["src/**/*.ts"],
+  "exclude": ["src/**/*.test.ts"],
+  "compilerOptions": {
+    "composite": true,
+    "target": "ES2020",
+    "lib": ["ES2020", "DOM", "DOM.Iterable", "DOM.AsyncIterable"],
+    "outDir": "dist",
+    "module": "Node16",
+    "moduleResolution": "Node16",
+    "moduleDetection": "force",
+    "verbatimModuleSyntax": false,
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "preserveWatchOutput": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "declaration": true
+  }
+}
diff --git a/internal-packages/metrics-pipeline/tsconfig.json b/internal-packages/metrics-pipeline/tsconfig.json
new file mode 100644
index 00000000000..af630abe1f1
--- /dev/null
+++ b/internal-packages/metrics-pipeline/tsconfig.json
@@ -0,0 +1,8 @@
+{
+  "references": [{ "path": "./tsconfig.src.json" }, { "path": "./tsconfig.test.json" }],
+  "compilerOptions": {
+    "moduleResolution": "Node16",
+    "module": "Node16",
+    "customConditions": ["@triggerdotdev/source"]
+  }
+}
diff --git a/internal-packages/metrics-pipeline/tsconfig.src.json b/internal-packages/metrics-pipeline/tsconfig.src.json
new file mode 100644
index 00000000000..0df3d2d222f
--- /dev/null
+++ b/internal-packages/metrics-pipeline/tsconfig.src.json
@@ -0,0 +1,20 @@
+{
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "src/**/*.test.ts"],
+  "compilerOptions": {
+    "composite": true,
+    "target": "ES2020",
+    "lib": ["ES2020", "DOM", "DOM.Iterable", "DOM.AsyncIterable"],
+    "module": "Node16",
+    "moduleResolution": "Node16",
+    "moduleDetection": "force",
+    "verbatimModuleSyntax": false,
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "preserveWatchOutput": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "customConditions": ["@triggerdotdev/source"]
+  }
+}
diff --git a/internal-packages/metrics-pipeline/tsconfig.test.json b/internal-packages/metrics-pipeline/tsconfig.test.json
new file mode 100644
index 00000000000..4c06c9f57bb
--- /dev/null
+++ b/internal-packages/metrics-pipeline/tsconfig.test.json
@@ -0,0 +1,21 @@
+{
+  "include": ["src/**/*.test.ts"],
+  "references": [{ "path": "./tsconfig.src.json" }],
+  "compilerOptions": {
+    "composite": true,
+    "target": "ES2020",
+    "lib": ["ES2020", "DOM", "DOM.Iterable", "DOM.AsyncIterable"],
+    "module": "Node16",
+    "moduleResolution": "Node16",
+    "moduleDetection": "force",
+    "verbatimModuleSyntax": false,
+    "types": ["vitest/globals"],
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "isolatedModules": true,
+    "preserveWatchOutput": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "customConditions": ["@triggerdotdev/source"]
+  }
+}
diff --git a/internal-packages/metrics-pipeline/vitest.config.ts b/internal-packages/metrics-pipeline/vitest.config.ts
new file mode 100644
index 00000000000..daafd294fa8
--- /dev/null
+++ b/internal-packages/metrics-pipeline/vitest.config.ts
@@ -0,0 +1,17 @@
+import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
+
+export default defineConfig({
+  test: {
+    sequence: { sequencer: DurationShardingSequencer },
+    globals: true,
+    retry: process.env.CI ? 2 : 0,
+    environment: "node",
+    setupFiles: ["./test/setup.ts"],
+    testTimeout: 30000,
+    hookTimeout: 30000,
+  },
+  esbuild: {
+    target: "node18",
+  },
+});
diff --git a/internal-packages/run-engine/package.json b/internal-packages/run-engine/package.json
index 8d53974d10b..516e6a18696 100644
--- a/internal-packages/run-engine/package.json
+++ b/internal-packages/run-engine/package.json
@@ -21,6 +21,7 @@
   },
   "dependencies": {
     "@internal/redis": "workspace:*",
+    "@internal/metrics-pipeline": "workspace:*",
     "@internal/run-store": "workspace:*",
     "@trigger.dev/redis-worker": "workspace:*",
     "@internal/tracing": "workspace:*",
diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts
index f3091c93b88..c55184e594f 100644
--- a/internal-packages/run-engine/src/engine/index.ts
+++ b/internal-packages/run-engine/src/engine/index.ts
@@ -218,6 +218,7 @@ export class RunEngine {
         callback: this.#concurrencySweeperCallback.bind(this),
       },
       shardCount: options.queue?.shardCount,
+      queueMetrics: options.queue?.queueMetrics,
       masterQueueConsumersDisabled: options.queue?.masterQueueConsumersDisabled,
       masterQueueConsumersIntervalMs: options.queue?.masterQueueConsumersIntervalMs,
       processWorkerQueueDebounceMs: options.queue?.processWorkerQueueDebounceMs,
@@ -1628,6 +1629,14 @@ export class RunEngine {
     return this.runQueue.currentConcurrencyOfQueues(environment, queues);
   }
 
+  async concurrencyKeyBreakdown(
+    environment: MinimalAuthenticatedEnvironment,
+    queue: string,
+    options?: { limit?: number }
+  ) {
+    return this.runQueue.concurrencyKeyBreakdown(environment, queue, options);
+  }
+
   async removeEnvironmentQueuesFromMasterQueue({
     runtimeEnvironmentId,
     organizationId,
diff --git a/internal-packages/run-engine/src/engine/systems/enqueueSystem.ts b/internal-packages/run-engine/src/engine/systems/enqueueSystem.ts
index dc9d029c38c..4b236aefc16 100644
--- a/internal-packages/run-engine/src/engine/systems/enqueueSystem.ts
+++ b/internal-packages/run-engine/src/engine/systems/enqueueSystem.ts
@@ -98,10 +98,16 @@ export class EnqueueSystem {
       // Force development runs to use the environment id as the worker queue.
       const workerQueue = env.type === "DEVELOPMENT" ? env.id : run.workerQueue;
 
-      const timestamp = (run.queueTimestamp ?? run.createdAt).getTime() - run.priorityMs;
+      // Ordering keeps the run's original position; the scheduling-delay anchor is the
+      // trigger/delay time only on first enqueue (includeTtl). Re-enqueues anchor to now,
+      // else the wait metric absorbs the whole waitpoint/checkpoint duration.
+      const queuePositionMs = (run.queueTimestamp ?? run.createdAt).getTime();
+      const timestamp = queuePositionMs - run.priorityMs;
+      const eligibleAtMs = includeTtl ? queuePositionMs : Date.now();
 
-      // Include TTL only when explicitly requested (first enqueue from trigger).
-      // Re-enqueues (waitpoint, checkpoint, delayed, pending version) must not add TTL.
+      // Include TTL only when explicitly requested (first enqueue from trigger or the
+      // delayed-run system). Re-enqueues (waitpoint, checkpoint, pending version) must
+      // not add TTL.
       let ttlExpiresAt: number | undefined;
       if (includeTtl && run.ttl) {
         const expireAt = parseNaturalLanguageDuration(run.ttl);
@@ -124,6 +130,7 @@ export class EnqueueSystem {
           queue: run.queue,
           concurrencyKey: run.concurrencyKey ?? undefined,
           timestamp,
+          eligibleAtMs,
           attempt: 0,
           ttlExpiresAt,
         },
diff --git a/internal-packages/run-engine/src/engine/tests/ttl.test.ts b/internal-packages/run-engine/src/engine/tests/ttl.test.ts
index 949e47f8574..e33b361abdb 100644
--- a/internal-packages/run-engine/src/engine/tests/ttl.test.ts
+++ b/internal-packages/run-engine/src/engine/tests/ttl.test.ts
@@ -293,7 +293,12 @@ describe("RunEngine ttl", () => {
         );
         assertNonNullable(messageAfterTrigger);
         expect(messageAfterTrigger.ttlExpiresAt).toBeDefined();
+        // First enqueue anchors the scheduling-delay clock at the trigger time.
+        expect(messageAfterTrigger.eligibleAtMs).toBe(
+          (run.queueTimestamp ?? run.createdAt).getTime()
+        );
 
+        const beforeReenqueue = Date.now();
         await engine.enqueueSystem.enqueueRun({
           run,
           env: authenticatedEnvironment,
@@ -308,6 +313,10 @@ describe("RunEngine ttl", () => {
         );
         assertNonNullable(messageAfterReenqueue);
         expect(messageAfterReenqueue.ttlExpiresAt).toBeUndefined();
+        // Re-enqueues anchor to now so the wait metric measures only this queue stint,
+        // while the ordering timestamp keeps the run's original position.
+        expect(messageAfterReenqueue.eligibleAtMs).toBeGreaterThanOrEqual(beforeReenqueue);
+        expect(messageAfterReenqueue.timestamp).toBe(messageAfterTrigger.timestamp);
       } finally {
         await engine.quit();
       }
diff --git a/internal-packages/run-engine/src/engine/types.ts b/internal-packages/run-engine/src/engine/types.ts
index bb1d6eb2fa9..f37ec7df50a 100644
--- a/internal-packages/run-engine/src/engine/types.ts
+++ b/internal-packages/run-engine/src/engine/types.ts
@@ -16,6 +16,7 @@ import {
 } from "@trigger.dev/redis-worker";
 import type { ControlPlaneResolver } from "./controlPlaneResolver.js";
 import type { FairQueueSelectionStrategyOptions } from "../run-queue/fairQueueSelectionStrategy.js";
+import type { RunQueueMetricsEmitter } from "../run-queue/index.js";
 import type { MinimalAuthenticatedEnvironment } from "../shared/index.js";
 import type { LockRetryConfig } from "./locking.js";
 import type { workerCatalog } from "./workerCatalog.js";
@@ -90,6 +91,8 @@ export type RunEngineOptions = {
     defaultEnvConcurrency?: number;
     defaultEnvConcurrencyBurstFactor?: number;
     logLevel?: LogLevel;
+    /** Optional queue-metrics emitter; enables gauge + counter emission from the RunQueue. */
+    queueMetrics?: RunQueueMetricsEmitter;
     queueSelectionStrategyOptions?: Pick<
       FairQueueSelectionStrategyOptions,
       "parentQueueLimit" | "tracer" | "biases" | "reuseSnapshotCount" | "maximumEnvCount"
diff --git a/internal-packages/run-engine/src/run-queue/index.ts b/internal-packages/run-engine/src/run-queue/index.ts
index a0571206538..4e6ca89d847 100644
--- a/internal-packages/run-engine/src/run-queue/index.ts
+++ b/internal-packages/run-engine/src/run-queue/index.ts
@@ -5,6 +5,7 @@ import {
   type RedisOptions,
   type Result,
 } from "@internal/redis";
+import { createMetricsGaugeComputeLua } from "@internal/metrics-pipeline";
 import type {
   Attributes,
   Meter,
@@ -57,6 +58,99 @@ const SemanticAttributes = {
   ORG_ID: "runqueue.orgId",
 };
 
+// Prelude spliced at the top of every gauge-carrying script: declares the gauge slot and
+// the return wrapper. A splice fills __qm_g; every return goes through __qmret so the reply
+// is always {original, gauge}. A nil original becomes false, else Lua drops it from the
+// multi-bulk reply (which would swallow the gauge on the dequeue throttle paths).
+const QUEUE_METRICS_GAUGE_PRELUDE = `
+local __qm_g = false
+local function __qmret(r) if r == nil then r = false end return {r, __qm_g} end`;
+
+// Fresh-read gauge for splice points with no reusable locals: enqueue slow-path (before
+// return 0) and the base dequeue top. Gated on the last ARGV so it is inert unless the
+// caller opts in. CK queues emit per-subqueue depth (queue_name aggregates via the MV).
+const QUEUE_METRICS_GAUGE_LUA = createMetricsGaugeComputeLua({
+  enabledArg: "ARGV[#ARGV] == '1'",
+  queued: "redis.call('ZCARD', queueKey)",
+  running: "redis.call('SCARD', queueCurrentConcurrencyKey)",
+  queueLimit: "redis.call('GET', queueConcurrencyLimitKey) or '1000000'",
+  envQueued: "redis.call('ZCARD', envQueueKey)",
+  envRunning: "redis.call('SCARD', envCurrentConcurrencyKey)",
+  envLimit: "redis.call('GET', envConcurrencyLimitKey) or defaultEnvConcurrencyLimit",
+});
+
+// Enqueue fast-path gauge: the admission check already computed queueCurrent/envCurrent/
+// queueLimit/envLimit, so reuse them (only 2 ZCARDs stay fresh). Fast path was taken, so
+// cc < lim and thr is always 0 — reusing the effective queueLimit is fine (max() recovers raw).
+const QUEUE_METRICS_ENQUEUE_FASTPATH_GAUGE_LUA = createMetricsGaugeComputeLua({
+  enabledArg: "ARGV[#ARGV] == '1'",
+  queued: "redis.call('ZCARD', queueKey)",
+  running: "queueCurrent",
+  queueLimit: "queueLimit",
+  envQueued: "redis.call('ZCARD', envQueueKey)",
+  envRunning: "envCurrent",
+  envLimit: "envLimit",
+});
+
+// CK-health extras: distinct backlogged keys + most-starved head-of-line wait (ckIndex scores
+// are per-subqueue oldest timestamps). Needs ckIndexKey/currentTime locals; clamps future scores.
+const QUEUE_METRICS_CK_GAUGE_EXTRAS = {
+  preamble: `local __ckhead = redis.call('ZRANGE', ckIndexKey, 0, 0, 'WITHSCORES')
+    local __ckwait = 0
+    if #__ckhead > 0 then __ckwait = math.floor(math.max(0, (tonumber(currentTime) or 0) - (tonumber(__ckhead[2]) or 0))) end`,
+  ckBacklogged: "redis.call('ZCARD', ckIndexKey)",
+  ckMaxWaitMs: "__ckwait",
+};
+
+// CK enqueue variants of the two gauges above, extended with the CK-health tail.
+const QUEUE_METRICS_CK_ENQUEUE_GAUGE_LUA = createMetricsGaugeComputeLua({
+  enabledArg: "ARGV[#ARGV] == '1'",
+  queued: "redis.call('ZCARD', queueKey)",
+  running: "redis.call('SCARD', queueCurrentConcurrencyKey)",
+  queueLimit: "redis.call('GET', queueConcurrencyLimitKey) or '1000000'",
+  envQueued: "redis.call('ZCARD', envQueueKey)",
+  envRunning: "redis.call('SCARD', envCurrentConcurrencyKey)",
+  envLimit: "redis.call('GET', envConcurrencyLimitKey) or defaultEnvConcurrencyLimit",
+  ...QUEUE_METRICS_CK_GAUGE_EXTRAS,
+});
+
+const QUEUE_METRICS_CK_ENQUEUE_FASTPATH_GAUGE_LUA = createMetricsGaugeComputeLua({
+  enabledArg: "ARGV[#ARGV] == '1'",
+  queued: "redis.call('ZCARD', queueKey)",
+  running: "queueCurrent",
+  queueLimit: "queueLimit",
+  envQueued: "redis.call('ZCARD', envQueueKey)",
+  envRunning: "envCurrent",
+  envLimit: "envLimit",
+  ...QUEUE_METRICS_CK_GAUGE_EXTRAS,
+});
+
+// CK dequeue: depth/running from the per-base-queue aggregate counters the run-queue already
+// maintains (two O(1) GETs, not a per-variant scan). thr suppressed — an aggregate cc >= per-CK
+// limit would over-report; per-CK throttle is caught by the per-subqueue enqueue gauges.
+const QUEUE_METRICS_CK_DEQUEUE_GAUGE_LUA = createMetricsGaugeComputeLua({
+  enabledArg: "ARGV[#ARGV] == '1'",
+  queued: "redis.call('GET', lengthCounterKey) or '0'",
+  running: "redis.call('GET', runningCounterKey) or '0'",
+  queueLimit: "redis.call('GET', queueConcurrencyLimitKey) or '1000000'",
+  envQueued: "redis.call('ZCARD', envQueueKey)",
+  envRunning: "redis.call('SCARD', envCurrentConcurrencyKey)",
+  envLimit: "redis.call('GET', envConcurrencyLimitKey) or defaultEnvConcurrencyLimit",
+  throttledExpr: "false",
+  ...QUEUE_METRICS_CK_GAUGE_EXTRAS,
+});
+
+/** Injected queue-metrics stream emitter; all calls are no-ops when metrics are disabled. */
+export interface RunQueueMetricsEmitter {
+  enabledSync(): boolean;
+  /** enabled AND sampled-in; gates high-frequency sampled emissions (the Lua gauge). */
+  sampledSync(): boolean;
+  /** Counter event (cumulative odometer). */
+  emit(shardKey: string, fields: Record<string, string | number>): void;
+  /** Gauge snapshot read inside the queue-op Lua and returned on the reply. */
+  emitGauge(shardKey: string, fields: Record<string, string | number>): void;
+}
+
 export type RunQueueOptions = {
   name: string;
   tracer: Tracer;
@@ -93,6 +187,8 @@ export type RunQueueOptions = {
     disabled?: boolean;
   };
   meter?: Meter;
+  /** When set, enqueue/dequeue/ack/nack/dlq emit queue-metrics events (gated on the emitter's flag). */
+  queueMetrics?: RunQueueMetricsEmitter;
   dequeueBlockingTimeoutSeconds?: number;
   concurrencySweeper?: {
     scanSchedule?: string;
@@ -458,6 +554,65 @@ export class RunQueue {
     );
   }
 
+  /**
+   * Live per-concurrency-key breakdown of a queue's backlog, most-starved first.
+   * Reads the ckIndex zset (members = CK subqueue names, scores = oldest-message
+   * timestamps), so only keys with queued work appear; running-only keys do not.
+   */
+  public async concurrencyKeyBreakdown(
+    env: MinimalAuthenticatedEnvironment,
+    queue: string,
+    options?: { limit?: number }
+  ): Promise<{
+    totalBackloggedKeys: number;
+    keys: Array<{
+      concurrencyKey: string;
+      queued: number;
+      running: number;
+      oldestEnqueuedAt: number;
+    }>;
+  }> {
+    const limit = options?.limit ?? 50;
+    const ckIndexKey = this.keys.ckIndexKeyFromQueue(this.keys.queueKey(env, queue));
+
+    const indexPipeline = this.redis.pipeline();
+    indexPipeline.zcard(ckIndexKey);
+    indexPipeline.zrange(ckIndexKey, 0, limit - 1, "WITHSCORES");
+    const indexResults = await indexPipeline.exec();
+    if (!indexResults) return { totalBackloggedKeys: 0, keys: [] };
+
+    const [totalErr, totalVal] = indexResults[0];
+    const [rangeErr, rangeVal] = indexResults[1];
+    const totalBackloggedKeys = totalErr || totalVal == null ? 0 : (totalVal as number);
+    const flat = rangeErr || rangeVal == null ? [] : (rangeVal as string[]);
+
+    const members: Array<{ member: string; score: number }> = [];
+    for (let i = 0; i < flat.length; i += 2) {
+      members.push({ member: flat[i], score: Number(flat[i + 1]) });
+    }
+    if (members.length === 0) return { totalBackloggedKeys, keys: [] };
+
+    const statsPipeline = this.redis.pipeline();
+    for (const { member } of members) {
+      statsPipeline.zcard(member);
+      statsPipeline.scard(this.keys.queueCurrentConcurrencyKeyFromQueue(member));
+    }
+    const stats = await statsPipeline.exec();
+
+    const keys = members.map(({ member, score }, i) => {
+      const queuedResult = stats?.[i * 2];
+      const runningResult = stats?.[i * 2 + 1];
+      return {
+        concurrencyKey: this.#concurrencyKeyFromQueue(member) ?? "",
+        queued: queuedResult && !queuedResult[0] ? ((queuedResult[1] as number) ?? 0) : 0,
+        running: runningResult && !runningResult[0] ? ((runningResult[1] as number) ?? 0) : 0,
+        oldestEnqueuedAt: score,
+      };
+    });
+
+    return { totalBackloggedKeys, keys };
+  }
+
   public async lengthOfEnvQueue(env: MinimalAuthenticatedEnvironment) {
     return this.redis.zcard(this.keys.envQueueKey(env));
   }
@@ -751,6 +906,8 @@ export class RunQueue {
 
         span.setAttribute("fastPath", fastPathTaken);
 
+        this.#emitQueueMetric(queueKey, { op: "enqueue", q: queueKey });
+
         if (!fastPathTaken && !skipDequeueProcessing) {
           // Slow path: schedule the dequeue job to move the message from queue to worker queue
           await this.worker.enqueueOnce({
@@ -810,6 +967,15 @@ export class RunQueue {
           ...flattenAttributes(dequeuedMessage.message, "message"),
         });
 
+        const startedFields: Record<string, string | number> = {
+          op: "started",
+          q: dequeuedMessage.message.queue,
+        };
+        if (typeof dequeuedMessage.message.eligibleAtMs === "number") {
+          startedFields.wait = Math.max(0, Date.now() - dequeuedMessage.message.eligibleAtMs);
+        }
+        this.#emitQueueMetric(dequeuedMessage.message.queue, startedFields);
+
         return dequeuedMessage;
       },
       {
@@ -877,6 +1043,8 @@ export class RunQueue {
           message,
           removeFromWorkerQueue: options?.removeFromWorkerQueue,
         });
+
+        this.#emitQueueMetric(message.queue, { op: "ack", q: message.queue });
       },
       {
         kind: SpanKind.CONSUMER,
@@ -934,6 +1102,7 @@ export class RunQueue {
           message.attempt = message.attempt + 1;
           if (message.attempt >= maxAttempts) {
             await this.#callMoveToDeadLetterQueue({ message });
+            this.#emitQueueMetric(message.queue, { op: "dlq", q: message.queue });
             return false;
           }
         }
@@ -960,6 +1129,8 @@ export class RunQueue {
 
         await this.#callNackMessage({ message, retryAt });
 
+        this.#emitQueueMetric(message.queue, { op: "nack", q: message.queue });
+
         return true;
       },
       {
@@ -1831,6 +2002,57 @@ export class RunQueue {
    *
    * @returns true if the fast path was taken (message pushed directly to worker queue)
    */
+  #queueMetricsGaugeArg(): string {
+    // Gauge gate ARGV: enabled AND sampled-in (sampling applies to the gauge, not counters).
+    return this.options.queueMetrics?.sampledSync() ? "1" : "0";
+  }
+
+  // Gauge returned on a script reply as a flat [ql, cc, lim, eql, ec, elim, thr] array,
+  // plus an optional [ckq, ckw] tail on CK-path scripts.
+  // Unlike counters, gauges are NOT base-normalized: the q label keeps its :ck: suffix so
+  // the CK-aggregate and per-subqueue readings stay distinguishable; the consumer's mapEntry
+  // strips :ck: to the base queue_name and the MV maxes them into one row.
+  #emitGauge(queue: string, gauge: number[]): void {
+    if (!Array.isArray(gauge) || gauge.length < 7) return;
+    const [ql, cc, lim, eql, ec, elim, thr, ckq, ckw] = gauge;
+    const fields: Record<string, string | number> = {
+      op: "gauge",
+      q: queue,
+      ql,
+      cc,
+      lim,
+      eql,
+      ec,
+      elim,
+      thr,
+    };
+    if (gauge.length >= 9) {
+      fields.ckq = ckq;
+      fields.ckw = ckw;
+    }
+    this.options.queueMetrics?.emitGauge(queue, fields);
+  }
+
+  #concurrencyKeyFromQueue(queue: string): string | undefined {
+    const idx = queue.indexOf(":ck:");
+    return idx === -1 || idx + 4 >= queue.length ? undefined : queue.slice(idx + 4);
+  }
+
+  #emitQueueMetric(shardKey: string, fields: Record<string, string | number>): void {
+    // Counters roll up per BASE queue: normalize the CK-qualified queue to its base so all
+    // concurrency keys share one monotonic odometer (and one shard/order key), matching the
+    // base queue_name the consumer buckets on. A real concurrency key rides along as `ck`,
+    // driving a SEPARATE per-key odometer on the same entry (per-key history tier).
+    const baseQueue = this.keys.baseQueueKeyFromQueue(shardKey);
+    let baseFields = fields;
+    if (typeof fields.q === "string") {
+      baseFields = { ...fields, q: this.keys.baseQueueKeyFromQueue(fields.q) };
+      const ck = this.#concurrencyKeyFromQueue(fields.q);
+      if (ck && ck !== "*") baseFields.ck = ck;
+    }
+    this.options.queueMetrics?.emit(baseQueue, baseFields);
+  }
+
   async #callEnqueueMessage(
     message: OutputPayloadV2,
     ttlInfo?: {
@@ -1869,6 +2091,7 @@ export class RunQueue {
     const messageScore = String(message.timestamp);
     const currentTime = String(Date.now());
     const enableFastPathArg = enableFastPath ? "1" : "0";
+    const metricsGaugeArg = this.#queueMetricsGaugeArg();
     const defaultEnvConcurrencyLimit = String(this.options.defaultEnvConcurrency);
     const defaultEnvConcurrencyBurstFactor = String(
       this.options.defaultEnvConcurrencyBurstFactor ?? 1.0
@@ -1892,7 +2115,8 @@ export class RunQueue {
       service: this.name,
     });
 
-    let result: number;
+    // Every gauge-carrying script returns a 2-tuple [originalReturn, gauge|null].
+    let result: [number, number[] | null];
 
     // Use CK-aware enqueue for messages with concurrency keys
     if (message.concurrencyKey) {
@@ -1935,7 +2159,8 @@ export class RunQueue {
           currentTime,
           enableFastPathArg,
           ckKeyPrefix,
-          String(this.counterTtlSeconds)
+          String(this.counterTtlSeconds),
+          metricsGaugeArg
         );
       } else {
         result = await this.redis.enqueueMessageCkTracked(
@@ -1967,7 +2192,8 @@ export class RunQueue {
           currentTime,
           enableFastPathArg,
           ckKeyPrefix,
-          String(this.counterTtlSeconds)
+          String(this.counterTtlSeconds),
+          metricsGaugeArg
         );
       }
     } else if (ttlInfo) {
@@ -1998,7 +2224,8 @@ export class RunQueue {
         defaultEnvConcurrencyLimit,
         defaultEnvConcurrencyBurstFactor,
         currentTime,
-        enableFastPathArg
+        enableFastPathArg,
+        metricsGaugeArg
       );
     } else {
       result = await this.redis.enqueueMessage(
@@ -2024,11 +2251,14 @@ export class RunQueue {
         defaultEnvConcurrencyLimit,
         defaultEnvConcurrencyBurstFactor,
         currentTime,
-        enableFastPathArg
+        enableFastPathArg,
+        metricsGaugeArg
       );
     }
 
-    return result === 1;
+    const [enqueueResult, gauge] = result;
+    if (gauge) this.#emitGauge(queueName, gauge);
+    return enqueueResult === 1;
   }
 
   async #callDequeueMessagesFromQueue({
@@ -2081,7 +2311,9 @@ export class RunQueue {
         maxCount,
       });
 
-      const result = await this.redis.dequeueMessagesFromQueue(
+      const metricsGaugeArg = this.#queueMetricsGaugeArg();
+
+      const reply = await this.redis.dequeueMessagesFromQueue(
         //keys
         messageQueue,
         queueConcurrencyLimitKey,
@@ -2099,9 +2331,16 @@ export class RunQueue {
         String(this.options.defaultEnvConcurrency),
         String(this.options.defaultEnvConcurrencyBurstFactor ?? 1),
         this.options.redis.keyPrefix ?? "",
-        String(maxCount)
+        String(maxCount),
+        metricsGaugeArg
       );
 
+      // Reply is [flatMessages|null, gauge|null]: emit the gauge (read atomically inside
+      // the script, present on the throttle/empty paths too) and keep element 0 as the array.
+      const gauge = reply?.[1] ?? null;
+      if (gauge) this.#emitGauge(messageQueue, gauge);
+      const result = reply?.[0] ?? null;
+
       if (!result) {
         span.setAttribute("message_count", 0);
 
@@ -2202,8 +2441,11 @@ export class RunQueue {
       });
 
       const lengthCounterKey = this.keys.queueLengthCounterKeyFromQueue(ckWildcardQueue);
+      const runningCounterKey = this.keys.queueRunningCounterKeyFromQueue(ckWildcardQueue);
+
+      const metricsGaugeArg = this.#queueMetricsGaugeArg();
 
-      const result = await this.redis.dequeueMessagesFromCkQueueTracked(
+      const reply = await this.redis.dequeueMessagesFromCkQueueTracked(
         //keys
         ckIndexKey,
         queueConcurrencyLimitKey,
@@ -2215,15 +2457,22 @@ export class RunQueue {
         masterQueueKey,
         ttlQueueKey,
         lengthCounterKey,
+        runningCounterKey,
         //args
         ckWildcardQueue,
         String(Date.now()),
         String(this.options.defaultEnvConcurrency),
         String(this.options.defaultEnvConcurrencyBurstFactor ?? 1),
         this.options.redis.keyPrefix ?? "",
-        String(maxCount)
+        String(maxCount),
+        metricsGaugeArg
       );
 
+      // Reply is [flatMessages|null, gauge|null]; the CK aggregate gauge rides here.
+      const gauge = reply?.[1] ?? null;
+      if (gauge) this.#emitGauge(ckWildcardQueue, gauge);
+      const result = reply?.[0] ?? null;
+
       if (!result) {
         span.setAttribute("message_count", 0);
         return [];
@@ -3062,6 +3311,8 @@ local defaultEnvConcurrencyBurstFactor = ARGV[7]
 local currentTime = ARGV[8]
 local enableFastPath = ARGV[9]
 
+${QUEUE_METRICS_GAUGE_PRELUDE}
+
 -- Fast path: check if we can skip the queue and go directly to worker queue
 if enableFastPath == '1' then
   local available = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'LIMIT', 0, 1)
@@ -3083,7 +3334,8 @@ if enableFastPath == '1' then
         redis.call('SADD', queueCurrentConcurrencyKey, messageId)
         redis.call('SADD', envCurrentConcurrencyKey, messageId)
         redis.call('RPUSH', workerQueueKey, messageKeyValue)
-        return 1
+${QUEUE_METRICS_ENQUEUE_FASTPATH_GAUGE_LUA}
+        return __qmret(1)
       end
     end
   end
@@ -3113,8 +3365,9 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId)
 redis.call('SREM', envCurrentConcurrencyKey, messageId)
 redis.call('SREM', queueCurrentDequeuedKey, messageId)
 redis.call('SREM', envCurrentDequeuedKey, messageId)
+${QUEUE_METRICS_GAUGE_LUA}
 
-return 0
+return __qmret(0)
       `,
     });
 
@@ -3153,6 +3406,8 @@ local defaultEnvConcurrencyBurstFactor = ARGV[9]
 local currentTime = ARGV[10]
 local enableFastPath = ARGV[11]
 
+${QUEUE_METRICS_GAUGE_PRELUDE}
+
 -- Fast path: check if we can skip the queue and go directly to worker queue
 if enableFastPath == '1' then
   local available = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'LIMIT', 0, 1)
@@ -3174,8 +3429,9 @@ if enableFastPath == '1' then
         redis.call('SADD', queueCurrentConcurrencyKey, messageId)
         redis.call('SADD', envCurrentConcurrencyKey, messageId)
         redis.call('RPUSH', workerQueueKey, messageKeyValue)
+${QUEUE_METRICS_ENQUEUE_FASTPATH_GAUGE_LUA}
         -- Skip TTL sorted set: the expireRun worker job handles TTL expiry independently
-        return 1
+        return __qmret(1)
       end
     end
   end
@@ -3208,8 +3464,9 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId)
 redis.call('SREM', envCurrentConcurrencyKey, messageId)
 redis.call('SREM', queueCurrentDequeuedKey, messageId)
 redis.call('SREM', envCurrentDequeuedKey, messageId)
+${QUEUE_METRICS_GAUGE_LUA}
 
-return 0
+return __qmret(0)
       `,
     });
 
@@ -3246,6 +3503,8 @@ local defaultEnvConcurrencyBurstFactor = ARGV[8]
 local currentTime = ARGV[9]
 local enableFastPath = ARGV[10]
 
+${QUEUE_METRICS_GAUGE_PRELUDE}
+
 -- Fast path: check if we can skip the queue and go directly to worker queue
 if enableFastPath == '1' then
   local available = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'LIMIT', 0, 1)
@@ -3268,7 +3527,8 @@ if enableFastPath == '1' then
         redis.call('SADD', queueCurrentConcurrencyKey, messageId)
         redis.call('SADD', envCurrentConcurrencyKey, messageId)
         redis.call('RPUSH', workerQueueKey, messageKeyValue)
-        return 1
+${QUEUE_METRICS_CK_ENQUEUE_FASTPATH_GAUGE_LUA}
+        return __qmret(1)
       end
     end
   end
@@ -3304,8 +3564,9 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId)
 redis.call('SREM', envCurrentConcurrencyKey, messageId)
 redis.call('SREM', queueCurrentDequeuedKey, messageId)
 redis.call('SREM', envCurrentDequeuedKey, messageId)
+${QUEUE_METRICS_CK_ENQUEUE_GAUGE_LUA}
 
-return 0
+return __qmret(0)
       `,
     });
 
@@ -3344,6 +3605,8 @@ local defaultEnvConcurrencyBurstFactor = ARGV[10]
 local currentTime = ARGV[11]
 local enableFastPath = ARGV[12]
 
+${QUEUE_METRICS_GAUGE_PRELUDE}
+
 -- Fast path: check if we can skip the queue and go directly to worker queue
 if enableFastPath == '1' then
   local available = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'LIMIT', 0, 1)
@@ -3365,8 +3628,9 @@ if enableFastPath == '1' then
         redis.call('SADD', queueCurrentConcurrencyKey, messageId)
         redis.call('SADD', envCurrentConcurrencyKey, messageId)
         redis.call('RPUSH', workerQueueKey, messageKeyValue)
+${QUEUE_METRICS_CK_ENQUEUE_FASTPATH_GAUGE_LUA}
         -- Skip TTL sorted set: the expireRun worker job handles TTL expiry independently
-        return 1
+        return __qmret(1)
       end
     end
   end
@@ -3405,8 +3669,9 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId)
 redis.call('SREM', envCurrentConcurrencyKey, messageId)
 redis.call('SREM', queueCurrentDequeuedKey, messageId)
 redis.call('SREM', envCurrentDequeuedKey, messageId)
+${QUEUE_METRICS_CK_ENQUEUE_GAUGE_LUA}
 
-return 0
+return __qmret(0)
       `,
     });
 
@@ -3455,6 +3720,8 @@ local keyPrefix = ARGV[11]
 -- TTL (seconds) applied to counter lazy-init SETs
 local counterTtl = ARGV[12]
 
+${QUEUE_METRICS_GAUGE_PRELUDE}
+
 -- Fast path: check if we can skip the queue and go directly to worker queue
 if enableFastPath == '1' then
   local available = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'LIMIT', 0, 1)
@@ -3476,10 +3743,11 @@ if enableFastPath == '1' then
         redis.call('SADD', queueCurrentConcurrencyKey, messageId)
         redis.call('SADD', envCurrentConcurrencyKey, messageId)
         redis.call('RPUSH', workerQueueKey, messageKeyValue)
+${QUEUE_METRICS_CK_ENQUEUE_FASTPATH_GAUGE_LUA}
         -- Fast-path skips the CK variant zset entirely; lengthCounter is unchanged.
         -- runningCounter is bumped later by dequeueMessageFromKeyTracked when the
         -- worker pulls the message from the worker queue.
-        return 1
+        return __qmret(1)
       end
     end
   end
@@ -3531,8 +3799,9 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId)
 redis.call('SREM', envCurrentConcurrencyKey, messageId)
 redis.call('SREM', queueCurrentDequeuedKey, messageId)
 redis.call('SREM', envCurrentDequeuedKey, messageId)
+${QUEUE_METRICS_CK_ENQUEUE_GAUGE_LUA}
 
-return 0
+return __qmret(0)
       `,
     });
 
@@ -3576,6 +3845,8 @@ local keyPrefix = ARGV[13]
 -- TTL (seconds) applied to counter lazy-init SETs
 local counterTtl = ARGV[14]
 
+${QUEUE_METRICS_GAUGE_PRELUDE}
+
 -- Fast path: check if we can skip the queue and go directly to worker queue
 if enableFastPath == '1' then
   local available = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'LIMIT', 0, 1)
@@ -3597,7 +3868,8 @@ if enableFastPath == '1' then
         redis.call('SADD', queueCurrentConcurrencyKey, messageId)
         redis.call('SADD', envCurrentConcurrencyKey, messageId)
         redis.call('RPUSH', workerQueueKey, messageKeyValue)
-        return 1
+${QUEUE_METRICS_CK_ENQUEUE_FASTPATH_GAUGE_LUA}
+        return __qmret(1)
       end
     end
   end
@@ -3645,8 +3917,9 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId)
 redis.call('SREM', envCurrentConcurrencyKey, messageId)
 redis.call('SREM', queueCurrentDequeuedKey, messageId)
 redis.call('SREM', envCurrentDequeuedKey, messageId)
+${QUEUE_METRICS_CK_ENQUEUE_GAUGE_LUA}
 
-return 0
+return __qmret(0)
       `,
     });
 
@@ -3891,6 +4164,8 @@ local defaultEnvConcurrencyLimit = ARGV[3]
 local defaultEnvConcurrencyBurstFactor = ARGV[4]
 local keyPrefix = ARGV[5]
 local maxCount = tonumber(ARGV[6] or '1')
+${QUEUE_METRICS_GAUGE_PRELUDE}
+${QUEUE_METRICS_GAUGE_LUA}
 
 -- Check current env concurrency against the limit
 local envCurrentConcurrency = tonumber(redis.call('SCARD', envCurrentConcurrencyKey) or '0')
@@ -3899,7 +4174,7 @@ local envConcurrencyLimitBurstFactor = tonumber(redis.call('GET', envConcurrency
 local envConcurrencyLimitWithBurstFactor = math.floor(envConcurrencyLimit * envConcurrencyLimitBurstFactor)
 
 if envCurrentConcurrency >= envConcurrencyLimitWithBurstFactor then
-    return nil
+    return __qmret(nil)
 end
 
 -- Check current queue concurrency against the limit
@@ -3909,7 +4184,7 @@ local totalQueueConcurrencyLimit = queueConcurrencyLimit
 
 -- Check condition only if concurrencyLimit exists
 if queueCurrentConcurrency >= totalQueueConcurrencyLimit then
-    return nil
+    return __qmret(nil)
 end
 
 -- Calculate how many messages we can actually dequeue based on concurrency limits
@@ -3918,14 +4193,14 @@ local queueAvailableCapacity = totalQueueConcurrencyLimit - queueCurrentConcurre
 local actualMaxCount = math.min(maxCount, envAvailableCapacity, queueAvailableCapacity)
 
 if actualMaxCount <= 0 then
-    return nil
+    return __qmret(nil)
 end
 
 -- Attempt to dequeue messages up to actualMaxCount
 local messages = redis.call('ZRANGEBYSCORE', queueKey, '-inf', currentTime, 'WITHSCORES', 'LIMIT', 0, actualMaxCount)
 
 if #messages == 0 then
-    return nil
+    return __qmret(nil)
 end
 
 local results = {}
@@ -3991,7 +4266,7 @@ else
 end
 
 -- Return results as a flat array: [messageId1, messageScore1, messagePayload1, messageId2, messageScore2, messagePayload2, ...]
-return results
+return __qmret(results)
       `,
     });
 
@@ -4145,7 +4420,7 @@ return results
     // (normal dequeue, TTL-expired, or stale-orphan path — all of which were
     // counted at enqueue time).
     this.redis.defineCommand("dequeueMessagesFromCkQueueTracked", {
-      numberOfKeys: 10,
+      numberOfKeys: 11,
       lua: `
 local ckIndexKey = KEYS[1]
 local queueConcurrencyLimitKey = KEYS[2]
@@ -4157,6 +4432,7 @@ local envQueueKey = KEYS[7]
 local masterQueueKey = KEYS[8]
 local ttlQueueKey = KEYS[9]
 local lengthCounterKey = KEYS[10]
+local runningCounterKey = KEYS[11]
 
 local ckWildcardName = ARGV[1]
 local currentTime = tonumber(ARGV[2])
@@ -4164,6 +4440,8 @@ local defaultEnvConcurrencyLimit = ARGV[3]
 local defaultEnvConcurrencyBurstFactor = ARGV[4]
 local keyPrefix = ARGV[5]
 local maxCount = tonumber(ARGV[6] or '1')
+${QUEUE_METRICS_GAUGE_PRELUDE}
+${QUEUE_METRICS_CK_DEQUEUE_GAUGE_LUA}
 
 local function decrLengthCounter()
   if tonumber(redis.call('GET', lengthCounterKey) or '0') > 0 then
@@ -4178,7 +4456,7 @@ local envConcurrencyLimitBurstFactor = tonumber(redis.call('GET', envConcurrency
 local envConcurrencyLimitWithBurstFactor = math.floor(envConcurrencyLimit * envConcurrencyLimitBurstFactor)
 
 if envCurrentConcurrency >= envConcurrencyLimitWithBurstFactor then
-  return nil
+  return __qmret(nil)
 end
 
 local queueConcurrencyLimit = math.min(tonumber(redis.call('GET', queueConcurrencyLimitKey) or '1000000'), envConcurrencyLimit)
@@ -4187,7 +4465,7 @@ local envAvailableCapacity = envConcurrencyLimitWithBurstFactor - envCurrentConc
 local actualMaxCount = math.min(maxCount, envAvailableCapacity)
 
 if actualMaxCount <= 0 then
-  return nil
+  return __qmret(nil)
 end
 
 local ckQueues = redis.call('ZRANGEBYSCORE', ckIndexKey, '-inf', tostring(currentTime), 'LIMIT', 0, actualMaxCount * 3)
@@ -4199,7 +4477,7 @@ if #ckQueues == 0 then
   else
     redis.call('ZADD', masterQueueKey, anyIdx[2], ckWildcardName)
   end
-  return nil
+  return __qmret(nil)
 end
 
 local results = {}
@@ -4281,7 +4559,7 @@ else
   redis.call('ZADD', masterQueueKey, earliestIdx[2], ckWildcardName)
 end
 
-return results
+return __qmret(results)
       `,
     });
 
@@ -5199,8 +5477,9 @@ declare module "@internal/redis" {
       defaultEnvConcurrencyBurstFactor: string,
       currentTime: string,
       enableFastPath: string,
-      callback?: Callback<number>
-    ): Result<number, Context>;
+      metricsEnabled: string,
+      callback?: Callback<[number, number[] | null]>
+    ): Result<[number, number[] | null], Context>;
 
     enqueueMessageWithTtl(
       //keys
@@ -5229,8 +5508,9 @@ declare module "@internal/redis" {
       defaultEnvConcurrencyBurstFactor: string,
       currentTime: string,
       enableFastPath: string,
-      callback?: Callback<number>
-    ): Result<number, Context>;
+      metricsEnabled: string,
+      callback?: Callback<[number, number[] | null]>
+    ): Result<[number, number[] | null], Context>;
 
     expireTtlRuns(
       //keys
@@ -5265,8 +5545,9 @@ declare module "@internal/redis" {
       defaultEnvConcurrencyBurstFactor: string,
       keyPrefix: string,
       maxCount: string,
-      callback?: Callback<string[]>
-    ): Result<string[], Context>;
+      metricsEnabled: string,
+      callback?: Callback<[string[] | null, number[] | null]>
+    ): Result<[string[] | null, number[] | null], Context>;
 
     dequeueMessageFromWorkerQueueNonBlocking(
       workerQueueKey: string,
@@ -5405,8 +5686,9 @@ declare module "@internal/redis" {
       defaultEnvConcurrencyBurstFactor: string,
       currentTime: string,
       enableFastPath: string,
-      callback?: Callback<number>
-    ): Result<number, Context>;
+      metricsEnabled: string,
+      callback?: Callback<[number, number[] | null]>
+    ): Result<[number, number[] | null], Context>;
 
     enqueueMessageWithTtlCk(
       //keys
@@ -5437,8 +5719,9 @@ declare module "@internal/redis" {
       defaultEnvConcurrencyBurstFactor: string,
       currentTime: string,
       enableFastPath: string,
-      callback?: Callback<number>
-    ): Result<number, Context>;
+      metricsEnabled: string,
+      callback?: Callback<[number, number[] | null]>
+    ): Result<[number, number[] | null], Context>;
 
     dequeueMessagesFromCkQueue(
       //keys
@@ -5551,8 +5834,9 @@ declare module "@internal/redis" {
       enableFastPath: string,
       keyPrefix: string,
       counterTtl: string,
-      callback?: Callback<number>
-    ): Result<number, Context>;
+      metricsEnabled: string,
+      callback?: Callback<[number, number[] | null]>
+    ): Result<[number, number[] | null], Context>;
 
     enqueueMessageWithTtlCkTracked(
       masterQueueKey: string,
@@ -5585,8 +5869,9 @@ declare module "@internal/redis" {
       enableFastPath: string,
       keyPrefix: string,
       counterTtl: string,
-      callback?: Callback<number>
-    ): Result<number, Context>;
+      metricsEnabled: string,
+      callback?: Callback<[number, number[] | null]>
+    ): Result<[number, number[] | null], Context>;
 
     dequeueMessagesFromCkQueueTracked(
       ckIndexKey: string,
@@ -5599,14 +5884,16 @@ declare module "@internal/redis" {
       masterQueueKey: string,
       ttlQueueKey: string,
       lengthCounterKey: string,
+      runningCounterKey: string,
       ckWildcardName: string,
       currentTime: string,
       defaultEnvConcurrencyLimit: string,
       defaultEnvConcurrencyBurstFactor: string,
       keyPrefix: string,
       maxCount: string,
-      callback?: Callback<string[]>
-    ): Result<string[], Context>;
+      metricsEnabled: string,
+      callback?: Callback<[string[] | null, number[] | null]>
+    ): Result<[string[] | null, number[] | null], Context>;
 
     dequeueMessageFromKeyTracked(
       messageKey: string,
diff --git a/internal-packages/run-engine/src/run-queue/keyProducer.ts b/internal-packages/run-engine/src/run-queue/keyProducer.ts
index b185435f6f6..18a2727b7e4 100644
--- a/internal-packages/run-engine/src/run-queue/keyProducer.ts
+++ b/internal-packages/run-engine/src/run-queue/keyProducer.ts
@@ -141,8 +141,7 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer {
   }
 
   queueConcurrencyLimitKeyFromQueue(queue: string) {
-    const concurrencyQueueName = queue.replace(/:ck:.+$/, "");
-    return `${concurrencyQueueName}:${constants.CONCURRENCY_LIMIT_PART}`;
+    return `${this.baseQueueKeyFromQueue(queue)}:${constants.CONCURRENCY_LIMIT_PART}`;
   }
 
   queueCurrentConcurrencyKeyFromQueue(queue: string) {
@@ -313,12 +312,14 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer {
   }
 
   ckIndexKeyFromQueue(queue: string): string {
-    const baseQueue = queue.replace(/:ck:.+$/, "");
-    return `${baseQueue}:${constants.CK_INDEX_PART}`;
+    return `${this.baseQueueKeyFromQueue(queue)}:${constants.CK_INDEX_PART}`;
   }
 
+  // indexOf instead of /:ck:.+$/ (queue names are user-controlled; polynomial regex).
+  // Only strips when at least one character follows ":ck:", matching the old semantics.
   baseQueueKeyFromQueue(queue: string): string {
-    return queue.replace(/:ck:.+$/, "");
+    const idx = queue.indexOf(":ck:");
+    return idx === -1 || idx + 4 >= queue.length ? queue : queue.slice(0, idx);
   }
 
   queueLengthCounterKey(env: RunQueueKeyProducerEnvironment, queue: string): string {
@@ -342,7 +343,8 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer {
   }
 
   toCkWildcard(queue: string): string {
-    return queue.replace(/:ck:.+$/, ":ck:*");
+    const base = this.baseQueueKeyFromQueue(queue);
+    return base === queue ? queue : `${base}:ck:*`;
   }
 
   descriptorFromQueue(queue: string): QueueDescriptor {
diff --git a/internal-packages/run-engine/src/run-queue/metrics.test.ts b/internal-packages/run-engine/src/run-queue/metrics.test.ts
new file mode 100644
index 00000000000..ebfc295470e
--- /dev/null
+++ b/internal-packages/run-engine/src/run-queue/metrics.test.ts
@@ -0,0 +1,397 @@
+import { createRedisClient } from "@internal/redis";
+import { redisTest } from "@internal/testcontainers";
+import { trace } from "@internal/tracing";
+import {
+  allStreamKeys,
+  MetricsStreamEmitter,
+  type MetricDefinition,
+} from "@internal/metrics-pipeline";
+import { Logger } from "@trigger.dev/core/logger";
+import { Decimal } from "@trigger.dev/database";
+import { setTimeout } from "node:timers/promises";
+import { describe, expect } from "vitest";
+import { FairQueueSelectionStrategy } from "./fairQueueSelectionStrategy.js";
+import { RunQueue } from "./index.js";
+import { RunQueueFullKeyProducer } from "./keyProducer.js";
+import type { InputPayload } from "./types.js";
+
+const authenticatedEnvDev = {
+  id: "e1234",
+  type: "DEVELOPMENT" as const,
+  maximumConcurrencyLimit: 10,
+  concurrencyLimitBurstFactor: new Decimal(1.0),
+  project: { id: "p1234" },
+  organization: { id: "o1234" },
+};
+
+async function readAllEntries(
+  redisOptions: {
+    host: string;
+    port: number;
+  },
+  definition: MetricDefinition
+) {
+  const client = createRedisClient({ ...redisOptions, keyPrefix: undefined });
+  const entries: Array<{ id: string; fields: Record<string, string> }> = [];
+  for (const key of allStreamKeys(definition)) {
+    const raw = (await client.xrange(key, "-", "+")) as Array<[string, string[]]>;
+    for (const [id, flat] of raw) {
+      const fields: Record<string, string> = {};
+      for (let i = 0; i + 1 < flat.length; i += 2) fields[flat[i]!] = flat[i + 1]!;
+      entries.push({ id, fields });
+    }
+  }
+  await client.quit();
+  return entries;
+}
+
+// Gauges now land via a fire-and-forget Node XADD after the script reply (not synchronously
+// inside the Lua), so reads must poll until the expected entries appear.
+async function waitForEntries(
+  redisOptions: { host: string; port: number },
+  definition: MetricDefinition,
+  predicate: (entries: Array<{ id: string; fields: Record<string, string> }>) => boolean,
+  timeoutMs = 5000
+) {
+  const start = Date.now();
+  let entries = await readAllEntries(redisOptions, definition);
+  while (!predicate(entries)) {
+    if (Date.now() - start > timeoutMs) return entries;
+    await setTimeout(50);
+    entries = await readAllEntries(redisOptions, definition);
+  }
+  return entries;
+}
+
+describe("RunQueue queue-metrics emission", () => {
+  redisTest("emits gauge + enqueue/started/ack events when enabled", async ({ redisContainer }) => {
+    const redis = {
+      keyPrefix: "runqueue:test:",
+      host: redisContainer.getHost(),
+      port: redisContainer.getPort(),
+    };
+    const definition: MetricDefinition = {
+      name: `qm_test_${Date.now()}`,
+      shardCount: 2,
+      consumerGroup: "cg",
+      maxLen: 1000,
+    };
+    const emitter = new MetricsStreamEmitter({
+      redis,
+      definition,
+      flag: { enabled: () => true },
+    });
+
+    const queue = new RunQueue({
+      name: "rq",
+      tracer: trace.getTracer("rq"),
+      defaultEnvConcurrency: 25,
+      logger: new Logger("RunQueue", "error"),
+      keys: new RunQueueFullKeyProducer(),
+      queueSelectionStrategy: new FairQueueSelectionStrategy({
+        redis,
+        keys: new RunQueueFullKeyProducer(),
+      }),
+      redis,
+      queueMetrics: emitter,
+    });
+
+    const message: InputPayload = {
+      runId: "r-metrics",
+      taskIdentifier: "task/my-task",
+      orgId: "o1234",
+      projectId: "p1234",
+      environmentId: authenticatedEnvDev.id,
+      environmentType: "DEVELOPMENT",
+      queue: "task/my-task",
+      timestamp: Date.now(),
+      eligibleAtMs: Date.now() - 500,
+      attempt: 0,
+    };
+
+    try {
+      await queue.enqueueMessage({
+        env: authenticatedEnvDev,
+        message,
+        workerQueue: authenticatedEnvDev.id,
+      });
+      await setTimeout(1000);
+      const dequeued = await queue.dequeueMessageFromWorkerQueue("c1", authenticatedEnvDev.id);
+      expect(dequeued?.messageId).toBe(message.runId);
+      await queue.acknowledgeMessage(message.orgId, message.runId);
+      await setTimeout(100);
+
+      const entries = await waitForEntries(redis, definition, (es) => {
+        const seen = es.map((e) => e.fields.op);
+        return ["enqueue", "gauge", "started", "ack"].every((o) => seen.includes(o));
+      });
+      const ops = entries.map((e) => e.fields.op);
+      expect(ops).toContain("enqueue");
+      expect(ops).toContain("gauge");
+      expect(ops).toContain("started");
+      expect(ops).toContain("ack");
+
+      const gauge = entries.find((e) => e.fields.op === "gauge");
+      assertGauge(gauge);
+      expect(gauge!.fields.q).toContain("task/my-task");
+      for (const f of ["ql", "cc", "lim", "eql", "ec", "elim", "thr"]) {
+        expect(gauge!.fields[f]).toBeDefined();
+      }
+      // Non-CK scripts keep the 7-field gauge (no CK-health tail).
+      expect(gauge!.fields.ckq).toBeUndefined();
+      expect(gauge!.fields.ckw).toBeUndefined();
+
+      // The first counter emission also seeds a cum=0 baseline (no wait); the real reading
+      // carries wait. Pick the reading (cum > 0).
+      const started = entries.find((e) => e.fields.op === "started" && Number(e.fields.cum) > 0);
+      expect(started!.fields.wait).toBeDefined();
+      expect(Number(started!.fields.wait)).toBeGreaterThanOrEqual(0);
+      expect(Number(started!.fields.cum)).toBeGreaterThan(0);
+    } finally {
+      await queue.quit();
+      await emitter.close();
+    }
+  });
+
+  redisTest(
+    "emits a fast-path gauge reusing the admission-check locals",
+    async ({ redisContainer }) => {
+      const redis = {
+        keyPrefix: "runqueue:test:",
+        host: redisContainer.getHost(),
+        port: redisContainer.getPort(),
+      };
+      const definition: MetricDefinition = {
+        name: `qm_fp_${Date.now()}`,
+        shardCount: 2,
+        consumerGroup: "cg",
+        maxLen: 1000,
+      };
+      const emitter = new MetricsStreamEmitter({
+        redis,
+        definition,
+        flag: { enabled: () => true },
+      });
+      const queue = new RunQueue({
+        name: "rq",
+        tracer: trace.getTracer("rq"),
+        defaultEnvConcurrency: 25,
+        logger: new Logger("RunQueue", "error"),
+        keys: new RunQueueFullKeyProducer(),
+        queueSelectionStrategy: new FairQueueSelectionStrategy({
+          redis,
+          keys: new RunQueueFullKeyProducer(),
+        }),
+        redis,
+        queueMetrics: emitter,
+      });
+
+      const message: InputPayload = {
+        runId: "r-fastpath",
+        taskIdentifier: "task/my-task",
+        orgId: "o1234",
+        projectId: "p1234",
+        environmentId: authenticatedEnvDev.id,
+        environmentType: "DEVELOPMENT",
+        queue: "task/my-task",
+        timestamp: Date.now(),
+        attempt: 0,
+      };
+
+      try {
+        // enableFastPath + empty queue + zero concurrency => the Lua takes the fast path,
+        // so the gauge runs the reuse snippet (queueCurrent/envCurrent/queueLimit/envLimit).
+        await queue.enqueueMessage({
+          env: authenticatedEnvDev,
+          message,
+          workerQueue: authenticatedEnvDev.id,
+          enableFastPath: true,
+        });
+        const dequeued = await queue.dequeueMessageFromWorkerQueue("c1", authenticatedEnvDev.id);
+        expect(dequeued?.messageId).toBe(message.runId);
+
+        const entries = await waitForEntries(
+          redis,
+          definition,
+          (es) =>
+            es.some((e) => e.fields.op === "gauge") && es.some((e) => e.fields.op === "enqueue")
+        );
+        const gauge = entries.find((e) => e.fields.op === "gauge");
+        assertGauge(gauge);
+        for (const f of ["ql", "cc", "lim", "eql", "ec", "elim", "thr"]) {
+          expect(gauge!.fields[f]).toBeDefined();
+        }
+        // Fast path was taken => capacity was available => not throttled.
+        expect(gauge!.fields.thr).toBe("0");
+        expect(entries.some((e) => e.fields.op === "enqueue")).toBe(true);
+      } finally {
+        await queue.quit();
+        await emitter.close();
+      }
+    }
+  );
+
+  redisTest("emits an aggregate gauge for CK queues at dequeue", async ({ redisContainer }) => {
+    const redis = {
+      keyPrefix: "runqueue:test:",
+      host: redisContainer.getHost(),
+      port: redisContainer.getPort(),
+    };
+    const definition: MetricDefinition = {
+      name: `qm_ck_${Date.now()}`,
+      shardCount: 2,
+      consumerGroup: "cg",
+      maxLen: 1000,
+    };
+    const emitter = new MetricsStreamEmitter({ redis, definition, flag: { enabled: () => true } });
+    const queue = new RunQueue({
+      name: "rq",
+      tracer: trace.getTracer("rq"),
+      defaultEnvConcurrency: 25,
+      logger: new Logger("RunQueue", "error"),
+      keys: new RunQueueFullKeyProducer(),
+      queueSelectionStrategy: new FairQueueSelectionStrategy({
+        redis,
+        keys: new RunQueueFullKeyProducer(),
+      }),
+      redis,
+      queueMetrics: emitter,
+    });
+
+    const message: InputPayload = {
+      runId: "r-ck",
+      taskIdentifier: "task/my-task",
+      orgId: "o1234",
+      projectId: "p1234",
+      environmentId: authenticatedEnvDev.id,
+      environmentType: "DEVELOPMENT",
+      queue: "task/my-task",
+      concurrencyKey: "tenant-1",
+      timestamp: Date.now(),
+      eligibleAtMs: Date.now() - 300,
+      attempt: 0,
+    };
+
+    try {
+      await queue.enqueueMessage({
+        env: authenticatedEnvDev,
+        message,
+        workerQueue: authenticatedEnvDev.id,
+      });
+      await setTimeout(1000);
+      const dequeued = await queue.dequeueMessageFromWorkerQueue("c1", authenticatedEnvDev.id);
+      expect(dequeued?.messageId).toBe(message.runId);
+
+      const entries = await waitForEntries(redis, definition, (es) =>
+        es.some(
+          (e) => e.fields.op === "gauge" && e.fields.q.includes(":ck:") && e.fields.thr === "0"
+        )
+      );
+      const gauges = entries.filter((e) => e.fields.op === "gauge");
+      expect(gauges.length).toBeGreaterThan(0);
+      // The aggregate CK dequeue gauge targets the CK wildcard and never sets thr.
+      const aggregate = gauges.find((e) => e.fields.q.includes(":ck:") && e.fields.thr === "0");
+      assertGauge(aggregate);
+      expect(Number(aggregate!.fields.ql)).toBeGreaterThanOrEqual(0);
+      expect(Number(aggregate!.fields.cc)).toBeGreaterThanOrEqual(0);
+
+      // Every CK-path gauge carries the CK-health tail; the enqueue-time reading (and the
+      // pre-dequeue aggregate reading) sees the backlogged key.
+      const ckGauges = gauges.filter((e) => e.fields.q.includes(":ck:"));
+      for (const g of ckGauges) {
+        expect(g.fields.ckq).toBeDefined();
+        expect(g.fields.ckw).toBeDefined();
+        expect(Number(g.fields.ckw)).toBeGreaterThanOrEqual(0);
+      }
+      expect(ckGauges.some((g) => Number(g.fields.ckq) >= 1)).toBe(true);
+
+      // CK counter entries carry both odometers: the reading has cum + ck/ckcum, and each
+      // odometer seeds its own baseline entry (cum-only vs ck+ckcum-only).
+      const enqueues = entries.filter((e) => e.fields.op === "enqueue");
+      const reading = enqueues.find((e) => e.fields.cum != null && e.fields.ckcum != null);
+      expect(reading).toBeDefined();
+      expect(reading!.fields.ck).toBe("tenant-1");
+      expect(reading!.fields.q).not.toContain(":ck:");
+      expect(Number(reading!.fields.cum)).toBe(1);
+      expect(Number(reading!.fields.ckcum)).toBe(1);
+      const baseBaseline = enqueues.find((e) => e.fields.cum === "0" && e.fields.ck == null);
+      expect(baseBaseline).toBeDefined();
+      const ckBaseline = enqueues.find((e) => e.fields.ckcum === "0" && e.fields.cum == null);
+      expect(ckBaseline).toBeDefined();
+      expect(ckBaseline!.fields.ck).toBe("tenant-1");
+    } finally {
+      await queue.quit();
+      await emitter.close();
+    }
+  });
+
+  redisTest("gauge sampling gates gauges but not counters", async ({ redisContainer }) => {
+    const redis = {
+      keyPrefix: "runqueue:test:",
+      host: redisContainer.getHost(),
+      port: redisContainer.getPort(),
+    };
+    const definition: MetricDefinition = {
+      name: `qm_sample_${Date.now()}`,
+      shardCount: 2,
+      consumerGroup: "cg",
+      maxLen: 1000,
+    };
+    // gaugeSampleRate 0 => sampledSync() always false => Lua gauge never fires; counters still do.
+    const emitter = new MetricsStreamEmitter({
+      redis,
+      definition,
+      flag: { enabled: () => true },
+      gaugeSampleRate: 0,
+    });
+    const queue = new RunQueue({
+      name: "rq",
+      tracer: trace.getTracer("rq"),
+      defaultEnvConcurrency: 25,
+      logger: new Logger("RunQueue", "error"),
+      keys: new RunQueueFullKeyProducer(),
+      queueSelectionStrategy: new FairQueueSelectionStrategy({
+        redis,
+        keys: new RunQueueFullKeyProducer(),
+      }),
+      redis,
+      queueMetrics: emitter,
+    });
+
+    const message: InputPayload = {
+      runId: "r-sample",
+      taskIdentifier: "task/my-task",
+      orgId: "o1234",
+      projectId: "p1234",
+      environmentId: authenticatedEnvDev.id,
+      environmentType: "DEVELOPMENT",
+      queue: "task/my-task",
+      timestamp: Date.now(),
+      attempt: 0,
+    };
+
+    try {
+      await queue.enqueueMessage({
+        env: authenticatedEnvDev,
+        message,
+        workerQueue: authenticatedEnvDev.id,
+      });
+      await setTimeout(1000);
+      await queue.dequeueMessageFromWorkerQueue("c1", authenticatedEnvDev.id);
+
+      // Poll until the counter (enqueue) lands; by then a gauge would have too, if sampled in.
+      const entries = await waitForEntries(redis, definition, (es) =>
+        es.some((e) => e.fields.op === "enqueue")
+      );
+      expect(entries.some((e) => e.fields.op === "gauge")).toBe(false);
+      expect(entries.some((e) => e.fields.op === "enqueue")).toBe(true);
+    } finally {
+      await queue.quit();
+      await emitter.close();
+    }
+  });
+});
+
+function assertGauge(gauge: unknown): asserts gauge {
+  if (!gauge) throw new Error("expected a gauge entry");
+}
diff --git a/internal-packages/run-engine/src/run-queue/tests/ckIndex.test.ts b/internal-packages/run-engine/src/run-queue/tests/ckIndex.test.ts
index 224540f4efb..4eb47d59bc0 100644
--- a/internal-packages/run-engine/src/run-queue/tests/ckIndex.test.ts
+++ b/internal-packages/run-engine/src/run-queue/tests/ckIndex.test.ts
@@ -471,4 +471,46 @@ describe("CK Index", () => {
       await queue.quit();
     }
   });
+
+  redisTest(
+    "concurrencyKeyBreakdown lists backlogged keys most-starved first",
+    async ({ redisContainer }) => {
+      const queue = createQueue(redisContainer);
+      try {
+        const now = Date.now();
+        const enqueue = (runId: string, concurrencyKey: string, timestamp: number) =>
+          queue.enqueueMessage({
+            env: authenticatedEnvDev,
+            message: makeMessage({ runId, concurrencyKey, timestamp }),
+            workerQueue: authenticatedEnvDev.id,
+            skipDequeueProcessing: true,
+          });
+
+        // ck-a has the oldest head (most starved) and 2 queued; ck-b has 1.
+        await enqueue("r1", "ck-a", now - 10_000);
+        await enqueue("r2", "ck-a", now - 5_000);
+        await enqueue("r3", "ck-b", now - 2_000);
+
+        const breakdown = await queue.concurrencyKeyBreakdown(authenticatedEnvDev, "task/my-task");
+        expect(breakdown.totalBackloggedKeys).toBe(2);
+        expect(breakdown.keys).toEqual([
+          { concurrencyKey: "ck-a", queued: 2, running: 0, oldestEnqueuedAt: now - 10_000 },
+          { concurrencyKey: "ck-b", queued: 1, running: 0, oldestEnqueuedAt: now - 2_000 },
+        ]);
+
+        const limited = await queue.concurrencyKeyBreakdown(authenticatedEnvDev, "task/my-task", {
+          limit: 1,
+        });
+        expect(limited.totalBackloggedKeys).toBe(2);
+        expect(limited.keys).toHaveLength(1);
+        expect(limited.keys[0]!.concurrencyKey).toBe("ck-a");
+
+        // Queues with no CK backlog return an empty breakdown.
+        const empty = await queue.concurrencyKeyBreakdown(authenticatedEnvDev, "task/other-task");
+        expect(empty).toEqual({ totalBackloggedKeys: 0, keys: [] });
+      } finally {
+        await queue.quit();
+      }
+    }
+  );
 });
diff --git a/internal-packages/run-engine/src/run-queue/types.ts b/internal-packages/run-engine/src/run-queue/types.ts
index 0905f3971de..8a7d3c93ec5 100644
--- a/internal-packages/run-engine/src/run-queue/types.ts
+++ b/internal-packages/run-engine/src/run-queue/types.ts
@@ -13,6 +13,9 @@ export const InputPayload = z.object({
   queue: z.string(),
   concurrencyKey: z.string().optional(),
   timestamp: z.number(),
+  // Unix ms the run became eligible (delayUntil if set, else triggered-at), pre-priority.
+  // Dequeue scheduling delay = dequeueTime - eligibleAtMs. Optional for old-payload compat.
+  eligibleAtMs: z.number().optional(),
   attempt: z.number(),
   /** TTL expiration timestamp (unix ms). If set, run will be expired when this time is reached. */
   ttlExpiresAt: z.number().optional(),
diff --git a/internal-packages/tsql/src/index.test.ts b/internal-packages/tsql/src/index.test.ts
index f9aca2f236d..ce358e6ac08 100644
--- a/internal-packages/tsql/src/index.test.ts
+++ b/internal-packages/tsql/src/index.test.ts
@@ -231,6 +231,26 @@ describe("injectFallbackConditions", () => {
       expect(modified.where.expression_type).toBe("and");
     }
   });
+
+  it("should inject into a FROM subquery, where the fallback column's table lives", () => {
+    const ast = parseTSQLSelect(
+      "SELECT t, sum(total) AS total FROM (SELECT time AS t, status, count(*) AS total FROM task_runs GROUP BY t, status) GROUP BY t"
+    );
+    const fallbacks: Record<string, WhereClauseCondition> = {
+      time: { op: "gte", value: "2024-01-01" },
+    };
+
+    const modified = injectFallbackConditions(ast, fallbacks);
+    expect(modified.expression_type).toBe("select_query");
+    if (modified.expression_type === "select_query") {
+      expect(modified.where).toBeUndefined();
+      const inner = modified.select_from?.table;
+      expect(inner?.expression_type).toBe("select_query");
+      if (inner?.expression_type === "select_query") {
+        expect(isColumnReferencedInExpression(inner.where, "time")).toBe(true);
+      }
+    }
+  });
 });
 
 describe("compileTSQL with whereClauseFallback", () => {
diff --git a/internal-packages/tsql/src/index.ts b/internal-packages/tsql/src/index.ts
index 1d8759c108c..1ebd1a60a5d 100644
--- a/internal-packages/tsql/src/index.ts
+++ b/internal-packages/tsql/src/index.ts
@@ -429,6 +429,24 @@ export function injectFallbackConditions(
 
   // Handle SelectQuery
   const selectQuery = ast as SelectQuery;
+
+  // When the FROM is a subquery, the fallback columns belong to the inner query's
+  // table, not this level; descend so e.g. a time fallback lands next to the table ref.
+  const fromTable = selectQuery.select_from?.table;
+  if (
+    fromTable &&
+    (fromTable.expression_type === "select_query" ||
+      fromTable.expression_type === "select_set_query")
+  ) {
+    return {
+      ...selectQuery,
+      select_from: {
+        ...selectQuery.select_from!,
+        table: injectFallbackConditions(fromTable, fallbacks) as SelectQuery | SelectSetQuery,
+      },
+    };
+  }
+
   const existingWhere = selectQuery.where;
 
   // Collect fallback expressions for columns not already in WHERE
@@ -541,6 +559,12 @@ export interface CompileTSQLOptions {
    * ```
    */
   timeRange?: TimeRange;
+  /**
+   * Opt-in: emit rows for empty time buckets in a top-level time-bucketed query.
+   * Counters zero-fill, gauges (columns with `fillMode: "carry"`) carry forward.
+   * Off by default; output is unchanged when not set.
+   */
+  fillGaps?: boolean;
 }
 
 /**
@@ -599,6 +623,7 @@ export function compileTSQL(query: string, options: CompileTSQLOptions): PrintRe
     fieldMappings: options.fieldMappings,
     enforcedWhereClause,
     timeRange: options.timeRange,
+    fillGaps: options.fillGaps,
   });
 
   // 6. Print the AST to ClickHouse SQL (enforced conditions applied at printer level)
diff --git a/internal-packages/tsql/src/query/functions.ts b/internal-packages/tsql/src/query/functions.ts
index 2f2b9278454..a6dadf0f609 100644
--- a/internal-packages/tsql/src/query/functions.ts
+++ b/internal-packages/tsql/src/query/functions.ts
@@ -645,11 +645,24 @@ export const TSQL_AGGREGATIONS: Record<string, TSQLFunctionMeta> = {
     maxParams: 1,
     aggregate: true,
   },
+  quantilesTDigestMerge: {
+    clickhouseName: "quantilesTDigestMerge",
+    minArgs: 1,
+    maxArgs: 1,
+    minParams: 1,
+    aggregate: true,
+  },
   sumMerge: { clickhouseName: "sumMerge", minArgs: 1, maxArgs: 1, aggregate: true },
   avgMerge: { clickhouseName: "avgMerge", minArgs: 1, maxArgs: 1, aggregate: true },
   countMerge: { clickhouseName: "countMerge", minArgs: 1, maxArgs: 1, aggregate: true },
   minMerge: { clickhouseName: "minMerge", minArgs: 1, maxArgs: 1, aggregate: true },
   maxMerge: { clickhouseName: "maxMerge", minArgs: 1, maxArgs: 1, aggregate: true },
+  deltaSumTimestampMerge: {
+    clickhouseName: "deltaSumTimestampMerge",
+    minArgs: 1,
+    maxArgs: 1,
+    aggregate: true,
+  },
 
   // Statistical functions
   simpleLinearRegression: {
diff --git a/internal-packages/tsql/src/query/printer.test.ts b/internal-packages/tsql/src/query/printer.test.ts
index 0efa0d34fc4..dbc14818cae 100644
--- a/internal-packages/tsql/src/query/printer.test.ts
+++ b/internal-packages/tsql/src/query/printer.test.ts
@@ -3831,3 +3831,388 @@ describe("timeBucket()", () => {
     });
   });
 });
+
+// ============================================================
+// fillGaps Tests
+// ============================================================
+
+describe("timeBucket() fillGaps", () => {
+  // Schema with a gauge column (fillMode: "carry"), a counter, and a groupable dim.
+  const metricsSchema: TableSchema = {
+    name: "metrics",
+    clickhouseName: "trigger_dev.queue_metrics_v1",
+    timeConstraint: "bucket_at",
+    columns: {
+      bucket_at: { name: "bucket_at", clickhouseName: "created_at", ...column("DateTime64") },
+      queue_name: { name: "queue_name", ...column("String") },
+      max_running: { name: "max_running", ...column("UInt64"), fillMode: "carry" },
+      enqueued: { name: "enqueued", ...column("UInt64"), fillMode: "zero" },
+      organization_id: { name: "organization_id", ...column("String") },
+      project_id: { name: "project_id", ...column("String") },
+      environment_id: { name: "environment_id", ...column("String") },
+    },
+    tenantColumns: {
+      organizationId: "organization_id",
+      projectId: "project_id",
+      environmentId: "environment_id",
+    },
+  };
+
+  // 7-day range -> 6 HOUR buckets (same as the timeBucket() block).
+  const sevenDayRange = {
+    from: new Date("2024-01-01T00:00:00Z"),
+    to: new Date("2024-01-08T00:00:00Z"),
+  };
+
+  function ctx(fillGaps: boolean): PrinterContext {
+    return createPrinterContext({
+      schema: createSchemaRegistry([metricsSchema]),
+      enforcedWhereClause: {
+        organization_id: { op: "eq", value: "org_test123" },
+        project_id: { op: "eq", value: "proj_test456" },
+        environment_id: { op: "eq", value: "env_test789" },
+      },
+      timeRange: sevenDayRange,
+      fillGaps,
+    });
+  }
+
+  function run(query: string, fillGaps: boolean) {
+    const context = ctx(fillGaps);
+    const result = printToClickHouse(parseTSQLSelect(query), context);
+    return { ...result, warnings: context.warnings };
+  }
+
+  it("emits no WITH FILL when fillGaps is off (unchanged)", () => {
+    const query =
+      "SELECT timeBucket(), max(max_running), count() FROM metrics GROUP BY timeBucket ORDER BY timeBucket";
+    const { sql } = run(query, false);
+    expect(sql).not.toContain("WITH FILL");
+    expect(sql).not.toContain("INTERPOLATE");
+  });
+
+  it("single-series gauge + counter: WITH FILL plus INTERPOLATE for the gauge only", () => {
+    const query =
+      "SELECT timeBucket(), max(max_running) AS max_running, count() AS runs FROM metrics GROUP BY timeBucket ORDER BY timeBucket";
+    const { sql, params } = run(query, true);
+
+    // STEP matches the 6 HOUR bucket interval, FROM/TO snapped + parameterized.
+    expect(sql).toContain("WITH FILL FROM toStartOfInterval({");
+    expect(sql).toContain("STEP INTERVAL 6 HOUR");
+    expect(sql).toMatch(/TO toStartOfInterval\(\{[^}]+: DateTime64\(6\)\}, INTERVAL 6 HOUR\)/);
+
+    // Gauge carried forward; counter omitted (defaults to 0).
+    expect(sql).toContain("INTERPOLATE (max_running AS max_running)");
+    expect(sql).not.toContain("runs AS runs");
+
+    // FROM/TO bounds are real parameters carrying the time range.
+    const dateParams = Object.values(params).filter((v) => v instanceof Date);
+    expect(dateParams).toContainEqual(sevenDayRange.from);
+    expect(dateParams).toContainEqual(sevenDayRange.to);
+  });
+
+  it("single-series counter only: WITH FILL but no INTERPOLATE", () => {
+    const query =
+      "SELECT timeBucket(), count() AS runs FROM metrics GROUP BY timeBucket ORDER BY timeBucket";
+    const { sql } = run(query, true);
+    expect(sql).toContain("WITH FILL FROM toStartOfInterval({");
+    expect(sql).toContain("STEP INTERVAL 6 HOUR");
+    expect(sql).not.toContain("INTERPOLATE");
+  });
+
+  it("grouped counter only: group dim first, then WITH FILL, no INTERPOLATE", () => {
+    const query =
+      "SELECT timeBucket(), queue_name, count() AS runs FROM metrics GROUP BY timeBucket, queue_name ORDER BY timeBucket";
+    const { sql } = run(query, true);
+    expect(sql).toMatch(/ORDER BY queue_name, timebucket ASC WITH FILL/);
+    expect(sql).toContain("STEP INTERVAL 6 HOUR");
+    expect(sql).not.toContain("INTERPOLATE");
+  });
+
+  it("grouped + carry gauge: per-group LOCF via window functions, no INTERPOLATE", () => {
+    const query =
+      "SELECT timeBucket(), queue_name, max(max_running) AS max_running FROM metrics GROUP BY timeBucket, queue_name ORDER BY timeBucket";
+    const { sql, warnings } = run(query, true);
+
+    // Inner query densifies per group (dims first, then the bucket WITH FILL) + sentinel.
+    expect(sql).toMatch(/ORDER BY queue_name, timebucket ASC WITH FILL/);
+    expect(sql).toContain("STEP INTERVAL 6 HOUR");
+    expect(sql).toContain("1 AS __tsql_present");
+
+    // Block id increments at each real row, partitioned by the group dim.
+    expect(sql).toContain(
+      "sum(__tsql_present) OVER (PARTITION BY queue_name ORDER BY timebucket ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS __tsql_block"
+    );
+
+    // Gauge carried within each (group, block); never INTERPOLATE (which bleeds across groups).
+    expect(sql).toContain(
+      "max(if(__tsql_present = 1, max_running, NULL)) OVER (PARTITION BY queue_name, __tsql_block) AS max_running"
+    );
+    expect(sql).not.toContain("INTERPOLATE");
+
+    // Final result re-ordered by the user's ORDER BY, and not skipped.
+    expect(sql).toMatch(/\)\s*ORDER BY timebucket ASC$/);
+    expect(warnings.some((w) => w.code === "fill_skipped_grouped_gauge")).toBe(false);
+  });
+
+  it("grouped + carry gauge with a non-plain group dim: fill is skipped", () => {
+    const query =
+      "SELECT timeBucket(), upper(queue_name) AS q, max(max_running) AS max_running FROM metrics GROUP BY timeBucket, upper(queue_name) ORDER BY timeBucket";
+    const { sql, warnings } = run(query, true);
+    expect(sql).not.toContain("WITH FILL");
+    expect(sql).not.toContain("__tsql_block");
+    expect(warnings.some((w) => w.code === "fill_skipped_grouped_gauge")).toBe(true);
+  });
+
+  it("user ORDER BY not led by timeBucket: fill is skipped", () => {
+    const query =
+      "SELECT timeBucket(), count() AS runs FROM metrics GROUP BY timeBucket ORDER BY runs DESC";
+    const { sql } = run(query, true);
+    expect(sql).not.toContain("WITH FILL");
+    expect(sql).not.toContain("INTERPOLATE");
+  });
+
+  it("bucket-led ORDER BY DESC: fill is skipped (ascending fill would be invalid)", () => {
+    const query =
+      "SELECT timeBucket(), count() AS runs FROM metrics GROUP BY timeBucket ORDER BY timeBucket DESC";
+    const { sql } = run(query, true);
+    expect(sql).not.toContain("WITH FILL");
+    expect(sql).not.toContain("INTERPOLATE");
+    // The plain descending order still stands.
+    expect(sql).toContain("ORDER BY timebucket DESC");
+  });
+});
+
+describe("cross-queue counter totals via subquery (env-wide throughput shape)", () => {
+  // deltaSumTimestamp states must merge per queue, then sum outside; this is the
+  // supported shape for env-wide totals.
+  const metricsSchema: TableSchema = {
+    name: "metrics",
+    clickhouseName: "trigger_dev.queue_metrics_v1",
+    timeConstraint: "bucket_at",
+    columns: {
+      bucket_at: { name: "bucket_at", clickhouseName: "created_at", ...column("DateTime64") },
+      queue_name: { name: "queue_name", ...column("String") },
+      started_delta: {
+        name: "started_delta",
+        mergeGroupKey: "queue_name",
+        ...column("String"),
+        groupable: false,
+        sortable: false,
+        filterable: false,
+      },
+      organization_id: { name: "organization_id", ...column("String") },
+      project_id: { name: "project_id", ...column("String") },
+      environment_id: { name: "environment_id", ...column("String") },
+    },
+    tenantColumns: {
+      organizationId: "organization_id",
+      projectId: "project_id",
+      environmentId: "environment_id",
+    },
+  };
+
+  function runSubquery(query: string) {
+    const context = createPrinterContext({
+      schema: createSchemaRegistry([metricsSchema]),
+      enforcedWhereClause: {
+        organization_id: { op: "eq", value: "org_test123" },
+      },
+      timeRange: {
+        from: new Date("2024-01-01T00:00:00Z"),
+        to: new Date("2024-01-08T00:00:00Z"),
+      },
+    });
+    const result = printToClickHouse(parseTSQLSelect(query), context);
+    return { ...result, warnings: context.warnings };
+  }
+
+  it("compiles per-queue merge + outer sum, with tenant scoping inside the subquery", () => {
+    const { sql, params } = runSubquery(`
+      SELECT t, sum(started) AS started
+      FROM (
+        SELECT timeBucket() AS t, queue_name, deltaSumTimestampMerge(started_delta) AS started
+        FROM metrics
+        GROUP BY t, queue_name
+      )
+      GROUP BY t
+      ORDER BY t
+    `);
+
+    expect(sql).toContain("deltaSumTimestampMerge(started_delta)");
+    expect(sql).toContain("toStartOfInterval(created_at, INTERVAL 6 HOUR)");
+    const subqueryStart = sql.indexOf("FROM (");
+    const tenantFilter = sql.indexOf("organization_id");
+    expect(subqueryStart).toBeGreaterThan(-1);
+    expect(tenantFilter).toBeGreaterThan(subqueryStart);
+    expect(Object.values(params)).toContain("org_test123");
+  });
+});
+
+describe("mergeGroupKey validation", () => {
+  const metricsSchema: TableSchema = {
+    name: "metrics",
+    clickhouseName: "trigger_dev.queue_metrics_v1",
+    timeConstraint: "bucket_at",
+    columns: {
+      bucket_at: { name: "bucket_at", ...column("DateTime64") },
+      queue: { name: "queue", clickhouseName: "queue_name", ...column("String") },
+      started_delta: {
+        name: "started_delta",
+        mergeGroupKey: "queue",
+        ...column("String"),
+        groupable: false,
+        sortable: false,
+        filterable: false,
+      },
+      organization_id: { name: "organization_id", ...column("String") },
+      project_id: { name: "project_id", ...column("String") },
+      environment_id: { name: "environment_id", ...column("String") },
+    },
+    tenantColumns: {
+      organizationId: "organization_id",
+      projectId: "project_id",
+      environmentId: "environment_id",
+    },
+  };
+
+  function compile(
+    query: string,
+    enforced: Record<string, unknown> = { organization_id: { op: "eq", value: "org_x" } }
+  ) {
+    const context = createPrinterContext({
+      schema: createSchemaRegistry([metricsSchema]),
+      enforcedWhereClause: enforced as never,
+      timeRange: {
+        from: new Date("2024-01-01T00:00:00Z"),
+        to: new Date("2024-01-08T00:00:00Z"),
+      },
+    });
+    return printToClickHouse(parseTSQLSelect(query), context);
+  }
+
+  it("rejects an ungrouped, unpinned merge with an actionable message", () => {
+    expect(() =>
+      compile(
+        "SELECT timeBucket() AS t, deltaSumTimestampMerge(started_delta) AS started FROM metrics GROUP BY t"
+      )
+    ).toThrowError(
+      /Merging 'started_delta' across every queue[\s\S]*GROUP BY queue\)[\s\S]*WHERE queue = 'my-queue'[\s\S]*inner GROUP BY t, queue and outer GROUP BY t/
+    );
+  });
+
+  it("allows the merge when queue is in the GROUP BY", () => {
+    const { sql } = compile(
+      "SELECT timeBucket() AS t, queue, deltaSumTimestampMerge(started_delta) AS started FROM metrics GROUP BY t, queue"
+    );
+    expect(sql).toContain("deltaSumTimestampMerge(started_delta)");
+  });
+
+  it("allows the merge when queue is pinned by an equality filter", () => {
+    const { sql } = compile(
+      "SELECT deltaSumTimestampMerge(started_delta) AS started FROM metrics WHERE queue = 'emails'"
+    );
+    expect(sql).toContain("deltaSumTimestampMerge(started_delta)");
+  });
+
+  it("allows the merge when the enforced clause pins queue to one value", () => {
+    const { sql } = compile(
+      "SELECT deltaSumTimestampMerge(started_delta) AS started FROM metrics",
+      { organization_id: { op: "eq", value: "org_x" }, queue: { op: "in", values: ["emails"] } }
+    );
+    expect(sql).toContain("deltaSumTimestampMerge(started_delta)");
+  });
+
+  it("rejects the merge when the enforced clause spans several queues", () => {
+    expect(() =>
+      compile("SELECT deltaSumTimestampMerge(started_delta) AS started FROM metrics", {
+        organization_id: { op: "eq", value: "org_x" },
+        queue: { op: "in", values: ["emails", "webhooks"] },
+      })
+    ).toThrowError(/only combine correctly within one queue/);
+  });
+
+  it("allows a grouped inner merge summed by the outer query", () => {
+    const { sql } = compile(
+      "SELECT t, sum(started) AS started FROM (SELECT timeBucket() AS t, queue, deltaSumTimestampMerge(started_delta) AS started FROM metrics GROUP BY t, queue) GROUP BY t ORDER BY t"
+    );
+    expect(sql).toContain("GROUP BY t, queue_name");
+  });
+
+  it("rejects an ungrouped merge inside a subquery", () => {
+    expect(() =>
+      compile(
+        "SELECT t, sum(started) AS started FROM (SELECT timeBucket() AS t, deltaSumTimestampMerge(started_delta) AS started FROM metrics GROUP BY t) GROUP BY t"
+      )
+    ).toThrowError(/only combine correctly within one queue/);
+  });
+});
+
+describe("compound mergeGroupKey validation", () => {
+  const byKeySchema: TableSchema = {
+    name: "metrics_by_key",
+    clickhouseName: "trigger_dev.queue_metrics_ck_v1",
+    timeConstraint: "bucket_at",
+    columns: {
+      bucket_at: { name: "bucket_at", ...column("DateTime64") },
+      queue: { name: "queue", clickhouseName: "queue_name", ...column("String") },
+      concurrency_key: { name: "concurrency_key", ...column("String") },
+      started_delta: {
+        name: "started_delta",
+        mergeGroupKey: ["queue", "concurrency_key"],
+        ...column("String"),
+        groupable: false,
+        sortable: false,
+        filterable: false,
+      },
+      organization_id: { name: "organization_id", ...column("String") },
+      project_id: { name: "project_id", ...column("String") },
+      environment_id: { name: "environment_id", ...column("String") },
+    },
+    tenantColumns: {
+      organizationId: "organization_id",
+      projectId: "project_id",
+      environmentId: "environment_id",
+    },
+  };
+
+  function compile(query: string) {
+    const context = createPrinterContext({
+      schema: createSchemaRegistry([byKeySchema]),
+      enforcedWhereClause: { organization_id: { op: "eq", value: "org_x" } } as never,
+      timeRange: {
+        from: new Date("2024-01-01T00:00:00Z"),
+        to: new Date("2024-01-08T00:00:00Z"),
+      },
+    });
+    return printToClickHouse(parseTSQLSelect(query), context);
+  }
+
+  it("requires EVERY listed key grouped or pinned", () => {
+    expect(() =>
+      compile(
+        "SELECT deltaSumTimestampMerge(started_delta) AS started FROM metrics_by_key WHERE queue = 'emails'"
+      )
+    ).toThrowError(/only combine correctly within one concurrency_key/);
+    expect(() =>
+      compile(
+        "SELECT concurrency_key, deltaSumTimestampMerge(started_delta) AS started FROM metrics_by_key GROUP BY concurrency_key"
+      )
+    ).toThrowError(/only combine correctly within one queue/);
+  });
+
+  it("allows pin + group combinations covering both keys", () => {
+    const grouped = compile(
+      "SELECT concurrency_key, deltaSumTimestampMerge(started_delta) AS started FROM metrics_by_key WHERE queue = 'emails' GROUP BY concurrency_key"
+    );
+    expect(grouped.sql).toContain("deltaSumTimestampMerge(started_delta)");
+    const pinned = compile(
+      "SELECT deltaSumTimestampMerge(started_delta) AS started FROM metrics_by_key WHERE queue = 'emails' AND concurrency_key = 't1'"
+    );
+    expect(pinned.sql).toContain("deltaSumTimestampMerge(started_delta)");
+    const bothGrouped = compile(
+      "SELECT queue, concurrency_key, deltaSumTimestampMerge(started_delta) AS started FROM metrics_by_key GROUP BY queue, concurrency_key"
+    );
+    expect(bothGrouped.sql).toContain("GROUP BY queue_name, concurrency_key");
+  });
+});
diff --git a/internal-packages/tsql/src/query/printer.ts b/internal-packages/tsql/src/query/printer.ts
index 82d97f5491b..3ee9a0ab76a 100644
--- a/internal-packages/tsql/src/query/printer.ts
+++ b/internal-packages/tsql/src/query/printer.ts
@@ -385,6 +385,8 @@ export class ClickHousePrinter {
       nextJoin = nextJoin.next_join;
     }
 
+    this.validateMergeScopedColumns(node);
+
     // Extract SELECT column aliases BEFORE visiting columns
     // This allows ORDER BY/HAVING to reference aliased columns
     const savedAliases = this.selectAliases;
@@ -459,6 +461,25 @@ export class ClickHousePrinter {
       this.inProjectionContext = false;
     }
 
+    // Opt-in gap-fill: emit rows for empty time buckets via WITH FILL / INTERPOLATE.
+    // No-op unless enabled, top-level, and the query is fill-eligible.
+    let interpolateClause: string | null = null;
+    let groupedFillWrap: ((inner: string) => string) | null = null;
+    if (this.context.fillGaps && isTopLevelQuery) {
+      const fill = this.buildGapFill(node, orderBy, groupBy);
+      if (fill) {
+        orderBy = fill.orderBy;
+        if (fill.kind === "inline") {
+          interpolateClause = fill.interpolate;
+        } else {
+          // Grouped per-group LOCF: add the `present` sentinel to this (now inner) query
+          // and wrap the rendered SQL in the block-id + carry window layers below.
+          columns.push(fill.presentColumn);
+          groupedFillWrap = fill.wrap;
+        }
+      }
+    }
+
     // Process ARRAY JOIN
     let arrayJoin = "";
     if (node.array_join_op) {
@@ -487,6 +508,8 @@ export class ClickHousePrinter {
       having ? `HAVING${space}${having}` : null,
       windowClause ? `WINDOW${space}${windowClause}` : null,
       orderBy && orderBy.length > 0 ? `ORDER BY${space}${orderBy.join(comma)}` : null,
+      // INTERPOLATE must follow the full ORDER BY (including WITH FILL)
+      interpolateClause,
     ];
 
     // Process LIMIT
@@ -549,6 +572,11 @@ export class ClickHousePrinter {
       response = this.pretty ? `(${response.trim()})` : `(${response})`;
     }
 
+    // Grouped per-group gap fill wraps this query in the block-id + carry window layers.
+    if (groupedFillWrap) {
+      response = groupedFillWrap(response);
+    }
+
     // Restore saved contexts (for nested queries)
     this.selectAliases = savedAliases;
     this.queryHasGroupBy = savedQueryHasGroupBy;
@@ -559,6 +587,183 @@ export class ClickHousePrinter {
     return response;
   }
 
+  /**
+   * Build the gap-fill transformation (WITH FILL + optional INTERPOLATE) for a
+   * top-level time-bucketed query. Returns null when the query is not
+   * fill-eligible (correct-by-construction: emit nothing extra rather than risk
+   * wrong values).
+   *
+   * Eligibility: exactly one timeBucket() column in SELECT, and ORDER BY led by
+   * that timeBucket column. Carry (gauge) columns are LOCF'd via INTERPOLATE;
+   * counters zero-fill via WITH FILL's default. Grouped gauge queries are unsafe
+   * (INTERPOLATE bleeds across groups) and are skipped with a warning.
+   */
+  private buildGapFill(
+    node: SelectQuery,
+    orderBy: string[] | null,
+    groupBy: string[] | null
+  ):
+    | { kind: "inline"; orderBy: string[]; interpolate: string | null }
+    | { kind: "wrap"; orderBy: string[]; presentColumn: string; wrap: (inner: string) => string }
+    | null {
+    if (!orderBy || orderBy.length === 0 || !node.select || node.select.length === 0) {
+      return null;
+    }
+
+    const timeRange = this.context.timeRange;
+    if (!timeRange) {
+      return null;
+    }
+
+    // Need a time-constraint table to derive the bucket column + interval.
+    const tableWithConstraint = this.findTimeConstraintTable();
+    if (!tableWithConstraint) {
+      return null;
+    }
+    const { tableSchema, clickhouseColumnName } = tableWithConstraint;
+    const interval = calculateTimeBucketInterval(
+      timeRange.from,
+      timeRange.to,
+      tableSchema.timeBucketThresholds
+    );
+    const bucketSql = `toStartOfInterval(${escapeClickHouseIdentifier(clickhouseColumnName)}, INTERVAL ${interval.value} ${interval.unit})`;
+
+    // Find exactly one timeBucket() column in SELECT and its output alias.
+    let bucketAlias: string | null = null;
+    let bucketCount = 0;
+    for (const col of node.select) {
+      const inner = (col as Alias).expression_type === "alias" ? (col as Alias).expr : col;
+      if (
+        (inner as Call).expression_type === "call" &&
+        (inner as Call).name.toLowerCase() === "timebucket"
+      ) {
+        bucketCount++;
+        bucketAlias =
+          (col as Alias).expression_type === "alias" ? (col as Alias).alias : "timebucket";
+      }
+    }
+    if (bucketCount !== 1 || !bucketAlias) {
+      return null;
+    }
+
+    // ORDER BY must be led by the timeBucket column (alias or full expression).
+    // Don't fight a user ordering like `ORDER BY count DESC`.
+    const leadTerm = orderBy[0];
+    // Strip a trailing ASC/DESC direction without a regex: an unanchored `\s+` before the
+    // keyword backtracks polynomially across start positions on whitespace runs (CodeQL
+    // js/polynomial-redos). endsWith + slice is linear.
+    const trimmedLead = leadTerm.trim();
+    const upperLead = trimmedLead.toUpperCase();
+    const isDescending = upperLead.endsWith(" DESC");
+    const leadExpr = upperLead.endsWith(" ASC")
+      ? trimmedLead.slice(0, -4).trimEnd()
+      : isDescending
+        ? trimmedLead.slice(0, -5).trimEnd()
+        : trimmedLead;
+    const matchesBucket = (expr: string): boolean =>
+      expr.toLowerCase() === bucketAlias!.toLowerCase() || expr === bucketSql;
+    if (!matchesBucket(leadExpr)) {
+      return null;
+    }
+    // WITH FILL is emitted with ascending bounds and a positive STEP, which is
+    // only valid for an ascending bucket order. A descending order would need
+    // swapped bounds and a negative step (newer ClickHouse only), so skip the
+    // gap-fill rewrite and let the plain descending ORDER BY stand.
+    if (isDescending) {
+      return null;
+    }
+
+    // Group dims = GROUP BY expressions that are NOT the timeBucket column.
+    const groupDims = (groupBy ?? []).filter((g) => !matchesBucket(g.trim()));
+
+    // Classify each SELECT output column. Carry (gauge) columns survive through
+    // aliases + value-preserving aggregates (see analyzeSelectColumn). A bare column
+    // that isn't the bucket is a GROUP BY dimension; everything else is a counter or
+    // derived value that zero-fills.
+    const carryAliases: string[] = [];
+    const dimNames: string[] = [];
+    const orderedOutputs: Array<{ name: string; carry: boolean }> = [];
+    for (const col of node.select) {
+      const { outputName, sourceColumn } = this.analyzeSelectColumn(col);
+      if (!outputName) continue;
+      const carry = sourceColumn?.fillMode === "carry";
+      orderedOutputs.push({ name: outputName, carry });
+      if (carry) carryAliases.push(outputName);
+      const inner = (col as Alias).expression_type === "alias" ? (col as Alias).expr : col;
+      if (!matchesBucket(outputName) && (inner as Field).expression_type === "field") {
+        dimNames.push(outputName);
+      }
+    }
+
+    // Snap FROM/TO to the bucket grid and parameterize the bounds.
+    const fromBound = this.context.addValue(timeRange.from);
+    const toBound = this.context.addValue(timeRange.to);
+    const withFill =
+      `WITH FILL FROM toStartOfInterval(${fromBound}, INTERVAL ${interval.value} ${interval.unit})` +
+      ` TO toStartOfInterval(${toBound}, INTERVAL ${interval.value} ${interval.unit})` +
+      ` STEP INTERVAL ${interval.value} ${interval.unit}`;
+
+    const esc = escapeClickHouseIdentifier;
+
+    // Single series: WITH FILL on the bucket + INTERPOLATE the carry columns (LOCF);
+    // counters omitted from INTERPOLATE so they zero-fill.
+    if (groupDims.length === 0) {
+      const newOrderBy = [...orderBy];
+      newOrderBy[0] = `${leadTerm} ${withFill}`;
+      const interpolate =
+        carryAliases.length > 0
+          ? `INTERPOLATE (${carryAliases.map((a) => `${esc(a)} AS ${esc(a)}`).join(", ")})`
+          : null;
+      return { kind: "inline", orderBy: newOrderBy, interpolate };
+    }
+
+    // Grouped, counters only: per-group zero-fill via WITH FILL ordered by the dims.
+    if (carryAliases.length === 0) {
+      return {
+        kind: "inline",
+        orderBy: [...groupDims, `${leadTerm} ${withFill}`],
+        interpolate: null,
+      };
+    }
+
+    // Grouped + gauge: per-group LOCF. INTERPOLATE bleeds across groups, so densify per
+    // group (WITH FILL + a `present` sentinel that is 0 on filled rows), assign a block id
+    // that increments at each real row, then carry the block's real value via window max.
+    // Only safe when every GROUP BY dim is a plain column we can PARTITION BY.
+    if (dimNames.length !== groupDims.length) {
+      this.context.addWarning(
+        "fill_skipped_grouped_gauge",
+        "fillGaps was skipped: per-group gap fill needs every GROUP BY dimension to be a plain column."
+      );
+      return null;
+    }
+
+    const userOrderBy = [...orderBy];
+    const presentCol = "__tsql_present";
+    const blockCol = "__tsql_block";
+    const partitionDims = dimNames.map(esc).join(", ");
+    const blockExpr =
+      `sum(${esc(presentCol)}) OVER (PARTITION BY ${partitionDims} ORDER BY ${esc(bucketAlias)}` +
+      ` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS ${esc(blockCol)}`;
+    const finalColumns = orderedOutputs.map(({ name, carry }) =>
+      carry
+        ? `max(if(${esc(presentCol)} = 1, ${esc(name)}, NULL)) OVER (PARTITION BY ${partitionDims}, ${esc(
+            blockCol
+          )}) AS ${esc(name)}`
+        : esc(name)
+    );
+    const finalOrderBy = userOrderBy.length > 0 ? ` ORDER BY ${userOrderBy.join(", ")}` : "";
+    const wrap = (inner: string): string =>
+      `SELECT ${finalColumns.join(", ")} FROM (SELECT *, ${blockExpr} FROM (${inner.trim()}))${finalOrderBy}`;
+
+    return {
+      kind: "wrap",
+      orderBy: [...dimNames.map(esc), `${leadTerm} ${withFill}`],
+      presentColumn: `1 AS ${esc(presentCol)}`,
+      wrap,
+    };
+  }
+
   /**
    * Extract column aliases from a SELECT expression.
    * Handles explicit aliases (AS name) and implicit names from aggregations/functions.
@@ -1014,11 +1219,12 @@ export class ClickHousePrinter {
         if ((firstArg as Field).expression_type === "field") {
           const field = firstArg as Field;
           const columnInfo = this.resolveFieldToColumn(field.chain);
-          // Only propagate customRenderType, not the full column schema
-          if (columnInfo.column?.customRenderType) {
+          // Propagate customRenderType and fillMode (gauge-ness), not the full column schema
+          if (columnInfo.column?.customRenderType || columnInfo.column?.fillMode) {
             sourceColumn = {
               type: inferredType,
               customRenderType: columnInfo.column.customRenderType,
+              fillMode: columnInfo.column.fillMode,
             };
           }
         }
@@ -1679,6 +1885,138 @@ export class ClickHousePrinter {
     // Note: projectId and environmentId are optional - no validation needed
   }
 
+  /**
+   * Reject queries that merge a scope-keyed aggregate state column (`mergeGroupKey`)
+   * across values of its key: such merges silently return wrong numbers. Valid shapes
+   * group by the key column or pin it to a single value (in the query's WHERE or via
+   * the enforced clause). Runs per SELECT scope; subqueries validate themselves.
+   */
+  private validateMergeScopedColumns(node: SelectQuery): void {
+    for (const tableSchema of this.tableContexts.values()) {
+      for (const column of Object.values(tableSchema.columns)) {
+        if (!column.mergeGroupKey) continue;
+        const keys = Array.isArray(column.mergeGroupKey)
+          ? column.mergeGroupKey
+          : [column.mergeGroupKey];
+        if (!this.scopeReferencesColumn(node, column.name)) continue;
+        for (const key of keys) {
+          if (this.groupByIncludesColumn(node, key)) continue;
+          if (this.wherePinsColumn(node.where, key)) continue;
+          if (this.enforcedPinsColumn(tableSchema, key)) continue;
+          throw new QueryError(
+            `Merging '${column.name}' across every ${key} returns wrong totals: its aggregate ` +
+              `states are kept per ${key} and only combine correctly within one ${key}. Either ` +
+              `add '${key}' to the GROUP BY and sum the per-${key} results in an outer query, ` +
+              `for example: SELECT sum(v) AS total FROM (SELECT ${key}, ` +
+              `deltaSumTimestampMerge(${column.name}) AS v FROM ${tableSchema.name} ` +
+              `GROUP BY ${key}). Or filter to a single ${key}, for example: ` +
+              `WHERE ${key} = 'my-${key}'. For a time series, bucket both layers: ` +
+              `inner GROUP BY t, ${key} and outer GROUP BY t.`
+          );
+        }
+      }
+    }
+  }
+
+  private scopeReferencesColumn(node: SelectQuery, name: string): boolean {
+    const parts: unknown[] = [
+      node.select,
+      node.prewhere,
+      node.where,
+      node.group_by,
+      node.having,
+      node.order_by,
+    ];
+    return parts.some((part) => this.expressionReferencesColumn(part, name));
+  }
+
+  private expressionReferencesColumn(
+    expr: unknown,
+    name: string,
+    seen = new WeakSet<object>()
+  ): boolean {
+    if (expr === null || typeof expr !== "object") return false;
+    if (seen.has(expr)) return false;
+    seen.add(expr);
+    if (Array.isArray(expr)) {
+      return expr.some((item) => this.expressionReferencesColumn(item, name, seen));
+    }
+    const candidate = expr as { expression_type?: string; chain?: unknown[] };
+    if (
+      candidate.expression_type === "select_query" ||
+      candidate.expression_type === "select_set_query"
+    ) {
+      return false;
+    }
+    if (
+      candidate.expression_type === "field" &&
+      Array.isArray(candidate.chain) &&
+      candidate.chain[candidate.chain.length - 1] === name
+    ) {
+      return true;
+    }
+    return Object.entries(expr).some(
+      ([property, value]) =>
+        property !== "type" &&
+        property !== "parent" &&
+        this.expressionReferencesColumn(value, name, seen)
+    );
+  }
+
+  private groupByIncludesColumn(node: SelectQuery, name: string): boolean {
+    return (node.group_by ?? []).some((expr) => {
+      const field = expr as Field;
+      return (
+        field.expression_type === "field" &&
+        Array.isArray(field.chain) &&
+        field.chain[field.chain.length - 1] === name
+      );
+    });
+  }
+
+  // Pins only count on the top-level AND chain: a pin inside an OR guarantees nothing.
+  private wherePinsColumn(where: Expression | undefined, name: string): boolean {
+    if (!where) return false;
+    if (where.expression_type === "and") {
+      return (where as And).exprs.some((expr) => this.wherePinsColumn(expr, name));
+    }
+    if (where.expression_type !== "compare_operation") return false;
+    const cmp = where as CompareOperation;
+    const isKeyField = (side: Expression) => {
+      const field = side as Field;
+      return (
+        field.expression_type === "field" &&
+        Array.isArray(field.chain) &&
+        field.chain[field.chain.length - 1] === name
+      );
+    };
+    const fieldSide = [cmp.left, cmp.right].find(isKeyField);
+    if (!fieldSide) return false;
+    if (cmp.op === CompareOperationOp.Eq) return true;
+    if (cmp.op === CompareOperationOp.In || cmp.op === CompareOperationOp.GlobalIn) {
+      const other = fieldSide === cmp.left ? cmp.right : cmp.left;
+      if ((other as Constant).expression_type === "constant") return true;
+      const tuple = other as Tuple;
+      return tuple.expression_type === "tuple" && tuple.exprs.length === 1;
+    }
+    return false;
+  }
+
+  private enforcedPinsColumn(tableSchema: TableSchema, key: string): boolean {
+    const names = [key];
+    const clickhouseName = tableSchema.columns[key]?.clickhouseName;
+    if (clickhouseName) names.push(clickhouseName);
+    for (const name of names) {
+      const condition = this.context.enforcedWhereClause[name] as
+        | { op?: string; values?: unknown[] }
+        | undefined;
+      if (!condition) continue;
+      if (condition.op === "eq") return true;
+      if (condition.op === "in" && condition.values?.length === 1) return true;
+    }
+    return false;
+  }
+
   /**
    * Format a Date as a ClickHouse-compatible DateTime64 string.
    * ClickHouse expects format: 'YYYY-MM-DD HH:MM:SS.mmm' (in UTC)
diff --git a/internal-packages/tsql/src/query/printer_context.ts b/internal-packages/tsql/src/query/printer_context.ts
index d0fb41b5327..a964e2e04af 100644
--- a/internal-packages/tsql/src/query/printer_context.ts
+++ b/internal-packages/tsql/src/query/printer_context.ts
@@ -125,6 +125,9 @@ export class PrinterContext {
    */
   readonly timeRange?: TimeRange;
 
+  /** When true, time-bucketed queries emit rows for empty buckets (opt-in). */
+  readonly fillGaps?: boolean;
+
   constructor(
     /** Schema registry containing allowed tables and columns */
     public readonly schema: SchemaRegistry,
@@ -138,13 +141,16 @@ export class PrinterContext {
      */
     enforcedWhereClause: Record<string, WhereClauseCondition> = {},
     /** Time range for timeBucket() interval calculation */
-    timeRange?: TimeRange
+    timeRange?: TimeRange,
+    /** Opt-in gap-fill for time-bucketed queries */
+    fillGaps?: boolean
   ) {
     // Initialize with default settings
     this.settings = { ...DEFAULT_QUERY_SETTINGS, ...settings };
     this.fieldMappings = fieldMappings;
     this.enforcedWhereClause = enforcedWhereClause;
     this.timeRange = timeRange;
+    this.fillGaps = fillGaps;
   }
 
   /**
@@ -225,7 +231,8 @@ export class PrinterContext {
       this.settings,
       this.fieldMappings,
       this.enforcedWhereClause,
-      this.timeRange
+      this.timeRange,
+      this.fillGaps
     );
     // Share the same values map so parameters are unified
     child.values = this.values;
@@ -277,6 +284,8 @@ export interface PrinterContextOptions {
    * When provided, `timeBucket()` uses this to determine the appropriate bucket size.
    */
   timeRange?: TimeRange;
+  /** When true, time-bucketed queries emit rows for empty buckets (opt-in). */
+  fillGaps?: boolean;
 }
 
 /**
@@ -288,6 +297,7 @@ export function createPrinterContext(options: PrinterContextOptions): PrinterCon
     options.settings,
     options.fieldMappings,
     options.enforcedWhereClause,
-    options.timeRange
+    options.timeRange,
+    options.fillGaps
   );
 }
diff --git a/internal-packages/tsql/src/query/schema.ts b/internal-packages/tsql/src/query/schema.ts
index 9a1e2d2ddfe..a32b8ea142c 100644
--- a/internal-packages/tsql/src/query/schema.ts
+++ b/internal-packages/tsql/src/query/schema.ts
@@ -122,6 +122,18 @@ export interface ColumnSchema {
    * ```
    */
   customRenderType?: string;
+  /**
+   * Gap-fill behavior when the opt-in `fillGaps` feature emits rows for empty
+   * time buckets: `"carry"` = gauge (LOCF via INTERPOLATE), `"zero"` (default)
+   * = counter (missing buckets get 0).
+   */
+  fillMode?: "zero" | "carry";
+  /**
+   * Aggregate-state column whose states only merge correctly within one value of the
+   * named column(s) (e.g. per-queue counter states). Queries referencing it must GROUP BY
+   * every listed column or pin each to a single value; other shapes fail to compile.
+   */
+  mergeGroupKey?: string | string[];
   /**
    * Example value for documentation purposes.
    *
@@ -409,6 +421,21 @@ export interface TableSchema {
    * is needed to get correct results. Not needed for plain MergeTree tables.
    */
   useFinal?: boolean;
+  /**
+   * Coarser physical rollups with an identical logical schema, substituted by callers
+   * (not the printer) when the timeBucket() interval is at least minIntervalSeconds.
+   */
+  rollups?: Array<{ minIntervalSeconds: number; clickhouseName: string }>;
+  /**
+   * Opt into the ClickHouse query cache; callers align time bounds to alignSeconds
+   * so repeated auto-refresh queries share cache entries.
+   */
+  queryCache?: { ttlSeconds: number; alignSeconds: number };
+  /**
+   * Excluded from user-facing listings (query editor, schema docs, schema API) by
+   * callers; the engine still compiles queries against it.
+   */
+  hidden?: boolean;
 }
 
 /**
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 1a56a054f42..a49afc04da5 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -365,6 +365,9 @@ importers:
       '@internal/llm-model-catalog':
         specifier: workspace:*
         version: link:../../internal-packages/llm-model-catalog
+      '@internal/metrics-pipeline':
+        specifier: workspace:*
+        version: link:../../internal-packages/metrics-pipeline
       '@internal/redis':
         specifier: workspace:*
         version: link:../../internal-packages/redis
@@ -1255,6 +1258,25 @@ importers:
         specifier: 4.1.7
         version: 4.1.7(@opentelemetry/api@1.9.1)(@types/node@22.20.0)(@vitest/coverage-v8@4.1.7)(vite@6.4.2(@types/node@22.20.0)(jiti@2.6.1)(lightningcss@1.29.2)(terser@5.46.1)(tsx@4.22.4)(yaml@2.9.0))
 
+  internal-packages/metrics-pipeline:
+    dependencies:
+      '@internal/redis':
+        specifier: workspace:*
+        version: link:../redis
+      '@internal/tracing':
+        specifier: workspace:*
+        version: link:../tracing
+      '@trigger.dev/core':
+        specifier: workspace:*
+        version: link:../../packages/core
+    devDependencies:
+      '@internal/testcontainers':
+        specifier: workspace:*
+        version: link:../testcontainers
+      rimraf:
+        specifier: 6.0.1
+        version: 6.0.1
+
   internal-packages/otlp-importer:
     dependencies:
       long:
@@ -1335,6 +1357,9 @@ importers:
       '@internal/cache':
         specifier: workspace:*
         version: link:../cache
+      '@internal/metrics-pipeline':
+        specifier: workspace:*
+        version: link:../metrics-pipeline
       '@internal/redis':
         specifier: workspace:*
         version: link:../redis