Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
9f6128b
Overhaul missing_docs skill: fix audit blind spots, add change detection
hongyi-chen Jun 11, 2026
9e35135
Expand missing_docs audit: settings, web app, tools, skills, structur…
hongyi-chen Jun 11, 2026
3ea28b6
Add completeness accounting and map integrity checks; reclassify GA f…
hongyi-chen Jun 11, 2026
c671067
Target the public warp repo instead of warp-internal
hongyi-chen Jun 11, 2026
4da460d
Merge main into oz/missing-docs-skill-overhaul
hongyi-chen Jun 12, 2026
3a3b3fe
First drift-watch burn-down: settings, keybindings, slash commands, S…
hongyi-chen Jun 30, 2026
03ac886
Merge origin/main into oz/missing-docs-skill-overhaul
hongyi-chen Jun 30, 2026
888cf87
docs(missing_docs): add reviewer routing from code ownership
hongyi-chen Jun 30, 2026
5863b8c
docs(missing_docs): address review comments
hongyi-chen Jun 30, 2026
94fa141
demo(missing_docs): CLI drift burn-down sample (api-key / schedule / …
hongyi-chen Jun 30, 2026
9af8fde
test(missing_docs): add stdlib test suite for the skill scripts
hongyi-chen Jun 30, 2026
80e107f
docs(missing_docs): encode public vs. private surface boundary
hongyi-chen Jun 30, 2026
2132990
test(missing_docs): cover public/private boundary + run skill tests i…
hongyi-chen Jun 30, 2026
5f08115
feat(missing_docs): rollout-gate CLI/API surfaces via gated:<Flag>
hongyi-chen Jun 30, 2026
bddaea6
docs(missing_docs): sync SKILL.md accounting buckets with gated_non_ga
hongyi-chen Jun 30, 2026
738b7b8
docs(cli): resolve missing_docs CLI drift (oz agent, oz run messaging…
hongyi-chen Jun 30, 2026
6bf2f74
docs(cli): cross-link run-cloud --agent to named-agent management
hongyi-chen Jun 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
335 changes: 308 additions & 27 deletions .agents/skills/missing_docs/SKILL.md

Large diffs are not rendered by default.

337 changes: 241 additions & 96 deletions .agents/skills/missing_docs/references/feature_surface_map.md

Large diffs are not rendered by default.

1,211 changes: 1,211 additions & 0 deletions .agents/skills/missing_docs/references/surface_snapshot.json

Large diffs are not rendered by default.

2,771 changes: 2,443 additions & 328 deletions .agents/skills/missing_docs/scripts/audit_docs.py

Large diffs are not rendered by default.

153 changes: 153 additions & 0 deletions .agents/skills/missing_docs/scripts/suggest_reviewers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python3
"""Suggest PR reviewers for missing_docs drift-watch changes.

Every docs change the drift-watch flow makes traces back to a concrete source
surface (a feature flag, CLI command, API route, slash command, or setting). This
script maps those *source* files to the engineers who own them, using the
CODEOWNERS-format ownership files that already live in the code repos:

- warp client repo : .github/STAKEHOLDERS (advisory, broad coverage)
- warp-server : .github/STAKEHOLDERS (advisory) + .github/CODEOWNERS (enforced)

Those files are the source of truth for ownership (warp-server keeps STAKEHOLDERS
fresh via the `sync-stakeholders` skill), so this script never duplicates owner
lists — it just resolves against them with standard CODEOWNERS precedence
(last matching rule wins).

Usage:
python3 suggest_reviewers.py \
--warp ../warp --warp-server ../warp-server \
warp:app/src/settings/ssh.rs \
warp:app/src/search/slash_command_menu/static_commands/commands.rs \
warp-server:router/handlers/public_api/runs.go

Source paths may also be piped on stdin (one `repo:relpath` per line). `repo`
is `warp` (the client repo passed via --warp; `warp-internal` is accepted as an
alias) or `warp-server`.

Output: a per-path resolution table, the deduped reviewer set (users and teams),
and a ready-to-run `gh pr edit --add-reviewer` snippet. Exit code is always 0;
unresolved paths are reported but never fatal (so a scheduled run is not blocked
by an ownership gap — it just falls back to the default owners or none).
"""

import argparse
import fnmatch
import sys
from pathlib import Path


def parse_ownership(path):
"""Parse a CODEOWNERS-format file into an ordered list of (pattern, [owners])."""
rules = []
if not path or not path.is_file():
return rules
for raw in path.read_text(encoding="utf-8").splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
parts = line.split()
pattern = parts[0]
owners = [tok for tok in parts[1:] if tok.startswith("@")]
if owners:
rules.append((pattern, owners))
return rules


def pattern_matches(pattern, rel_path):
"""Practical CODEOWNERS matching for a repo-relative POSIX path."""
pat = pattern.lstrip("/") # all our rules are root-anchored
p = rel_path.lstrip("/")
if pat in ("", "*", "**"):
return True # default fallback rule (e.g. `/ @org/team`)
if pat.endswith("/"): # directory prefix: matches the dir and everything under it
return p == pat[:-1] or p.startswith(pat)
if any(ch in pat for ch in "*?["): # glob pattern
return fnmatch.fnmatch(p, pat) or fnmatch.fnmatch(p, pat + "/*")
# bare path: exact file, or a directory given without a trailing slash
return p == pat or p.startswith(pat + "/")


def owners_for(rel_path, rules):
"""Return (owners, matched_pattern) using last-match-wins precedence."""
match = None
for pattern, owners in rules:
if pattern_matches(pattern, rel_path):
match = (owners, pattern)
return match if match else (None, None)


def main():
ap = argparse.ArgumentParser(description="Suggest PR reviewers from code ownership.")
ap.add_argument("--warp", help="Path to the warp client repo root (warp-internal accepted).")
ap.add_argument("--warp-server", dest="warp_server", help="Path to the warp-server repo root.")
ap.add_argument("paths", nargs="*", help="Source paths as repo:relpath.")
args = ap.parse_args()

# Build per-repo rule lists (STAKEHOLDERS first, then CODEOWNERS so enforced
# rules take precedence as later matches).
repos = {}
if args.warp:
root = Path(args.warp)
repos["warp"] = parse_ownership(root / ".github" / "STAKEHOLDERS") + parse_ownership(
root / ".github" / "CODEOWNERS"
)
repos["warp-internal"] = repos["warp"] # alias
if args.warp_server:
root = Path(args.warp_server)
repos["warp-server"] = parse_ownership(root / ".github" / "STAKEHOLDERS") + parse_ownership(
root / ".github" / "CODEOWNERS"
)

inputs = list(args.paths)
if not sys.stdin.isatty():
inputs += [ln.strip() for ln in sys.stdin if ln.strip()]

if not inputs:
print("No source paths given. Pass repo:relpath args or pipe them on stdin.", file=sys.stderr)
return 0

users, teams = [], []
unresolved = []
print("Reviewer resolution:")
for item in inputs:
if ":" not in item:
unresolved.append(item)
print(f" ? {item} — missing repo prefix (use warp: or warp-server:)")
continue
repo, rel = item.split(":", 1)
rules = repos.get(repo)
if rules is None:
unresolved.append(item)
print(f" ? {item} — no ownership file loaded for repo '{repo}'")
continue
owners, pattern = owners_for(rel, rules)
if not owners:
unresolved.append(item)
print(f" ? {repo}:{rel} — no owner match")
continue
print(f" - {repo}:{rel} -> {' '.join(owners)} (matched: {pattern})")
for o in owners:
handle = o.lstrip("@")
bucket = teams if "/" in handle else users
if handle not in bucket:
bucket.append(handle)

print()
print(f"Reviewers (users): {', '.join(users) if users else '(none)'}")
print(f"Reviewers (teams): {', '.join(teams) if teams else '(none)'}")
if unresolved:
print(f"Unresolved paths: {len(unresolved)} (left for manual assignment)")

# gh accepts users by login and teams as org/team; both via --add-reviewer.
review_args = users + teams
if review_args:
joined = ",".join(review_args)
print()
print("Suggested command (replace <PR> with the PR number):")
print(f" gh pr edit <PR> --add-reviewer {joined}")
return 0


if __name__ == "__main__":
raise SystemExit(main())
227 changes: 227 additions & 0 deletions .agents/skills/missing_docs/scripts/test_audit_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""Integration tests for audit_docs.py.

These run the audit as a subprocess against the sibling code repos (warp client +
warp-server) and assert behavioral invariants: clean exit, completeness
accounting totality, category/severity scoping, fail-loud on a missing repo, and
that --update-snapshot honors --snapshot without mutating the committed snapshot.

Tests are skipped (not failed) when the sibling code repos aren't checked out, so
the suite is safe to run anywhere.

Run with: python3 .agents/skills/missing_docs/scripts/test_audit_docs.py
(stdlib unittest only; no third-party deps).
"""

import hashlib
import importlib.util
import json
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path

_HERE = Path(__file__).resolve().parent
_AUDIT = _HERE / "audit_docs.py"
_DOCS_ROOT = _HERE.parents[3] # scripts -> missing_docs -> skills -> .agents -> docs
_DEFAULT_SNAPSHOT = _HERE.parent / "references" / "surface_snapshot.json"
_SIBLINGS = _DOCS_ROOT.parent


def _find_warp():
for name in ("warp", "warp-internal"):
if (_SIBLINGS / name / ".github").exists() or (_SIBLINGS / name / "app").exists():
return _SIBLINGS / name
return None


def _find_server():
p = _SIBLINGS / "warp-server"
return p if p.exists() else None


WARP = _find_warp()
SERVER = _find_server()
_REPOS_AVAILABLE = WARP is not None and SERVER is not None

# Import the audit module directly for repo-free unit tests of pure logic.
_spec = importlib.util.spec_from_file_location("audit_docs", _AUDIT)
audit_docs = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(audit_docs)


def _run_audit(extra_args, capture_report=True):
"""Run audit_docs.py; return (returncode, report_dict_or_None)."""
out_path = None
args = [sys.executable, str(_AUDIT), "--warp", str(WARP), "--warp-server", str(SERVER)]
if capture_report:
out_path = Path(tempfile.mkstemp(suffix=".json")[1])
args += ["--output", str(out_path)]
args += extra_args
proc = subprocess.run(args, capture_output=True, text=True, stdin=subprocess.DEVNULL)
report = None
if capture_report and out_path and out_path.exists() and out_path.stat().st_size > 0:
try:
report = json.loads(out_path.read_text())
except json.JSONDecodeError:
report = None
return proc.returncode, report, proc.stderr


def _sha(path):
return hashlib.sha256(Path(path).read_bytes()).hexdigest()


@unittest.skipUnless(_REPOS_AVAILABLE, "warp/warp-server repos not checked out as siblings")
class TestAuditBehavior(unittest.TestCase):
def test_full_run_is_clean_and_accounts_for_everything(self):
rc, report, stderr = _run_audit([])
self.assertEqual(rc, 0, f"audit should exit 0 on a healthy run; stderr={stderr}")
self.assertIsNotNone(report, "audit should emit a JSON report")
summary = report["summary"]
self.assertEqual(summary.get("audits_skipped"), [], "no audits should be skipped")
self.assertEqual(
summary["accounting"].get("unaccounted"), {}, "every surface must be accounted for"
)

def test_category_scopes_to_one_audit(self):
rc, report, stderr = _run_audit(["--category", "settings"])
self.assertEqual(rc, 0, stderr)
audits_run = report["summary"].get("audits_run", [])
self.assertIn("settings", audits_run)
self.assertNotIn("cli", audits_run)
# CLI category did not run, so its findings should be absent/zero.
self.assertEqual(report["summary"]["by_category"].get("undocumented_cli_commands", 0), 0)

def test_severity_filter_excludes_lower_severities(self):
rc, report, _ = _run_audit(["--severity", "high"])
self.assertEqual(rc, 0)
bad = []
for key, value in report.items():
if isinstance(value, list):
for item in value:
if isinstance(item, dict) and item.get("severity") in ("low", "medium"):
bad.append((key, item.get("severity")))
self.assertEqual(bad, [], f"--severity high must drop low/medium findings, found: {bad[:5]}")

def test_fail_loud_on_missing_repo(self):
# Point --warp at a nonexistent path; the script must exit 2, not pretend "no gaps".
out_path = Path(tempfile.mkstemp(suffix=".json")[1])
proc = subprocess.run(
[
sys.executable,
str(_AUDIT),
"--warp",
str(_SIBLINGS / "definitely-not-a-real-repo"),
"--warp-server",
str(SERVER),
"--output",
str(out_path),
],
capture_output=True,
text=True,
stdin=subprocess.DEVNULL,
)
self.assertEqual(proc.returncode, 2, f"missing repo must exit 2; stderr={proc.stderr}")

def test_diff_against_committed_snapshot_is_current(self):
# The committed snapshot should reflect current code (no pending surface drift).
rc, report, stderr = _run_audit(["--diff"])
self.assertEqual(rc, 0, stderr)
self.assertEqual(
report["summary"]["by_category"].get("surface_changes", 0),
0,
"committed snapshot is stale; regenerate with --update-snapshot",
)

def test_update_snapshot_respects_snapshot_flag_and_roundtrips(self):
before = _sha(_DEFAULT_SNAPSHOT)
with tempfile.TemporaryDirectory() as d:
tmp_snap = Path(d) / "snap.json"
# Regenerate into the temp path (must NOT touch the committed snapshot).
rc, _, stderr = _run_audit(
["--update-snapshot", "--snapshot", str(tmp_snap)], capture_report=False
)
self.assertEqual(rc, 0, stderr)
self.assertTrue(tmp_snap.exists() and tmp_snap.stat().st_size > 0,
"--update-snapshot should write to the --snapshot path")
self.assertEqual(
_sha(_DEFAULT_SNAPSHOT), before, "--update-snapshot must not mutate the committed snapshot"
)
# Diffing current code against the just-generated snapshot shows no drift.
rc2, report2, _ = _run_audit(["--diff", "--snapshot", str(tmp_snap)])
self.assertEqual(rc2, 0)
self.assertEqual(report2["summary"]["by_category"].get("surface_changes", 0), 0)

def test_research_preview_surfaces_are_deferred(self):
# Public vs. private boundary: Agent Memory is research preview (not public),
# so its CLI (`oz memory*`) and REST API (`/memory_stores/*`) must never be
# flagged for documentation. Guards the surface-map deferrals from regressing.
rc, report, _ = _run_audit([])
self.assertEqual(rc, 0)
flagged = []
for cat in ("undocumented_cli_commands", "undocumented_api_endpoints"):
for item in report.get(cat, []):
name = item.get("command") or item.get("endpoint") or ""
if "memory" in name.lower():
flagged.append(name)
self.assertEqual(
flagged, [], f"research-preview Agent Memory surfaces must stay deferred, found: {flagged}"
)


class TestGatedLogic(unittest.TestCase):
"""Repo-free unit tests for the `gated:<Flag>` rollout-aware deferral."""

def test_gated_flag_helper(self):
self.assertEqual(audit_docs._gated_flag("gated:AIMemories"), "AIMemories")
self.assertEqual(audit_docs._gated_flag("gated: Spaced "), "Spaced")
self.assertIsNone(audit_docs._gated_flag("internal"))
self.assertIsNone(audit_docs._gated_flag("src/content/docs/x.mdx"))
self.assertIsNone(audit_docs._gated_flag(None))

def _run_cli(self, status_map):
"""Run audit_cli on one gated command with the given flag statuses."""
with tempfile.TemporaryDirectory() as d:
surface_map = {"cli_to_doc": {"oz memx": "gated:MemFlag"}}
commands = [{"command": "oz memx", "hidden": False,
"subcommands": [], "source_file": None}]
return audit_docs.audit_cli(
None, Path(d), surface_map, {},
cli_commands=commands, flag_statuses=status_map)

def test_gated_non_ga_cli_is_deferred(self):
findings = self._run_cli({"MemFlag": "other"})
self.assertEqual(findings, [], "non-GA gated CLI command must be deferred")

def test_gated_ga_cli_auto_surfaces(self):
findings = self._run_cli({"MemFlag": "ga"})
cmds = [f["command"] for f in findings]
self.assertIn("oz memx", cmds, "a GA gated command must surface as a finding")

def test_gated_unknown_flag_cli_surfaces(self):
# Unknown gating flag is treated conservatively (not silently deferred).
findings = self._run_cli({})
self.assertIn("oz memx", [f["command"] for f in findings])

def test_map_hygiene_flags_unknown_gated_flag(self):
surface_map = {
"cli_to_doc": {"oz good": "gated:KnownFlag", "oz bad": "gated:BogusFlag"},
"feature_to_doc": {}, "api_to_doc": {}, "slash_to_doc": {},
"settings_to_doc": {}, "ignore_flags": set(), "duplicates": [],
}
cli_commands = [
{"command": "oz good", "hidden": False, "subcommands": []},
{"command": "oz bad", "hidden": False, "subcommands": []},
]
with tempfile.TemporaryDirectory() as d:
findings = audit_docs.audit_map_hygiene(
surface_map, {"KnownFlag": "other"}, cli_commands, [], [], {}, Path(d))
gated_findings = [f for f in findings if "Gated target" in f["reason"]]
self.assertEqual(len(gated_findings), 1, gated_findings)
self.assertEqual(gated_findings[0]["entry"], "oz bad")


if __name__ == "__main__":
unittest.main(verbosity=2)
Loading
Loading