From 6c826a4976681e61bd4934425c190d7ce42eb242 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 26 Jun 2026 09:49:25 +0800 Subject: [PATCH] Add change_localize: attribute a screen change to the element boxes that changed Existing diffs return raw pixel regions or a11y-element diffs; the gap is 'given a frame diff and a list of element boxes, which of those changed?'. localize_changes diffs reference vs current and scores each element box by its mean per-pixel change; rank_changes is the pure ranker (changed when score >= threshold, sorted most-changed first). cv2/numpy lazy. --- WHATS_NEW.md | 6 ++ .../doc/new_features/v218_features_doc.rst | 50 ++++++++++++ .../Zh/doc/new_features/v218_features_doc.rst | 44 +++++++++++ je_auto_control/__init__.py | 3 + .../gui/script_builder/command_schema.py | 26 ++++++ .../utils/change_localize/__init__.py | 6 ++ .../utils/change_localize/change_localize.py | 74 +++++++++++++++++ .../utils/executor/action_executor.py | 22 ++++++ .../utils/mcp_server/tools/_factories.py | 30 +++++++ .../utils/mcp_server/tools/_handlers.py | 13 +++ .../headless/test_change_localize_batch.py | 79 +++++++++++++++++++ 11 files changed, 353 insertions(+) create mode 100644 docs/source/Eng/doc/new_features/v218_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v218_features_doc.rst create mode 100644 je_auto_control/utils/change_localize/__init__.py create mode 100644 je_auto_control/utils/change_localize/change_localize.py create mode 100644 test/unit_test/headless/test_change_localize_batch.py diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 25fe4b24..3ebd77c4 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -2,6 +2,12 @@ ## What's new (2026-06-26) +### Localize a Change to the Elements That Changed + +Turn a raw screen diff into "element 3 changed" by scoring a list of element boxes. Full reference: [`docs/source/Eng/doc/new_features/v218_features_doc.rst`](docs/source/Eng/doc/new_features/v218_features_doc.rst). + +- **`localize_changes` / `rank_changes`** (`AC_localize_changes`, `AC_rank_changes`): existing diffs answer *where* pixels changed (`motion_regions`, `perceptual_diff`, `ssim_changed_regions` → raw pixel regions) or which *accessibility* elements differ (`element_diff`, needs metadata) — but not "given a frame diff **and a list of element boxes**, which of *those* changed?". `localize_changes` diffs a reference against the current screen and scores each supplied element box by its mean per-pixel change; `rank_changes` is the pure ranker that flags `changed` (score ≥ `threshold`) and sorts most-changed first. Pairs with `set_of_marks`/accessibility boxes to give a per-element "what changed" feedback signal after a click. cv2/numpy imported lazily; ranking is pure and fully testable. Fifth feature of the ROUND-15 perception lane. No `PySide6`. + ### Theme-Invariant Matching (Light Template, Dark Mode) Find a button captured in light mode even after the app switches to dark mode. Full reference: [`docs/source/Eng/doc/new_features/v217_features_doc.rst`](docs/source/Eng/doc/new_features/v217_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v218_features_doc.rst b/docs/source/Eng/doc/new_features/v218_features_doc.rst new file mode 100644 index 00000000..694ce0dd --- /dev/null +++ b/docs/source/Eng/doc/new_features/v218_features_doc.rst @@ -0,0 +1,50 @@ +Localize a Change to the Elements That Changed +============================================== + +The existing diffs answer "*where* did pixels change" (``motion_regions``, +``perceptual_diff``, ``ssim_changed_regions`` return raw pixel regions) or "which +*accessibility* elements differ" (``element_diff``, needs a11y metadata). The +missing middle is: given a frame diff **and a list of element boxes**, which of +*those* elements changed? ``change_localize`` scores each supplied box by how +much it changed and ranks them. + +* :func:`rank_changes` — pure: take ``[{box, score}]`` and mark each box + ``changed`` (score at or above ``threshold``), sorted most-changed first. +* :func:`localize_changes` — diff a reference against the current screen, score + each element box by its mean pixel change, and rank them. + +``cv2`` / ``numpy`` are imported lazily (the module stays importable without +them) and the loaders reuse :mod:`visual_match`. The ranking is pure and fully +testable. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import localize_changes, rank_changes, mark_elements + + boxes = [mark["bbox"] for mark in mark_elements(elements)] + + # After an action, which of those elements actually changed? + changed = localize_changes("before.png", boxes, current="after.png") + for entry in changed: + if entry["changed"]: + print("element changed:", entry["box"], entry["score"]) + + # Or rank pre-computed scores yourself: + rank_changes([{"box": [0, 0, 40, 20], "score": 0.6}], threshold=0.1) + +``localize_changes`` returns ``[{box, score, changed}]`` sorted most-changed +first, where ``score`` is the box's mean per-pixel change (0..1). It pairs with +``set_of_marks`` / accessibility element boxes to turn a raw screen diff into a +per-element "what changed" signal — an agent feedback channel after a click. + +Executor commands +----------------- + +``AC_localize_changes`` (``reference`` + ``boxes`` JSON list + ``current`` / +``threshold`` / ``region`` → ``{changes}``) and ``AC_rank_changes`` +(``scored_boxes`` JSON list + ``threshold`` → ``{changes}``, pure). They are the +matching read-only ``ac_*`` MCP tools and Script Builder commands under +**Image**. diff --git a/docs/source/Zh/doc/new_features/v218_features_doc.rst b/docs/source/Zh/doc/new_features/v218_features_doc.rst new file mode 100644 index 00000000..bf69978d --- /dev/null +++ b/docs/source/Zh/doc/new_features/v218_features_doc.rst @@ -0,0 +1,44 @@ +把變化歸因到實際改變的元素 +========================== + +既有的 diff 回答「像素在*哪裡*改變」(``motion_regions``、``perceptual_diff``、 +``ssim_changed_regions`` 回傳原始像素區域),或「哪些*無障礙*元素不同」(``element_diff``,需 a11y 中介資料)。 +缺少的中段是:給定一個畫面 diff **與一份元素方框清單**,*那些*元素中哪些改變了?``change_localize`` 依 +每個提供的方框改變多少評分並排序。 + +* :func:`rank_changes` ——純函式:接受 ``[{box, score}]`` 並把每個方框標記為 ``changed`` + (分數達到或超過 ``threshold``),依改變最多排在最前。 +* :func:`localize_changes` ——把參考影像對目前螢幕做 diff,依每個元素方框的平均像素改變評分,再排序。 + +``cv2`` / ``numpy`` 採延遲匯入(模組無需它們即可匯入),載入器重用 :mod:`visual_match`。 +排序為純函式且可完整測試。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import localize_changes, rank_changes, mark_elements + + boxes = [mark["bbox"] for mark in mark_elements(elements)] + + # 某動作後,那些元素中哪些真的改變了? + changed = localize_changes("before.png", boxes, current="after.png") + for entry in changed: + if entry["changed"]: + print("元素改變:", entry["box"], entry["score"]) + + # 或自行排序預先算好的分數: + rank_changes([{"box": [0, 0, 40, 20], "score": 0.6}], threshold=0.1) + +``localize_changes`` 回傳 ``[{box, score, changed}]`` 依改變最多排序,``score`` 是方框的平均 +逐像素改變(0..1)。它與 ``set_of_marks`` / 無障礙元素方框搭配,把原始螢幕 diff 轉成逐元素的 +「什麼改變了」訊號——點擊後的 agent 回饋通道。 + +執行器指令 +---------- + +``AC_localize_changes``(``reference`` 加上 ``boxes`` JSON 清單加上 ``current`` / +``threshold`` / ``region`` → ``{changes}``)與 ``AC_rank_changes``(``scored_boxes`` JSON 清單加上 +``threshold`` → ``{changes}``,純函式)。皆以對應的唯讀 ``ac_*`` MCP 工具及 Script Builder 指令 +(位於 **Image** 分類下)形式提供。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index bfb1a716..8686170d 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -143,6 +143,8 @@ ) # Theme-invariant matching so a light template matches dark mode from je_auto_control.utils.theme_normalize import match_theme, normalize_theme +# Attribute a screen change to the specific element boxes that changed +from je_auto_control.utils.change_localize import localize_changes, rank_changes # Rich clipboard formats — RTF + CSV/TSV codecs and Windows get / set from je_auto_control.utils.clipboard_rich_formats import ( build_rtf, csv_to_rows, get_clipboard_csv, get_clipboard_rtf, rows_to_csv, @@ -1771,6 +1773,7 @@ def start_autocontrol_gui(*args, **kwargs): "place_labels", "label_color", "grade_contrast", "dominant_pair", "region_contrast", "normalize_theme", "match_theme", + "localize_changes", "rank_changes", "build_rtf", "rtf_to_text", "rows_to_csv", "csv_to_rows", "set_clipboard_rtf", "get_clipboard_rtf", "set_clipboard_csv", "get_clipboard_csv", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 1d1f6f0f..ac0cf971 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -4606,6 +4606,32 @@ def _add_work_queue_specs(specs: List[CommandSpec]) -> None: ), description="Locate a template across a light/dark theme flip.", )) + specs.append(CommandSpec( + "AC_rank_changes", "Image", "Rank Changed Boxes", + fields=( + FieldSpec("scored_boxes", FieldType.STRING, + placeholder="JSON list of {box, score}"), + FieldSpec("threshold", FieldType.FLOAT, optional=True, + default=0.1), + ), + description="Rank scored element boxes by how much they changed.", + )) + specs.append(CommandSpec( + "AC_localize_changes", "Image", "Localize Changed Elements", + fields=( + FieldSpec("reference", FieldType.STRING, + placeholder="reference image path"), + FieldSpec("boxes", FieldType.STRING, + placeholder="JSON list of [x, y, w, h]"), + FieldSpec("current", FieldType.STRING, optional=True, + placeholder="current image path (else screen)"), + FieldSpec("threshold", FieldType.FLOAT, optional=True, + default=0.1), + FieldSpec("region", FieldType.STRING, optional=True, + placeholder="[x, y, w, h]"), + ), + description="Rank which element boxes changed between two frames.", + )) specs.append(CommandSpec( "AC_normalize_ext", "Shell", "Normalize Extension", fields=( diff --git a/je_auto_control/utils/change_localize/__init__.py b/je_auto_control/utils/change_localize/__init__.py new file mode 100644 index 00000000..c11b398b --- /dev/null +++ b/je_auto_control/utils/change_localize/__init__.py @@ -0,0 +1,6 @@ +"""Attribute a screen change to the specific element boxes that changed.""" +from je_auto_control.utils.change_localize.change_localize import ( + localize_changes, rank_changes, +) + +__all__ = ["localize_changes", "rank_changes"] diff --git a/je_auto_control/utils/change_localize/change_localize.py b/je_auto_control/utils/change_localize/change_localize.py new file mode 100644 index 00000000..efb38cc6 --- /dev/null +++ b/je_auto_control/utils/change_localize/change_localize.py @@ -0,0 +1,74 @@ +"""Attribute a screen change to the specific elements that changed. + +The existing diffs answer "*where* did pixels change" (``motion_regions``, +``perceptual_diff``, ``ssim_changed_regions`` return raw pixel regions) or "which +*accessibility* elements differ" (``element_diff``, needs a11y metadata). The +missing middle is: given a frame diff **and a list of element boxes**, which of +*those* elements changed? ``change_localize`` scores each supplied box by how +much it changed and ranks them. + +* :func:`rank_changes` — pure: take ``[{box, score}]`` and mark each box + ``changed`` (score at or above ``threshold``), sorted most-changed first. +* :func:`localize_changes` — diff a reference against the current screen, score + each element box by its mean pixel change, and rank them. + +cv2 / numpy are imported lazily (the module stays importable without them) and +the loaders reuse :mod:`visual_match`. The ranking is pure and fully testable. +Imports no ``PySide6``. +""" +from typing import Any, Dict, List, Optional, Sequence + + +def _unpack(item: Any) -> tuple: + """Return ``(box, score)`` from a ``{box, score}`` dict or a ``(box, score)``.""" + if isinstance(item, dict): + return item["box"], item["score"] + return item[0], item[1] + + +def rank_changes(scored_boxes: Sequence[Any], *, + threshold: float = 0.1) -> List[Dict[str, Any]]: + """Mark and rank scored element boxes by how much they changed (pure). + + ``scored_boxes`` is a sequence of ``{box, score}`` (or ``(box, score)``). + Returns ``[{box, score, changed}]`` sorted by descending score; ``changed`` + is ``True`` when the score is at or above ``threshold``. + """ + limit = float(threshold) + result = [ + {"box": [int(value) for value in box], + "score": round(float(score), 4), + "changed": float(score) >= limit} + for box, score in (_unpack(item) for item in scored_boxes) + ] + result.sort(key=lambda entry: entry["score"], reverse=True) + return result + + +def _box_mean(diff: Any, box: Sequence[int]) -> float: + """Mean change (0..1) of the diff map inside ``box`` (numpy).""" + x, y, w, h = (int(box[0]), int(box[1]), int(box[2]), int(box[3])) + patch = diff[max(0, y):y + h, max(0, x):x + w] + return float(patch.mean()) if patch.size else 0.0 + + +def localize_changes(reference: Any, boxes: Sequence[Sequence[int]], *, + current: Optional[Any] = None, threshold: float = 0.1, + region: Optional[Sequence[int]] = None + ) -> List[Dict[str, Any]]: + """Score and rank which of ``boxes`` changed between two frames. + + Diffs ``reference`` against ``current`` (a fresh screen grab of ``region`` + by default), takes each box's mean per-pixel change (0..1), and ranks them + via :func:`rank_changes`. Returns ``[{box, score, changed}]``. + """ + import numpy as np + from je_auto_control.utils.visual_match.visual_match import ( + _grab_gray, _to_gray) + ref = _to_gray(reference).astype("float64") + other = current if current is not None else _grab_gray(region) + cur = _to_gray(other).astype("float64") + diff = np.abs(ref - cur) / 255.0 + scored = [{"box": list(box), "score": _box_mean(diff, box)} + for box in boxes] + return rank_changes(scored, threshold=threshold) diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 6ba236f4..fc271047 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2912,6 +2912,26 @@ def _match_theme(template: Any, region: Any = None, method: Any = "sobel", return {"found": True, **match} +def _rank_changes(scored_boxes: Any, threshold: Any = 0.1) -> Dict[str, Any]: + """Adapter: rank scored element boxes by how much they changed (pure).""" + from je_auto_control.utils.change_localize import rank_changes + items = _coerce_list(scored_boxes) if scored_boxes else [] + return {"changes": rank_changes(items, threshold=float(threshold))} + + +def _localize_changes(reference: Any, boxes: Any, current: Any = None, + threshold: Any = 0.1, region: Any = None + ) -> Dict[str, Any]: + """Adapter: rank which element boxes changed between two frames (device).""" + from je_auto_control.utils.change_localize import localize_changes + box_list = _coerce_list(boxes) if boxes else [] + changes = localize_changes(str(reference), box_list, + current=str(current) if current else None, + threshold=float(threshold), + region=_coerce_region(region)) + return {"changes": changes} + + def _normalize_ext(target: str) -> Dict[str, Any]: """Adapter: the lowercased extension of a path / bare ext (pure).""" from je_auto_control.utils.file_assoc import normalize_ext @@ -6951,6 +6971,8 @@ def __init__(self): "AC_dominant_pair": _dominant_pair, "AC_region_contrast": _region_contrast, "AC_match_theme": _match_theme, + "AC_rank_changes": _rank_changes, + "AC_localize_changes": _localize_changes, "AC_normalize_ext": _normalize_ext, "AC_file_association": _file_association, "AC_get_control_text": _get_control_text, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 4369522b..41e8400d 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -4107,6 +4107,36 @@ def img_histogram_tools() -> List[MCPTool]: handler=h.match_theme, annotations=READ_ONLY, ), + MCPTool( + name="ac_rank_changes", + description=("Rank scored element boxes by how much they changed. " + "'scored_boxes' is a list of {box:[x,y,w,h], score}. " + "Pure. Returns {changes:[{box, score, changed}]} " + "sorted most-changed first."), + input_schema=schema({"scored_boxes": {"type": "array", + "items": {"type": "object"}}, + "threshold": {"type": "number"}}, + required=["scored_boxes"]), + handler=h.rank_changes, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_localize_changes", + description=("Which of the supplied element 'boxes' changed between " + "a 'reference' image and the current screen (or " + "'current' image). Returns {changes:[{box, score, " + "changed}]}."), + input_schema=schema({"reference": {"type": "string"}, + "boxes": {"type": "array", + "items": {"type": "array"}}, + "current": {"type": "string"}, + "threshold": {"type": "number"}, + "region": {"type": "array", + "items": {"type": "integer"}}}, + required=["reference", "boxes"]), + handler=h.localize_changes, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 5006b182..a91d0644 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -774,6 +774,19 @@ def match_theme(template, region=None, method="sobel", min_score=0.5): return _match_theme(template, region, method, min_score) +def rank_changes(scored_boxes, threshold=0.1): + from je_auto_control.utils.executor.action_executor import _rank_changes + return _rank_changes(scored_boxes, threshold) + + +def localize_changes(reference, boxes, current=None, threshold=0.1, + region=None): + from je_auto_control.utils.executor.action_executor import ( + _localize_changes, + ) + return _localize_changes(reference, boxes, current, threshold, region) + + def normalize_ext(target): from je_auto_control.utils.executor.action_executor import _normalize_ext return _normalize_ext(target) diff --git a/test/unit_test/headless/test_change_localize_batch.py b/test/unit_test/headless/test_change_localize_batch.py new file mode 100644 index 00000000..84110e53 --- /dev/null +++ b/test/unit_test/headless/test_change_localize_batch.py @@ -0,0 +1,79 @@ +"""Headless tests for change_localize (pure ranking + cv2 localization).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.change_localize import localize_changes, rank_changes + + +# --- pure rank_changes ---------------------------------------------------- + +def test_rank_changes_marks_and_sorts(): + scored = [{"box": [0, 0, 10, 10], "score": 0.02}, + {"box": [20, 20, 10, 10], "score": 0.5}, + {"box": [40, 40, 10, 10], "score": 0.2}] + ranked = rank_changes(scored, threshold=0.1) + # sorted most-changed first + assert [entry["score"] for entry in ranked] == pytest.approx([0.5, 0.2, + 0.02]) + assert [entry["changed"] for entry in ranked] == [True, True, False] + + +def test_rank_changes_accepts_tuples(): + ranked = rank_changes([([0, 0, 5, 5], 0.3), ([1, 1, 5, 5], 0.05)], + threshold=0.1) + assert ranked[0]["changed"] is True + assert ranked[1]["changed"] is False + + +def test_rank_changes_empty(): + assert rank_changes([]) == [] + + +def test_rank_changes_threshold_boundary(): + # a score exactly at the threshold counts as changed (>=) + ranked = rank_changes([{"box": [0, 0, 1, 1], "score": 0.1}], threshold=0.1) + assert ranked[0]["changed"] is True + + +# --- cv2 localize_changes (per-function importorskip) --------------------- + +def test_localize_changes_attributes_to_the_right_box(): + np = pytest.importorskip("numpy") + pytest.importorskip("cv2") + reference = np.zeros((100, 100), dtype="uint8") + current = reference.copy() + current[40:60, 40:60] = 255 # change inside this box only + boxes = [[40, 40, 20, 20], [0, 0, 20, 20]] + ranked = localize_changes(reference, boxes, current=current, + threshold=0.05) + # the changed box ranks first and is flagged; the untouched one is not + assert ranked[0]["box"] == [40, 40, 20, 20] + assert ranked[0]["changed"] is True + untouched = [r for r in ranked if r["box"] == [0, 0, 20, 20]][0] + assert untouched["changed"] is False + + +# --- wiring (cv2-free) ---------------------------------------------------- + +def test_executor_pure_rank_path(): + from je_auto_control.utils.executor.action_executor import _rank_changes + out = _rank_changes('[{"box": [0,0,4,4], "score": 0.4}]', 0.1) + assert out["changes"][0]["changed"] is True + + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert {"AC_rank_changes", "AC_localize_changes"} <= known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry, + ) + names = {t.name for t in build_default_tool_registry()} + assert {"ac_rank_changes", "ac_localize_changes"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert {"AC_rank_changes", "AC_localize_changes"} <= specs + + +def test_facade_exports(): + for name in ("localize_changes", "rank_changes"): + assert hasattr(ac, name) and name in ac.__all__