diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 18ba3291..d7bf15b2 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -2,6 +2,12 @@ ## What's new (2026-06-26) +### Act In View — Scroll to a Target, Then Act When Actionable + +Click the row three pages down: scroll it into view, then gate on actionability before clicking. Full reference: [`docs/source/Eng/doc/new_features/v221_features_doc.rst`](docs/source/Eng/doc/new_features/v221_features_doc.rst). + +- **`act_in_view` / `ScrollPlan`** (`AC_act_in_view`): two reliability primitives stayed separate — `scroll_find.scroll_until_visible` brings an off-screen target on-screen, and `actionability.act_when_ready` waits for it to be visible/stable/enabled/unoccluded before acting. A real "click the off-screen row" step needs both. `act_in_view` composes them: scroll until the target is located, then run the actionability gate at its point and perform the action. `ScrollPlan` bundles the scroll search + its `locator`/`scroller` seams so the call stays within the argument limit; the actionability probes (`region_sampler`/`enabled_probe`/`hit_tester`) and gate `config` are injectable too, so the whole flow is testable without a screen. Closes the input-fidelity lane's composition gap. No `PySide6`. + ### Template-Free Element Proposal (Pixels to Elements) Get a clean numbered element list straight from the screen when there's no accessibility tree. Full reference: [`docs/source/Eng/doc/new_features/v220_features_doc.rst`](docs/source/Eng/doc/new_features/v220_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v221_features_doc.rst b/docs/source/Eng/doc/new_features/v221_features_doc.rst new file mode 100644 index 00000000..e3d9711a --- /dev/null +++ b/docs/source/Eng/doc/new_features/v221_features_doc.rst @@ -0,0 +1,49 @@ +Act In View — Scroll to a Target, Then Act When Actionable +========================================================== + +Two reliability primitives stayed separate: ``scroll_find.scroll_until_visible`` +brings an off-screen target on-screen, and ``actionability.act_when_ready`` waits +for a target to be visible / stable / enabled / unoccluded before acting. A real +"click the row three pages down" step needs *both* — scroll to it, then gate +before clicking. ``act_in_view`` composes them into one call. + +* :class:`ScrollPlan` — bundles the scroll search (``kind`` / ``direction`` / + ``max_scrolls`` / ``scroll_amount``) and its injectable ``locator`` / + ``scroller`` seams, so the composed call stays within a sane argument count. +* :func:`act_in_view` — scroll until the target is found, then run the + actionability gate at its location and perform ``action`` on it. + +Every seam — the scroll locator / scroller, the action, the actionability probes +(``region_sampler`` / ``enabled_probe`` / ``hit_tester``) and the gate ``config`` +— is injectable, so the whole flow is testable without a screen. Reuses +:func:`scroll_find.scroll_until_visible` and +:func:`actionability.act_when_ready`. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import act_in_view, ScrollPlan + + # Scroll down to the "Submit" button image, then click it once it's actionable + act_in_view("submit.png", lambda point: click(point[0], point[1]), + scroll=ScrollPlan(kind="image", direction="down", + max_scrolls=20)) + +``act_in_view`` returns ``{acted, coords, scrolls, result}`` (``result`` is the +action's return value) and raises ``AutoControlActionException`` if the target +never comes into view. Pass ``enabled_probe`` / ``hit_tester`` / ``config`` to +have the actionability gate actually wait for the control to be enabled and +unoccluded before the action fires — otherwise it acts as soon as the target is +located. + +Executor commands +----------------- + +``AC_act_in_view`` (``target`` + ``kind`` / ``direction`` / ``max_scrolls`` / +``scroll_amount`` / ``button`` → ``{acted, coords, scrolls}``) scrolls a template +or text target into view and clicks it. It is the matching ``ac_act_in_view`` MCP +tool and a Script Builder command under **Flow**. :func:`act_in_view` (which +takes an arbitrary action and the actionability probes) is the Python-API +surface. diff --git a/docs/source/Zh/doc/new_features/v221_features_doc.rst b/docs/source/Zh/doc/new_features/v221_features_doc.rst new file mode 100644 index 00000000..986ae7b9 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v221_features_doc.rst @@ -0,0 +1,38 @@ +在視野內操作——捲動到目標,再於可操作時動作 +============================================ + +兩個可靠性原語原本各自獨立:``scroll_find.scroll_until_visible`` 把螢幕外的目標捲進畫面, +``actionability.act_when_ready`` 則在目標可見 / 穩定 / 啟用 / 未被遮擋前等待再動作。真實的 +「點選下三頁的那一列」步驟需要*兩者*——先捲到它,再閘控後才點擊。``act_in_view`` 把它們組合成單一呼叫。 + +* :class:`ScrollPlan` ——把捲動搜尋(``kind`` / ``direction`` / ``max_scrolls`` / + ``scroll_amount``)與其可注入的 ``locator`` / ``scroller`` 接縫打包,讓組合後的呼叫維持在合理的參數數量內。 +* :func:`act_in_view` ——捲動直到找到目標,接著在其位置執行 actionability 閘控,並對其執行 ``action``。 + +每個接縫——捲動的 locator / scroller、action、actionability 探針(``region_sampler`` / +``enabled_probe`` / ``hit_tester``)與閘控 ``config``——皆可注入,故整個流程能在沒有螢幕的情況下測試。 +重用 :func:`scroll_find.scroll_until_visible` 與 :func:`actionability.act_when_ready`。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import act_in_view, ScrollPlan + + # 向下捲動到「Submit」按鈕影像,於可操作時點擊 + act_in_view("submit.png", lambda point: click(point[0], point[1]), + scroll=ScrollPlan(kind="image", direction="down", + max_scrolls=20)) + +``act_in_view`` 回傳 ``{acted, coords, scrolls, result}``(``result`` 為 action 的回傳值), +若目標始終未進入畫面則丟出 ``AutoControlActionException``。傳入 ``enabled_probe`` / ``hit_tester`` / +``config`` 可讓 actionability 閘控真正等到控制項已啟用且未被遮擋才觸發動作——否則一旦定位到目標即動作。 + +執行器指令 +---------- + +``AC_act_in_view``(``target`` 加上 ``kind`` / ``direction`` / ``max_scrolls`` / +``scroll_amount`` / ``button`` → ``{acted, coords, scrolls}``)把 template 或文字目標捲入畫面並點擊。 +以對應的 ``ac_act_in_view`` MCP 工具及 Script Builder 指令(位於 **Flow** 分類下)形式提供。 +:func:`act_in_view`(接受任意 action 與 actionability 探針)則是 Python API 介面。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 1ccf1d6d..7817f77d 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -151,6 +151,8 @@ ) # Propose a clean element list from raw pixels (template-free) from je_auto_control.utils.element_proposal import propose_elements, tag_kinds +# Scroll a target into view, then act on it once it is actionable +from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view # Rich clipboard formats — RTF + CSV/TSV codecs and Windows get / set from je_auto_control.utils.clipboard_rich_formats import ( build_rtf, csv_to_rows, get_clipboard_csv, get_clipboard_rtf, rows_to_csv, @@ -1782,6 +1784,7 @@ def start_autocontrol_gui(*args, **kwargs): "localize_changes", "rank_changes", "classify_widget", "box_features", "classify_icon", "propose_elements", "tag_kinds", + "act_in_view", "ScrollPlan", "build_rtf", "rtf_to_text", "rows_to_csv", "csv_to_rows", "set_clipboard_rtf", "get_clipboard_rtf", "set_clipboard_csv", "get_clipboard_csv", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 50e2031b..29885136 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -4522,6 +4522,23 @@ def _add_work_queue_specs(specs: List[CommandSpec]) -> None: ), description="Index where a busy/idle series first settles idle.", )) + specs.append(CommandSpec( + "AC_act_in_view", "Flow", "Act In View (scroll + click)", + fields=( + FieldSpec("target", FieldType.STRING, + placeholder="template path or text"), + FieldSpec("kind", FieldType.STRING, optional=True, + default="image", placeholder="image / text"), + FieldSpec("direction", FieldType.STRING, optional=True, + default="down", placeholder="up / down"), + FieldSpec("max_scrolls", FieldType.INT, optional=True, default=10), + FieldSpec("scroll_amount", FieldType.INT, optional=True, + default=3), + FieldSpec("button", FieldType.STRING, optional=True, + default="left"), + ), + description="Scroll a target into view, then click it when actionable.", + )) specs.append(CommandSpec( "AC_simulate_cvd", "Image", "Simulate Colour-Vision Deficiency", fields=( diff --git a/je_auto_control/utils/act_in_view/__init__.py b/je_auto_control/utils/act_in_view/__init__.py new file mode 100644 index 00000000..43640bad --- /dev/null +++ b/je_auto_control/utils/act_in_view/__init__.py @@ -0,0 +1,6 @@ +"""Scroll a target into view, then act on it once it is actionable.""" +from je_auto_control.utils.act_in_view.act_in_view import ( + ScrollPlan, act_in_view, +) + +__all__ = ["act_in_view", "ScrollPlan"] diff --git a/je_auto_control/utils/act_in_view/act_in_view.py b/je_auto_control/utils/act_in_view/act_in_view.py new file mode 100644 index 00000000..c0cc903f --- /dev/null +++ b/je_auto_control/utils/act_in_view/act_in_view.py @@ -0,0 +1,70 @@ +"""Scroll a target into view, then act on it only once it is actionable. + +Two reliability primitives the framework already had stayed separate: +``scroll_find.scroll_until_visible`` brings an off-screen target on-screen, and +``actionability.act_when_ready`` waits for a target to be visible / stable / +enabled / unoccluded before acting. A real "click the row three pages down" step +needs *both* — scroll to it, then gate before clicking. ``act_in_view`` composes +them into one call. + +* :class:`ScrollPlan` — bundles the scroll search (``kind`` / ``direction`` / + ``max_scrolls`` / ``scroll_amount``) and its injectable ``locator`` / + ``scroller`` seams, so the composed call stays within a sane argument count. +* :func:`act_in_view` — scroll until the target is found, then run the + actionability gate at its location and perform ``action`` on it. + +All seams (locator / scroller / action / actionability probes / clock) are +injectable, so the whole flow is testable without a screen. Reuses +:func:`scroll_find.scroll_until_visible` and +:func:`actionability.act_when_ready`. Imports no ``PySide6``. +""" +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional + +from je_auto_control.utils.actionability import GateConfig, act_when_ready +from je_auto_control.utils.exception.exceptions import AutoControlActionException +from je_auto_control.utils.scroll_find import scroll_until_visible +from je_auto_control.utils.scroll_find.scroll_find import Locator, Scroller + + +@dataclass +class ScrollPlan: + """How to scroll while searching for the target (with injectable seams).""" + + kind: str = "image" + direction: str = "down" + max_scrolls: int = 10 + scroll_amount: int = 3 + locator: Optional[Locator] = None + scroller: Optional[Scroller] = None + + +def act_in_view(target: str, action: Callable[[List[int]], Any], *, + scroll: Optional[ScrollPlan] = None, + region_sampler: Optional[Callable[[Any], Any]] = None, + enabled_probe: Optional[Callable[[], Optional[bool]]] = None, + hit_tester: Optional[Callable[[List[int]], bool]] = None, + config: Optional[GateConfig] = None) -> Dict[str, Any]: + """Scroll ``target`` into view, gate on actionability, then ``action`` it. + + Scrolls per ``scroll`` (a :class:`ScrollPlan`) until ``target`` is located, + then runs :func:`actionability.act_when_ready` at the found point and calls + ``action(center_point)``. Raises ``AutoControlActionException`` if the target + never comes into view. The actionability probes / ``config`` are injectable + and forwarded to the gate. Returns ``{acted, coords, scrolls, result}``. + """ + plan = scroll if scroll is not None else ScrollPlan() + found = scroll_until_visible( + target, kind=plan.kind, direction=plan.direction, + max_scrolls=plan.max_scrolls, scroll_amount=plan.scroll_amount, + locator=plan.locator, scroller=plan.scroller) + if not found["found"]: + raise AutoControlActionException( + f"target {target!r} not in view after {found['scrolls']} scrolls") + cx, cy = int(found["coords"][0]), int(found["coords"][1]) + result = act_when_ready(action, lambda: (cx, cy, 1, 1), + region_sampler=region_sampler, + enabled_probe=enabled_probe, hit_tester=hit_tester, + config=config) + return {"acted": True, "coords": [cx, cy], "scrolls": found["scrolls"], + "result": result} diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 7f5a8761..4f566c17 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -2963,6 +2963,22 @@ def _tag_kinds(elements: Any) -> Dict[str, Any]: return {"elements": tag_kinds(items)} +def _act_in_view(target: Any, kind: Any = "image", direction: Any = "down", + max_scrolls: Any = 10, scroll_amount: Any = 3, + button: Any = "left") -> Dict[str, Any]: + """Adapter: scroll a target into view then click it when actionable.""" + from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view + plan = ScrollPlan(kind=str(kind), direction=str(direction), + max_scrolls=int(max_scrolls), + scroll_amount=int(scroll_amount)) + out = act_in_view( + str(target), + lambda point: click_mouse(str(button), int(point[0]), int(point[1])), + scroll=plan) + return {"acted": out["acted"], "coords": out["coords"], + "scrolls": out["scrolls"]} + + def _normalize_ext(target: str) -> Dict[str, Any]: """Adapter: the lowercased extension of a path / bare ext (pure).""" from je_auto_control.utils.file_assoc import normalize_ext @@ -7008,6 +7024,7 @@ def __init__(self): "AC_classify_icon": _classify_icon, "AC_propose_elements": _propose_elements, "AC_tag_kinds": _tag_kinds, + "AC_act_in_view": _act_in_view, "AC_normalize_ext": _normalize_ext, "AC_file_association": _file_association, "AC_get_control_text": _get_control_text, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 26c2427e..e2910c74 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -1898,6 +1898,21 @@ def smart_wait_tools() -> List[MCPTool]: handler=h.idle_point, annotations=READ_ONLY, ), + MCPTool( + name="ac_act_in_view", + description=("Scroll a 'target' (kind=image template path / text) " + "into view, wait until it is actionable, then click it " + "('button'). Returns {acted, coords, scrolls}."), + input_schema=schema({"target": {"type": "string"}, + "kind": {"type": "string"}, + "direction": {"type": "string"}, + "max_scrolls": {"type": "integer"}, + "scroll_amount": {"type": "integer"}, + "button": {"type": "string"}}, + required=["target"]), + handler=h.act_in_view, + annotations=SIDE_EFFECT_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 91d38a68..4eadd1d0 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -809,6 +809,13 @@ def tag_kinds(elements): return _tag_kinds(elements) +def act_in_view(target, kind="image", direction="down", max_scrolls=10, + scroll_amount=3, button="left"): + from je_auto_control.utils.executor.action_executor import _act_in_view + return _act_in_view(target, kind, direction, max_scrolls, scroll_amount, + button) + + def normalize_ext(target): from je_auto_control.utils.executor.action_executor import _normalize_ext return _normalize_ext(target) diff --git a/test/unit_test/headless/test_act_in_view_batch.py b/test/unit_test/headless/test_act_in_view_batch.py new file mode 100644 index 00000000..4c18ba76 --- /dev/null +++ b/test/unit_test/headless/test_act_in_view_batch.py @@ -0,0 +1,93 @@ +"""Headless tests for act_in_view (injected locator / scroller / action / gate).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.act_in_view import ScrollPlan, act_in_view +from je_auto_control.utils.actionability import GateConfig +from je_auto_control.utils.exception.exceptions import AutoControlActionException + + +def _locator_found_after(scrolls_needed, coords=(100, 200)): + """A locator that returns None until ``scrolls_needed`` scrolls, then coords.""" + state = {"calls": 0} + + def locator(_target): + result = coords if state["calls"] >= scrolls_needed else None + state["calls"] += 1 + return result + + return locator + + +# --- scroll then act ------------------------------------------------------ + +def test_act_in_view_scrolls_then_acts(): + scrolled = [] + clicked = [] + plan = ScrollPlan(locator=_locator_found_after(2, (100, 200)), + scroller=lambda direction, amount: scrolled.append( + (direction, amount)), + max_scrolls=5) + out = act_in_view("target.png", clicked.append, scroll=plan) + assert out["acted"] is True + assert out["coords"] == [100, 200] + assert out["scrolls"] == 2 + assert clicked == [[100, 200]] # acted at the located point + assert len(scrolled) == 2 # scrolled twice before finding + + +def test_act_in_view_acts_immediately_when_already_visible(): + clicked = [] + plan = ScrollPlan(locator=lambda _t: (50, 60), + scroller=lambda d, a: None) + out = act_in_view("here", clicked.append, scroll=plan) + assert out["scrolls"] == 0 + assert clicked == [[50, 60]] + + +def test_act_in_view_raises_when_never_found(): + plan = ScrollPlan(locator=lambda _t: None, + scroller=lambda d, a: None, max_scrolls=3) + with pytest.raises(AutoControlActionException): + act_in_view("missing", lambda point: None, scroll=plan) + + +# --- actionability gate is honoured --------------------------------------- + +def test_act_in_view_waits_for_enabled(): + enabled_calls = {"n": 0} + + def enabled_probe(): + enabled_calls["n"] += 1 + return enabled_calls["n"] >= 2 # disabled on the first poll + + ticks = iter([0.0, 0.0, 1.0, 2.0, 3.0, 4.0]) + config = GateConfig(timeout_s=10.0, stable_for_s=0.0, poll_interval_s=1.0, + clock=lambda: next(ticks), sleep=lambda _s: None) + clicked = [] + plan = ScrollPlan(locator=lambda _t: (10, 20), scroller=lambda d, a: None) + out = act_in_view("x", clicked.append, scroll=plan, + enabled_probe=enabled_probe, config=config) + assert out["acted"] is True + assert clicked == [[10, 20]] + assert enabled_calls["n"] >= 2 # gated until the probe reported enabled + + +# --- wiring --------------------------------------------------------------- + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert "AC_act_in_view" in known + from je_auto_control.utils.mcp_server.tools import ( + build_default_tool_registry, + ) + names = {t.name for t in build_default_tool_registry()} + assert "ac_act_in_view" in names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert "AC_act_in_view" in specs + + +def test_facade_exports(): + for name in ("act_in_view", "ScrollPlan"): + assert hasattr(ac, name) and name in ac.__all__