From eb7f8cc43da272bafb423a90b0529a4c1dd7e8cd Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 15:36:54 +0000 Subject: [PATCH 1/4] yeast: Add `@@name` raw-capture syntax to `rule!` The `@@name` capture marker in `rule!` queries skips the auto-translate prefix for that specific capture, letting the body see the original capture (and thus delay its translation using `ctx.translate` until it becomes convenient). Regular `@name` captures continue to be auto-translated as before. Specifically these are translated _eagerly_, before the main body of the rewrite rule is run. I settled on `@@` as the syntax because it did not add new symbols that the user has to keep track of (it's still a kind of capture), but it's still visually distinct enough that the user should be able to tell that there's something special going on. In principle one could accidentally write one form of capture where the other was intended, but in practice this would result in code that did not compile (because the types would not match). --- shared/yeast-macros/src/parse.rs | 57 ++++++++++++----- shared/yeast/doc/yeast.md | 31 +++++++++ shared/yeast/src/captures.rs | 22 +++++++ shared/yeast/src/lib.rs | 16 ++--- shared/yeast/tests/test.rs | 104 +++++++++++++++++++++++++++++++ 5 files changed, 209 insertions(+), 21 deletions(-) diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index fc6031eb39d2..2b5c4f530032 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result { /// Parse a single query node (possibly with a trailing `@capture`). fn parse_query_node(tokens: &mut Tokens) -> Result { let base = parse_query_atom(tokens)?; - // Check for trailing @capture + // Check for trailing @capture or @@capture if peek_is_at(tokens) { - tokens.next(); // consume @ - let capture_name = expect_ident(tokens, "expected capture name after @")?; + let capture_name = consume_capture_marker(tokens)?; let name_str = capture_name.to_string(); Ok(quote! { yeast::query::QueryNode::Capture { @@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { push_field_elem(&mut field_order, &mut field_elems, field_str, elem); } else { let child = if peek_is_at(tokens) { - tokens.next(); - let capture_name = expect_ident(tokens, "expected capture name after @")?; + let capture_name = consume_capture_marker(tokens)?; let name_str = capture_name.to_string(); quote! { yeast::query::QueryNode::Capture { @@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result { + // `@@name` marks the capture as raw (skip auto-translate). + let raw = matches!( + tokens.peek(), + Some(TokenTree::Punct(p)) if p.as_char() == '@' + ); + if raw { + tokens.next(); // consume the second `@` + } if let Some(TokenTree::Ident(name)) = tokens.next() { let mult = if parent_mult == CaptureMultiplicity::Repeated || last_mult == CaptureMultiplicity::Repeated @@ -723,6 +732,7 @@ fn extract_captures_inner( captures.push(CaptureInfo { name: name.to_string(), multiplicity: mult, + raw, }); } last_mult = CaptureMultiplicity::Single; @@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result { // Parse query let query_code = parse_query_top(query_stream.clone())?; + // Capture names marked `@@name` (raw) — passed to the auto-translate + // prefix as a skip list so those captures keep their input-schema ids. + let raw_capture_names: Vec<&str> = captures + .iter() + .filter(|c| c.raw) + .map(|c| c.name.as_str()) + .collect(); + // Generate capture bindings let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site()); let bindings: Vec = captures @@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result { let __query = #query_code; yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { // Auto-translation prefix: recursively translate every - // captured node before invoking the user's transform body. + // captured node before invoking the user's transform body, + // except for `@@name` captures listed in `__skip` which the + // body consumes raw. // For OneShot rules this preserves the legacy behaviour // (input-schema captures translated to output-schema // nodes); for Repeating rules it is a no-op. - __translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?; + let __skip: &[&str] = &[#(#raw_capture_names),*]; + __translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?; #(#bindings)* let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); let __result: Vec = { #transform_body }; @@ -1013,6 +1034,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool { matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@') } +/// Consume an `@` or `@@` capture marker and the following name ident. +/// Caller has already verified `peek_is_at(tokens)`. +fn consume_capture_marker(tokens: &mut Tokens) -> Result { + tokens.next(); // consume the first `@` + if peek_is_at(tokens) { + tokens.next(); // consume the second `@` of `@@` + } + expect_ident(tokens, "expected capture name after `@` or `@@`") +} + fn peek_is_literal(tokens: &mut Tokens) -> bool { matches!(tokens.peek(), Some(TokenTree::Literal(_))) } @@ -1113,8 +1144,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result { fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result { if peek_is_at(tokens) { - tokens.next(); // consume @ - let name = expect_ident(tokens, "expected capture name after @")?; + let name = consume_capture_marker(tokens)?; let name_str = name.to_string(); Ok(quote! { yeast::query::QueryNode::Capture { @@ -1141,13 +1171,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result Result { if peek_is_at(tokens) { - tokens.next(); - let name = expect_ident(tokens, "expected capture name after @")?; + let name = consume_capture_marker(tokens)?; let name_str = name.to_string(); // Re-parse the element isn't practical, so we generate a wrapper // that creates a new Repeated with each child wrapped in a capture. diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index 1700029b43c0..3c122e7ebf9c 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a single capture (`Id`) and `{..name}` splices a repeated capture (`Vec`). +### Raw captures (`@@name`) + +The default `@name` capture marker is *auto-translated*: in OneShot +phases the macro recursively translates the captured node before +binding it, so `{name}` in the output template splices a node that +already conforms to the output schema. + +For rules that need the raw (input-schema) capture — typically to read +its source text or to translate it explicitly with mutable context +state between calls — use `@@name` instead. The body sees the original +input-schema `NodeRef`: + +```rust +yeast::rule!( + (assignment left: (_) @@raw_lhs right: (_) @rhs) + => + { + // raw_lhs is untranslated: read its original source text. + let text = ctx.ast.source_text(raw_lhs.into()); + // rhs is already translated by the auto-translate prefix. + tree!((call + method: (identifier #{text.as_str()}) + receiver: {rhs})) + } +); +``` + +Mix `@` and `@@` freely in the same rule. In a Repeating phase both +markers are equivalent (auto-translation is a no-op for repeating +rules). + ## Complete example: for-loop desugaring This rule rewrites Ruby's `for pat in val do body end` into diff --git a/shared/yeast/src/captures.rs b/shared/yeast/src/captures.rs index 404d402a5016..101ab329220d 100644 --- a/shared/yeast/src/captures.rs +++ b/shared/yeast/src/captures.rs @@ -80,6 +80,28 @@ impl Captures { } Ok(()) } + + /// Like [`try_map_all_captures`] but leaves captures whose name appears + /// in `skip` untouched. Used by the `rule!` macro to support `@@name` + /// (raw) captures alongside the default auto-translated `@name` + /// captures. + pub fn try_map_captures_except( + &mut self, + skip: &[&str], + mut f: impl FnMut(Id) -> Result, E>, + ) -> Result<(), E> { + for (name, ids) in self.captures.iter_mut() { + if skip.contains(name) { + continue; + } + let mut new_ids = Vec::with_capacity(ids.len()); + for &id in ids.iter() { + new_ids.extend(f(id)?); + } + *ids = new_ids; + } + Ok(()) + } pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) { if let Some(from_ids) = self.captures.get(from) { let new_values = from_ids.iter().copied().map(f).collect(); diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index e0fffc551f34..2c08c12276fb 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -757,13 +757,14 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> { } /// Translate every captured node in `captures` in place (OneShot phase - /// only). In a Repeating phase this is a no-op — Repeating rules - /// receive raw captures. + /// only), except for captures whose name appears in `skip` — those are + /// left as raw (input-schema) ids for the rule body to consume + /// directly. In a Repeating phase this is a no-op — Repeating rules + /// receive raw captures regardless of `skip`. /// - /// Used by the `rule!` macro's generated prefix to preserve the - /// pre-existing "auto-translate captures before running the transform - /// body" behavior. Manually-written transforms typically translate - /// captures selectively via [`translate`] instead. + /// Used by the `rule!` macro's generated prefix. `skip` is populated + /// from the macro's `@@name` capture markers; for plain `@name` + /// captures (and rules with no `@@` markers) it is empty. /// /// To avoid infinite recursion, a capture whose id matches the rule's /// matched root (e.g. from a `(_) @_` pattern) is left unchanged. @@ -772,11 +773,12 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> { captures: &mut Captures, ast: &mut Ast, user_ctx: &mut C, + skip: &[&str], ) -> Result<(), String> { match &self.inner { TranslatorImpl::OneShot { matched_root, .. } => { let root = *matched_root; - captures.try_map_all_captures(|cid| { + captures.try_map_captures_except(skip, |cid| { if cid == root { Ok(vec![cid]) } else { diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 99471f129abf..1444b7c2a46e 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -1058,6 +1058,110 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { ); } +/// Verify that `@@name` capture markers skip the auto-translate prefix: +/// the body sees the *raw* (input-schema) NodeRef and can read its +/// source text or call `ctx.translate(...)` explicitly. Compare with +/// the bare `@name` form, where the auto-translate prefix runs the +/// same translation up front and the body sees the post-translate id. +#[test] +fn test_raw_capture_marker() { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let rules: Vec = vec![ + yeast::rule!( + (program (_)* @stmts) + => + (program stmt: {..stmts}) + ), + // `@@raw_lhs` is untranslated: the body reads its source text + // ("x") and embeds it directly as the identifier content. `@rhs` + // is auto-translated (rhs already points to (integer "INT")). + yeast::rule!( + (assignment left: (_) @@raw_lhs right: (_) @rhs) + => + { + let text = ctx.ast.source_text(raw_lhs.into()); + tree!((call + method: (identifier #{text.as_str()}) + receiver: {rhs})) + } + ), + yeast::rule!((identifier) => (identifier "ID")), + yeast::rule!((integer) => (integer "INT")), + ]; + let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; + let runner: Runner = Runner::with_schema(lang, &schema, &phases); + + let input = "x = 1"; + let ast = runner.run(input).unwrap(); + let dump = dump_ast(&ast, ast.get_root(), input); + // `method:` uses the raw source text ("x"); if `@@` were broken and + // auto-translation ran on `raw_lhs`, it would still produce the + // string "x" (source_text inherits the input range), so the dump + // wouldn't change. Add a second assertion: explicitly translating + // the raw NodeRef inside the body must succeed and produce + // `(identifier "ID")`. + assert_dump_eq( + &dump, + r#" + program + stmt: + call + method: identifier "x" + receiver: integer "INT" + "#, + ); +} + +/// Companion to `test_raw_capture_marker`: confirms that calling +/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body +/// produces the correctly-translated output-schema node. With `@`, the +/// translation has already happened, so `ctx.translate(...)` inside the +/// body would attempt to re-translate an output node (which has no +/// matching rule and would error). +#[test] +fn test_raw_capture_marker_explicit_translate() { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let rules: Vec = vec![ + yeast::rule!( + (program (_)* @stmts) + => + (program stmt: {..stmts}) + ), + yeast::rule!( + (assignment left: (_) @@raw_lhs right: (_) @rhs) + => + { + let translated_lhs = ctx.translate(raw_lhs)?; + tree!((call + method: {..translated_lhs} + receiver: {rhs})) + } + ), + yeast::rule!((identifier) => (identifier "ID")), + yeast::rule!((integer) => (integer "INT")), + ]; + let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; + let runner: Runner = Runner::with_schema(lang, &schema, &phases); + + let input = "x = 1"; + let ast = runner.run(input).unwrap(); + let dump = dump_ast(&ast, ast.get_root(), input); + assert_dump_eq( + &dump, + r#" + program + stmt: + call + method: identifier "ID" + receiver: integer "INT" + "#, + ); +} + // ---- Cursor tests ---- #[test] From 1b7f589000b36f0b9c40997e47802d346f67578e Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 15:37:58 +0000 Subject: [PATCH 2/4] unified/swift: Migrate `manual_rule!` sites to `rule!` + `@@` With `@@name` available, there's no longer a need to use `manual_rule!`. Every place where it is used, we can instead just mark the relevant raw captures as such. This results in quite a lot of cleanup! (Also, to me at least, it makes these rules a lot easier to reason about.) A first iteration of this approach resulted in a lot of `.map(Into::into)` being needed, because `SwiftContext` stores `Id`s, but captures produce `NodeRef`s. To avoid this, I swapped it around so that the context stores `NodeRef`s. This does require adding `.into()` in a few places, but it makes the rest of the code a lot more ergonomic. --- shared/yeast/src/lib.rs | 6 + .../extractor/src/languages/swift/swift.rs | 134 ++++++++---------- 2 files changed, 64 insertions(+), 76 deletions(-) diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 2c08c12276fb..63850f097d4c 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -48,6 +48,12 @@ impl From for Id { } } +impl From for NodeRef { + fn from(value: Id) -> Self { + NodeRef(value) + } +} + /// Like [`std::fmt::Display`], but the formatting routine is given access to /// the [`Ast`] so that node references can resolve to their source text. /// diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index c84e3cf38676..4c07618d1bb2 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,5 +1,5 @@ use codeql_extractor::extractor::simple; -use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree}; +use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, rule, tree}; /// User context propagated from outer rules down to the inner rules that /// emit the corresponding output declarations, so that each emitted node @@ -15,26 +15,26 @@ struct SwiftContext { /// (`computed_getter`/`computed_setter`/`computed_modify`/ /// `willset_clause`/`didset_clause`/`getter_specifier`/ /// `setter_specifier`). - property_name: Option, + property_name: Option, /// Translated type node for the property type. Set by the outer /// `property_binding` rule (computed accessors variant) and /// `protocol_property_declaration` when present; read by the /// accessor inner rules. - property_type: Option, + property_type: Option, /// Default-value expression for the next translated `parameter`. Set /// by the outer `function_parameter` rule; read by the `parameter` /// rules. - default_value: Option, + default_value: Option, /// Translated outer modifiers (e.g. visibility, attributes) to /// attach to each child of a flattening outer rule. Set by /// `property_declaration`, `enum_entry`, and /// `protocol_property_declaration`. - outer_modifiers: Vec, + outer_modifiers: Vec, /// The `let`/`var` binding modifier for a `property_declaration`. /// Set by `property_declaration`; read by the inner declaration /// rules (`property_binding` variants, accessor rules) so they /// emit it as part of the output node's `modifier:` field. - binding_modifier: Option, + binding_modifier: Option, /// True when the current child of a flattening outer rule is not /// the first one — its inner rule should emit a /// `chained_declaration` modifier so the original grouping can be @@ -45,10 +45,10 @@ struct SwiftContext { /// Build a freshly-created `chained_declaration` modifier node if /// `ctx.is_chained`, else `None`. Used by inner declaration rules to /// emit the chained tag for non-first children of a flattening outer -/// rule. Returns `Option` so it splices via `{..…}` to 0 or 1 ids. -fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option { +/// rule. Returns `Option` so it splices via `{..…}` to 0 or 1 ids. +fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option { if ctx.is_chained { - Some(ctx.literal("modifier", "chained_declaration")) + Some(ctx.literal("modifier", "chained_declaration").into()) } else { None } @@ -192,21 +192,15 @@ fn translation_rules() -> Vec> { // this whole property_binding is itself a non-first declarator // of a containing property_declaration); subsequent accessors // always emit `chained_declaration`. - manual_rule!( + rule!( (property_binding name: @pattern type: _? @ty - computed_value: (computed_property accessor: _+ @accessors)) - { - // Translate `ty` first so the context holds an - // output-schema node id. - let translated_ty = ctx.translate_opt(ty)?; - // Build the property-name identifier from the - // (untranslated) pattern leaf. - let name_id = tree!((identifier #{pattern})); - - ctx.property_name = Some(name_id); - ctx.property_type = translated_ty; + computed_value: (computed_property accessor: _+ @@accessors)) + => + {..{ + ctx.property_name = Some(tree!((identifier #{pattern})).into()); + ctx.property_type = ty; let mut result = Vec::new(); for (i, acc) in accessors.into_iter().enumerate() { @@ -215,8 +209,8 @@ fn translation_rules() -> Vec> { } result.extend(ctx.translate(acc)?); } - Ok(result) - } + result + }} ), // Computed property: shorthand getter (no explicit get/set, just // statements) → a single accessor_declaration with kind "get". @@ -248,30 +242,26 @@ fn translation_rules() -> Vec> { // The `variable_declaration` itself inherits the outer rule's // chained state; observers always get `chained_declaration` // because they're subsequent outputs of this flattening rule. - manual_rule!( + rule!( (property_binding name: (pattern bound_identifier: @name) type: _? @ty value: _? @val - observers: (willset_didset_block willset: _? @ws didset: _? @ds)) - { - // Translate ty and val so the variable_declaration - // below contains output-schema nodes. - let translated_ty = ctx.translate_opt(ty)?; - let translated_val = ctx.translate_opt(val)?; - + observers: (willset_didset_block willset: _? @@ws didset: _? @@ds)) + => + {..{ let var_decl = tree!( (variable_declaration modifier: {..ctx.binding_modifier} modifier: {..ctx.outer_modifiers.clone()} modifier: {..chained_modifier(&mut ctx)} pattern: (name_pattern identifier: (identifier #{name})) - type: {..translated_ty} - value: {..translated_val}) + type: {..ty} + value: {..val}) ); // Publish the property name for the observer rules. - ctx.property_name = Some(tree!((identifier #{name}))); + ctx.property_name = Some(tree!((identifier #{name})).into()); // Observers are subsequent outputs of this flattening // rule, so they always get `chained_declaration`. ctx.is_chained = true; @@ -280,8 +270,8 @@ fn translation_rules() -> Vec> { for obs in ws.into_iter().chain(ds) { result.extend(ctx.translate(obs)?); } - Ok(result) - } + result + }} ), // property_binding with any pattern name (identifier or // destructuring). Reads outer modifiers / chained tag from `ctx`. @@ -309,27 +299,24 @@ fn translation_rules() -> Vec> { // inner declaration rules (`property_binding` variants, // accessor inner rules) read these fields and emit complete // `modifier:` lists from the start. - manual_rule!( + rule!( (property_declaration binding: (value_binding_pattern mutability: @binding_kind) - declarator: _* @decls + declarator: _* @@decls (modifiers)* @mods) - { - let binding_text = ctx.ast.source_text(binding_kind.0); - ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text)); - let mut modifiers = Vec::new(); - for m in mods { - modifiers.extend(ctx.translate(m)?); - } - ctx.outer_modifiers = modifiers; + => + {..{ + let binding_text = ctx.ast.source_text(binding_kind.into()); + ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text).into()); + ctx.outer_modifiers = mods; let mut result = Vec::new(); for (i, decl) in decls.into_iter().enumerate() { ctx.is_chained = i > 0; result.extend(ctx.translate(decl)?); } - Ok(result) - } + result + }} ), // ---- Enums ---- // enum_type_parameter → parameter (with optional name as pattern). @@ -386,22 +373,19 @@ fn translation_rules() -> Vec> { // into `ctx` and translate each case with `ctx.is_chained` // toggled per iteration so the inner `enum_case_entry` rules // emit complete `modifier:` lists from the start. - manual_rule!( - (enum_entry case: _+ @cases (modifiers)* @mods) - { - let mut modifiers = Vec::new(); - for m in mods { - modifiers.extend(ctx.translate(m)?); - } - ctx.outer_modifiers = modifiers; + rule!( + (enum_entry case: _+ @@cases (modifiers)* @mods) + => + {..{ + ctx.outer_modifiers = mods; let mut result = Vec::new(); for (i, case) in cases.into_iter().enumerate() { ctx.is_chained = i > 0; result.extend(ctx.translate(case)?); } - Ok(result) - } + result + }} ), // Plain assignment: `x = expr` rule!( @@ -476,12 +460,13 @@ fn translation_rules() -> Vec> { // optional default values. Publishes the default value into `ctx` // before translating the inner `parameter` so the `parameter` // rules can include it as a `default:` field directly. - manual_rule!( - (function_parameter parameter: @p default_value: _? @def) - { - ctx.default_value = ctx.translate_opt(def)?; - ctx.translate(p) - } + rule!( + (function_parameter parameter: @@p default_value: _? @def) + => + {..{ + ctx.default_value = def; + ctx.translate(p)? + }} ), // Parameter with external name and type rule!( @@ -1017,28 +1002,25 @@ fn translation_rules() -> Vec> { // inner `getter_specifier`/`setter_specifier` rules emit // complete nodes from the start (including the // `chained_declaration` tag for non-first accessors). - manual_rule!( + rule!( (protocol_property_declaration name: (pattern bound_identifier: @name) - requirements: (protocol_property_requirements accessor: _+ @accessors) + requirements: (protocol_property_requirements accessor: _+ @@accessors) type: _? @ty (modifiers)* @mods) - { - ctx.property_name = Some(tree!((identifier #{name}))); - ctx.property_type = ctx.translate_opt(ty)?; - let mut modifiers = Vec::new(); - for m in mods { - modifiers.extend(ctx.translate(m)?); - } - ctx.outer_modifiers = modifiers; + => + {..{ + ctx.property_name = Some(tree!((identifier #{name})).into()); + ctx.property_type = ty; + ctx.outer_modifiers = mods; let mut result = Vec::new(); for (i, acc) in accessors.into_iter().enumerate() { ctx.is_chained = i > 0; result.extend(ctx.translate(acc)?); } - Ok(result) - } + result + }} ), // getter_specifier / setter_specifier → bodyless accessor_declaration // getter_specifier / setter_specifier → bodyless From 664f0125b96e1fe18280d25344a0ef04a8b4c3a8 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 15:38:41 +0000 Subject: [PATCH 3/4] yeast: Remove now-unused `manual_rule!` The `manual_rule!` macro is now fully subsumed by `rule!` + `@@name`, so this commit simply gets rid of the now no longer needed code. --- shared/yeast-macros/src/lib.rs | 34 ----------- shared/yeast-macros/src/parse.rs | 100 ------------------------------- shared/yeast/src/lib.rs | 2 +- 3 files changed, 1 insertion(+), 135 deletions(-) diff --git a/shared/yeast-macros/src/lib.rs b/shared/yeast-macros/src/lib.rs index 7153cf306443..07077be51f04 100644 --- a/shared/yeast-macros/src/lib.rs +++ b/shared/yeast-macros/src/lib.rs @@ -121,37 +121,3 @@ pub fn rule(input: TokenStream) -> TokenStream { Err(err) => err.to_compile_error().into(), } } - -/// Define a desugaring rule whose transform is a hand-written Rust block. -/// -/// Use `manual_rule!` when the transform needs control over capture -/// translation timing — for example, when an outer rule needs to set -/// state in `ctx` (the `BuildCtx`'s user context) before recursive -/// translation reaches inner rules that read that state. -/// -/// ```text -/// manual_rule!( -/// (query_pattern field: (_) @name) -/// { -/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables -/// // (`name: NodeRef`, etc.) are bound from the query. -/// let translated = ctx.translate(name)?; -/// Ok(translated) -/// } -/// ) -/// ``` -/// -/// Differences from [`rule!`]: -/// - Captures are **not** auto-translated before the body runs; they -/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or -/// [`BuildCtx::translate_opt`]) to translate them when you choose. -/// - The body is plain Rust returning `Result, String>` — no -/// tree template, no `Ok(...)` wrap. -#[proc_macro] -pub fn manual_rule(input: TokenStream) -> TokenStream { - let input2: TokenStream2 = input.into(); - match parse::parse_manual_rule_top(input2) { - Ok(output) => output.into(), - Err(err) => err.to_compile_error().into(), - } -} diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 2b5c4f530032..d02556b5cdfe 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -926,106 +926,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result { }) } -/// Parse `manual_rule!( query { body } )`. -/// -/// Like [`parse_rule_top`] but: -/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow). -/// - Generates code that does NOT auto-translate captures before -/// running the body. Capture variables refer to raw (input-schema) -/// nodes; the body is responsible for explicit translation via -/// `ctx.translate(...)`. -/// - The body is included verbatim and must evaluate to -/// `Result, String>`. -pub fn parse_manual_rule_top(input: TokenStream) -> Result { - let mut tokens = input.into_iter().peekable(); - - // Collect query tokens up to the body block `{ ... }`. - let mut query_tokens = Vec::new(); - loop { - match tokens.peek() { - None => { - return Err(syn::Error::new( - Span::call_site(), - "expected a Rust block `{ ... }` after the query in manual_rule!", - )) - } - Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break, - _ => { - query_tokens.push(tokens.next().unwrap()); - } - } - } - - let query_stream: TokenStream = query_tokens.into_iter().collect(); - - // Extract captures from the query (same as in `rule!`). - let captures = extract_captures(&query_stream); - - // Parse the query into the QueryNode-building expression. - let query_code = parse_query_top(query_stream)?; - - // Generate capture bindings (same as in `rule!`). - let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site()); - let bindings: Vec = captures - .iter() - .map(|cap| { - let name = Ident::new(&cap.name, Span::call_site()); - let name_str = &cap.name; - match cap.multiplicity { - CaptureMultiplicity::Repeated => quote! { - let #name: Vec = __captures.get_all(#name_str) - .into_iter() - .map(yeast::NodeRef) - .collect(); - }, - CaptureMultiplicity::Optional => quote! { - let #name: Option = - __captures.get_opt(#name_str).map(yeast::NodeRef); - }, - CaptureMultiplicity::Single => quote! { - let #name: yeast::NodeRef = - yeast::NodeRef(__captures.get_var(#name_str).unwrap()); - }, - } - }) - .collect(); - - // Consume the body block. - let body_group = match tokens.next() { - Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g, - other => { - return Err(syn::Error::new( - Span::call_site(), - format!( - "expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}" - ), - )) - } - }; - let body_stream = body_group.stream(); - - // No tokens should follow the body. - if let Some(tok) = tokens.next() { - return Err(syn::Error::new_spanned( - tok, - "unexpected token after manual_rule! body", - )); - } - - Ok(quote! { - { - let __query = #query_code; - yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { - // No auto-translate prefix for manual rules — the body - // is responsible for translating captures explicitly. - #(#bindings)* - let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); - #body_stream - })) - } - }) -} - // --------------------------------------------------------------------------- // Token utilities // --------------------------------------------------------------------------- diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 63850f097d4c..004a8408cb67 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -16,7 +16,7 @@ pub mod schema; pub mod tree_builder; mod visitor; -pub use yeast_macros::{manual_rule, query, rule, tree, trees}; +pub use yeast_macros::{query, rule, tree, trees}; use captures::Captures; pub use cursor::Cursor; From 70ca7af04c78519928254b04d4c5785bb9132326 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 26 Jun 2026 13:30:01 +0000 Subject: [PATCH 4/4] Address PR review comments - unified/swift: Mark `binding_kind` as a raw `@@` capture in the property_declaration rule. It is only used to read its source text (`ctx.ast.source_text`), never as a translated node. With `@` the auto-translate prefix would route the unnamed `let`/`var` token through the catch-all `_ @node => {node}` fallback for a no-op roundtrip; `@@` makes the intent explicit and removes that reliance. - shared/yeast/tests: Reword a stale comment in test_raw_capture_marker. The text claimed a "second assertion" exists in this test, but the explicit-translation check actually lives in the companion test_raw_capture_marker_explicit_translate. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- shared/yeast/tests/test.rs | 7 ++++--- unified/extractor/src/languages/swift/swift.rs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 1444b7c2a46e..73243a09ab76 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -1099,9 +1099,10 @@ fn test_raw_capture_marker() { // `method:` uses the raw source text ("x"); if `@@` were broken and // auto-translation ran on `raw_lhs`, it would still produce the // string "x" (source_text inherits the input range), so the dump - // wouldn't change. Add a second assertion: explicitly translating - // the raw NodeRef inside the body must succeed and produce - // `(identifier "ID")`. + // wouldn't change here. The companion test + // `test_raw_capture_marker_explicit_translate` exercises the + // stronger property that `ctx.translate(raw_lhs)?` succeeds and + // produces the translated `(identifier "ID")`. assert_dump_eq( &dump, r#" diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 4c07618d1bb2..c5025228cc97 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -301,7 +301,7 @@ fn translation_rules() -> Vec> { // `modifier:` lists from the start. rule!( (property_declaration - binding: (value_binding_pattern mutability: @binding_kind) + binding: (value_binding_pattern mutability: @@binding_kind) declarator: _* @@decls (modifiers)* @mods) =>