Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions shared/yeast-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,37 +121,3 @@ pub fn rule(input: TokenStream) -> TokenStream {
Err(err) => err.to_compile_error().into(),
}
}

/// Define a desugaring rule whose transform is a hand-written Rust block.
///
/// Use `manual_rule!` when the transform needs control over capture
/// translation timing — for example, when an outer rule needs to set
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
/// translation reaches inner rules that read that state.
///
/// ```text
/// manual_rule!(
/// (query_pattern field: (_) @name)
/// {
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
/// // (`name: NodeRef`, etc.) are bound from the query.
/// let translated = ctx.translate(name)?;
/// Ok(translated)
/// }
/// )
/// ```
///
/// Differences from [`rule!`]:
/// - Captures are **not** auto-translated before the body runs; they
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
/// tree template, no `Ok(...)` wrap.
#[proc_macro]
pub fn manual_rule(input: TokenStream) -> TokenStream {
let input2: TokenStream2 = input.into();
match parse::parse_manual_rule_top(input2) {
Ok(output) => output.into(),
Err(err) => err.to_compile_error().into(),
}
}
157 changes: 43 additions & 114 deletions shared/yeast-macros/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
/// Parse a single query node (possibly with a trailing `@capture`).
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
let base = parse_query_atom(tokens)?;
// Check for trailing @capture
// Check for trailing @capture or @@capture
if peek_is_at(tokens) {
tokens.next(); // consume @
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let capture_name = consume_capture_marker(tokens)?;
let name_str = capture_name.to_string();
Ok(quote! {
yeast::query::QueryNode::Capture {
Expand Down Expand Up @@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let capture_name = consume_capture_marker(tokens)?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
Expand Down Expand Up @@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
struct CaptureInfo {
name: String,
multiplicity: CaptureMultiplicity,
/// `true` for `@@name` captures: the auto-translate prefix skips them,
/// so the bound `NodeRef` refers to the raw (input-schema) node.
raw: bool,
}

#[derive(Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -708,6 +709,14 @@ fn extract_captures_inner(
extract_captures_inner(&mut inner, captures, child_mult);
}
TokenTree::Punct(p) if p.as_char() == '@' => {
// `@@name` marks the capture as raw (skip auto-translate).
let raw = matches!(
tokens.peek(),
Some(TokenTree::Punct(p)) if p.as_char() == '@'
);
if raw {
tokens.next(); // consume the second `@`
}
if let Some(TokenTree::Ident(name)) = tokens.next() {
let mult = if parent_mult == CaptureMultiplicity::Repeated
|| last_mult == CaptureMultiplicity::Repeated
Expand All @@ -723,6 +732,7 @@ fn extract_captures_inner(
captures.push(CaptureInfo {
name: name.to_string(),
multiplicity: mult,
raw,
});
}
last_mult = CaptureMultiplicity::Single;
Expand Down Expand Up @@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
// Parse query
let query_code = parse_query_top(query_stream.clone())?;

// Capture names marked `@@name` (raw) — passed to the auto-translate
// prefix as a skip list so those captures keep their input-schema ids.
let raw_capture_names: Vec<&str> = captures
.iter()
.filter(|c| c.raw)
.map(|c| c.name.as_str())
.collect();

// Generate capture bindings
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
let bindings: Vec<TokenStream> = captures
Expand Down Expand Up @@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
let __query = #query_code;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
// Auto-translation prefix: recursively translate every
// captured node before invoking the user's transform body.
// captured node before invoking the user's transform body,
// except for `@@name` captures listed in `__skip` which the
// body consumes raw.
// For OneShot rules this preserves the legacy behaviour
// (input-schema captures translated to output-schema
// nodes); for Repeating rules it is a no-op.
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
let __skip: &[&str] = &[#(#raw_capture_names),*];
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
#(#bindings)*
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
let __result: Vec<usize> = { #transform_body };
Expand All @@ -905,106 +926,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
})
}

/// Parse `manual_rule!( query { body } )`.
///
/// Like [`parse_rule_top`] but:
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
/// - Generates code that does NOT auto-translate captures before
/// running the body. Capture variables refer to raw (input-schema)
/// nodes; the body is responsible for explicit translation via
/// `ctx.translate(...)`.
/// - The body is included verbatim and must evaluate to
/// `Result<Vec<usize>, String>`.
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
let mut tokens = input.into_iter().peekable();

// Collect query tokens up to the body block `{ ... }`.
let mut query_tokens = Vec::new();
loop {
match tokens.peek() {
None => {
return Err(syn::Error::new(
Span::call_site(),
"expected a Rust block `{ ... }` after the query in manual_rule!",
))
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
_ => {
query_tokens.push(tokens.next().unwrap());
}
}
}

let query_stream: TokenStream = query_tokens.into_iter().collect();

// Extract captures from the query (same as in `rule!`).
let captures = extract_captures(&query_stream);

// Parse the query into the QueryNode-building expression.
let query_code = parse_query_top(query_stream)?;

// Generate capture bindings (same as in `rule!`).
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
let bindings: Vec<TokenStream> = captures
.iter()
.map(|cap| {
let name = Ident::new(&cap.name, Span::call_site());
let name_str = &cap.name;
match cap.multiplicity {
CaptureMultiplicity::Repeated => quote! {
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
.into_iter()
.map(yeast::NodeRef)
.collect();
},
CaptureMultiplicity::Optional => quote! {
let #name: Option<yeast::NodeRef> =
__captures.get_opt(#name_str).map(yeast::NodeRef);
},
CaptureMultiplicity::Single => quote! {
let #name: yeast::NodeRef =
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
},
}
})
.collect();

// Consume the body block.
let body_group = match tokens.next() {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
other => {
return Err(syn::Error::new(
Span::call_site(),
format!(
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
),
))
}
};
let body_stream = body_group.stream();

// No tokens should follow the body.
if let Some(tok) = tokens.next() {
return Err(syn::Error::new_spanned(
tok,
"unexpected token after manual_rule! body",
));
}

Ok(quote! {
{
let __query = #query_code;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
// No auto-translate prefix for manual rules — the body
// is responsible for translating captures explicitly.
#(#bindings)*
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
#body_stream
}))
}
})
}

// ---------------------------------------------------------------------------
// Token utilities
// ---------------------------------------------------------------------------
Expand All @@ -1013,6 +934,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
}

/// Consume an `@` or `@@` capture marker and the following name ident.
/// Caller has already verified `peek_is_at(tokens)`.
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
tokens.next(); // consume the first `@`
if peek_is_at(tokens) {
tokens.next(); // consume the second `@` of `@@`
}
expect_ident(tokens, "expected capture name after `@` or `@@`")
}

fn peek_is_literal(tokens: &mut Tokens) -> bool {
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
}
Expand Down Expand Up @@ -1113,8 +1044,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {

fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
if peek_is_at(tokens) {
tokens.next(); // consume @
let name = expect_ident(tokens, "expected capture name after @")?;
let name = consume_capture_marker(tokens)?;
let name_str = name.to_string();
Ok(quote! {
yeast::query::QueryNode::Capture {
Expand All @@ -1141,13 +1071,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
}
}

/// If `@name` follows a Repeated list element, wrap each child SingleNode
/// inside the repetition with a Capture. This matches tree-sitter semantics
/// where `(_)* @name` captures each matched node.
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
/// child SingleNode inside the repetition with a Capture. This matches
/// tree-sitter semantics where `(_)* @name` captures each matched node.
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
if peek_is_at(tokens) {
tokens.next();
let name = expect_ident(tokens, "expected capture name after @")?;
let name = consume_capture_marker(tokens)?;
let name_str = name.to_string();
// Re-parse the element isn't practical, so we generate a wrapper
// that creates a new Repeated with each child wrapped in a capture.
Expand Down
31 changes: 31 additions & 0 deletions shared/yeast/doc/yeast.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
single capture (`Id`) and `{..name}` splices a repeated capture
(`Vec<Id>`).

### Raw captures (`@@name`)

The default `@name` capture marker is *auto-translated*: in OneShot
phases the macro recursively translates the captured node before
binding it, so `{name}` in the output template splices a node that
already conforms to the output schema.

For rules that need the raw (input-schema) capture — typically to read
its source text or to translate it explicitly with mutable context
state between calls — use `@@name` instead. The body sees the original
input-schema `NodeRef`:

```rust
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
// raw_lhs is untranslated: read its original source text.
let text = ctx.ast.source_text(raw_lhs.into());
// rhs is already translated by the auto-translate prefix.
tree!((call
method: (identifier #{text.as_str()})
receiver: {rhs}))
}
);
```

Mix `@` and `@@` freely in the same rule. In a Repeating phase both
markers are equivalent (auto-translation is a no-op for repeating
rules).

## Complete example: for-loop desugaring

This rule rewrites Ruby's `for pat in val do body end` into
Expand Down
22 changes: 22 additions & 0 deletions shared/yeast/src/captures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,28 @@ impl Captures {
}
Ok(())
}

/// Like [`try_map_all_captures`] but leaves captures whose name appears
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
/// (raw) captures alongside the default auto-translated `@name`
/// captures.
pub fn try_map_captures_except<E>(
&mut self,
skip: &[&str],
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
) -> Result<(), E> {
for (name, ids) in self.captures.iter_mut() {
if skip.contains(name) {
continue;
}
let mut new_ids = Vec::with_capacity(ids.len());
for &id in ids.iter() {
new_ids.extend(f(id)?);
}
*ids = new_ids;
}
Ok(())
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();
Expand Down
Loading
Loading