Skip to content

Commit 52acaec

Browse files
authored
Merge pull request #22054 from github/tausbn/yeast-context-reification
2 parents cacdc46 + af7ae8c commit 52acaec

18 files changed

Lines changed: 1410 additions & 431 deletions

File tree

shared/tree-sitter-extractor/src/extractor/mod.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr
280280
}
281281

282282
/// Extracts the source file at `path`, which is assumed to be canonicalized.
283-
/// When `yeast_runner` is `Some`, the parsed tree is first transformed
284-
/// through the supplied yeast `Runner` before TRAP extraction. Building the
285-
/// `Runner` (which parses YAML and constructs the schema) is the caller's
286-
/// responsibility, allowing it to be done once and shared across files.
283+
/// When `desugarer` is `Some`, the parsed tree is first transformed
284+
/// through the supplied yeast desugarer before TRAP extraction. Building
285+
/// the desugarer (which parses YAML and constructs the schema) is the
286+
/// caller's responsibility, allowing it to be done once and shared across
287+
/// files.
287288
#[allow(clippy::too_many_arguments)]
288289
pub fn extract(
289290
language: &Language,
@@ -295,7 +296,7 @@ pub fn extract(
295296
path: &Path,
296297
source: &[u8],
297298
ranges: &[Range],
298-
yeast_runner: Option<&yeast::Runner<'_>>,
299+
desugarer: Option<&dyn yeast::Desugarer>,
299300
) {
300301
let path_str = file_paths::normalize_and_transform_path(path, transformer);
301302
let source_root = std::env::current_dir()
@@ -328,8 +329,8 @@ pub fn extract(
328329
schema,
329330
);
330331

331-
if let Some(yeast_runner) = yeast_runner {
332-
let ast = yeast_runner
332+
if let Some(desugarer) = desugarer {
333+
let ast = desugarer
333334
.run_from_tree(&tree, source)
334335
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
335336
traverse_yeast(&ast, &mut visitor);

shared/tree-sitter-extractor/src/extractor/simple.rs

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@ pub struct LanguageSpec {
1313
pub prefix: &'static str,
1414
pub ts_language: tree_sitter::Language,
1515
pub node_types: &'static str,
16-
/// Optional yeast desugaring configuration. When set, the parsed
17-
/// tree is rewritten through yeast before TRAP extraction. The
18-
/// config's `output_node_types_yaml` (if set) provides the schema
19-
/// used both at runtime (for the rewriter) and for TRAP validation.
20-
pub desugar: Option<yeast::DesugaringConfig>,
16+
/// Optional desugarer. When set, the parsed tree is rewritten through
17+
/// the desugarer before TRAP extraction. The desugarer's
18+
/// `output_node_types_yaml()` (if set) provides the schema used both
19+
/// at runtime (for the rewriter) and for TRAP validation.
20+
///
21+
/// `Box<dyn yeast::Desugarer>` so the shared extractor is agnostic to
22+
/// the user-defined context type the desugarer uses internally.
23+
pub desugar: Option<Box<dyn yeast::Desugarer>>,
2124
pub file_globs: Vec<String>,
2225
}
2326

@@ -91,35 +94,22 @@ impl Extractor {
9194
.collect();
9295

9396
let mut schemas = vec![];
94-
let mut yeast_runners = Vec::new();
9597
for lang in &self.languages {
96-
let effective_node_types: String =
97-
match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) {
98-
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
99-
std::io::Error::other(format!(
100-
"Failed to convert YAML node-types to JSON for {}: {e}",
101-
lang.prefix
102-
))
103-
})?,
104-
None => lang.node_types.to_string(),
105-
};
106-
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
107-
schemas.push(schema);
108-
109-
// Build the yeast runner once per language so the YAML schema
110-
// isn't re-parsed for every file.
111-
let yeast_runner = lang
98+
let effective_node_types: String = match lang
11299
.desugar
113100
.as_ref()
114-
.map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config))
115-
.transpose()
116-
.map_err(|e| {
101+
.and_then(|d| d.output_node_types_yaml())
102+
{
103+
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
117104
std::io::Error::other(format!(
118-
"Failed to build desugaring runner for {}: {e}",
105+
"Failed to convert YAML node-types to JSON for {}: {e}",
119106
lang.prefix
120107
))
121-
})?;
122-
yeast_runners.push(yeast_runner);
108+
})?,
109+
None => lang.node_types.to_string(),
110+
};
111+
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
112+
schemas.push(schema);
123113
}
124114

125115
// Construct a single globset containing all language globs,
@@ -194,7 +184,7 @@ impl Extractor {
194184
&path,
195185
&source,
196186
&[],
197-
yeast_runners[i].as_ref(),
187+
lang.desugar.as_deref(),
198188
);
199189
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
200190
std::fs::copy(&path, &src_archive_file)?;

shared/yeast-macros/src/lib.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
121121
Err(err) => err.to_compile_error().into(),
122122
}
123123
}
124+
125+
/// Define a desugaring rule whose transform is a hand-written Rust block.
126+
///
127+
/// Use `manual_rule!` when the transform needs control over capture
128+
/// translation timing — for example, when an outer rule needs to set
129+
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
130+
/// translation reaches inner rules that read that state.
131+
///
132+
/// ```text
133+
/// manual_rule!(
134+
/// (query_pattern field: (_) @name)
135+
/// {
136+
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
137+
/// // (`name: NodeRef`, etc.) are bound from the query.
138+
/// let translated = ctx.translate(name)?;
139+
/// Ok(translated)
140+
/// }
141+
/// )
142+
/// ```
143+
///
144+
/// Differences from [`rule!`]:
145+
/// - Captures are **not** auto-translated before the body runs; they
146+
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
147+
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
148+
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
149+
/// tree template, no `Ok(...)` wrap.
150+
#[proc_macro]
151+
pub fn manual_rule(input: TokenStream) -> TokenStream {
152+
let input2: TokenStream2 = input.into();
153+
match parse::parse_manual_rule_top(input2) {
154+
Ok(output) => output.into(),
155+
Err(err) => err.to_compile_error().into(),
156+
}
157+
}

0 commit comments

Comments
 (0)