Skip to content

Commit 258a8ef

Browse files
rustdoc: Add tree-sitter syntax highlighting for non-Rust code blocks
Integrate arborium (tree-sitter based highlighting) to provide syntax highlighting for non-Rust code blocks in documentation. Previously, code blocks like ```python or ```javascript were rendered as plain text. Supported languages: bash, c, cpp, css, go, html, java, javascript, json, python, ruby, sql, toml, typescript, yaml. The highlighting uses custom HTML elements (a-k for keywords, a-s for strings, etc.) which are styled via CSS to match rustdoc's existing color scheme across all themes (light, dark, ayu).
1 parent dc47a69 commit 258a8ef

File tree

5 files changed

+343
-7
lines changed

5 files changed

+343
-7
lines changed

Cargo.lock

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,240 @@ dependencies = [
170170
"object 0.37.3",
171171
]
172172

173+
[[package]]
174+
name = "arborium"
175+
version = "1.2.3"
176+
source = "registry+https://github.com/rust-lang/crates.io-index"
177+
checksum = "d7ae3fd5b382cf9f09e1613da6214d8020c4868eb1358627eb3628bf87bb30fb"
178+
dependencies = [
179+
"arborium-bash",
180+
"arborium-c",
181+
"arborium-cpp",
182+
"arborium-css",
183+
"arborium-go",
184+
"arborium-highlight",
185+
"arborium-html",
186+
"arborium-java",
187+
"arborium-javascript",
188+
"arborium-json",
189+
"arborium-python",
190+
"arborium-ruby",
191+
"arborium-sql",
192+
"arborium-theme",
193+
"arborium-toml",
194+
"arborium-tree-sitter",
195+
"arborium-typescript",
196+
"arborium-yaml",
197+
"dlmalloc",
198+
]
199+
200+
[[package]]
201+
name = "arborium-bash"
202+
version = "1.2.3"
203+
source = "registry+https://github.com/rust-lang/crates.io-index"
204+
checksum = "b4be4da1f6f020cbed670305d2942818323064f1e0667fc5a780cfe48638ee5a"
205+
dependencies = [
206+
"arborium-sysroot",
207+
"cc",
208+
"tree-sitter-language",
209+
]
210+
211+
[[package]]
212+
name = "arborium-c"
213+
version = "1.2.3"
214+
source = "registry+https://github.com/rust-lang/crates.io-index"
215+
checksum = "4931371886b262191c6679945103b3679d358a70697e6c7d59cdf3cbe8b6e4e9"
216+
dependencies = [
217+
"arborium-sysroot",
218+
"cc",
219+
"tree-sitter-language",
220+
]
221+
222+
[[package]]
223+
name = "arborium-cpp"
224+
version = "1.2.3"
225+
source = "registry+https://github.com/rust-lang/crates.io-index"
226+
checksum = "01a0e5539a2cc0e8ed13b20bd52fa7511e2843baac1a775a577013e6e645c644"
227+
dependencies = [
228+
"arborium-c",
229+
"arborium-sysroot",
230+
"cc",
231+
"tree-sitter-language",
232+
]
233+
234+
[[package]]
235+
name = "arborium-css"
236+
version = "1.2.3"
237+
source = "registry+https://github.com/rust-lang/crates.io-index"
238+
checksum = "f5f81a6fa983b8f12118ec33cda27da8e06e63fdeaeb1c71532590c399513102"
239+
dependencies = [
240+
"arborium-sysroot",
241+
"cc",
242+
"tree-sitter-language",
243+
]
244+
245+
[[package]]
246+
name = "arborium-go"
247+
version = "1.2.3"
248+
source = "registry+https://github.com/rust-lang/crates.io-index"
249+
checksum = "12618f8df880cfbf268e2d35f392302ec8b26397addff3fb154db79e981ef391"
250+
dependencies = [
251+
"arborium-sysroot",
252+
"cc",
253+
"tree-sitter-language",
254+
]
255+
256+
[[package]]
257+
name = "arborium-highlight"
258+
version = "1.2.3"
259+
source = "registry+https://github.com/rust-lang/crates.io-index"
260+
checksum = "e185c18bb04a51252e2c60d0d19324c438a6d3a71468ffa63674a116685d45de"
261+
dependencies = [
262+
"arborium-theme",
263+
"arborium-tree-sitter",
264+
"streaming-iterator",
265+
]
266+
267+
[[package]]
268+
name = "arborium-html"
269+
version = "1.2.3"
270+
source = "registry+https://github.com/rust-lang/crates.io-index"
271+
checksum = "17704d9e8ebc4a338e810cb1cc2aaedd770041773762902a397f17ac245f371e"
272+
dependencies = [
273+
"arborium-sysroot",
274+
"cc",
275+
"tree-sitter-language",
276+
]
277+
278+
[[package]]
279+
name = "arborium-java"
280+
version = "1.2.3"
281+
source = "registry+https://github.com/rust-lang/crates.io-index"
282+
checksum = "1ca33a5c1bbe5b3e38f47cf82461b04bdfad8b4fe0362824805f7447ea96a0f8"
283+
dependencies = [
284+
"arborium-sysroot",
285+
"cc",
286+
"tree-sitter-language",
287+
]
288+
289+
[[package]]
290+
name = "arborium-javascript"
291+
version = "1.2.3"
292+
source = "registry+https://github.com/rust-lang/crates.io-index"
293+
checksum = "b2069654043459d341aded42e5061242ab1298806a1603617ad5286d4cf2af73"
294+
dependencies = [
295+
"arborium-sysroot",
296+
"cc",
297+
"tree-sitter-language",
298+
]
299+
300+
[[package]]
301+
name = "arborium-json"
302+
version = "1.2.3"
303+
source = "registry+https://github.com/rust-lang/crates.io-index"
304+
checksum = "c85dacbcb3dc01dd26ba6004307873d4d3a2376adc218793d28e55f14758a5e9"
305+
dependencies = [
306+
"arborium-sysroot",
307+
"cc",
308+
"tree-sitter-language",
309+
]
310+
311+
[[package]]
312+
name = "arborium-python"
313+
version = "1.2.3"
314+
source = "registry+https://github.com/rust-lang/crates.io-index"
315+
checksum = "411a3f3a95b92f41b41c07b8b62cd0abbe42f5dcf7178223928b9f2e793b1f65"
316+
dependencies = [
317+
"arborium-sysroot",
318+
"cc",
319+
"tree-sitter-language",
320+
]
321+
322+
[[package]]
323+
name = "arborium-ruby"
324+
version = "1.2.3"
325+
source = "registry+https://github.com/rust-lang/crates.io-index"
326+
checksum = "45c9a92f48efc376408177358507e22406ad8338cb371d79b696fba94dd9ab20"
327+
dependencies = [
328+
"arborium-sysroot",
329+
"cc",
330+
"tree-sitter-language",
331+
]
332+
333+
[[package]]
334+
name = "arborium-sql"
335+
version = "1.2.3"
336+
source = "registry+https://github.com/rust-lang/crates.io-index"
337+
checksum = "d479480c7beac1e789be25235f1dea72e27ddb11919789befb84c51bb1139ba5"
338+
dependencies = [
339+
"arborium-sysroot",
340+
"cc",
341+
"tree-sitter-language",
342+
]
343+
344+
[[package]]
345+
name = "arborium-sysroot"
346+
version = "1.2.3"
347+
source = "registry+https://github.com/rust-lang/crates.io-index"
348+
checksum = "5942714de3eb1c3d0c5607e30284e314ddea3698a76a0cf3e7686274ac57802f"
349+
350+
[[package]]
351+
name = "arborium-theme"
352+
version = "1.2.3"
353+
source = "registry+https://github.com/rust-lang/crates.io-index"
354+
checksum = "ac1ce1ffd21913c61768880c08567e3fbe3a966e7501ce1751f1238316b6fb45"
355+
dependencies = [
356+
"toml 0.8.23",
357+
]
358+
359+
[[package]]
360+
name = "arborium-toml"
361+
version = "1.2.3"
362+
source = "registry+https://github.com/rust-lang/crates.io-index"
363+
checksum = "8671c543a4c77cf32cd39e446c7d25e8cfd74cf6032873a25b35b76b663c9fe5"
364+
dependencies = [
365+
"arborium-sysroot",
366+
"cc",
367+
"tree-sitter-language",
368+
]
369+
370+
[[package]]
371+
name = "arborium-tree-sitter"
372+
version = "1.2.3"
373+
source = "registry+https://github.com/rust-lang/crates.io-index"
374+
checksum = "e056bac05e8c35fdc66754d7028e32f14bbf190e30ff6b2e5a67162749ccfe26"
375+
dependencies = [
376+
"arborium-sysroot",
377+
"cc",
378+
"regex",
379+
"regex-syntax",
380+
"streaming-iterator",
381+
"tree-sitter-language",
382+
]
383+
384+
[[package]]
385+
name = "arborium-typescript"
386+
version = "1.2.3"
387+
source = "registry+https://github.com/rust-lang/crates.io-index"
388+
checksum = "243273d737c8c0d26b5e7ed2a09c3eb772b95df508313678494058551e3ad53d"
389+
dependencies = [
390+
"arborium-javascript",
391+
"arborium-sysroot",
392+
"cc",
393+
"tree-sitter-language",
394+
]
395+
396+
[[package]]
397+
name = "arborium-yaml"
398+
version = "1.2.3"
399+
source = "registry+https://github.com/rust-lang/crates.io-index"
400+
checksum = "748abb1f9d238ae3f3be5b9adb9416edf68a2b331fe7c2b5ef566c72fc20391f"
401+
dependencies = [
402+
"arborium-sysroot",
403+
"cc",
404+
"tree-sitter-language",
405+
]
406+
173407
[[package]]
174408
name = "arrayref"
175409
version = "0.3.9"
@@ -1265,6 +1499,17 @@ version = "1.0.10"
12651499
source = "registry+https://github.com/rust-lang/crates.io-index"
12661500
checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921"
12671501

1502+
[[package]]
1503+
name = "dlmalloc"
1504+
version = "0.2.12"
1505+
source = "registry+https://github.com/rust-lang/crates.io-index"
1506+
checksum = "6738d2e996274e499bc7b0d693c858b7720b9cd2543a0643a3087e6cb0a4fa16"
1507+
dependencies = [
1508+
"cfg-if",
1509+
"libc",
1510+
"windows-sys 0.61.2",
1511+
]
1512+
12681513
[[package]]
12691514
name = "dyn-clone"
12701515
version = "1.0.20"
@@ -4861,6 +5106,7 @@ dependencies = [
48615106
name = "rustdoc"
48625107
version = "0.0.0"
48635108
dependencies = [
5109+
"arborium",
48645110
"arrayvec",
48655111
"askama",
48665112
"base64",
@@ -5329,6 +5575,12 @@ version = "1.1.0"
53295575
source = "registry+https://github.com/rust-lang/crates.io-index"
53305576
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
53315577

5578+
[[package]]
5579+
name = "streaming-iterator"
5580+
version = "0.1.9"
5581+
source = "registry+https://github.com/rust-lang/crates.io-index"
5582+
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
5583+
53325584
[[package]]
53335585
name = "string_cache"
53345586
version = "0.8.9"
@@ -5851,6 +6103,12 @@ dependencies = [
58516103
"tracing-subscriber",
58526104
]
58536105

6106+
[[package]]
6107+
name = "tree-sitter-language"
6108+
version = "0.1.6"
6109+
source = "registry+https://github.com/rust-lang/crates.io-index"
6110+
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
6111+
58546112
[[package]]
58556113
name = "twox-hash"
58566114
version = "1.6.3"

src/librustdoc/Cargo.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,23 @@ path = "lib.rs"
99

1010
[dependencies]
1111
# tidy-alphabetical-start
12+
arborium = { version = "1.2.3", default-features = false, features = [
13+
"lang-bash",
14+
"lang-c",
15+
"lang-cpp",
16+
"lang-css",
17+
"lang-go",
18+
"lang-html",
19+
"lang-java",
20+
"lang-javascript",
21+
"lang-json",
22+
"lang-python",
23+
"lang-ruby",
24+
"lang-sql",
25+
"lang-toml",
26+
"lang-typescript",
27+
"lang-yaml",
28+
] }
1229
arrayvec = { version = "0.7", default-features = false }
1330
askama = { version = "0.14", default-features = false, features = ["alloc", "config", "derive"] }
1431
base64 = "0.21.7"

src/librustdoc/html/highlight.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,5 +1475,31 @@ fn string_without_closing_tag<T: Display>(
14751475
}
14761476
}
14771477

1478+
/// Highlights non-Rust code using arborium (tree-sitter based).
1479+
/// Returns `None` if the language is not supported, in which case
1480+
/// the caller should fall back to plain escaped text.
1481+
pub(crate) fn highlight_foreign_code(lang: &str, code: &str) -> Option<String> {
1482+
use std::cell::RefCell;
1483+
1484+
thread_local! {
1485+
static HIGHLIGHTER: RefCell<arborium::Highlighter> =
1486+
RefCell::new(arborium::Highlighter::new());
1487+
}
1488+
1489+
// Map common language aliases to arborium grammar names
1490+
let lang = match lang {
1491+
"js" => "javascript",
1492+
"ts" => "typescript",
1493+
"py" => "python",
1494+
"rb" => "ruby",
1495+
"sh" | "shell" | "zsh" => "bash",
1496+
"yml" => "yaml",
1497+
"c++" | "cxx" => "cpp",
1498+
other => other,
1499+
};
1500+
1501+
HIGHLIGHTER.with_borrow_mut(|h| h.highlight_to_html(lang, code).ok())
1502+
}
1503+
14781504
#[cfg(test)]
14791505
mod tests;

src/librustdoc/html/markdown.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -250,21 +250,27 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
250250
LangString::parse_without_check(lang, self.check_error_codes);
251251
if !parse_result.rust {
252252
let added_classes = parse_result.added_classes;
253-
let lang_string = if let Some(lang) = parse_result.unknown.first() {
254-
format!("language-{lang}")
255-
} else {
256-
String::new()
257-
};
253+
let lang = parse_result.unknown.first().map(|s| s.as_str());
254+
let lang_string = lang.map(|l| format!("language-{l}")).unwrap_or_default();
258255
let whitespace = if added_classes.is_empty() { "" } else { " " };
256+
257+
// Try to highlight with arborium if we have a language
258+
let code_html = lang
259+
.and_then(|l| {
260+
highlight::highlight_foreign_code(l, original_text.trim_suffix('\n'))
261+
})
262+
.unwrap_or_else(|| {
263+
Escape(original_text.trim_suffix('\n')).to_string()
264+
});
265+
259266
return Some(Event::Html(
260267
format!(
261268
"<div class=\"example-wrap\">\
262269
<pre class=\"{lang_string}{whitespace}{added_classes}\">\
263-
<code>{text}</code>\
270+
<code>{code_html}</code>\
264271
</pre>\
265272
</div>",
266273
added_classes = added_classes.join(" "),
267-
text = Escape(original_text.trim_suffix('\n')),
268274
)
269275
.into(),
270276
));

0 commit comments

Comments
 (0)