diff --git a/__tests__/compilers/html-block.test.ts b/__tests__/compilers/html-block.test.ts
index 6f739bb64..7b62977fb 100644
--- a/__tests__/compilers/html-block.test.ts
+++ b/__tests__/compilers/html-block.test.ts
@@ -1,15 +1,4 @@
-import type { Element } from 'hast';
-
-import { mdast, mdx, mdxish } from '../../index';
-
-function findHTMLBlock(element: Element): Element | undefined {
- if (element.tagName === 'HTMLBlock' || element.tagName === 'html-block') {
- return element;
- }
- return element.children
- .filter((child): child is Element => child.type === 'element')
- .reduce((found, child) => found || findHTMLBlock(child), undefined);
-}
+import { mdast, mdx } from '../../index';
describe('html-block compiler', () => {
it('compiles html blocks within containers', () => {
@@ -51,138 +40,3 @@ const foo = () => {
expect(mdx(mdast(markdown)).trim()).toBe(expected.trim());
});
});
-
-describe('mdxish html-block compiler', () => {
- it('compiles html blocks within containers', () => {
- const markdown = `
-> 🚧 It compiles!
->
-> {\`
-> Hello, World!
-> \`}
-`;
-
- const hast = mdxish(markdown.trim());
- const callout = hast.children[0] as Element;
-
- expect(callout.type).toBe('element');
- expect(callout.tagName).toBe('Callout');
-
- // Find HTMLBlock within the callout
- const htmlBlock = findHTMLBlock(callout);
- expect(htmlBlock).toBeDefined();
- expect(htmlBlock?.tagName).toBe('html-block');
- });
-
- it('compiles html blocks preserving newlines', () => {
- const markdown = `
-{\`
-
-const foo = () => {
- const bar = {
- baz: 'blammo'
- }
-
- return bar
-}
-
-\`}
-`;
-
- const hast = mdxish(markdown.trim());
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
- expect(htmlBlock?.tagName).toBe('html-block');
- });
-
- it('adds newlines for readability', () => {
- const markdown = '{`Hello, World!
`}';
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
- expect(htmlBlock?.tagName).toBe('html-block');
- });
-
- it('unescapes backticks in HTML content', () => {
- const markdown = '{`\\`example\\``}';
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
- expect(htmlBlock?.tagName).toBe('html-block');
-
- // Verify that escaped backticks \` are unescaped to ` in the HTML
- const htmlProp = htmlBlock?.properties?.html as string;
- expect(htmlProp).toBeDefined();
- expect(htmlProp).toContain('`example`');
- expect(htmlProp).not.toContain('\\`');
- });
-
- it('passes safeMode property correctly', () => {
- // Test with both JSX expression and string syntax
- const markdown = '{`Content
`}';
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
-
- const allProps = htmlBlock?.properties;
- expect(allProps).toBeDefined();
-
- const safeMode = allProps?.safeMode;
- expect(safeMode).toBe('true');
-
- // Verify that html property is still present (for safeMode to render as escaped text)
- const htmlProp = allProps?.html as string;
- expect(htmlProp).toBeDefined();
- expect(htmlProp).toContain('');
- expect(htmlProp).toContain('Content
');
- });
-
- it('should handle template literal with variables', () => {
- // eslint-disable-next-line quotes
- const markdown = `{\`const x = \${variable}\`}`;
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
- // eslint-disable-next-line no-template-curly-in-string
- expect(htmlBlock?.properties?.html).toBe('const x = ${variable}');
- });
-
- it('should handle nested template literals', () => {
- // Use a regular string to avoid nested template literal syntax error
- // The content should be: ```javascript\nconst x = 1;\n```
- const markdown = '{`\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`}';
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
-
- // Verify that the HTML content is preserved correctly with newlines
- const htmlProp = htmlBlock?.properties?.html as string;
- expect(htmlProp).toBeDefined();
-
- // The expected content should have triple backticks
- expect(htmlProp).toBe('```javascript\nconst x = 1;\n```
');
- });
-});
diff --git a/__tests__/transformers/mdxish-html-blocks.test.ts b/__tests__/transformers/mdxish-html-blocks.test.ts
new file mode 100644
index 000000000..e0407d29f
--- /dev/null
+++ b/__tests__/transformers/mdxish-html-blocks.test.ts
@@ -0,0 +1,587 @@
+import type { Element } from 'hast';
+import type { Root } from 'mdast';
+
+import remarkParse from 'remark-parse';
+import { unified } from 'unified';
+
+import { mdxish } from '../../index';
+import { htmlBlockComponentFromMarkdown } from '../../lib/mdast-util/html-block-component';
+import { htmlBlockComponent } from '../../lib/micromark/html-block-component/syntax';
+import mdxishHtmlBlocks from '../../processor/transform/mdxish/mdxish-html-blocks';
+import { collectNodes } from '../helpers';
+
+interface HTMLBlockNode {
+ children: { type: string; value: string }[];
+ data: {
+ hName: string;
+ hProperties: Record;
+ };
+ type: string;
+}
+
+const parseWithPlugin = (markdown: string): Root => {
+ const processor = unified()
+ .data('micromarkExtensions', [htmlBlockComponent()])
+ .data('fromMarkdownExtensions', [htmlBlockComponentFromMarkdown()])
+ .use(remarkParse)
+ .use(mdxishHtmlBlocks);
+ const tree = processor.parse(markdown);
+ processor.runSync(tree);
+ return tree as Root;
+};
+
+const findHtmlBlockNodes = (tree: Root): HTMLBlockNode[] =>
+ collectNodes(tree, node => node.type === 'html-block') as unknown as HTMLBlockNode[];
+
+function findHTMLBlock(element: Element): Element | undefined {
+ if (element.tagName === 'HTMLBlock' || element.tagName === 'html-block') {
+ return element;
+ }
+ return element.children
+ .filter((child): child is Element => child.type === 'element')
+ .reduce((found, child) => found || findHTMLBlock(child), undefined);
+}
+
+describe('mdxish-html-blocks transformer', () => {
+ describe('attribute extraction', () => {
+ it('extracts safeMode from JSX syntax', () => {
+ const tree = parseWithPlugin('{`content
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties).toMatchObject({ safeMode: 'true', html: 'content
' });
+ });
+
+ it('extracts safeMode from string syntax', () => {
+ const tree = parseWithPlugin('{`content
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties).toMatchObject({ safeMode: 'false', html: 'content
' });
+ });
+
+ it('extracts runScripts boolean true', () => {
+ const tree = parseWithPlugin('{`content
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties.runScripts).toBe(true);
+ });
+
+ it('extracts runScripts boolean false', () => {
+ const tree = parseWithPlugin('{`content
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties.runScripts).toBe(false);
+ });
+
+ it('extracts runScripts string value', () => {
+ const tree = parseWithPlugin('{`content
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties.runScripts).toBe('afterRender');
+ });
+
+ it('extracts multiple attributes', () => {
+ const tree = parseWithPlugin(
+ '{`content
`}',
+ );
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties).toMatchObject({ safeMode: 'true', runScripts: true });
+ });
+
+ it('omits runScripts and safeMode when absent', () => {
+ const tree = parseWithPlugin('{`content
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties).toStrictEqual({ html: 'content
' });
+ });
+ });
+
+ describe('content extraction', () => {
+ it('strips template literal delimiters', () => {
+ const tree = parseWithPlugin('{`hello
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties.html).toBe('hello
');
+ });
+
+ it('handles content without template literal syntax', () => {
+ const tree = parseWithPlugin('plain');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties.html).toBe('plain');
+ });
+
+ it('unescapes backticks in HTML content', () => {
+ const tree = parseWithPlugin('{`\\`example\\``}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.data.hProperties.html).toBe('`example`');
+ });
+
+ it('preserves multiline content', () => {
+ const markdown = `{\`
+
+\`}`;
+ const tree = parseWithPlugin(markdown);
+ const [node] = findHtmlBlockNodes(tree);
+ const html = node.data.hProperties.html as string;
+ expect(html).toContain('one');
+ expect(html).toContain('two');
+ });
+ });
+
+ describe('node structure', () => {
+ it('produces correct node type and hName', () => {
+ const tree = parseWithPlugin('{`test
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.type).toBe('html-block');
+ expect(node.data.hName).toBe('html-block');
+ });
+
+ it('sets children text node matching html property', () => {
+ const tree = parseWithPlugin('{`test
`}');
+ const [node] = findHtmlBlockNodes(tree);
+ expect(node.children).toStrictEqual([{ type: 'text', value: 'test
' }]);
+ });
+
+ it('does not transform non-HTMLBlock html nodes', () => {
+ const tree = parseWithPlugin('just html
');
+ const htmlBlockNodes = findHtmlBlockNodes(tree);
+ expect(htmlBlockNodes).toHaveLength(0);
+ });
+ });
+});
+
+describe('mdxish html-block integration', () => {
+ it('compiles html blocks within containers', () => {
+ const markdown = `
+> 🚧 It compiles!
+>
+> {\`
+> Hello, World!
+> \`}
+`;
+
+ const hast = mdxish(markdown.trim());
+ const callout = hast.children[0] as Element;
+
+ expect(callout.tagName).toBe('Callout');
+
+ const htmlBlock = findHTMLBlock(callout);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: ' Hello, World!' },
+ });
+ });
+
+ it('compiles html blocks preserving newlines', () => {
+ const markdown = `{\`
+
+const foo = () => {
+ const bar = {
+ baz: 'blammo'
+ }
+
+ return bar
+}
+
+\`}`;
+
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('');
+ expect(htmlProp).toContain('const foo = () => {');
+ expect(htmlProp).toContain("baz: 'blammo'");
+ expect(htmlProp).toContain('
');
+ });
+
+ it('adds newlines for readability', () => {
+ const hast = mdxish('{`Hello, World!
`}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'Hello, World!
' },
+ });
+ });
+
+ it('unescapes backticks in HTML content', () => {
+ const hast = mdxish('{`\\`example\\``}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: '`example`' },
+ });
+ });
+
+ it('passes safeMode property correctly', () => {
+ const hast = mdxish('{`Content
`}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { safeMode: 'true', html: 'Content
' },
+ });
+ });
+
+ it('handles template literal with variables', () => {
+ // eslint-disable-next-line quotes
+ const hast = mdxish(`{\`const x = \${variable}\`}`);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ // eslint-disable-next-line no-template-curly-in-string
+ properties: { html: 'const x = ${variable}' },
+ });
+ });
+
+ it('handles nested template literals', () => {
+ const hast = mdxish('{`\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: '```javascript\nconst x = 1;\n```
' },
+ });
+ });
+
+ describe('flow-level (standalone block)', () => {
+ it('handles simple single-line HTMLBlock', () => {
+ const hast = mdxish('{`hello
`}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'hello
' },
+ });
+ });
+
+ it('handles multiline content', () => {
+ const markdown = `{\`
+
+\`}`;
+
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('one');
+ expect(htmlProp).toContain('two');
+ });
+
+ it('handles content with blank lines', () => {
+ const markdown = `{\`
+before
+
+after
+\`}`;
+
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('before
');
+ expect(htmlProp).toContain('after
');
+ });
+
+ it('handles script tags without being consumed by the markdown parser', () => {
+ const markdown = `{\`
+
+visible
+\`}`;
+
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('');
+ expect(htmlProp).toContain('visible
');
+ });
+
+ it('handles style tags without being consumed by the markdown parser', () => {
+ const markdown = `{\`
+
+styled
+\`}`;
+
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('');
+ expect(htmlProp).toContain('styled
');
+ });
+
+ it('handles HTMLBlock without template literal syntax', () => {
+ const hast = mdxish('plain');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'plain' },
+ });
+ });
+
+ it('handles HTMLBlock with runScripts attribute', () => {
+ const hast = mdxish('{``}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { runScripts: true, html: '' },
+ });
+ });
+
+ it('handles opening tag with attributes on a new line', () => {
+ const markdown = '{``}';
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { runScripts: true, html: '' },
+ });
+ });
+
+ it('handles trailing whitespace after closing tag', () => {
+ const hast = mdxish('{`hello
`} ');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'hello
' },
+ });
+ });
+ });
+
+ describe('text-level (inline)', () => {
+ it('handles inline HTMLBlock surrounded by text', () => {
+ const hast = mdxish('before {`middle`} after');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'middle' },
+ });
+ });
+
+ it('handles inline HTMLBlock at the end of a paragraph', () => {
+ const hast = mdxish('some text {`italic`}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'italic' },
+ });
+ });
+
+ it('handles inline HTMLBlock with attributes', () => {
+ const hast = mdxish('text {``} more');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { safeMode: 'true', html: '' },
+ });
+ });
+
+ it('handles multiline inline HTMLBlock with trailing text', () => {
+ const markdown = 'hello {`Hello, World!`} world';
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+ expect(htmlBlock?.properties?.html).toContain('Hello, World!');
+ });
+ });
+
+ describe('trailing text preservation', () => {
+ it('preserves trailing text after single-line HTMLBlock', () => {
+ const hast = mdxish('{`Hello`} trailing');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'Hello' },
+ });
+ });
+
+ it('preserves trailing text after multiline HTMLBlock', () => {
+ const markdown = '{`Hello`}. trailing';
+ const hast = mdxish(markdown);
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+ expect(htmlBlock?.properties?.html).toContain('Hello');
+ });
+
+ it('does not crash with trailing text after closing tag', () => {
+ expect(() => mdxish('{`content
`} hello world')).not.toThrow();
+ });
+
+ it('does not crash with trailing punctuation after closing tag', () => {
+ expect(() => mdxish('{`content
`}. stuff')).not.toThrow();
+ });
+ });
+
+ describe('inside callouts', () => {
+ it('handles HTMLBlock in a callout without stray > characters', () => {
+ const markdown = `> 🚧 It compiles!
+>
+> {\`
+> Hello, World!
+> \`}`;
+
+ const hast = mdxish(markdown);
+ expect((hast.children[0] as Element).tagName).toBe('Callout');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: ' Hello, World!' },
+ });
+ });
+
+ it('handles HTMLBlock in a callout with script tags', () => {
+ const markdown = `> ⚠️ Warning
+>
+> {\`
+>
+> safe content
+> \`}`;
+
+ const hast = mdxish(markdown);
+ expect((hast.children[0] as Element).tagName).toBe('Callout');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('');
+ expect(htmlProp).toContain('safe content
');
+ expect(htmlProp).not.toMatch(/^>/m);
+ });
+
+ it('handles HTMLBlock in a callout with blank lines in content', () => {
+ const markdown = `> 🚧 Test
+>
+> {\`
+> first
+>
+> second
+> \`}`;
+
+ const hast = mdxish(markdown);
+ expect((hast.children[0] as Element).tagName).toBe('Callout');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('first
');
+ expect(htmlProp).toContain('second
');
+ expect(htmlProp).not.toMatch(/^>/m);
+ });
+
+ it('handles HTMLBlock in a callout with trailing whitespace', () => {
+ const markdown = `> 🚧 It compiles!
+>
+> {\`
+> Hello
+> \`}${' '}`;
+
+ const hast = mdxish(markdown);
+ expect((hast.children[0] as Element).tagName).toBe('Callout');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: ' Hello' },
+ });
+ });
+
+ it('handles HTMLBlock in an empty callout (no title text)', () => {
+ const markdown = `> 📘
+>
+> {\`body only
\`}`;
+
+ const hast = mdxish(markdown);
+ expect((hast.children[0] as Element).tagName).toBe('Callout');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'body only
' },
+ });
+ });
+ });
+
+ describe('inside blockquotes', () => {
+ it('handles single-line HTMLBlock in a blockquote', () => {
+ const hast = mdxish('> {`quoted
`}');
+ expect((hast.children[0] as Element).tagName).toBe('blockquote');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'quoted
' },
+ });
+ });
+
+ it('handles multiline HTMLBlock in a blockquote without stray > characters', () => {
+ const markdown = `> {\`
+> line1
+> line2
+> \`}`;
+
+ const hast = mdxish(markdown);
+ expect((hast.children[0] as Element).tagName).toBe('blockquote');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+
+ const htmlProp = htmlBlock?.properties?.html as string;
+ expect(htmlProp).toContain('line1
');
+ expect(htmlProp).toContain('line2
');
+ expect(htmlProp).not.toMatch(/^>/m);
+ });
+ });
+
+ describe('inside lists', () => {
+ it('handles HTMLBlock in an unordered list item', () => {
+ const hast = mdxish('- {`listed`}');
+ expect((hast.children[0] as Element).tagName).toBe('ul');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'listed' },
+ });
+ });
+
+ it('handles HTMLBlock in an ordered list item', () => {
+ const hast = mdxish('1. {`ordered`}');
+ expect((hast.children[0] as Element).tagName).toBe('ol');
+
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({
+ tagName: 'html-block',
+ properties: { html: 'ordered' },
+ });
+ });
+ });
+
+ describe('edge cases', () => {
+ it('handles standalone multiline HTMLBlock with surrounding paragraphs', () => {
+ const markdown = `Hello
+
+{\`
+Hello, World!
+\`}
+
+there`;
+ const hast = mdxish(markdown);
+ const htmlBlock = (hast.children as Element[])
+ .filter(child => child.type === 'element')
+ .reduce((found, child) => found || findHTMLBlock(child), undefined);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+ expect(htmlBlock?.properties?.html).toContain('Hello, World!
');
+ });
+
+ it('handles nested HTMLBlock tags in content', () => {
+ const hast = mdxish('{`{Hello}`}');
+ const htmlBlock = findHTMLBlock(hast.children[0] as Element);
+ expect(htmlBlock).toMatchObject({ tagName: 'html-block' });
+ expect(htmlBlock?.properties?.html).toContain('{Hello}');
+ });
+ });
+});
diff --git a/components/HTMLBlock/index.tsx b/components/HTMLBlock/index.tsx
index ed3ff975a..c73bfbfef 100644
--- a/components/HTMLBlock/index.tsx
+++ b/components/HTMLBlock/index.tsx
@@ -25,10 +25,7 @@ const HTMLBlock = ({ children = '', html: htmlProp, runScripts, safeMode: safeMo
let html: string = '';
if (htmlProp !== undefined) {
html = htmlProp;
- } else {
- if (typeof children !== 'string') {
- throw new TypeError('HTMLBlock: children must be a string');
- }
+ } else if (typeof children === 'string') {
html = children;
}
diff --git a/lib/mdast-util/html-block-component/index.ts b/lib/mdast-util/html-block-component/index.ts
new file mode 100644
index 000000000..d2d27240b
--- /dev/null
+++ b/lib/mdast-util/html-block-component/index.ts
@@ -0,0 +1,54 @@
+import type { Html } from 'mdast';
+import type { CompileContext, Extension as FromMarkdownExtension, Handle, Token } from 'mdast-util-from-markdown';
+
+const contextMap = new WeakMap();
+
+function findHtmlBlockComponentToken(this: CompileContext): Token | undefined {
+ const events = this.tokenStack;
+ for (let i = events.length - 1; i >= 0; i -= 1) {
+ if (events[i][0].type === 'htmlBlockComponent') return events[i][0];
+ }
+ return undefined;
+}
+
+// Produces an MDAST `html` node, mdxishHtmlBlocks then transforms it into an `html-block` node
+function enterHtmlBlockComponent(this: CompileContext, token: Parameters[0]): void {
+ contextMap.set(token, { chunks: [], lastEndLine: token.start.line });
+ this.enter({ type: 'html', value: '' } as Html, token);
+}
+
+function exitHtmlBlockComponentData(this: CompileContext, token: Parameters[0]): void {
+ const componentToken = findHtmlBlockComponentToken.call(this);
+ if (!componentToken) return;
+ const ctx = contextMap.get(componentToken);
+ if (ctx) {
+ const gap = token.start.line - ctx.lastEndLine;
+ if (ctx.chunks.length > 0 && gap > 0) {
+ ctx.chunks.push('\n'.repeat(gap));
+ }
+ ctx.chunks.push(this.sliceSerialize(token));
+ ctx.lastEndLine = token.end.line;
+ }
+}
+
+function exitHtmlBlockComponent(this: CompileContext, token: Parameters[0]): void {
+ const ctx = contextMap.get(token);
+ const node = this.stack[this.stack.length - 1] as Html;
+ if (ctx) {
+ node.value = ctx.chunks.join('');
+ contextMap.delete(token);
+ }
+ this.exit(token);
+}
+
+export function htmlBlockComponentFromMarkdown(): FromMarkdownExtension {
+ return {
+ enter: {
+ htmlBlockComponent: enterHtmlBlockComponent,
+ },
+ exit: {
+ htmlBlockComponentData: exitHtmlBlockComponentData,
+ htmlBlockComponent: exitHtmlBlockComponent,
+ },
+ };
+}
diff --git a/lib/mdxish.ts b/lib/mdxish.ts
index c0b5efea3..d9ca5a32f 100644
--- a/lib/mdxish.ts
+++ b/lib/mdxish.ts
@@ -58,11 +58,13 @@ import tailwindTransformer from '../processor/transform/tailwind';
import { emptyTaskListItemFromMarkdown } from './mdast-util/empty-task-list-item';
import { gemojiFromMarkdown } from './mdast-util/gemoji';
+import { htmlBlockComponentFromMarkdown } from './mdast-util/html-block-component';
import { jsxTableFromMarkdown } from './mdast-util/jsx-table';
import { legacyVariableFromMarkdown } from './mdast-util/legacy-variable';
import { magicBlockFromMarkdown } from './mdast-util/magic-block';
import { mdxComponentFromMarkdown } from './mdast-util/mdx-component';
import { gemoji } from './micromark/gemoji';
+import { htmlBlockComponent } from './micromark/html-block-component';
import { jsxComment } from './micromark/jsx-comment';
import { jsxTable } from './micromark/jsx-table';
import { legacyVariable } from './micromark/legacy-variable';
@@ -158,8 +160,9 @@ export function mdxishAstProcessor(mdContent: string, opts: MdxishOpts = {}) {
text: mdxExprExt.text,
};
- const micromarkExts = [jsxTable(), magicBlock(), mdxComponent(), gemoji(), legacyVariable(), looseHtmlEntity()];
+ const micromarkExts = [htmlBlockComponent(), jsxTable(), magicBlock(), mdxComponent(), gemoji(), legacyVariable(), looseHtmlEntity()];
const fromMarkdownExts = [
+ htmlBlockComponentFromMarkdown(),
jsxTableFromMarkdown(),
magicBlockFromMarkdown(),
mdxComponentFromMarkdown(),
diff --git a/lib/micromark/html-block-component/index.ts b/lib/micromark/html-block-component/index.ts
new file mode 100644
index 000000000..1a86842bd
--- /dev/null
+++ b/lib/micromark/html-block-component/index.ts
@@ -0,0 +1 @@
+export { htmlBlockComponent } from './syntax';
diff --git a/lib/micromark/html-block-component/syntax.ts b/lib/micromark/html-block-component/syntax.ts
new file mode 100644
index 000000000..fd665533e
--- /dev/null
+++ b/lib/micromark/html-block-component/syntax.ts
@@ -0,0 +1,329 @@
+/* eslint-disable @typescript-eslint/no-use-before-define */
+import type { Code, Construct, Effects, Extension, Resolver, State, TokenizeContext } from 'micromark-util-types';
+
+import { markdownLineEnding } from 'micromark-util-character';
+import { codes, types } from 'micromark-util-symbol';
+
+declare module 'micromark-util-types' {
+ interface TokenTypeMap {
+ htmlBlockComponent: 'htmlBlockComponent';
+ htmlBlockComponentData: 'htmlBlockComponentData';
+ }
+}
+
+const TAG_SUFFIX: Code[] = [
+ codes.uppercaseT,
+ codes.uppercaseM,
+ codes.uppercaseL,
+ codes.uppercaseB,
+ codes.lowercaseL,
+ codes.lowercaseO,
+ codes.lowercaseC,
+ codes.lowercaseK,
+];
+
+// ---------------------------------------------------------------------------
+// Shared tokenizer factory
+// ---------------------------------------------------------------------------
+
+/**
+ * Creates a tokenize function for `...`.
+ *
+ * - **flow** (block-level): supports multiline content via line continuations,
+ * consumes trailing whitespace after the closing tag.
+ * - **text** (inline): single-line only, exits immediately after the closing tag.
+ */
+function createTokenize(mode: 'flow' | 'text') {
+ return function tokenize(this: TokenizeContext, effects: Effects, ok: State, nok: State) {
+ let depth = 1;
+
+ function matchChars(chars: Code[], onMatch: State, onFail: (code: Code) => State | undefined): State {
+ if (chars.length === 0) return onMatch;
+ const next: State = (code: Code): State | undefined => {
+ if (code === chars[0]) {
+ effects.consume(code);
+ return matchChars(chars.slice(1), onMatch, onFail);
+ }
+ return onFail(code);
+ };
+ return next;
+ }
+
+ function matchTagName(onMatch: State, onFail: (code: Code) => State | undefined): State {
+ const next: State = (code: Code): State | undefined => {
+ if (code === codes.uppercaseH) {
+ effects.consume(code);
+ return matchChars(TAG_SUFFIX, onMatch, onFail);
+ }
+ return onFail(code);
+ };
+ return next;
+ }
+
+ return start;
+
+ function start(code: Code): State | undefined {
+ if (code !== codes.lessThan) return nok(code);
+ effects.enter('htmlBlockComponent');
+ effects.enter('htmlBlockComponentData');
+ effects.consume(code);
+ return matchTagName(afterTagName, nok);
+ }
+
+ function afterTagName(code: Code): State | undefined {
+ if (code === codes.greaterThan) {
+ effects.consume(code);
+ return body;
+ }
+ if (code === codes.space || code === codes.horizontalTab) {
+ effects.consume(code);
+ return inAttributes;
+ }
+ if (mode === 'flow' && markdownLineEnding(code)) {
+ effects.exit('htmlBlockComponentData');
+ return attributeContinuationStart(code);
+ }
+ if (code === codes.slash) {
+ effects.consume(code);
+ return selfClose;
+ }
+ return nok(code);
+ }
+
+ function inAttributes(code: Code): State | undefined {
+ if (code === codes.greaterThan) {
+ effects.consume(code);
+ return body;
+ }
+ if (code === null) {
+ return nok(code);
+ }
+ if (markdownLineEnding(code)) {
+ if (mode === 'text') return nok(code);
+ effects.exit('htmlBlockComponentData');
+ return attributeContinuationStart(code);
+ }
+ effects.consume(code);
+ return inAttributes;
+ }
+
+ function attributeContinuationStart(code: Code): State | undefined {
+ return effects.check(nonLazyContinuationStart, attributeContinuationNonLazy, continuationAfter)(code);
+ }
+
+ function attributeContinuationNonLazy(code: Code): State | undefined {
+ effects.enter(types.lineEnding);
+ effects.consume(code);
+ effects.exit(types.lineEnding);
+ return attributeContinuationBefore;
+ }
+
+ function attributeContinuationBefore(code: Code): State | undefined {
+ if (code === null || markdownLineEnding(code)) {
+ return attributeContinuationStart(code);
+ }
+ effects.enter('htmlBlockComponentData');
+ return inAttributes(code);
+ }
+
+ function selfClose(code: Code): State | undefined {
+ if (code === codes.greaterThan) {
+ effects.consume(code);
+ return mode === 'flow' ? afterClose : done(code);
+ }
+ return nok(code);
+ }
+
+ function body(code: Code): State | undefined {
+ if (code === null) return nok(code);
+
+ if (markdownLineEnding(code)) {
+ if (mode === 'text') {
+ // Text constructs operate on paragraph content which spans lines
+ effects.consume(code);
+ return body;
+ }
+ effects.exit('htmlBlockComponentData');
+ return continuationStart(code);
+ }
+
+ if (code === codes.lessThan) {
+ effects.consume(code);
+ return closeSlash;
+ }
+
+ effects.consume(code);
+ return body;
+ }
+
+ function closeSlash(code: Code): State | undefined {
+ if (code === codes.slash) {
+ effects.consume(code);
+ return matchTagName(closeGt, body);
+ }
+ return matchTagName(openAfterTagName, body)(code);
+ }
+
+ function openAfterTagName(code: Code): State | undefined {
+ if (code === codes.greaterThan || code === codes.space || code === codes.horizontalTab) {
+ depth += 1;
+ effects.consume(code);
+ return body;
+ }
+ return body(code);
+ }
+
+ function closeGt(code: Code): State | undefined {
+ if (code === codes.greaterThan) {
+ depth -= 1;
+ effects.consume(code);
+ if (depth === 0) {
+ return mode === 'flow' ? afterClose : done(code);
+ }
+ return body;
+ }
+ return body(code);
+ }
+
+ // -- flow-only states ---------------------------------------------------
+
+ function afterClose(code: Code): State | undefined {
+ if (code === null || markdownLineEnding(code)) {
+ return done(code);
+ }
+ if (code === codes.space || code === codes.horizontalTab) {
+ effects.consume(code);
+ return afterClose;
+ }
+ // Reject so the block re-parses as a paragraph, deferring to the
+ // text tokenizer which preserves trailing content in the same line.
+ return nok(code);
+ }
+
+ // -- flow-only: line continuation ---------------------------------------
+
+ function continuationStart(code: Code): State | undefined {
+ return effects.check(nonLazyContinuationStart, continuationStartNonLazy, continuationAfter)(code);
+ }
+
+ function continuationStartNonLazy(code: Code): State | undefined {
+ effects.enter(types.lineEnding);
+ effects.consume(code);
+ effects.exit(types.lineEnding);
+ return continuationBefore;
+ }
+
+ function continuationBefore(code: Code): State | undefined {
+ if (code === null || markdownLineEnding(code)) {
+ return continuationStart(code);
+ }
+ effects.enter('htmlBlockComponentData');
+ return body(code);
+ }
+
+ function continuationAfter(code: Code): State | undefined {
+ if (code === null) return nok(code);
+ effects.exit('htmlBlockComponent');
+ return ok(code);
+ }
+
+ // -- shared exit --------------------------------------------------------
+
+ function done(_code: Code): State | undefined {
+ effects.exit('htmlBlockComponentData');
+ effects.exit('htmlBlockComponent');
+ return ok(_code);
+ }
+ };
+}
+
+// ---------------------------------------------------------------------------
+// Flow construct (block-level)
+// ---------------------------------------------------------------------------
+
+const nonLazyContinuationStart: Construct = {
+ tokenize: tokenizeNonLazyContinuationStart,
+ partial: true,
+};
+
+function resolveToHtmlBlockComponent(events: Parameters[0]) {
+ let index = events.length;
+
+ while (index > 0) {
+ index -= 1;
+ if (events[index][0] === 'enter' && events[index][1].type === 'htmlBlockComponent') {
+ break;
+ }
+ }
+
+ if (index > 1 && events[index - 2][1].type === types.linePrefix) {
+ events[index][1].start = events[index - 2][1].start;
+ events[index + 1][1].start = events[index - 2][1].start;
+ events.splice(index - 2, 2);
+ }
+
+ return events;
+}
+
+const htmlBlockComponentFlowConstruct: Construct = {
+ name: 'htmlBlockComponent',
+ tokenize: createTokenize('flow'),
+ resolveTo: resolveToHtmlBlockComponent,
+ concrete: true,
+};
+
+function tokenizeNonLazyContinuationStart(this: TokenizeContext, effects: Effects, ok: State, nok: State) {
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
+ const self = this;
+
+ return start;
+
+ function start(code: Code): State | undefined {
+ if (markdownLineEnding(code)) {
+ effects.enter(types.lineEnding);
+ effects.consume(code);
+ effects.exit(types.lineEnding);
+ return after;
+ }
+ return nok(code);
+ }
+
+ function after(code: Code): State | undefined {
+ if (self.parser.lazy[self.now().line]) {
+ return nok(code);
+ }
+ return ok(code);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Text construct (inline)
+// ---------------------------------------------------------------------------
+
+const htmlBlockComponentTextConstruct: Construct = {
+ name: 'htmlBlockComponent',
+ tokenize: createTokenize('text'),
+};
+
+// ---------------------------------------------------------------------------
+// Extension
+// ---------------------------------------------------------------------------
+
+/**
+ * Micromark extension that tokenizes `...` as a single
+ * token at both flow (block) and text (inline) levels.
+ *
+ * Prevents the markdown parser from consuming ``}';
- * protectHTMLBlockContent(input)
- * // Returns: ''
- * ```
- */
-function protectHTMLBlockContent(content: string): string {
- return content.replace(
- /(]*>)\{\s*`((?:[^`\\]|\\.)*)`\s*\}(<\/HTMLBlock>)/g,
- (_match, openTag: string, templateContent: string, closeTag: string) => {
- const encoded = base64Encode(templateContent);
- return `${openTag}${HTML_BLOCK_CONTENT_START}${encoded}${HTML_BLOCK_CONTENT_END}${closeTag}`;
- },
- );
-}
-
/**
* Removes JSX-style comments (e.g., { /* comment *\/ }) from content.
*
@@ -336,16 +294,13 @@ function evaluateAttributeExpressions(content: string, context: JSXContext, prot
* @returns Preprocessed content ready for markdown parsing
*/
export function preprocessJSXExpressions(content: string, context: JSXContext = {}): string {
- // Step 0: Base64 encode HTMLBlock content
- let processed = protectHTMLBlockContent(content);
-
// Step 1: Protect code blocks and inline code
- const { protectedCode, protectedContent } = protectCodeBlocks(processed);
+ const { protectedCode, protectedContent } = protectCodeBlocks(content);
// Step 2: Evaluate attribute expressions (JSX attribute syntax: href={baseUrl})
// For inline expressions, we use a library to parse the expression & evaluate it later
// For attribute expressions, it was difficult to use a library to parse them, so do it manually
- processed = evaluateAttributeExpressions(protectedContent, context, protectedCode);
+ let processed = evaluateAttributeExpressions(protectedContent, context, protectedCode);
// Step 3: Escape problematic braces to prevent MDX expression parsing errors
// This handles both unbalanced braces and paragraph-spanning expressions in one pass