diff --git a/__tests__/compilers/html-block.test.ts b/__tests__/compilers/html-block.test.ts index 6f739bb64..7b62977fb 100644 --- a/__tests__/compilers/html-block.test.ts +++ b/__tests__/compilers/html-block.test.ts @@ -1,15 +1,4 @@ -import type { Element } from 'hast'; - -import { mdast, mdx, mdxish } from '../../index'; - -function findHTMLBlock(element: Element): Element | undefined { - if (element.tagName === 'HTMLBlock' || element.tagName === 'html-block') { - return element; - } - return element.children - .filter((child): child is Element => child.type === 'element') - .reduce((found, child) => found || findHTMLBlock(child), undefined); -} +import { mdast, mdx } from '../../index'; describe('html-block compiler', () => { it('compiles html blocks within containers', () => { @@ -51,138 +40,3 @@ const foo = () => { expect(mdx(mdast(markdown)).trim()).toBe(expected.trim()); }); }); - -describe('mdxish html-block compiler', () => { - it('compiles html blocks within containers', () => { - const markdown = ` -> 🚧 It compiles! -> -> {\` -> Hello, World! -> \`} -`; - - const hast = mdxish(markdown.trim()); - const callout = hast.children[0] as Element; - - expect(callout.type).toBe('element'); - expect(callout.tagName).toBe('Callout'); - - // Find HTMLBlock within the callout - const htmlBlock = findHTMLBlock(callout); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - }); - - it('compiles html blocks preserving newlines', () => { - const markdown = ` -{\` -

-const foo = () => {
-  const bar = {
-    baz: 'blammo'
-  }
-
-  return bar
-}
-
-\`}
-`; - - const hast = mdxish(markdown.trim()); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - }); - - it('adds newlines for readability', () => { - const markdown = '{`

Hello, World!

`}
'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - }); - - it('unescapes backticks in HTML content', () => { - const markdown = '{`\\`example\\``}'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - - // Verify that escaped backticks \` are unescaped to ` in the HTML - const htmlProp = htmlBlock?.properties?.html as string; - expect(htmlProp).toBeDefined(); - expect(htmlProp).toContain('`example`'); - expect(htmlProp).not.toContain('\\`'); - }); - - it('passes safeMode property correctly', () => { - // Test with both JSX expression and string syntax - const markdown = '{`

Content

`}
'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - - const allProps = htmlBlock?.properties; - expect(allProps).toBeDefined(); - - const safeMode = allProps?.safeMode; - expect(safeMode).toBe('true'); - - // Verify that html property is still present (for safeMode to render as escaped text) - const htmlProp = allProps?.html as string; - expect(htmlProp).toBeDefined(); - expect(htmlProp).toContain(''); - expect(htmlProp).toContain('

Content

'); - }); - - it('should handle template literal with variables', () => { - // eslint-disable-next-line quotes - const markdown = `{\`const x = \${variable}\`}`; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - // eslint-disable-next-line no-template-curly-in-string - expect(htmlBlock?.properties?.html).toBe('const x = ${variable}'); - }); - - it('should handle nested template literals', () => { - // Use a regular string to avoid nested template literal syntax error - // The content should be:
```javascript\nconst x = 1;\n```
- const markdown = '{`
\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`}
'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - - // Verify that the HTML content is preserved correctly with newlines - const htmlProp = htmlBlock?.properties?.html as string; - expect(htmlProp).toBeDefined(); - - // The expected content should have triple backticks - expect(htmlProp).toBe('
```javascript\nconst x = 1;\n```
'); - }); -}); diff --git a/__tests__/transformers/mdxish-html-blocks.test.ts b/__tests__/transformers/mdxish-html-blocks.test.ts new file mode 100644 index 000000000..e0407d29f --- /dev/null +++ b/__tests__/transformers/mdxish-html-blocks.test.ts @@ -0,0 +1,587 @@ +import type { Element } from 'hast'; +import type { Root } from 'mdast'; + +import remarkParse from 'remark-parse'; +import { unified } from 'unified'; + +import { mdxish } from '../../index'; +import { htmlBlockComponentFromMarkdown } from '../../lib/mdast-util/html-block-component'; +import { htmlBlockComponent } from '../../lib/micromark/html-block-component/syntax'; +import mdxishHtmlBlocks from '../../processor/transform/mdxish/mdxish-html-blocks'; +import { collectNodes } from '../helpers'; + +interface HTMLBlockNode { + children: { type: string; value: string }[]; + data: { + hName: string; + hProperties: Record; + }; + type: string; +} + +const parseWithPlugin = (markdown: string): Root => { + const processor = unified() + .data('micromarkExtensions', [htmlBlockComponent()]) + .data('fromMarkdownExtensions', [htmlBlockComponentFromMarkdown()]) + .use(remarkParse) + .use(mdxishHtmlBlocks); + const tree = processor.parse(markdown); + processor.runSync(tree); + return tree as Root; +}; + +const findHtmlBlockNodes = (tree: Root): HTMLBlockNode[] => + collectNodes(tree, node => node.type === 'html-block') as unknown as HTMLBlockNode[]; + +function findHTMLBlock(element: Element): Element | undefined { + if (element.tagName === 'HTMLBlock' || element.tagName === 'html-block') { + return element; + } + return element.children + .filter((child): child is Element => child.type === 'element') + .reduce((found, child) => found || findHTMLBlock(child), undefined); +} + +describe('mdxish-html-blocks transformer', () => { + describe('attribute extraction', () => { + it('extracts safeMode from JSX syntax', () => { + const tree = parseWithPlugin('{`

content

`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties).toMatchObject({ safeMode: 'true', html: '

content

' }); + }); + + it('extracts safeMode from string syntax', () => { + const tree = parseWithPlugin('{`

content

`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties).toMatchObject({ safeMode: 'false', html: '

content

' }); + }); + + it('extracts runScripts boolean true', () => { + const tree = parseWithPlugin('{`

content

`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties.runScripts).toBe(true); + }); + + it('extracts runScripts boolean false', () => { + const tree = parseWithPlugin('{`

content

`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties.runScripts).toBe(false); + }); + + it('extracts runScripts string value', () => { + const tree = parseWithPlugin('{`

content

`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties.runScripts).toBe('afterRender'); + }); + + it('extracts multiple attributes', () => { + const tree = parseWithPlugin( + '{`

content

`}
', + ); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties).toMatchObject({ safeMode: 'true', runScripts: true }); + }); + + it('omits runScripts and safeMode when absent', () => { + const tree = parseWithPlugin('{`

content

`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties).toStrictEqual({ html: '

content

' }); + }); + }); + + describe('content extraction', () => { + it('strips template literal delimiters', () => { + const tree = parseWithPlugin('{`
hello
`}
'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties.html).toBe('
hello
'); + }); + + it('handles content without template literal syntax', () => { + const tree = parseWithPlugin('plain'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties.html).toBe('plain'); + }); + + it('unescapes backticks in HTML content', () => { + const tree = parseWithPlugin('{`\\`example\\``}'); + const [node] = findHtmlBlockNodes(tree); + expect(node.data.hProperties.html).toBe('`example`'); + }); + + it('preserves multiline content', () => { + const markdown = `{\` +
    +
  • one
  • +
  • two
  • +
+\`}
`; + const tree = parseWithPlugin(markdown); + const [node] = findHtmlBlockNodes(tree); + const html = node.data.hProperties.html as string; + expect(html).toContain('
  • one
  • '); + expect(html).toContain('
  • two
  • '); + }); + }); + + describe('node structure', () => { + it('produces correct node type and hName', () => { + const tree = parseWithPlugin('{`

    test

    `}
    '); + const [node] = findHtmlBlockNodes(tree); + expect(node.type).toBe('html-block'); + expect(node.data.hName).toBe('html-block'); + }); + + it('sets children text node matching html property', () => { + const tree = parseWithPlugin('{`

    test

    `}
    '); + const [node] = findHtmlBlockNodes(tree); + expect(node.children).toStrictEqual([{ type: 'text', value: '

    test

    ' }]); + }); + + it('does not transform non-HTMLBlock html nodes', () => { + const tree = parseWithPlugin('
    just html
    '); + const htmlBlockNodes = findHtmlBlockNodes(tree); + expect(htmlBlockNodes).toHaveLength(0); + }); + }); +}); + +describe('mdxish html-block integration', () => { + it('compiles html blocks within containers', () => { + const markdown = ` +> 🚧 It compiles! +> +> {\` +> Hello, World! +> \`} +`; + + const hast = mdxish(markdown.trim()); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + + const htmlBlock = findHTMLBlock(callout); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: ' Hello, World!' }, + }); + }); + + it('compiles html blocks preserving newlines', () => { + const markdown = `{\` +
    
    +const foo = () => {
    +  const bar = {
    +    baz: 'blammo'
    +  }
    +
    +  return bar
    +}
    +
    +\`}
    `; + + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain('
    ');
    +    expect(htmlProp).toContain('const foo = () => {');
    +    expect(htmlProp).toContain("baz: 'blammo'");
    +    expect(htmlProp).toContain('
    '); + }); + + it('adds newlines for readability', () => { + const hast = mdxish('{`

    Hello, World!

    `}
    '); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '

    Hello, World!

    ' }, + }); + }); + + it('unescapes backticks in HTML content', () => { + const hast = mdxish('{`\\`example\\``}'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '`example`' }, + }); + }); + + it('passes safeMode property correctly', () => { + const hast = mdxish('{`

    Content

    `}
    '); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { safeMode: 'true', html: '

    Content

    ' }, + }); + }); + + it('handles template literal with variables', () => { + // eslint-disable-next-line quotes + const hast = mdxish(`{\`const x = \${variable}\`}`); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + // eslint-disable-next-line no-template-curly-in-string + properties: { html: 'const x = ${variable}' }, + }); + }); + + it('handles nested template literals', () => { + const hast = mdxish('{`
    \\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
    `}
    '); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '
    ```javascript\nconst x = 1;\n```
    ' }, + }); + }); + + describe('flow-level (standalone block)', () => { + it('handles simple single-line HTMLBlock', () => { + const hast = mdxish('{`
    hello
    `}
    '); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '
    hello
    ' }, + }); + }); + + it('handles multiline content', () => { + const markdown = `{\` +
      +
    • one
    • +
    • two
    • +
    +\`}
    `; + + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain('
  • one
  • '); + expect(htmlProp).toContain('
  • two
  • '); + }); + + it('handles content with blank lines', () => { + const markdown = `{\` +
    before
    + +
    after
    +\`}
    `; + + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain('
    before
    '); + expect(htmlProp).toContain('
    after
    '); + }); + + it('handles script tags without being consumed by the markdown parser', () => { + const markdown = `{\` + +

    visible

    +\`}
    `; + + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain(''); + expect(htmlProp).toContain('

    visible

    '); + }); + + it('handles style tags without being consumed by the markdown parser', () => { + const markdown = `{\` + +

    styled

    +\`}
    `; + + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain(''); + expect(htmlProp).toContain('

    styled

    '); + }); + + it('handles HTMLBlock without template literal syntax', () => { + const hast = mdxish('plain'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: 'plain' }, + }); + }); + + it('handles HTMLBlock with runScripts attribute', () => { + const hast = mdxish('{``}'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { runScripts: true, html: '' }, + }); + }); + + it('handles opening tag with attributes on a new line', () => { + const markdown = '{``}'; + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { runScripts: true, html: '' }, + }); + }); + + it('handles trailing whitespace after closing tag', () => { + const hast = mdxish('{`
    hello
    `}
    '); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '
    hello
    ' }, + }); + }); + }); + + describe('text-level (inline)', () => { + it('handles inline HTMLBlock surrounded by text', () => { + const hast = mdxish('before {`middle`} after'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: 'middle' }, + }); + }); + + it('handles inline HTMLBlock at the end of a paragraph', () => { + const hast = mdxish('some text {`italic`}'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: 'italic' }, + }); + }); + + it('handles inline HTMLBlock with attributes', () => { + const hast = mdxish('text {``} more'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { safeMode: 'true', html: '' }, + }); + }); + + it('handles multiline inline HTMLBlock with trailing text', () => { + const markdown = 'hello {`Hello, World!`} world'; + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + expect(htmlBlock?.properties?.html).toContain('Hello, World!'); + }); + }); + + describe('trailing text preservation', () => { + it('preserves trailing text after single-line HTMLBlock', () => { + const hast = mdxish('{`Hello`} trailing'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: 'Hello' }, + }); + }); + + it('preserves trailing text after multiline HTMLBlock', () => { + const markdown = '{`Hello`}. trailing'; + const hast = mdxish(markdown); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + expect(htmlBlock?.properties?.html).toContain('Hello'); + }); + + it('does not crash with trailing text after closing tag', () => { + expect(() => mdxish('{`

    content

    `}
    hello world')).not.toThrow(); + }); + + it('does not crash with trailing punctuation after closing tag', () => { + expect(() => mdxish('{`

    content

    `}
    . stuff')).not.toThrow(); + }); + }); + + describe('inside callouts', () => { + it('handles HTMLBlock in a callout without stray > characters', () => { + const markdown = `> 🚧 It compiles! +> +> {\` +> Hello, World! +> \`}`; + + const hast = mdxish(markdown); + expect((hast.children[0] as Element).tagName).toBe('Callout'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: ' Hello, World!' }, + }); + }); + + it('handles HTMLBlock in a callout with script tags', () => { + const markdown = `> ⚠️ Warning +> +> {\` +> +>

    safe content

    +> \`}
    `; + + const hast = mdxish(markdown); + expect((hast.children[0] as Element).tagName).toBe('Callout'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain(''); + expect(htmlProp).toContain('

    safe content

    '); + expect(htmlProp).not.toMatch(/^>/m); + }); + + it('handles HTMLBlock in a callout with blank lines in content', () => { + const markdown = `> 🚧 Test +> +> {\` +>
    first
    +> +>
    second
    +> \`}
    `; + + const hast = mdxish(markdown); + expect((hast.children[0] as Element).tagName).toBe('Callout'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain('
    first
    '); + expect(htmlProp).toContain('
    second
    '); + expect(htmlProp).not.toMatch(/^>/m); + }); + + it('handles HTMLBlock in a callout with trailing whitespace', () => { + const markdown = `> 🚧 It compiles! +> +> {\` +> Hello +> \`}${' '}`; + + const hast = mdxish(markdown); + expect((hast.children[0] as Element).tagName).toBe('Callout'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: ' Hello' }, + }); + }); + + it('handles HTMLBlock in an empty callout (no title text)', () => { + const markdown = `> 📘 +> +> {\`

    body only

    \`}
    `; + + const hast = mdxish(markdown); + expect((hast.children[0] as Element).tagName).toBe('Callout'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '

    body only

    ' }, + }); + }); + }); + + describe('inside blockquotes', () => { + it('handles single-line HTMLBlock in a blockquote', () => { + const hast = mdxish('> {`

    quoted

    `}
    '); + expect((hast.children[0] as Element).tagName).toBe('blockquote'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: '

    quoted

    ' }, + }); + }); + + it('handles multiline HTMLBlock in a blockquote without stray > characters', () => { + const markdown = `> {\` +>
    line1
    +>
    line2
    +> \`}
    `; + + const hast = mdxish(markdown); + expect((hast.children[0] as Element).tagName).toBe('blockquote'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toContain('
    line1
    '); + expect(htmlProp).toContain('
    line2
    '); + expect(htmlProp).not.toMatch(/^>/m); + }); + }); + + describe('inside lists', () => { + it('handles HTMLBlock in an unordered list item', () => { + const hast = mdxish('- {`listed`}'); + expect((hast.children[0] as Element).tagName).toBe('ul'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: 'listed' }, + }); + }); + + it('handles HTMLBlock in an ordered list item', () => { + const hast = mdxish('1. {`ordered`}'); + expect((hast.children[0] as Element).tagName).toBe('ol'); + + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ + tagName: 'html-block', + properties: { html: 'ordered' }, + }); + }); + }); + + describe('edge cases', () => { + it('handles standalone multiline HTMLBlock with surrounding paragraphs', () => { + const markdown = `Hello + +{\` +

    Hello, World!

    +\`}
    + +there`; + const hast = mdxish(markdown); + const htmlBlock = (hast.children as Element[]) + .filter(child => child.type === 'element') + .reduce((found, child) => found || findHTMLBlock(child), undefined); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + expect(htmlBlock?.properties?.html).toContain('Hello, World!

    '); + }); + + it('handles nested HTMLBlock tags in content', () => { + const hast = mdxish('{`{Hello}`}'); + const htmlBlock = findHTMLBlock(hast.children[0] as Element); + expect(htmlBlock).toMatchObject({ tagName: 'html-block' }); + expect(htmlBlock?.properties?.html).toContain('{Hello}'); + }); + }); +}); diff --git a/components/HTMLBlock/index.tsx b/components/HTMLBlock/index.tsx index ed3ff975a..c73bfbfef 100644 --- a/components/HTMLBlock/index.tsx +++ b/components/HTMLBlock/index.tsx @@ -25,10 +25,7 @@ const HTMLBlock = ({ children = '', html: htmlProp, runScripts, safeMode: safeMo let html: string = ''; if (htmlProp !== undefined) { html = htmlProp; - } else { - if (typeof children !== 'string') { - throw new TypeError('HTMLBlock: children must be a string'); - } + } else if (typeof children === 'string') { html = children; } diff --git a/lib/mdast-util/html-block-component/index.ts b/lib/mdast-util/html-block-component/index.ts new file mode 100644 index 000000000..d2d27240b --- /dev/null +++ b/lib/mdast-util/html-block-component/index.ts @@ -0,0 +1,54 @@ +import type { Html } from 'mdast'; +import type { CompileContext, Extension as FromMarkdownExtension, Handle, Token } from 'mdast-util-from-markdown'; + +const contextMap = new WeakMap(); + +function findHtmlBlockComponentToken(this: CompileContext): Token | undefined { + const events = this.tokenStack; + for (let i = events.length - 1; i >= 0; i -= 1) { + if (events[i][0].type === 'htmlBlockComponent') return events[i][0]; + } + return undefined; +} + +// Produces an MDAST `html` node, mdxishHtmlBlocks then transforms it into an `html-block` node +function enterHtmlBlockComponent(this: CompileContext, token: Parameters[0]): void { + contextMap.set(token, { chunks: [], lastEndLine: token.start.line }); + this.enter({ type: 'html', value: '' } as Html, token); +} + +function exitHtmlBlockComponentData(this: CompileContext, token: Parameters[0]): void { + const componentToken = findHtmlBlockComponentToken.call(this); + if (!componentToken) return; + const ctx = contextMap.get(componentToken); + if (ctx) { + const gap = token.start.line - ctx.lastEndLine; + if (ctx.chunks.length > 0 && gap > 0) { + ctx.chunks.push('\n'.repeat(gap)); + } + ctx.chunks.push(this.sliceSerialize(token)); + ctx.lastEndLine = token.end.line; + } +} + +function exitHtmlBlockComponent(this: CompileContext, token: Parameters[0]): void { + const ctx = contextMap.get(token); + const node = this.stack[this.stack.length - 1] as Html; + if (ctx) { + node.value = ctx.chunks.join(''); + contextMap.delete(token); + } + this.exit(token); +} + +export function htmlBlockComponentFromMarkdown(): FromMarkdownExtension { + return { + enter: { + htmlBlockComponent: enterHtmlBlockComponent, + }, + exit: { + htmlBlockComponentData: exitHtmlBlockComponentData, + htmlBlockComponent: exitHtmlBlockComponent, + }, + }; +} diff --git a/lib/mdxish.ts b/lib/mdxish.ts index c0b5efea3..d9ca5a32f 100644 --- a/lib/mdxish.ts +++ b/lib/mdxish.ts @@ -58,11 +58,13 @@ import tailwindTransformer from '../processor/transform/tailwind'; import { emptyTaskListItemFromMarkdown } from './mdast-util/empty-task-list-item'; import { gemojiFromMarkdown } from './mdast-util/gemoji'; +import { htmlBlockComponentFromMarkdown } from './mdast-util/html-block-component'; import { jsxTableFromMarkdown } from './mdast-util/jsx-table'; import { legacyVariableFromMarkdown } from './mdast-util/legacy-variable'; import { magicBlockFromMarkdown } from './mdast-util/magic-block'; import { mdxComponentFromMarkdown } from './mdast-util/mdx-component'; import { gemoji } from './micromark/gemoji'; +import { htmlBlockComponent } from './micromark/html-block-component'; import { jsxComment } from './micromark/jsx-comment'; import { jsxTable } from './micromark/jsx-table'; import { legacyVariable } from './micromark/legacy-variable'; @@ -158,8 +160,9 @@ export function mdxishAstProcessor(mdContent: string, opts: MdxishOpts = {}) { text: mdxExprExt.text, }; - const micromarkExts = [jsxTable(), magicBlock(), mdxComponent(), gemoji(), legacyVariable(), looseHtmlEntity()]; + const micromarkExts = [htmlBlockComponent(), jsxTable(), magicBlock(), mdxComponent(), gemoji(), legacyVariable(), looseHtmlEntity()]; const fromMarkdownExts = [ + htmlBlockComponentFromMarkdown(), jsxTableFromMarkdown(), magicBlockFromMarkdown(), mdxComponentFromMarkdown(), diff --git a/lib/micromark/html-block-component/index.ts b/lib/micromark/html-block-component/index.ts new file mode 100644 index 000000000..1a86842bd --- /dev/null +++ b/lib/micromark/html-block-component/index.ts @@ -0,0 +1 @@ +export { htmlBlockComponent } from './syntax'; diff --git a/lib/micromark/html-block-component/syntax.ts b/lib/micromark/html-block-component/syntax.ts new file mode 100644 index 000000000..fd665533e --- /dev/null +++ b/lib/micromark/html-block-component/syntax.ts @@ -0,0 +1,329 @@ +/* eslint-disable @typescript-eslint/no-use-before-define */ +import type { Code, Construct, Effects, Extension, Resolver, State, TokenizeContext } from 'micromark-util-types'; + +import { markdownLineEnding } from 'micromark-util-character'; +import { codes, types } from 'micromark-util-symbol'; + +declare module 'micromark-util-types' { + interface TokenTypeMap { + htmlBlockComponent: 'htmlBlockComponent'; + htmlBlockComponentData: 'htmlBlockComponentData'; + } +} + +const TAG_SUFFIX: Code[] = [ + codes.uppercaseT, + codes.uppercaseM, + codes.uppercaseL, + codes.uppercaseB, + codes.lowercaseL, + codes.lowercaseO, + codes.lowercaseC, + codes.lowercaseK, +]; + +// --------------------------------------------------------------------------- +// Shared tokenizer factory +// --------------------------------------------------------------------------- + +/** + * Creates a tokenize function for `...`. + * + * - **flow** (block-level): supports multiline content via line continuations, + * consumes trailing whitespace after the closing tag. + * - **text** (inline): single-line only, exits immediately after the closing tag. + */ +function createTokenize(mode: 'flow' | 'text') { + return function tokenize(this: TokenizeContext, effects: Effects, ok: State, nok: State) { + let depth = 1; + + function matchChars(chars: Code[], onMatch: State, onFail: (code: Code) => State | undefined): State { + if (chars.length === 0) return onMatch; + const next: State = (code: Code): State | undefined => { + if (code === chars[0]) { + effects.consume(code); + return matchChars(chars.slice(1), onMatch, onFail); + } + return onFail(code); + }; + return next; + } + + function matchTagName(onMatch: State, onFail: (code: Code) => State | undefined): State { + const next: State = (code: Code): State | undefined => { + if (code === codes.uppercaseH) { + effects.consume(code); + return matchChars(TAG_SUFFIX, onMatch, onFail); + } + return onFail(code); + }; + return next; + } + + return start; + + function start(code: Code): State | undefined { + if (code !== codes.lessThan) return nok(code); + effects.enter('htmlBlockComponent'); + effects.enter('htmlBlockComponentData'); + effects.consume(code); + return matchTagName(afterTagName, nok); + } + + function afterTagName(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + return body; + } + if (code === codes.space || code === codes.horizontalTab) { + effects.consume(code); + return inAttributes; + } + if (mode === 'flow' && markdownLineEnding(code)) { + effects.exit('htmlBlockComponentData'); + return attributeContinuationStart(code); + } + if (code === codes.slash) { + effects.consume(code); + return selfClose; + } + return nok(code); + } + + function inAttributes(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + return body; + } + if (code === null) { + return nok(code); + } + if (markdownLineEnding(code)) { + if (mode === 'text') return nok(code); + effects.exit('htmlBlockComponentData'); + return attributeContinuationStart(code); + } + effects.consume(code); + return inAttributes; + } + + function attributeContinuationStart(code: Code): State | undefined { + return effects.check(nonLazyContinuationStart, attributeContinuationNonLazy, continuationAfter)(code); + } + + function attributeContinuationNonLazy(code: Code): State | undefined { + effects.enter(types.lineEnding); + effects.consume(code); + effects.exit(types.lineEnding); + return attributeContinuationBefore; + } + + function attributeContinuationBefore(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + return attributeContinuationStart(code); + } + effects.enter('htmlBlockComponentData'); + return inAttributes(code); + } + + function selfClose(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + return mode === 'flow' ? afterClose : done(code); + } + return nok(code); + } + + function body(code: Code): State | undefined { + if (code === null) return nok(code); + + if (markdownLineEnding(code)) { + if (mode === 'text') { + // Text constructs operate on paragraph content which spans lines + effects.consume(code); + return body; + } + effects.exit('htmlBlockComponentData'); + return continuationStart(code); + } + + if (code === codes.lessThan) { + effects.consume(code); + return closeSlash; + } + + effects.consume(code); + return body; + } + + function closeSlash(code: Code): State | undefined { + if (code === codes.slash) { + effects.consume(code); + return matchTagName(closeGt, body); + } + return matchTagName(openAfterTagName, body)(code); + } + + function openAfterTagName(code: Code): State | undefined { + if (code === codes.greaterThan || code === codes.space || code === codes.horizontalTab) { + depth += 1; + effects.consume(code); + return body; + } + return body(code); + } + + function closeGt(code: Code): State | undefined { + if (code === codes.greaterThan) { + depth -= 1; + effects.consume(code); + if (depth === 0) { + return mode === 'flow' ? afterClose : done(code); + } + return body; + } + return body(code); + } + + // -- flow-only states --------------------------------------------------- + + function afterClose(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + return done(code); + } + if (code === codes.space || code === codes.horizontalTab) { + effects.consume(code); + return afterClose; + } + // Reject so the block re-parses as a paragraph, deferring to the + // text tokenizer which preserves trailing content in the same line. + return nok(code); + } + + // -- flow-only: line continuation --------------------------------------- + + function continuationStart(code: Code): State | undefined { + return effects.check(nonLazyContinuationStart, continuationStartNonLazy, continuationAfter)(code); + } + + function continuationStartNonLazy(code: Code): State | undefined { + effects.enter(types.lineEnding); + effects.consume(code); + effects.exit(types.lineEnding); + return continuationBefore; + } + + function continuationBefore(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + return continuationStart(code); + } + effects.enter('htmlBlockComponentData'); + return body(code); + } + + function continuationAfter(code: Code): State | undefined { + if (code === null) return nok(code); + effects.exit('htmlBlockComponent'); + return ok(code); + } + + // -- shared exit -------------------------------------------------------- + + function done(_code: Code): State | undefined { + effects.exit('htmlBlockComponentData'); + effects.exit('htmlBlockComponent'); + return ok(_code); + } + }; +} + +// --------------------------------------------------------------------------- +// Flow construct (block-level) +// --------------------------------------------------------------------------- + +const nonLazyContinuationStart: Construct = { + tokenize: tokenizeNonLazyContinuationStart, + partial: true, +}; + +function resolveToHtmlBlockComponent(events: Parameters[0]) { + let index = events.length; + + while (index > 0) { + index -= 1; + if (events[index][0] === 'enter' && events[index][1].type === 'htmlBlockComponent') { + break; + } + } + + if (index > 1 && events[index - 2][1].type === types.linePrefix) { + events[index][1].start = events[index - 2][1].start; + events[index + 1][1].start = events[index - 2][1].start; + events.splice(index - 2, 2); + } + + return events; +} + +const htmlBlockComponentFlowConstruct: Construct = { + name: 'htmlBlockComponent', + tokenize: createTokenize('flow'), + resolveTo: resolveToHtmlBlockComponent, + concrete: true, +}; + +function tokenizeNonLazyContinuationStart(this: TokenizeContext, effects: Effects, ok: State, nok: State) { + // eslint-disable-next-line @typescript-eslint/no-this-alias + const self = this; + + return start; + + function start(code: Code): State | undefined { + if (markdownLineEnding(code)) { + effects.enter(types.lineEnding); + effects.consume(code); + effects.exit(types.lineEnding); + return after; + } + return nok(code); + } + + function after(code: Code): State | undefined { + if (self.parser.lazy[self.now().line]) { + return nok(code); + } + return ok(code); + } +} + +// --------------------------------------------------------------------------- +// Text construct (inline) +// --------------------------------------------------------------------------- + +const htmlBlockComponentTextConstruct: Construct = { + name: 'htmlBlockComponent', + tokenize: createTokenize('text'), +}; + +// --------------------------------------------------------------------------- +// Extension +// --------------------------------------------------------------------------- + +/** + * Micromark extension that tokenizes `...` as a single + * token at both flow (block) and text (inline) levels. + * + * Prevents the markdown parser from consuming ``}'; - * protectHTMLBlockContent(input) - * // Returns: '' - * ``` - */ -function protectHTMLBlockContent(content: string): string { - return content.replace( - /(]*>)\{\s*`((?:[^`\\]|\\.)*)`\s*\}(<\/HTMLBlock>)/g, - (_match, openTag: string, templateContent: string, closeTag: string) => { - const encoded = base64Encode(templateContent); - return `${openTag}${HTML_BLOCK_CONTENT_START}${encoded}${HTML_BLOCK_CONTENT_END}${closeTag}`; - }, - ); -} - /** * Removes JSX-style comments (e.g., { /* comment *\/ }) from content. * @@ -336,16 +294,13 @@ function evaluateAttributeExpressions(content: string, context: JSXContext, prot * @returns Preprocessed content ready for markdown parsing */ export function preprocessJSXExpressions(content: string, context: JSXContext = {}): string { - // Step 0: Base64 encode HTMLBlock content - let processed = protectHTMLBlockContent(content); - // Step 1: Protect code blocks and inline code - const { protectedCode, protectedContent } = protectCodeBlocks(processed); + const { protectedCode, protectedContent } = protectCodeBlocks(content); // Step 2: Evaluate attribute expressions (JSX attribute syntax: href={baseUrl}) // For inline expressions, we use a library to parse the expression & evaluate it later // For attribute expressions, it was difficult to use a library to parse them, so do it manually - processed = evaluateAttributeExpressions(protectedContent, context, protectedCode); + let processed = evaluateAttributeExpressions(protectedContent, context, protectedCode); // Step 3: Escape problematic braces to prevent MDX expression parsing errors // This handles both unbalanced braces and paragraph-spanning expressions in one pass