Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions lib/collectors/codebase.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ function extractSymbols(content) {
symbols.functions.push(match[1]);
}

const arrowPattern = /(?:const|let)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/g;
// ReDoS fix: bound the unbounded \s* / async runs and the parameter list so the
// matcher cannot backtrack polynomially on pathological input. Bounds are large
// enough that all realistic source matches identically to the prior \s*/[^)]* form.
const arrowPattern = /(?:const|let)\s{1,1000}([a-zA-Z_$][a-zA-Z0-9_$]*)\s{0,1000}=\s{0,1000}(?:async\s{0,1000})?\([^)]{0,2000}\)\s{0,1000}=>/g;
while ((match = arrowPattern.exec(content)) !== null) {
symbols.functions.push(match[1]);
}
Expand All @@ -141,7 +144,9 @@ function extractSymbols(content) {
symbols.exports.push(match[1]);
}

const moduleExportsPattern = /module\.exports\s*=\s*\{([^}]+)\}/;
// ReDoS fix: bound the \s* runs and capture length so the matcher stays linear;
// bounds exceed any realistic module.exports declaration so matches are unchanged.
const moduleExportsPattern = /module\.exports\s{0,1000}=\s{0,1000}\{([^}]{1,100000})\}/;
Comment on lines +147 to +149

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The original regular expression /module\\.exports\\s*=\\s*\\{([^}]+)\\}/ is completely safe from ReDoS. The negated character class [^}]+ is terminated by the literal \\}, meaning there is no ambiguity or overlapping paths for the regex engine to backtrack exponentially.\n\nLimiting the capture to 100,000 characters ({1,100000}) will cause the parser to fail on large, valid JavaScript files (such as large configuration files, localized translation dictionaries, or auto-generated asset maps) where the exported object exceeds 100KB.\n\nWe should revert this to the original safe pattern.

Suggested change
// ReDoS fix: bound the \s* runs and capture length so the matcher stays linear;
// bounds exceed any realistic module.exports declaration so matches are unchanged.
const moduleExportsPattern = /module\.exports\s{0,1000}=\s{0,1000}\{([^}]{1,100000})\}/;
const moduleExportsPattern = /module\.exports\s*=\s*\{([^}]+)\}/;

const moduleMatch = content.match(moduleExportsPattern);
if (moduleMatch) {
const keys = moduleMatch[1].split(',').map(k => k.trim().split(':')[0].trim());
Expand Down
10 changes: 8 additions & 2 deletions lib/collectors/documentation.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ function safeReadFile(filePath, basePath) {
* Analyze a single markdown file
*/
function analyzeMarkdownFile(content, filePath) {
const sectionMatches = content.match(/^##\s+(.+)$/gm) || [];
// ReDoS fix: bound the \s+ run after the ## marker; line-anchored (.+) cannot
// cross newlines so this matches the same headings as before.
const sectionMatches = content.match(/^##\s{1,1000}(.+)$/gm) || [];
const sections = sectionMatches.slice(0, 10).map(s => s.replace(/^##\s+/, ''));
const sectionLower = sections.map(s => s.toLowerCase()).join(' ');

Expand Down Expand Up @@ -83,7 +85,11 @@ function extractCheckboxes(result, content) {
* Extract documented features
*/
function extractFeatures(result, content) {
const featurePattern = /^[-*]\s+\*{0,2}(.+?)\*{0,2}(?:\s*[-–]\s*(.+))?$/gm;
// ReDoS fix: bound the \s+ run and the line-content quantifiers so the lazy
// (.+?) / optional trailing (.+) pair cannot backtrack polynomially. Using
// [^\n] is equivalent to . here (. never matches newline), and the bounds far
// exceed the 80-char feature cap applied below, so matches are unchanged.
const featurePattern = /^[-*]\s{1,100}\*{0,2}([^\n]{1,2000}?)\*{0,2}(?:\s{0,100}[-–]\s{0,100}([^\n]{1,2000}))?$/gm;
let match;

while ((match = featurePattern.exec(content)) !== null && result.features.length < 20) {
Expand Down
12 changes: 9 additions & 3 deletions lib/enhance/agent-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,9 @@ const agentPatterns = {
// Look for hardcoded .claude/ references
const hasHardcoded = /\.claude\//.test(content);
// Exclude if using AI_STATE_DIR
const usesEnvVar = /AI_STATE_DIR|\$\{.*STATE.*\}/i.test(content);
// ReDoS fix: bound the .* runs to non-brace chars so they cannot cross }
// and cannot backtrack; matches the same ${...STATE...} expressions.
const usesEnvVar = /AI_STATE_DIR|\$\{[^}]*STATE[^}]*\}/i.test(content);

if (hasHardcoded && !usesEnvVar) {
return {
Expand Down Expand Up @@ -494,8 +496,12 @@ const agentPatterns = {

// Check if has code blocks or lists but no XML
const hasCodeBlocks = /```[\s\S]+?```/.test(content);
const hasLists = /^[-*]\s+.+$/m.test(content);
const hasXML = /<\w+>[\s\S]*?<\/\w+>/.test(content);
// ReDoS fix: bound the \s+ and line-content runs; line-anchored so this still
// detects any "- item" / "* item" list line as before.
const hasLists = /^[-*]\s{1,1000}[^\n]{1,2000}$/m.test(content);
// ReDoS fix: bound the unbounded [\s\S]*? so an unterminated <tag> cannot
// drive polynomial backtracking; 50k chars covers any realistic XML block.
const hasXML = /<\w+>[\s\S]{0,50000}?<\/\w+>/.test(content);
const sectionCount = (content.match(/^##\s+/gm) || []).length;

// Complex content without XML
Expand Down
26 changes: 19 additions & 7 deletions lib/enhance/auto-suppression.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,12 @@ const PATTERN_HEURISTICS = {
const contentLower = content.toLowerCase();

// Check if file is pattern documentation describing vague language detection
// ReDoS fix: the .* runs never matched across newlines (. excludes \n), so
// bounding them to [^\n]{0,N} keeps the same "within one line, in order"
// semantics while removing the polynomial multi-.* backtracking.
const isPatternDoc =
/pattern.*detect.*usually|example.*vague|fuzzy.*language.*like/i.test(content) ||
/vague.*terms.*like|"usually".*"sometimes"/i.test(content);
/pattern[^\n]{0,500}detect[^\n]{0,500}usually|example[^\n]{0,500}vague|fuzzy[^\n]{0,500}language[^\n]{0,500}like/i.test(content) ||
/vague[^\n]{0,500}terms[^\n]{0,500}like|"usually"[^\n]{0,500}"sometimes"/i.test(content);

if (isPatternDoc) {
return {
Expand Down Expand Up @@ -129,7 +132,10 @@ const PATTERN_HEURISTICS = {
const isOrchestrator =
fileNameLower.includes('orchestrator') ||
fileNameLower.includes('coordinator') ||
/Task\s*\(\s*\{[\s\S]*subagent_type/i.test(content);
// ReDoS fix: bound the unbounded [\s\S]* so a "Task({" with no following
// subagent_type cannot drive polynomial backtracking; 50k chars covers any
// realistic Task(...) call body.
/Task\s{0,100}\(\s{0,100}\{[\s\S]{0,50000}subagent_type/i.test(content);

if (isOrchestrator) {
return {
Expand All @@ -140,7 +146,9 @@ const PATTERN_HEURISTICS = {

// Check if workflow command that invokes agents
const isWorkflowCommand =
/spawn.*agent|invoke.*agent|Task\s*\(\s*\{/i.test(content) &&
// ReDoS fix: bound the within-line .* runs and \s* runs ([^\n] == . here)
// to keep the same matches without polynomial backtracking.
/spawn[^\n]{0,500}agent|invoke[^\n]{0,500}agent|Task\s{0,100}\(\s{0,100}\{/i.test(content) &&
fileNameLower.endsWith('.md');

if (isWorkflowCommand) {
Expand All @@ -159,9 +167,11 @@ const PATTERN_HEURISTICS = {
*/
missing_output_format: (finding, content, context) => {
// Check if content spawns subagents with their own output specs
// ReDoS fix: bound the within-line .* and \s* runs ([^\n] == . here) so the
// same membership matches hold without polynomial backtracking.
const spawnsSubagent =
/subagent_type|spawn.*agent|Task\s*\(\s*\{/i.test(content) ||
/enhance:.*-enhancer|enhance:.*-reporter/i.test(content);
/subagent_type|spawn[^\n]{0,500}agent|Task\s{0,100}\(\s{0,100}\{/i.test(content) ||
/enhance:[^\n]{0,500}-enhancer|enhance:[^\n]{0,500}-reporter/i.test(content);

if (spawnsSubagent) {
return {
Expand All @@ -180,7 +190,9 @@ const PATTERN_HEURISTICS = {
missing_constraints: (finding, content, context) => {
// Check for constraint section presence
const hasConstraintSection =
/##\s*What\s+.*MUST\s+NOT\s+Do/i.test(content) ||
// ReDoS fix: bound the within-line .* and \s runs ([^\n] == . here) so the
// "## What ... MUST NOT Do" heading still matches without backtracking.
/##\s{0,100}What\s{1,100}[^\n]{0,500}MUST\s{1,100}NOT\s{1,100}Do/i.test(content) ||
/##\s*Constraints/i.test(content) ||
/<constraints>/i.test(content) ||
/##\s*Critical\s+Constraints/i.test(content) ||
Expand Down
15 changes: 11 additions & 4 deletions lib/enhance/cross-file-analyzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,11 @@ const CRITICAL_PATTERNS = [
const SUBAGENT_PATTERN = /subagent_type\s*[=:]\s*["']([^"']+)["']/g;

/** Pre-compiled patterns for cleaning content */
const BAD_EXAMPLE_TAG_PATTERN = /<bad[_\- ]?example>[\s\S]*?<\/bad[_\- ]?example>/gi;
const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]*bad[^\n]*\n[\s\S]*?```/gi;
// ReDoS fix: bound the lazy [\s\S]*? bodies so an unterminated <bad-example> or
// ``` fence cannot drive polynomial backtracking; 50k chars covers any realistic
// example block, so the stripped regions are unchanged for real content.
const BAD_EXAMPLE_TAG_PATTERN = /<bad[_\- ]?example>[\s\S]{0,50000}?<\/bad[_\- ]?example>/gi;
const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]{0,500}bad[^\n]{0,500}\n[\s\S]{0,50000}?```/gi;
Comment on lines +117 to +121

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The original regular expressions for stripping bad examples do not possess ReDoS vulnerabilities. Lazy matching over all characters ([\\s\\S]*?) is a linear scan and does not cause catastrophic backtracking because there are no nested quantifiers or overlapping active groups.\n\nBy capping the match at 50,000 characters, any large <bad-example> block or code block (which can easily exceed 50KB when demonstrating large prompts or complex codebases) will fail to be stripped. This will lead to false positives during cross-file analysis.\n\nWe should revert these to their original safe patterns.

Suggested change
// ReDoS fix: bound the lazy [\s\S]*? bodies so an unterminated <bad-example> or
// ``` fence cannot drive polynomial backtracking; 50k chars covers any realistic
// example block, so the stripped regions are unchanged for real content.
const BAD_EXAMPLE_TAG_PATTERN = /<bad[_\- ]?example>[\s\S]{0,50000}?<\/bad[_\- ]?example>/gi;
const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]{0,500}bad[^\n]{0,500}\n[\s\S]{0,50000}?```/gi;
const BAD_EXAMPLE_TAG_PATTERN = /<bad[_\- ]?example>[\s\S]*?<\/bad[_\- ]?example>/gi;
const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]*bad[^\n]*\n[\s\S]*?```/gi;


// ============================================
// TOOL PATTERN CACHE
Expand Down Expand Up @@ -649,10 +652,14 @@ function analyzePromptConsistency(agents) {

// Extract action keywords
let action;
// ReDoS fix: bound the greedy prefix to non-newline chars. `line` is a single
// trimmed line (no newlines), so [^\n]{0,N} is equivalent to the prior `.*`:
// greedy match strips everything up to and including the LAST keyword plus its
// trailing whitespace, preserving the word-boundary semantics exactly.
if (isAlways) {
action = line.replace(/.*\bALWAYS\b\s*/i, '').substring(0, ACTION_COMPARISON_LENGTH);
action = line.replace(/[^\n]{0,2000}\bALWAYS\b\s{0,200}/i, '').substring(0, ACTION_COMPARISON_LENGTH);
} else {
action = line.replace(/.*\b(?:NEVER|DO NOT)\b\s*/i, '').substring(0, ACTION_COMPARISON_LENGTH);
action = line.replace(/[^\n]{0,2000}\b(?:NEVER|DO NOT)\b\s{0,200}/i, '').substring(0, ACTION_COMPARISON_LENGTH);
}

// Extract significant keywords from action
Expand Down
8 changes: 6 additions & 2 deletions lib/enhance/docs-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ const docsPatterns = {
if (!content || typeof content !== 'string') return null;

// Find markdown links
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
// ReDoS fix: bound the negated-class captures so the matcher is linear;
// bounds far exceed any realistic markdown link, so matches are unchanged.
const linkRegex = /\[([^\]]{1,2000})\]\(([^)]{1,4000})\)/g;
Comment on lines +27 to +29

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The original regular expression /\\\[([^\\\\]+)\\\]\\(([^)]+)\\)/g does not have a ReDoS vulnerability. Because the negated character classes [^\\\\]+ and [^)]+ are strictly bounded by the literal characters \\\\] and \\) respectively, there is no overlapping match state that can cause catastrophic backtracking.\n\nBy introducing arbitrary limits like {1,2000} and {1,4000}, this regex will fail to match valid markdown links that contain long URLs, such as those with extensive query parameters, tracking tokens, or inline base64 data URIs (which are very common in markdown image links).\n\nWe should revert this to the original safe pattern.

Suggested change
// ReDoS fix: bound the negated-class captures so the matcher is linear;
// bounds far exceed any realistic markdown link, so matches are unchanged.
const linkRegex = /\[([^\]]{1,2000})\]\(([^)]{1,4000})\)/g;
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;

const brokenLinks = [];
let match;

Expand All @@ -40,7 +42,9 @@ const docsPatterns = {
if (linkTarget.startsWith('#')) {
const anchorId = linkTarget.slice(1).toLowerCase();
// Generate expected heading anchors from content
const headings = content.match(/^#{1,6}\s+(.+)$/gm) || [];
// ReDoS fix: bound the \s+ run; line-anchored (.+) cannot cross newlines
// so the same headings match as before.
const headings = content.match(/^#{1,6}\s{1,1000}(.+)$/gm) || [];
const anchors = headings.map(h => {
return h.replace(/^#{1,6}\s+/, '')
.toLowerCase()
Expand Down
Loading