Skip to content

Commit 9331fa8

Browse files
authored
refactor: optimize prepQuotedPrintable() with hash lookup and int-length tracking (#10344)
* refactor: optimize prepQuotedPrintable() with O(1) lookup, int-length tracking, and sprintf encoding * cs-fix
1 parent b6e9a4f commit 9331fa8

2 files changed

Lines changed: 131 additions & 98 deletions

File tree

system/Email/Email.php

Lines changed: 24 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ class Email
197197
*
198198
* @see http://www.ietf.org/rfc/rfc822.txt
199199
*
200-
* @var "\r\n"|"n"
200+
* @var "\n"|"\r\n"
201201
*/
202202
public $CRLF = "\r\n";
203203

@@ -1335,89 +1335,6 @@ protected function appendAttachments(&$body, $boundary, $multipart = null)
13351335
*/
13361336
protected function prepQuotedPrintable($str)
13371337
{
1338-
// ASCII code numbers for "safe" characters that can always be
1339-
// used literally, without encoding, as described in RFC 2049.
1340-
// http://www.ietf.org/rfc/rfc2049.txt
1341-
static $asciiSafeChars = [
1342-
// ' ( ) + , - . / : = ?
1343-
39,
1344-
40,
1345-
41,
1346-
43,
1347-
44,
1348-
45,
1349-
46,
1350-
47,
1351-
58,
1352-
61,
1353-
63,
1354-
// numbers
1355-
48,
1356-
49,
1357-
50,
1358-
51,
1359-
52,
1360-
53,
1361-
54,
1362-
55,
1363-
56,
1364-
57,
1365-
// upper-case letters
1366-
65,
1367-
66,
1368-
67,
1369-
68,
1370-
69,
1371-
70,
1372-
71,
1373-
72,
1374-
73,
1375-
74,
1376-
75,
1377-
76,
1378-
77,
1379-
78,
1380-
79,
1381-
80,
1382-
81,
1383-
82,
1384-
83,
1385-
84,
1386-
85,
1387-
86,
1388-
87,
1389-
88,
1390-
89,
1391-
90,
1392-
// lower-case letters
1393-
97,
1394-
98,
1395-
99,
1396-
100,
1397-
101,
1398-
102,
1399-
103,
1400-
104,
1401-
105,
1402-
106,
1403-
107,
1404-
108,
1405-
109,
1406-
110,
1407-
111,
1408-
112,
1409-
113,
1410-
114,
1411-
115,
1412-
116,
1413-
117,
1414-
118,
1415-
119,
1416-
120,
1417-
121,
1418-
122,
1419-
];
1420-
14211338
// We are intentionally wrapping so mail servers will encode characters
14221339
// properly and MUAs will behave, so {unwrap} must go!
14231340
$str = str_replace(['{unwrap}', '{/unwrap}'], '', $str);
@@ -1438,46 +1355,55 @@ protected function prepQuotedPrintable($str)
14381355
$str = str_replace(["\r\n", "\r"], "\n", $str);
14391356
}
14401357

1441-
$escape = '=';
1358+
static $asciiSafeChars;
1359+
if ($asciiSafeChars === null) {
1360+
$safeChars = [39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63];
1361+
$safeChars = array_merge($safeChars, range(48, 57), range(65, 90), range(97, 122));
1362+
$asciiSafeChars = array_fill_keys($safeChars, true);
1363+
}
1364+
14421365
$output = '';
14431366

14441367
foreach (explode("\n", $str) as $line) {
1445-
$length = static::strlen($line);
1446-
$temp = '';
1368+
$length = static::strlen($line);
1369+
$temp = '';
1370+
$tempLen = 0;
14471371

14481372
// Loop through each character in the line to add soft-wrap
14491373
// characters at the end of a line " =\r\n" and add the newly
14501374
// processed line(s) to the output (see comment on $crlf class property)
14511375
for ($i = 0; $i < $length; $i++) {
1452-
// Grab the next character
1453-
$char = $line[$i];
1454-
$ascii = ord($char);
1376+
$char = $line[$i];
1377+
$ascii = ord($char);
1378+
$charLen = 1;
14551379

14561380
// Convert spaces and tabs but only if it's the end of the line
14571381
if ($ascii === 32 || $ascii === 9) {
14581382
if ($i === ($length - 1)) {
1459-
$char = $escape . sprintf('%02s', dechex($ascii));
1383+
$char = sprintf('=%02X', $ascii);
1384+
$charLen = 3;
14601385
}
14611386
}
14621387
// DO NOT move this below the $ascii_safe_chars line!
14631388
//
14641389
// = (equals) signs are allowed by RFC2049, but must be encoded
14651390
// as they are the encoding delimiter!
1466-
elseif ($ascii === 61) {
1467-
$char = $escape . strtoupper(sprintf('%02s', dechex($ascii))); // =3D
1468-
} elseif (! in_array($ascii, $asciiSafeChars, true)) {
1469-
$char = $escape . strtoupper(sprintf('%02s', dechex($ascii)));
1391+
elseif ($ascii === 61 || ! isset($asciiSafeChars[$ascii])) {
1392+
$char = sprintf('=%02X', $ascii);
1393+
$charLen = 3;
14701394
}
14711395

14721396
// If we're at the character limit, add the line to the output,
14731397
// reset our temp variable, and keep on chuggin'
1474-
if ((static::strlen($temp) + static::strlen($char)) >= 76) {
1475-
$output .= $temp . $escape . $this->CRLF;
1476-
$temp = '';
1398+
if (($tempLen + $charLen) >= 76) {
1399+
$output .= $temp . '=' . $this->CRLF;
1400+
$temp = '';
1401+
$tempLen = 0;
14771402
}
14781403

14791404
// Add the character to our temporary line
14801405
$temp .= $char;
1406+
$tempLen += $charLen;
14811407
}
14821408

14831409
// Add our completed line to the output

tests/system/Email/EmailTest.php

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,4 +302,111 @@ public function testGetHostnameFallsBackToGethostnameFunction(): void
302302

303303
$this->assertSame(gethostname(), $getHostname());
304304
}
305+
306+
#[DataProvider('providePrepQuotedPrintableWithLfCrlf')]
307+
public function testPrepQuotedPrintableWithLfCrlf(string $input, string $expected): void
308+
{
309+
$email = new Email();
310+
$email->CRLF = "\n";
311+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
312+
313+
$this->assertSame($expected, $prepQP($input));
314+
}
315+
316+
/**
317+
* @return iterable<string, array{string, string}>
318+
*/
319+
public static function providePrepQuotedPrintableWithLfCrlf(): iterable
320+
{
321+
return [
322+
'empty string' => ['', ''],
323+
'safe ascii only' => ['hello world', 'hello world'],
324+
'safe chars only' => ['abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(),-./:?', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(),-./:?'],
325+
'unsafe char encoded' => ["a\x01b", 'a=01b'],
326+
'trailing space encoded' => ["hello \nworld", "hello=20\nworld"],
327+
'trailing tab encoded' => ["hello\t\nworld", "hello=09\nworld"],
328+
'equals sign encoded as =3D' => ['a=b', 'a=3Db'],
329+
'multiple spaces reduced' => ['a b', 'a b'],
330+
'null bytes removed' => ["a\x00b", 'ab'],
331+
'unwrap tags removed' => ['{unwrap}secret{/unwrap}', 'secret'],
332+
'single line' => ['test', 'test'],
333+
'two lines' => ["line1\nline2", "line1\nline2"],
334+
'three lines trailing empty' => ["line1\nline2\n", "line1\nline2\n"],
335+
];
336+
}
337+
338+
public function testPrepQuotedPrintableWithCrlfNative(): void
339+
{
340+
$email = new Email();
341+
$email->CRLF = "\r\n";
342+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
343+
344+
$result = $prepQP('test');
345+
346+
$this->assertSame(quoted_printable_encode('test'), $result);
347+
}
348+
349+
public function testPrepQuotedPrintableSoftLineBreak(): void
350+
{
351+
$email = new Email();
352+
$email->CRLF = "\n";
353+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
354+
355+
// 76 'a' chars fit in one line; add 2 more 'b' chars and they soft-wrap
356+
// After reduction: no trailing spaces, just safe chars
357+
$input = str_repeat('a', 76) . 'bb';
358+
$result = $prepQP($input);
359+
360+
$this->assertStringContainsString("=\n", $result, 'Soft line break must be present');
361+
$this->assertStringNotContainsString("\r\n", $result, 'Custom CRLF must not contain \\r');
362+
}
363+
364+
public function testPrepQuotedPrintableSoftBreakAfterEncodedChar(): void
365+
{
366+
$email = new Email();
367+
$email->CRLF = "\n";
368+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
369+
370+
// 74 safe chars + 1 encoded (=3D = 3 bytes) = 77 → must break before encoded
371+
$input = str_repeat('a', 74) . '=';
372+
$result = $prepQP($input);
373+
374+
$this->assertSame(str_repeat('a', 74) . "=\n=3D", $result);
375+
}
376+
377+
public function testPrepQuotedPrintableHardLineBreakNoInternalSpaceReduction(): void
378+
{
379+
$email = new Email();
380+
$email->CRLF = "\n";
381+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
382+
383+
// Spaces not at end of line must be left as-is
384+
$this->assertSame('a b', $prepQP('a b'));
385+
}
386+
387+
public function testPrepQuotedPrintableMixedContent(): void
388+
{
389+
$email = new Email();
390+
$email->CRLF = "\n";
391+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
392+
393+
$input = "Hello, World!\nline ends with tab\t\n=special chars: \x01\x02";
394+
$result = $prepQP($input);
395+
396+
$this->assertStringContainsString('Hello, World=21', $result);
397+
$this->assertStringContainsString('=09', $result);
398+
$this->assertStringContainsString('=3D', $result);
399+
$this->assertStringContainsString('=01', $result);
400+
$this->assertStringContainsString('=02', $result);
401+
}
402+
403+
public function testPrepQuotedPrintableUnwrapRemovesTagsOnly(): void
404+
{
405+
$email = new Email();
406+
$email->CRLF = "\n";
407+
$prepQP = self::getPrivateMethodInvoker($email, 'prepQuotedPrintable');
408+
409+
$this->assertSame('keep =7Bbraces=7D', $prepQP('keep {braces}'));
410+
$this->assertSame('keep (parentheses)', $prepQP('keep (parentheses)'));
411+
}
305412
}

0 commit comments

Comments
 (0)