diff --git a/changes.txt b/changes.txt index 6447476bb..6eae9ec80 100644 --- a/changes.txt +++ b/changes.txt @@ -2,7 +2,7 @@ Change Log ========== -**Changes in version 1.27.2.3** +**Changes in version 1.27.2.3** (2026-04-24) * Fixed issues: diff --git a/tests/resources/test_2608_expected_1.28 b/tests/resources/test_2608_expected_1.28 new file mode 100644 index 000000000..4fd75038c --- /dev/null +++ b/tests/resources/test_2608_expected_1.28 @@ -0,0 +1,12 @@ +The XRT summed image is shown in Figure 5. At the edge +of the field-of-view, we detect a source spatially coincident with +the Wolf 1561 star. As we consider this source unrelated to the +FRB, we use the online Swift-XRT data products generator (Evans +et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3- +10 keV range on the count rate of 0.001885 counts.s−1. Using +WebPIMMS9 (v4.11a) and assuming a weighted average 𝑁H = 5.12× +1020 cm−2 from the direction of the source estimated from the +NASA’s HEASARC 10 online tools (HI4PI Collaboration et al. +2016) and a power law model with a photon index = 2, this upper +limit translates to an energy flux of 6.6 × 10−14 erg cm−2 s−1 (8.3 × +10−14 erg cm−2 s−1 unabsorbed). diff --git a/tests/resources/test_3842_expected_1.28.txt b/tests/resources/test_3842_expected_1.28.txt new file mode 100644 index 000000000..e42c89896 --- /dev/null +++ b/tests/resources/test_3842_expected_1.28.txt @@ -0,0 +1,43 @@ +NIST SP 800-223 + +High-Performance Computing Security +February 2024 + + +iii +Table of Contents +1. Introduction ...................................................................................................................................1 +2. HPC System Reference Architecture and Main Components ............................................................2 +2.1.1. Components of the High-Performance Computing Zone ............................................................. 3 +2.1.2. Components of the Data Storage Zone ........................................................................................ 4 +2.1.3. Parallel File System ....................................................................................................................... 4 +2.1.4. Archival and Campaign Storage .................................................................................................... 5 +2.1.5. Burst Buffer .................................................................................................................................. 5 +2.1.6. Components of the Access Zone .................................................................................................. 6 +2.1.7. Components of the Management Zone ....................................................................................... 6 +2.1.8. General Architecture and Characteristics .................................................................................... 6 +2.1.9. Basic Services ................................................................................................................................ 7 +2.1.10. Configuration Management ....................................................................................................... 7 +2.1.11. HPC Scheduler and Workflow Management .............................................................................. 7 +2.1.12. HPC Software .............................................................................................................................. 8 +2.1.13. User Software ............................................................................................................................. 8 +2.1.14. Site-Provided Software and Vendor Software ........................................................................... 8 +2.1.15. Containerized Software in HPC .................................................................................................. 9 +3. HPC Threat Analysis...................................................................................................................... 10 +3.2.1. Access Zone Threats ................................................................................................................... 11 +3.2.2. Management Zone Threats ........................................................................................................ 11 +3.2.3. High-Performance Computing Zone Threats .............................................................................. 12 +3.2.4. Data Storage Zone Threats ......................................................................................................... 12 +4. HPC Security Posture, Challenges, and Recommendations ............................................................. 14 +5. Conclusions .................................................................................................................................. 19 +2.1. Main COMPONENNS..........cccccssccccssssccccssssecccssssecccsessseccessseeecsesseeceesseecsesseeesesseeecesaseecsesseeesessaeeesessaeeesD +3.1. Key HPC Security Characteristics and Use REquireMent............cccsscccessccessecesssecesseecsssecesseeestessstree LO +3.2. Threats to HPC FUNCTION ZONES.........cesccesscesscesscssscesecessssssssssscesscesscessssseeseesseascessssssessesssesssssessssees LO +3.3. Other Threats ........cccccsccssccsscssccssecssscssscssscsseesssesssssesscesscseesesseeecessccssssssssessssssssssesessssssssssssssssesesLO +4.1. HPC Access Control via Network S@gMeNtatiOn ...........ccccscccsssccessecessseceseccsssecessecessecesstecsssecesseessses LA +4.2. Compute Node Sanitization ............cccccssecsssecsessccsseccsseecsseecceseecssseesseecssssesssesessssessseesssssesssessssessses +LD +4.3. Data Integrity Protection ............cccccccccccccessssssssccecccessessssssseecccesssesssssseescesssesssssseeesessssssstsssesesssssssesLO +4.4. SECUFING CONTAINELSS ........eccesscccesssccccessseccceesscccessssecccesseeccesseeccessseeccessseccessssescessssesesssssescsssseseessLO +4.5. Achieving Security While Maintaining HPC Performance. ..........cc:cccsscccessscesssecessecesssecesstcessseeesreesss LZ +4.6. Challenges to HPC Security TOols..........c:ccccssccsssecceseecssseccssecessseccsseecssseecsseecssseecsssesssscssssessssssssssessse LD diff --git a/tests/test_font.py b/tests/test_font.py index 1b7fd4a95..b54ec2723 100644 --- a/tests/test_font.py +++ b/tests/test_font.py @@ -68,7 +68,10 @@ def test_2608(): f.write(text.encode('utf8')) path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected') path_expected_1_26 = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected_1.26') - if pymupdf.mupdf_version_tuple >= (1, 27): + path_expected_1_28 = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected_1.28') + if pymupdf.mupdf_version_tuple >= (1, 28): + path_expected2 = path_expected_1_28 + elif pymupdf.mupdf_version_tuple >= (1, 27): path_expected2 = path_expected else: path_expected2 = path_expected_1_26 diff --git a/tests/test_tesseract.py b/tests/test_tesseract.py index 4babde463..734bdae50 100644 --- a/tests/test_tesseract.py +++ b/tests/test_tesseract.py @@ -79,8 +79,11 @@ def test_3842(): return path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') - path_text = os.path.normpath(f'{__file__}/../../tests/resources/test_3842_partial.txt') - text_expected = pathlib.Path(path_text).read_text() + if pymupdf.mupdf_version_tuple >= (1, 28): + path_text_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3842_expected_1.28.txt') + else: + path_text_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3842_partial.txt') + text_expected = pathlib.Path(path_text_expected).read_text() with pymupdf.open(path) as document: page = document[6] try: @@ -95,6 +98,8 @@ def test_3842(): assert 0, f'Unexpected exception text: {str(e)=}' else: text = page.get_text(textpage=partial_tp) + with open(os.path.normpath(f'{__file__}/../../tests/resources/test_3842_out'), 'w') as f: + f.write(text) print() print(text) print(f'text:\n{text!r}') diff --git a/tests/test_textextract.py b/tests/test_textextract.py index ab66f8aef..1be5d7662 100644 --- a/tests/test_textextract.py +++ b/tests/test_textextract.py @@ -356,8 +356,10 @@ def test_3594(): wt = pymupdf.TOOLS.mupdf_warnings() if pymupdf.mupdf_version_tuple < (1, 26, 8): assert not wt - else: + elif pymupdf.mupdf_version_tuple < (1, 28): assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 2 times...' + else: + assert wt == 'ActualText with no position. Text may be lost or mispositioned.\n... repeated 2 times...' def test_3687(): @@ -432,7 +434,12 @@ def test_4026(): blocks = page.get_text('blocks') for i, block in enumerate(blocks): print(f'block {i}: {block}') - assert len(blocks) == 5 + if pymupdf.mupdf_version_tuple >= (1, 28): + # 2026-05-01: Expect slightly better splitting of text into + # paragraphs. + assert len(blocks) == 8 + else: + assert len(blocks) == 5 def test_3725(): # This currently just shows the extracted text. We don't check it is as expected. @@ -905,7 +912,10 @@ def test_4546(): wt = pymupdf.TOOLS.mupdf_warnings() if pymupdf.mupdf_version_tuple >= (1, 26, 8): assert text == expected_mupdf_1_27_0 - assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...' + if pymupdf.mupdf_version_tuple >= (1, 28): + assert wt == 'ActualText with no position. Text may be lost or mispositioned.\n... repeated 120 times...' + else: + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...' elif pymupdf.mupdf_version_tuple >= (1, 26, 1): assert text == expected_mupdf_1_26_1 assert not wt diff --git a/tests/test_threads.py b/tests/test_threads.py new file mode 100644 index 000000000..386fbc3d6 --- /dev/null +++ b/tests/test_threads.py @@ -0,0 +1,139 @@ +import os +import random +import queue +import threading + +import pymupdf + + +def log(text): + print(f'{__file__}: {threading.get_native_id()=} {text}', flush=1) + + +def threadfn(queue_to_threads, queue_from_threads): + def tlog(text): + log(f'### threadfn(): {text}') + try: + documents = list() + while 1: + action = queue_to_threads.get() + #tlog(f'{action=}') + if action == 'quit': + break + elif isinstance(action, tuple) and len(action) == 2 and action[0] == 'open': + path = action[1] + #tlog(f'Opening {path=}.') + document = pymupdf.open(path) + #tlog(f'Have opened {path=}.') + documents.append(document) + elif action == 'gettext': + if documents: + document_i = random.randrange(len(documents)) + document = documents[document_i] + page = document[random.randrange(len(document))] + text = page.get_text() + elif action == 'close': + if len(documents) >= 2: + document_i = random.randrange(len(documents)) + del documents[document_i] + else: + assert 0, f'Unrecognised {action=}.' + + #tlog(f'Sending to queue_from_threads: {threading.current_thread()=}.') + queue_from_threads.put(threading.current_thread()) + except Exception as e: + tlog(f'error: {e}') + queue_from_threads.put(e) + + +def test_threads_stress(): + + print() + paths = [ + os.path.normpath(f'{__file__}/../../tests/resources/test_3594.pdf'), + os.path.normpath(f'{__file__}/../../tests/resources/test_3789.pdf'), + ] + + threads = list() + + queue_to_threads = queue.Queue() + queue_from_threads = queue.Queue() + + def put(action): + #log(f'test_threads_stress(): Sending {action=}.') + queue_to_threads.put(action) + + class Stats: + pass + stats = Stats() + stats.num_opens = 0 + stats.num_gettexts = 0 + stats.num_threads_max = 0 + + def start_thread(): + thread = threading.Thread(target=threadfn, args=(queue_to_threads, queue_from_threads), daemon=1) + threads.append(thread) + thread.start() + stats.num_threads_max = max(stats.num_threads_max, len(threads)) + + def quit_thread(): + put('quit') + stopped_thread = queue_from_threads.get() + assert isinstance(stopped_thread, threading.Thread), f'A thread has failed: {stopped_thread}' + #log(f'{stopped_thread=}') + stopped_thread.join() + if 0: + log(f'threads ({len(threads)}):') + for thread in threads: + log(f' {thread=}') + log(f'{stopped_thread=}') + threads.remove(stopped_thread) + + def open_document(): + path = paths[random.randrange(len(paths))] + put(('open', path)) + stats.num_opens += 1 + + for i in range(10): + start_thread() + open_document() + + numits = 1000 + for i in range(numits): + op = random.randrange(100) + if 0: + log('') + log(f'{i+1}/{numits}') + log(f'{len(threads)=}.') + log(f'{stats.num_opens=}.') + log(f'{stats.num_gettexts=}.') + log(f'{op=}.') + if op < 10: + # Create new thread. + start_thread() + elif op < 15: + if len(threads) >= 2: + quit_thread() + elif op < 30: + # Open document in a thread. + open_document() + elif op == 40: + # Close document in a thread. + if threads: + put('close') + elif op < 100: + # get text. + put('gettext') + stats.num_gettexts +=1 + else: + assert 0, f'Unrecognised {op=}' + + log(f'End:') + log(f'{len(threads)=} {stats.num_opens=} {stats.num_gettexts=} {stats.num_threads_max=}.') + + for _ in range(len(threads)): + quit_thread() + + # Ignore any warnings, which can occur for some pages in the documents. + wt = pymupdf.TOOLS.mupdf_warnings() +