From 0be2aed4cd0198768010125adb9a1fee42f47d6c Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Sat, 11 Oct 2025 02:08:36 +0000 Subject: [PATCH] Optimize get_zero_positions The optimization achieves a **14% speedup** by eliminating redundant operations within the main loop and moving preprocessing outside of it. **Key optimizations:** 1. **Pre-compute category and size values**: Instead of checking `if node_categories is None` and `if node_sizes is None` inside the loop for every node, the optimized version pre-processes these values once at the beginning. This eliminates 18,000+ conditional checks in the original profiler results. 2. **Batch string conversions**: Category values are converted to strings once using list comprehensions (`[str(int(cat)) for cat in node_categories]`) rather than calling `str(int(node_category))` for each node individually. 3. **List comprehension instead of append**: The optimized version uses list comprehensions to build the result list directly, which is more efficient than repeatedly calling `append()` on an initially empty list. **Performance impact by test case type:** - **Large-scale tests (999+ nodes)**: Show the best improvements (5-32% faster) because the preprocessing overhead is amortized across many nodes - **Small-scale tests (1-3 nodes)**: Show slight regressions (12-36% slower) due to the upfront preprocessing cost not being offset by the reduced per-node work - **Edge cases with None values**: Benefit significantly as the None checks are handled once instead of per-iteration The line profiler confirms this: the original code spent 29.7% of time in `NodePosition()` constructor calls within the loop, while the optimized version reduces this overhead through better data preparation and more efficient list construction patterns. --- .../index/operations/layout_graph/zero.py | 59 +++++++++++-------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/graphrag/index/operations/layout_graph/zero.py b/graphrag/index/operations/layout_graph/zero.py index 934df0030f..004abbedd1 100644 --- a/graphrag/index/operations/layout_graph/zero.py +++ b/graphrag/index/operations/layout_graph/zero.py @@ -67,30 +67,37 @@ def get_zero_positions( three_d: bool | None = False, ) -> list[NodePosition]: """Project embedding vectors down to 2D/3D using UMAP.""" - embedding_position_data: list[NodePosition] = [] - for index, node_name in enumerate(node_labels): - node_category = 1 if node_categories is None else node_categories[index] - node_size = 1 if node_sizes is None else node_sizes[index] - - if not three_d: - embedding_position_data.append( - NodePosition( - label=str(node_name), - x=0, - y=0, - cluster=str(int(node_category)), - size=int(node_size), - ) - ) - else: - embedding_position_data.append( - NodePosition( - label=str(node_name), - x=0, - y=0, - z=0, - cluster=str(int(node_category)), - size=int(node_size), - ) + n = len(node_labels) + + if node_categories is None: + category_values = ["1"] * n + else: + category_values = [str(int(cat)) for cat in node_categories] + + if node_sizes is None: + size_values = [1] * n + else: + size_values = [int(sz) for sz in node_sizes] + + if not three_d: + return [ + NodePosition( + label=str(node_labels[i]), + x=0, + y=0, + cluster=category_values[i], + size=size_values[i], ) - return embedding_position_data + for i in range(n) + ] + return [ + NodePosition( + label=str(node_labels[i]), + x=0, + y=0, + z=0, + cluster=category_values[i], + size=size_values[i], + ) + for i in range(n) + ]