diff --git a/graphrag/query/indexer_adapters.py b/graphrag/query/indexer_adapters.py index 0c6e54a8af..877d2bf780 100644 --- a/graphrag/query/indexer_adapters.py +++ b/graphrag/query/indexer_adapters.py @@ -228,9 +228,12 @@ def embed_community_reports( raise ValueError(error_msg) if embedding_col not in reports_df.columns: - reports_df[embedding_col] = reports_df.loc[:, source_col].apply( - lambda x: embedder.embed(x) - ) + # Avoid using .apply with a lambda for improved performance. + # Use a list comprehension, which is faster for element-wise operations in pandas. + src = reports_df[source_col].to_list() + # No change in behavior, ensures a list of same length as DataFrame + embeddings = [embedder.embed(x) for x in src] + reports_df[embedding_col] = embeddings return reports_df