diff --git a/graphrag/index/operations/build_noun_graph/np_extractors/resource_loader.py b/graphrag/index/operations/build_noun_graph/np_extractors/resource_loader.py index ed6c5a8190..796e27adaa 100644 --- a/graphrag/index/operations/build_noun_graph/np_extractors/resource_loader.py +++ b/graphrag/index/operations/build_noun_graph/np_extractors/resource_loader.py @@ -3,13 +3,16 @@ """Util functions needed for nltk-based noun-phrase extractors (i.e. TextBlob).""" +from functools import lru_cache + import nltk +@lru_cache(maxsize=64) def download_if_not_exists(resource_name) -> bool: """Download nltk resources if they haven't been already.""" - # look under all possible categories - root_categories = [ + # Precompute all category/resource_name paths for efficiency + root_categories = ( "corpora", "tokenizers", "taggers", @@ -24,11 +27,11 @@ def download_if_not_exists(resource_name) -> bool: "mt", "sentiment", "similarity", - ] - for category in root_categories: + ) + resource_paths = [f"{category}/{resource_name}" for category in root_categories] + for path in resource_paths: try: - # if found, stop looking and avoid downloading - nltk.find(f"{category}/{resource_name}") + nltk.find(path) return True # noqa: TRY300 except LookupError: continue