diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml new file mode 100644 index 0000000000..84f07fae9b --- /dev/null +++ b/.github/workflows/secret-scan.yml @@ -0,0 +1,30 @@ +name: Secret Scan + +on: + push: + pull_request: + workflow_dispatch: + schedule: + - cron: '0 3 * * 0' + +jobs: + secret-scan: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + lfs: true + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install detect-secrets + run: | + python -m pip install --upgrade pip + python -m pip install detect-secrets + + - name: Run secret detection + run: ./scripts/ci/secret-detection.sh diff --git a/.gitignore b/.gitignore index 35704d511b..8a650955db 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,9 @@ nosetests.xml coverage.xml *,cover .pytest_cache +.pre-commit-cache/ +.uv-cache/ +.uvcache/ # Translations *.mo diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..b41b914af0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: local + hooks: + - id: detect-secrets + name: Detect secrets + entry: ./scripts/pre-commit-secret-check.sh + language: python + additional_dependencies: + - detect-secrets + pass_filenames: false + always_run: true diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000000..f940fb5d27 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,852 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + }, + { + "path": "detect_secrets.filters.regex.should_exclude_files", + "pattern": "^(plans/|tmp/)" + } + ], + "results": { + "README.md": [ + { + "type": "Secret Keyword", + "filename": "README.md", + "hashed_secret": "564e340cd48437d2dfe876ee154cc99dc4d0d137", + "is_verified": false, + "line_number": 115, + "is_secret": false + } + ], + "ai/prompts/PLAN.md": [ + { + "type": "Secret Keyword", + "filename": "ai/prompts/PLAN.md", + "hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4", + "is_verified": false, + "line_number": 141, + "is_secret": false + } + ], + "demos/ai/Introduction/Ask-HackerNews-Demo.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/Introduction/Ask-HackerNews-Demo.ipynb", + "hashed_secret": "9c3388110000d4008ad7ea6ffee36b4f235a0fb5", + "is_verified": false, + "line_number": 2372, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/Introduction/Ask-HackerNews-Demo.ipynb", + "hashed_secret": "f9c77cecc3284da1a2a31da79c4461e97533e810", + "is_verified": false, + "line_number": 2407, + "is_secret": false + } + ], + "demos/ai/Introduction/simple-power-of-umap.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/Introduction/simple-power-of-umap.ipynb", + "hashed_secret": "7e6268d3d8f12d6bc39017524362b10f9982f383", + "is_verified": false, + "line_number": 1564, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/Introduction/simple-power-of-umap.ipynb", + "hashed_secret": "d1a8bb8b7ae7bfdd73a0f546b1afe3cfd1e9340b", + "is_verified": false, + "line_number": 1571, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/Introduction/simple-power-of-umap.ipynb", + "hashed_secret": "8c7dd29142ea573688b77d75812ba4a05790f6d1", + "is_verified": false, + "line_number": 7141, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "demos/ai/Introduction/simple-power-of-umap.ipynb", + "hashed_secret": "80903ddedcf4ec0a2ee5911cefa7e1ad52419dcc", + "is_verified": false, + "line_number": 7532, + "is_secret": false + } + ], + "demos/ai/OSINT/Chavismo.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/OSINT/Chavismo.ipynb", + "hashed_secret": "4201305032390957dc14167329aa749f6cf732d8", + "is_verified": false, + "line_number": 1428, + "is_secret": false + } + ], + "demos/ai/OSINT/jack-donations.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/OSINT/jack-donations.ipynb", + "hashed_secret": "42d1a7197ef5c6309f69db102a3d679dc18c592e", + "is_verified": false, + "line_number": 491, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/OSINT/jack-donations.ipynb", + "hashed_secret": "3ac55df4c18d6371e53fd33772815221a7c5f76a", + "is_verified": false, + "line_number": 558, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/OSINT/jack-donations.ipynb", + "hashed_secret": "af7555388b98cc279d3a4353e9149dfe980c2ac9", + "is_verified": false, + "line_number": 594, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/OSINT/jack-donations.ipynb", + "hashed_secret": "2f910b0297a8ef20c24f4509cd039e405968c134", + "is_verified": false, + "line_number": 1323, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/OSINT/jack-donations.ipynb", + "hashed_secret": "f844717ee920487259d838c1dc057fde5b683fa5", + "is_verified": false, + "line_number": 2885, + "is_secret": false + } + ], + "demos/ai/cyber/cyber-redteam-umap-demo.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/cyber/cyber-redteam-umap-demo.ipynb", + "hashed_secret": "b039dd7fb7def798c46726c4911534f216b5317d", + "is_verified": false, + "line_number": 1490, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/cyber/cyber-redteam-umap-demo.ipynb", + "hashed_secret": "7c4776e81ae5ca8ad7b123873bb1d415619f833e", + "is_verified": false, + "line_number": 1674, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/ai/cyber/cyber-redteam-umap-demo.ipynb", + "hashed_secret": "726aa3d6fd041ec07593b706c7c619a3f4037a18", + "is_verified": false, + "line_number": 1792, + "is_secret": false + } + ], + "demos/demos_databases_apis/databricks_pyspark/graphistry-notebook-dashboard.html": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/demos_databases_apis/databricks_pyspark/graphistry-notebook-dashboard.html", + "hashed_secret": "520a029833255babe2609d7c084d2d6d4ff4b285", + "is_verified": false, + "line_number": 16, + "is_secret": false + } + ], + "demos/demos_databases_apis/databricks_pyspark/graphistry-notebook-dashboard.py": [ + { + "type": "Secret Keyword", + "filename": "demos/demos_databases_apis/databricks_pyspark/graphistry-notebook-dashboard.py", + "hashed_secret": "554ec9059a3eedbfaa5d014bffec0d51716ad064", + "is_verified": false, + "line_number": 41, + "is_secret": false + } + ], + "demos/demos_databases_apis/gpu_rapids/part_iv_gpu_cuml.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/demos_databases_apis/gpu_rapids/part_iv_gpu_cuml.ipynb", + "hashed_secret": "80d35521036242ee66ab8163b7bfb9ea7c098923", + "is_verified": false, + "line_number": 627, + "is_secret": false + } + ], + "demos/demos_databases_apis/neo4j/official/graphistry_bolt_tutorial_public.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/demos_databases_apis/neo4j/official/graphistry_bolt_tutorial_public.ipynb", + "hashed_secret": "8dcf25ce272f25ec303bbff34a6fe2ca0501a0ed", + "is_verified": false, + "line_number": 167, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "demos/demos_databases_apis/neo4j/official/graphistry_bolt_tutorial_public.ipynb", + "hashed_secret": "32e2fd2b3eb53e36a0bb7ec6c722da33bd5eca58", + "is_verified": false, + "line_number": 169, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "demos/demos_databases_apis/neo4j/official/graphistry_bolt_tutorial_public.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 483, + "is_secret": false + } + ], + "demos/demos_databases_apis/splunk/splunk_demo_public.ipynb": [ + { + "type": "Secret Keyword", + "filename": "demos/demos_databases_apis/splunk/splunk_demo_public.ipynb", + "hashed_secret": "08ba36ba977b5b41cce7ce1437fedf7cb9014f1a", + "is_verified": false, + "line_number": 46, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "demos/demos_databases_apis/splunk/splunk_demo_public.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 882, + "is_secret": false + } + ], + "demos/demos_databases_apis/tigergraph/social_raw_REST_calls.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/demos_databases_apis/tigergraph/social_raw_REST_calls.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 584, + "is_secret": false + } + ], + "demos/demos_databases_apis/tigergraph/tigergraph_pygraphistry_bindings.ipynb": [ + { + "type": "Secret Keyword", + "filename": "demos/demos_databases_apis/tigergraph/tigergraph_pygraphistry_bindings.ipynb", + "hashed_secret": "373e3a56093ed3fa7cafad4f0059752d9f330e30", + "is_verified": false, + "line_number": 47, + "is_secret": false + } + ], + "demos/for_developers.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/for_developers.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 353, + "is_secret": false + } + ], + "demos/gfql/GPU_memory_consumption_tutorial.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/gfql/GPU_memory_consumption_tutorial.ipynb", + "hashed_secret": "49e526f4756a5a35534b6ab40ca110379d5332dd", + "is_verified": false, + "line_number": 767, + "is_secret": false + } + ], + "demos/gfql/gfql_remote.ipynb": [ + { + "type": "Secret Keyword", + "filename": "demos/gfql/gfql_remote.ipynb", + "hashed_secret": "13c52a750914694e5471bdcafd43e1fd7b4c6068", + "is_verified": false, + "line_number": 65, + "is_secret": false + } + ], + "demos/gfql/python_remote.ipynb": [ + { + "type": "Secret Keyword", + "filename": "demos/gfql/python_remote.ipynb", + "hashed_secret": "13c52a750914694e5471bdcafd43e1fd7b4c6068", + "is_verified": false, + "line_number": 81, + "is_secret": false + } + ], + "demos/more_examples/graphistry_features/Workbooks.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/more_examples/graphistry_features/Workbooks.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 326, + "is_secret": false + } + ], + "demos/more_examples/graphistry_features/embed/simple-ssh-logs-rgcn-anomaly-detector.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/more_examples/graphistry_features/embed/simple-ssh-logs-rgcn-anomaly-detector.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 701, + "is_secret": false + } + ], + "demos/more_examples/simple/tutorial_csv_mini_app_icij_implants.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/more_examples/simple/tutorial_csv_mini_app_icij_implants.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 883, + "is_secret": false + } + ], + "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/advanced-identity-protection-40m.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/advanced-identity-protection-40m.ipynb", + "hashed_secret": "669a4cfdf3cf20f3bad530eb6c5811829a24d4c8", + "is_verified": false, + "line_number": 4006, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/advanced-identity-protection-40m.ipynb", + "hashed_secret": "5cdf3d640434671856cc6f162fa948cdcbf217d3", + "is_verified": false, + "line_number": 4450, + "is_secret": false + }, + { + "type": "Base64 High Entropy String", + "filename": "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/advanced-identity-protection-40m.ipynb", + "hashed_secret": "0092aa58220b91b89748359a4ca8af55313589d4", + "is_verified": false, + "line_number": 4498, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/advanced-identity-protection-40m.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 4568, + "is_secret": false + } + ], + "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/intro-story.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/talks/infosec_jupyterthon2022/rgcn_login_anomaly_detection/intro-story.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 161, + "is_secret": false + } + ], + "demos/upload_csv_miniapp.ipynb": [ + { + "type": "Hex High Entropy String", + "filename": "demos/upload_csv_miniapp.ipynb", + "hashed_secret": "7cfe75e6f3886d976b4edf799444225cfd38e7e2", + "is_verified": false, + "line_number": 644, + "is_secret": false + } + ], + "docs/source/10min.rst": [ + { + "type": "Secret Keyword", + "filename": "docs/source/10min.rst", + "hashed_secret": "981eb7e146cab5b17b4c7f5f12af441d36d0cc36", + "is_verified": false, + "line_number": 63, + "is_secret": false + } + ], + "docs/source/cheatsheet.md": [ + { + "type": "Secret Keyword", + "filename": "docs/source/cheatsheet.md", + "hashed_secret": "3bed76eaf1f24ca8dd40308d7c6b2e47905ed885", + "is_verified": false, + "line_number": 77, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "docs/source/cheatsheet.md", + "hashed_secret": "46eda0d919e9ff536436e45a46524ad602151135", + "is_verified": false, + "line_number": 473, + "is_secret": false + } + ], + "docs/source/install/quick.rst": [ + { + "type": "Secret Keyword", + "filename": "docs/source/install/quick.rst", + "hashed_secret": "981eb7e146cab5b17b4c7f5f12af441d36d0cc36", + "is_verified": false, + "line_number": 62, + "is_secret": false + } + ], + "docs/source/server/concurrency.rst": [ + { + "type": "Secret Keyword", + "filename": "docs/source/server/concurrency.rst", + "hashed_secret": "1a91d62f7ca67399625a4368a6ab5d4a3baa6073", + "is_verified": false, + "line_number": 17, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "docs/source/server/concurrency.rst", + "hashed_secret": "db530a75f72661f267f2463a57055fdb0f69376f", + "is_verified": false, + "line_number": 33, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "docs/source/server/concurrency.rst", + "hashed_secret": "f36bbba7cd1624ce46da6a320f1d12d7f9f4d3e1", + "is_verified": false, + "line_number": 38, + "is_secret": false + } + ], + "docs/source/server/privacy.rst": [ + { + "type": "Secret Keyword", + "filename": "docs/source/server/privacy.rst", + "hashed_secret": "5eb942810a75ebc850972a89285d570d484c89c4", + "is_verified": false, + "line_number": 29, + "is_secret": false + } + ], + "docs/source/server/register.rst": [ + { + "type": "Secret Keyword", + "filename": "docs/source/server/register.rst", + "hashed_secret": "5eb942810a75ebc850972a89285d570d484c89c4", + "is_verified": false, + "line_number": 18, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "docs/source/server/register.rst", + "hashed_secret": "a46f3f62d7ba2f6a8ee2c1d1647d1744204ad450", + "is_verified": false, + "line_number": 175, + "is_secret": false + } + ], + "graphistry/PlotterBase.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/PlotterBase.py", + "hashed_secret": "91dfd9ddb4198affc5c194cd8ce6d338fde470e2", + "is_verified": false, + "line_number": 1887, + "is_secret": false + } + ], + "graphistry/client_session.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/client_session.py", + "hashed_secret": "da4e198da2aa490cfa6fab3bc4408cfa611facef", + "is_verified": false, + "line_number": 17, + "is_secret": false + } + ], + "graphistry/gremlin.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/gremlin.py", + "hashed_secret": "8b471eab0529eb92783d20fafd6ee067817be700", + "is_verified": false, + "line_number": 509, + "is_secret": false + } + ], + "graphistry/messages.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/messages.py", + "hashed_secret": "3f784094ab12aa41bb95e8e27d891fa57c966b97", + "is_verified": false, + "line_number": 3, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/messages.py", + "hashed_secret": "f222daa35b4810ef6e9621550a278aec6000be32", + "is_verified": false, + "line_number": 6, + "is_secret": false + } + ], + "graphistry/plugins/kusto.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/plugins/kusto.py", + "hashed_secret": "c303df00cd0a72b21c62900b758b06fc541664ce", + "is_verified": false, + "line_number": 57, + "is_secret": false + } + ], + "graphistry/pygraphistry.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/pygraphistry.py", + "hashed_secret": "1a91d62f7ca67399625a4368a6ab5d4a3baa6073", + "is_verified": false, + "line_number": 76, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/pygraphistry.py", + "hashed_secret": "37fa265330ad83eaa879efb1e2db6380896cf639", + "is_verified": false, + "line_number": 656, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/pygraphistry.py", + "hashed_secret": "91dfd9ddb4198affc5c194cd8ce6d338fde470e2", + "is_verified": false, + "line_number": 810, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/pygraphistry.py", + "hashed_secret": "f0578f1e7174b1a41c4ea8c6e17f7a8a3b88c92a", + "is_verified": false, + "line_number": 1829, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/pygraphistry.py", + "hashed_secret": "8be52126a6fde450a7162a3651d589bb51e9579d", + "is_verified": false, + "line_number": 1833, + "is_secret": false + } + ], + "graphistry/tests/test_client_session.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "c4f437957afe434e75cd32b3515db03ef9e3a7af", + "is_verified": false, + "line_number": 51, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "ee369845b98b65e65abb99e72a3bec006a78d3e8", + "is_verified": false, + "line_number": 59, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "f0cab9d2f956f8044d800c15b8fc3a101bddd6b9", + "is_verified": false, + "line_number": 68, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "4ffe5395368d479466536b68c32cbb89750fa9b4", + "is_verified": false, + "line_number": 71, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "1073ab6cda4b991cd29f9e83a307f34004ae9327", + "is_verified": false, + "line_number": 99, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "87ba78e0f03afcef60657f342ec5567368fadd8c", + "is_verified": false, + "line_number": 100, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "3b88ea816c78ec104041a75e78f32ec804eaac39", + "is_verified": false, + "line_number": 101, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "c45aceded785a712b9fb005fac0cce0a44bad123", + "is_verified": false, + "line_number": 230, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_client_session.py", + "hashed_secret": "6bcca3559640eb119c1fc48bd633a19c2f35e256", + "is_verified": false, + "line_number": 236, + "is_secret": false + } + ], + "graphistry/tests/test_kusto.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_kusto.py", + "hashed_secret": "00cafd126182e8a9e7c01bb2f0dfd00496be724f", + "is_verified": false, + "line_number": 139, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_kusto.py", + "hashed_secret": "c636e8e238fd7af97e2e500f8c6f0f4c0bedafb0", + "is_verified": false, + "line_number": 147, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_kusto.py", + "hashed_secret": "fe1bae27cb7c1fb823f496f286e78f1d2ae87734", + "is_verified": false, + "line_number": 259, + "is_secret": false + } + ], + "graphistry/tests/test_pygraphistry.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_pygraphistry.py", + "hashed_secret": "25a4a4c1152866fb8d015948ac819dc74a3e2391", + "is_verified": false, + "line_number": 45, + "is_secret": false + }, + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_pygraphistry.py", + "hashed_secret": "ecc4bb9d7cf399b5ca454e339a71865d9d4b0de7", + "is_verified": false, + "line_number": 59, + "is_secret": false + } + ], + "graphistry/tests/test_tigergraph.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/tests/test_tigergraph.py", + "hashed_secret": "2c96b5bc6f6b026fbb1e83565abf7e2c6402309f", + "is_verified": false, + "line_number": 20, + "is_secret": false + }, + { + "type": "Basic Auth Credentials", + "filename": "graphistry/tests/test_tigergraph.py", + "hashed_secret": "2c96b5bc6f6b026fbb1e83565abf7e2c6402309f", + "is_verified": false, + "line_number": 38, + "is_secret": false + } + ], + "graphistry/tigeristry.py": [ + { + "type": "Secret Keyword", + "filename": "graphistry/tigeristry.py", + "hashed_secret": "373e3a56093ed3fa7cafad4f0059752d9f330e30", + "is_verified": false, + "line_number": 51, + "is_secret": false + } + ], + "versioneer.py": [ + { + "type": "Hex High Entropy String", + "filename": "versioneer.py", + "hashed_secret": "ad99b40cfbae8800e55ab294290c0f0c5ce50471", + "is_verified": false, + "line_number": 107, + "is_secret": false + } + ] + }, + "generated_at": "2026-01-11T04:16:18Z" +} diff --git a/DEVELOP.md b/DEVELOP.md index 1934923343..7cbede7927 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -64,6 +64,30 @@ Automatically build via ReadTheDocs from inline definitions. To manually build, see `docs/`. +## Secret scanning + +We use `detect-secrets` with a pre-commit hook and a CI workflow. The scripts prefer `uv run python3` +when `uv` is available. +Default scan exclusions live in `scripts/ci/secret-excludes.sh`. + +```bash +uv pip install -e .[dev] +pre-commit install +./scripts/secrets.sh --update-baseline +./scripts/ci/secret-detection.sh +``` + +If you prefer explicit installs instead of the dev extras: + +```bash +uv pip install detect-secrets pre-commit +pre-commit install +./scripts/secrets.sh --update-baseline +./scripts/ci/secret-detection.sh +``` + +Commit `.secrets.baseline` after review when it changes. + ## Ignore files You may need to add ignore rules: diff --git a/graphistry/compute/predicates/str.py b/graphistry/compute/predicates/str.py index 27d1015f99..20d2c91ca3 100644 --- a/graphistry/compute/predicates/str.py +++ b/graphistry/compute/predicates/str.py @@ -1,4 +1,5 @@ from typing import Any, Optional, Union +import re import pandas as pd @@ -12,6 +13,26 @@ def _cudf_mask_none(result: Any, mask: Any) -> Any: return result_pd +def _pandas_handle_na( + result: pd.Series, + source: pd.Series, + na: Optional[bool] +) -> pd.Series: + mask = source.isna() + if na is None: + if mask.any(): + result = result.astype('object') + result[mask] = None + return result + + if mask.any(): + result = result.copy() + result[mask] = na + if result.dtype == object: + result = result.infer_objects(copy=False) + return result + + class Contains(ASTPredicate): def __init__( self, @@ -58,13 +79,14 @@ def __call__(self, s: SeriesT) -> SeriesT: return result else: - return s.str.contains( + result = s.str.contains( self.pat, - self.case, - self.flags, - self.na, - self.regex + case=self.case, + flags=self.flags, + na=self.na, + regex=self.regex ) + return _pandas_handle_na(result, s, self.na) def _validate_fields(self) -> None: """Validate predicate fields.""" @@ -153,9 +175,7 @@ def __call__(self, s: SeriesT) -> SeriesT: if not is_cudf and self.case: # Use pandas native tuple support for case-sensitive result = s.str.startswith(self.pat) - if self.na is not None: - return result.fillna(self.na) - return result + return _pandas_handle_na(result, s, self.na) elif not is_cudf and not self.case: # pandas tuple with case-insensitive - need workaround if len(self.pat) == 0: @@ -169,9 +189,7 @@ def __call__(self, s: SeriesT) -> SeriesT: patterns_lower = tuple(p.lower() for p in self.pat) # Use pandas native tuple support on lowercased data result = s_lower.str.startswith(patterns_lower) - if self.na is not None: - return result.fillna(self.na) - return result + return _pandas_handle_na(result, s, self.na) else: # cuDF - need manual OR logic (workaround for bug #20237) if len(self.pat) == 0: @@ -217,14 +235,7 @@ def __call__(self, s: SeriesT) -> SeriesT: else: return result else: - # pandas supports na parameter for case-sensitive str patterns - if not self.case: - if self.na is not None: - return result.fillna(self.na) - else: - return result - else: - return s.str.startswith(self.pat, self.na) + return _pandas_handle_na(result, s, self.na) def _validate_fields(self) -> None: """Validate predicate fields.""" @@ -319,9 +330,7 @@ def __call__(self, s: SeriesT) -> SeriesT: if not is_cudf and self.case: # Use pandas native tuple support for case-sensitive result = s.str.endswith(self.pat) - if self.na is not None: - return result.fillna(self.na) - return result + return _pandas_handle_na(result, s, self.na) elif not is_cudf and not self.case: # pandas tuple with case-insensitive - need workaround if len(self.pat) == 0: @@ -336,9 +345,7 @@ def __call__(self, s: SeriesT) -> SeriesT: patterns_lower = tuple(p.lower() for p in self.pat) # Use pandas native tuple support on lowercased data result = s_lower.str.endswith(patterns_lower) - if self.na is not None: - return result.fillna(self.na) - return result + return _pandas_handle_na(result, s, self.na) else: # cuDF - need manual OR logic (workaround for bug #20237) if len(self.pat) == 0: @@ -384,14 +391,7 @@ def __call__(self, s: SeriesT) -> SeriesT: else: return result else: - # pandas supports na parameter for case-sensitive str patterns - if not self.case: - if self.na is not None: - return result.fillna(self.na) - else: - return result - else: - return s.str.endswith(self.pat, self.na) + return _pandas_handle_na(result, s, self.na) def _validate_fields(self) -> None: """Validate predicate fields.""" @@ -493,7 +493,18 @@ def __call__(self, s: SeriesT) -> SeriesT: return result else: - return s.str.match(self.pat, self.case, self.flags, self.na) + if self.flags: + effective_flags = self.flags + if not self.case: + effective_flags |= re.IGNORECASE + pattern = re.compile(self.pat, effective_flags) + result = s.str.match(pattern, na=self.na) + else: + if not self.case: + result = s.str.match(self.pat, case=False, na=self.na) + else: + result = s.str.match(self.pat, na=self.na) + return _pandas_handle_na(result, s, self.na) def _validate_fields(self) -> None: """Validate predicate fields.""" @@ -582,7 +593,13 @@ def __call__(self, s: SeriesT) -> SeriesT: return result else: # pandas has native fullmatch support - return s.str.fullmatch(self.pat, self.case, self.flags, self.na) + result = s.str.fullmatch( + self.pat, + case=self.case, + flags=self.flags, + na=self.na + ) + return _pandas_handle_na(result, s, self.na) def _validate_fields(self) -> None: """Validate predicate fields.""" diff --git a/scripts/ci/secret-detection.py b/scripts/ci/secret-detection.py new file mode 100644 index 0000000000..eef47d2055 --- /dev/null +++ b/scripts/ci/secret-detection.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +import argparse +import os +import sys + +from detect_secrets.constants import VerifiedResult +from detect_secrets.core import baseline +from detect_secrets.core.potential_secret import PotentialSecret +from detect_secrets.core.scan import get_files_to_scan +from detect_secrets.core.secrets_collection import SecretsCollection +from detect_secrets.settings import default_settings, get_settings + +EXCLUDE_FILTER = "detect_secrets.filters.regex.should_exclude_file" +VERIFY_FILTER = "detect_secrets.filters.common.is_ignored_due_to_verification_policies" +BASELINE_FILTER = "detect_secrets.filters.common.is_baseline_file" + + +def configure_filters(exclude_files: str, only_verified: bool, baseline_path: str) -> None: + if exclude_files: + get_settings().filters[EXCLUDE_FILTER] = {"pattern": [exclude_files]} + + if baseline_path: + get_settings().filters[BASELINE_FILTER] = {"filename": baseline_path} + + min_level = VerifiedResult.VERIFIED_TRUE if only_verified else VerifiedResult.UNVERIFIED + get_settings().filters[VERIFY_FILTER] = {"min_level": min_level.value} + +def ensure_default_plugins() -> None: + if get_settings().plugins: + return + with default_settings() as settings: + pass + get_settings().set(settings) + + +def normalize_secrets(secrets: SecretsCollection) -> SecretsCollection: + normalized = SecretsCollection(root=secrets.root) + for filename, secret in secrets: + data = secret.json() + data["filename"] = filename + normalized[filename].add(PotentialSecret.load_secret_from_dict(data)) + return normalized + + +def load_baseline(baseline_path: str) -> SecretsCollection: + baseline_data = baseline.load_from_file(baseline_path) + baseline_data = baseline.upgrade(baseline_data) + baseline.configure_settings_from_baseline(baseline_data, filename=baseline_path) + + secrets = SecretsCollection() + for filename, entries in baseline_data.get("results", {}).items(): + for entry in entries: + data = dict(entry) + data["filename"] = filename + secrets[filename].add(PotentialSecret.load_secret_from_dict(data)) + return secrets + + +def scan_paths(paths, root: str) -> SecretsCollection: + secrets = SecretsCollection(root=root) + for filename in get_files_to_scan(*paths, should_scan_all_files=False, root=root): + secrets.scan_file(filename) + return secrets + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Secret detection wrapper (serial scan)") + subparsers = parser.add_subparsers(dest="command", required=True) + + create_cmd = subparsers.add_parser("create-baseline", help="Create or update baseline") + create_cmd.add_argument("--baseline", required=True, help="Baseline path") + create_cmd.add_argument("--exclude-files", default="", help="Regex for files to exclude") + create_cmd.add_argument("paths", nargs="*", help="Paths to scan (default: .)") + + scan_cmd = subparsers.add_parser("scan", help="Scan for new secrets") + scan_cmd.add_argument("--baseline", required=True, help="Baseline path") + scan_cmd.add_argument("--exclude-files", default="", help="Regex for files to exclude") + scan_cmd.add_argument("--only-verified", action="store_true", help="Only report verified secrets") + scan_cmd.add_argument("paths", nargs="*", help="Paths to scan (default: .)") + + return parser.parse_args() + + +def main() -> int: + args = parse_args() + root = os.getcwd() + paths = args.paths or ["."] + + if args.command == "create-baseline": + ensure_default_plugins() + configure_filters(args.exclude_files, False, args.baseline) + secrets = scan_paths(paths, root=root) + baseline.save_to_file(normalize_secrets(secrets), args.baseline) + return 0 + + if not os.path.exists(args.baseline): + print(f"ERROR: baseline not found at {args.baseline}", file=sys.stderr) + return 2 + + baseline_secrets = normalize_secrets(load_baseline(args.baseline)) + ensure_default_plugins() + configure_filters(args.exclude_files, args.only_verified, args.baseline) + scanned = normalize_secrets(scan_paths(paths, root=root)) + new_secrets = scanned - baseline_secrets + + if new_secrets: + print( + "ERROR: new secrets detected. Run detect-secrets locally to review the findings.", + file=sys.stderr, + ) + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/ci/secret-detection.sh b/scripts/ci/secret-detection.sh new file mode 100755 index 0000000000..3d5f623079 --- /dev/null +++ b/scripts/ci/secret-detection.sh @@ -0,0 +1,149 @@ +#!/bin/bash +# secret-detection.sh - Centralized secret detection using detect-secrets +# Usage: ./scripts/ci/secret-detection.sh [--check-only] + +set -euo pipefail + +print_error() { + echo "ERROR: $1" >&2 + exit 1 +} + +print_warning() { + echo "WARN: $1" >&2 +} + +print_success() { + echo "OK: $1" +} + +CHECK_ONLY=false +if [ "${1:-}" == "--check-only" ]; then + CHECK_ONLY=true +fi + +if [ ! -f "setup.cfg" ] && [ ! -f "pyproject.toml" ] && [ ! -f "setup.py" ]; then + print_error "Must run from project root (setup.cfg/pyproject.toml/setup.py not found)" +fi + +EXCLUDES_FILE="$(dirname "$0")/secret-excludes.sh" +if [ -f "$EXCLUDES_FILE" ]; then + # shellcheck source=scripts/ci/secret-excludes.sh + source "$EXCLUDES_FILE" +fi +SECRET_SCAN_EXCLUDE_REGEX="${SECRET_SCAN_EXCLUDE_REGEX:-^(plans/|tmp/)}" + +PYTHON_CMD=() +DETECT_SECRETS_AVAILABLE=0 + +if [ "${PRE_COMMIT:-}" != "1" ] && command -v uv >/dev/null 2>&1; then + UV_CACHE_DIR="${UV_CACHE_DIR:-$(pwd)/.uv-cache}" + export UV_CACHE_DIR + mkdir -p "$UV_CACHE_DIR" + PYTHON_CMD=(uv run python3) +fi + +if [ ${#PYTHON_CMD[@]} -eq 0 ] && command -v python3 >/dev/null 2>&1; then + PYTHON_CMD=(python3) +fi + +if [ ${#PYTHON_CMD[@]} -eq 0 ] && command -v python >/dev/null 2>&1; then + PYTHON_CMD=(python) +fi + +if [ ${#PYTHON_CMD[@]} -eq 0 ]; then + print_error "python not found; needed to run detect-secrets" +fi + +if "${PYTHON_CMD[@]}" - <<'PY' >/dev/null 2>&1 +import importlib.util +import sys +sys.exit(0 if importlib.util.find_spec("detect_secrets") else 1) +PY +then + DETECT_SECRETS_AVAILABLE=1 +fi + +BASELINE=".secrets.baseline" +SCRIPT_PATH="scripts/ci/secret-detection.py" +if [ ! -f "$SCRIPT_PATH" ]; then + print_error "Missing $SCRIPT_PATH" +fi + +if [ "$CHECK_ONLY" == true ]; then + echo "Checking for secrets in staged files..." + + staged_files=() + while IFS= read -r -d '' file; do + case "$file" in + .secrets.baseline) + continue + ;; + esac + if [[ "$file" =~ $SECRET_SCAN_EXCLUDE_REGEX ]]; then + continue + fi + staged_files+=("$file") + done < <(git diff --cached --name-only -z --diff-filter=ACM) + + if [ ${#staged_files[@]} -eq 0 ]; then + print_success "No files to check" + exit 0 + fi + + for file in "${staged_files[@]}"; do + if [[ "$file" == "scripts/ci/secret-detection.sh" ]]; then + continue + fi + if grep -qE "(accountaccount|testtest|password123)" "$file" 2>/dev/null; then + print_error "Found hardcoded test password in $file - use placeholders like ''" + fi + if grep -qE "graphistry-(dev|test|staging)\.(grph\.xyz|graphistry\.com)" "$file" 2>/dev/null; then + print_error "Found internal dev server URL in $file - use hub.graphistry.com or localhost" + fi + if grep -qE "(GRAPHISTRY_)(USERNAME|PASSWORD|TOKEN|KEY|SECRET)\s*=\s*['\"]?[A-Za-z0-9]+" "$file" 2>/dev/null; then + print_warning "Possible hardcoded credential in $file - use os.environ.get() with defaults" + fi + done + + if [ "$DETECT_SECRETS_AVAILABLE" -ne 1 ]; then + print_warning "detect-secrets not found; install with: uv pip install detect-secrets (or pip install detect-secrets)" + exit 0 + fi + + if [ ! -f "$BASELINE" ]; then + print_warning "No $BASELINE found. Creating initial baseline..." + "${PYTHON_CMD[@]}" "$SCRIPT_PATH" create-baseline --baseline "$BASELINE" --exclude-files "$SECRET_SCAN_EXCLUDE_REGEX" . + print_success "Created $BASELINE - please review and commit" + exit 0 + fi + + if ! "${PYTHON_CMD[@]}" "$SCRIPT_PATH" scan --baseline "$BASELINE" --exclude-files "$SECRET_SCAN_EXCLUDE_REGEX" "${staged_files[@]}"; then + print_error "New secrets detected. Use clear placeholders like 'sk-XXXX' or ''" + fi + + print_success "No secrets detected" +else + if [ "$DETECT_SECRETS_AVAILABLE" -ne 1 ]; then + print_error "detect-secrets not found. Install with: pip install detect-secrets" + fi + + if [ ! -f "$BASELINE" ]; then + print_warning "No $BASELINE found. Creating initial baseline..." + "${PYTHON_CMD[@]}" "$SCRIPT_PATH" create-baseline --baseline "$BASELINE" --exclude-files "$SECRET_SCAN_EXCLUDE_REGEX" . + print_success "Created $BASELINE - please review and commit" + exit 0 + fi + + echo "Running full secret detection scan..." + + "${PYTHON_CMD[@]}" "$SCRIPT_PATH" scan --baseline "$BASELINE" --exclude-files "$SECRET_SCAN_EXCLUDE_REGEX" . || { + print_error "New secrets detected. Remove them or update $BASELINE" + } + + "${PYTHON_CMD[@]}" "$SCRIPT_PATH" scan --baseline "$BASELINE" --only-verified --exclude-files "$SECRET_SCAN_EXCLUDE_REGEX" . || { + print_error "High-confidence secrets detected. These must be removed" + } + + print_success "Secret detection passed - no new secrets found" +fi diff --git a/scripts/ci/secret-excludes.sh b/scripts/ci/secret-excludes.sh new file mode 100644 index 0000000000..c6eb3b9644 --- /dev/null +++ b/scripts/ci/secret-excludes.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Centralized exclude regex for secret scanning + +SECRET_SCAN_EXCLUDE_REGEX='(^|.*/)(plans/|tmp/|demos/demos_by_use_case/logs/)' diff --git a/scripts/pre-commit-secret-check.sh b/scripts/pre-commit-secret-check.sh new file mode 100755 index 0000000000..1ada10c00b --- /dev/null +++ b/scripts/pre-commit-secret-check.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# pre-commit-secret-check.sh - Wrapper for pre-commit hook + +set -euo pipefail + +exec "$(dirname "$0")/ci/secret-detection.sh" --check-only diff --git a/scripts/secrets.sh b/scripts/secrets.sh new file mode 100755 index 0000000000..8958c442f6 --- /dev/null +++ b/scripts/secrets.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# secrets.sh - Run secret detection manually +# Usage: ./scripts/secrets.sh [--update-baseline] + +set -euo pipefail + +if [ "${1:-}" == "--update-baseline" ]; then + EXCLUDES_FILE="$(dirname "$0")/ci/secret-excludes.sh" + if [ -f "$EXCLUDES_FILE" ]; then + # shellcheck source=scripts/ci/secret-excludes.sh + source "$EXCLUDES_FILE" + fi + SECRET_SCAN_EXCLUDE_REGEX="${SECRET_SCAN_EXCLUDE_REGEX:-^(plans/|tmp/)}" + + PYTHON_CMD=() + + if command -v uv >/dev/null 2>&1; then + UV_CACHE_DIR="${UV_CACHE_DIR:-$(pwd)/.uv-cache}" + export UV_CACHE_DIR + mkdir -p "$UV_CACHE_DIR" + PYTHON_CMD=(uv run python3) + fi + + if [ ${#PYTHON_CMD[@]} -eq 0 ] && command -v python3 >/dev/null 2>&1; then + PYTHON_CMD=(python3) + fi + + if [ ${#PYTHON_CMD[@]} -eq 0 ] && command -v python >/dev/null 2>&1; then + PYTHON_CMD=(python) + fi + + if [ ${#PYTHON_CMD[@]} -eq 0 ]; then + echo "ERROR: python not found; needed to run detect-secrets" >&2 + exit 1 + fi + + if "${PYTHON_CMD[@]}" - <<'PY' >/dev/null 2>&1 +import importlib.util +import sys +sys.exit(0 if importlib.util.find_spec("detect_secrets") else 1) +PY + then + : + else + echo "ERROR: detect-secrets not found. Install with: pip install detect-secrets" >&2 + exit 1 + fi + + SCRIPT_PATH="$(dirname "$0")/ci/secret-detection.py" + if [ ! -f "$SCRIPT_PATH" ]; then + echo "ERROR: missing $SCRIPT_PATH" >&2 + exit 1 + fi + + echo "Updating secrets baseline..." + "${PYTHON_CMD[@]}" "$SCRIPT_PATH" create-baseline --baseline .secrets.baseline --exclude-files "$SECRET_SCAN_EXCLUDE_REGEX" . + echo "Baseline updated. Review changes and commit if appropriate." + exit 0 +fi + +exec "$(dirname "$0")/ci/secret-detection.sh" diff --git a/setup.py b/setup.py index 3c09cb95c5..80e36b7c4b 100755 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ def unique_flatten_dict(d): 'palettable >= 3.0', 'pandas', 'pyarrow >= 0.15.0', + 'pytz', 'requests', 'squarify', 'typing-extensions', @@ -44,7 +45,8 @@ def unique_flatten_dict(d): 'testai': [ 'numba>=0.57.1' # https://github.com/numba/numba/issues/8615 ], - 'build': ['build'] + 'build': ['build'], + 'devtools': ['detect-secrets', 'pre-commit'] } base_extras_light = {