Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
GITHUB_TOKEN=your_github_token
DISCORD_TOKEN=your_discord_token
GITHUB_WEBHOOK_SECRET=change_me_to_a_long_random_secret
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ Core boundaries:
Load config -> Ingest -> Score -> Plan -> Audit -> (Optional) Apply
```

### GitHub Webhook Ingestion

Gitcord supports a secure webhook ingestion layer for org-level GitHub events. Webhook payloads are verified with `X-Hub-Signature-256`, deduped with `X-GitHub-Delivery`, and stored as the same `ContributionEvent` records used by `/sync`.

This keeps `/sync` useful as a backfill/reconcile path while allowing future deployments to process GitHub events without scanning every repository on each sync. Deployments should terminate TLS before the webhook handler and keep `GITHUB_WEBHOOK_SECRET` out of source control.

### Key User Journeys

1. **Dry‑run review**
Expand Down
5 changes: 5 additions & 0 deletions config/example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ assignments:
repo_contributor_roles:
castro: "castro"

# Optional: GitHub org webhooks, verified with X-Hub-Signature-256.
# webhooks:
# enabled: false
# secret: "${GITHUB_WEBHOOK_SECRET}"

identity_mappings:
- github_user: "YOUR_GITHUB_USERNAME"
discord_user_id: "YOUR_DISCORD_USER_ID"
21 changes: 21 additions & 0 deletions src/ghdcbot/adapters/storage/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def init_schema(self) -> None:
source TEXT PRIMARY KEY,
cursor TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS webhook_deliveries (
delivery_id TEXT PRIMARY KEY,
event_name TEXT NOT NULL,
received_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS identity_links (
discord_user_id TEXT NOT NULL,
github_user TEXT NOT NULL,
Expand Down Expand Up @@ -317,6 +322,22 @@ def set_cursor(self, source: str, cursor: datetime) -> None:
(source, cursor_utc.isoformat()),
)

def record_webhook_delivery(self, delivery_id: str, event_name: str) -> bool:
# GitHub can retry the same delivery.
now = datetime.now(timezone.utc).isoformat()
with self._connect() as conn:
try:
conn.execute(
"""
INSERT INTO webhook_deliveries (delivery_id, event_name, received_at)
VALUES (?, ?, ?)
""",
(delivery_id, event_name, now),
)
except sqlite3.IntegrityError:
return False
return True
Comment on lines +325 to +339

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | 💤 Low value

Consider using datetime.UTC alias.

Python 3.11+ (which this project requires) provides datetime.UTC as a cleaner alias for timezone.utc.

♻️ Optional modernization
     def record_webhook_delivery(self, delivery_id: str, event_name: str) -> bool:
         # GitHub can retry the same delivery.
-        now = datetime.now(timezone.utc).isoformat()
+        now = datetime.now(datetime.UTC).isoformat()
         with self._connect() as conn:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def record_webhook_delivery(self, delivery_id: str, event_name: str) -> bool:
# GitHub can retry the same delivery.
now = datetime.now(timezone.utc).isoformat()
with self._connect() as conn:
try:
conn.execute(
"""
INSERT INTO webhook_deliveries (delivery_id, event_name, received_at)
VALUES (?, ?, ?)
""",
(delivery_id, event_name, now),
)
except sqlite3.IntegrityError:
return False
return True
def record_webhook_delivery(self, delivery_id: str, event_name: str) -> bool:
# GitHub can retry the same delivery.
now = datetime.now(datetime.UTC).isoformat()
with self._connect() as conn:
try:
conn.execute(
"""
INSERT INTO webhook_deliveries (delivery_id, event_name, received_at)
VALUES (?, ?, ?)
""",
(delivery_id, event_name, now),
)
except sqlite3.IntegrityError:
return False
return True
🧰 Tools
🪛 Ruff (0.15.12)

[warning] 327-327: Use datetime.UTC alias

Convert to datetime.UTC alias

(UP017)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/ghdcbot/adapters/storage/sqlite.py` around lines 325 - 339, In
record_webhook_delivery, replace the use of timezone.utc with the Python 3.11+
alias datetime.UTC when constructing the timestamp (now =
datetime.now(datetime.UTC).isoformat()) so the code uses the modern alias;
update the import usage if needed so datetime.UTC is referenced from the
datetime module and keep the rest of the function (connection, INSERT,
IntegrityError handling) unchanged.


def create_identity_claim(
self,
discord_user_id: str,
Expand Down
21 changes: 20 additions & 1 deletion src/ghdcbot/config/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from pydantic import BaseModel, Field, HttpUrl, field_validator
from pydantic import BaseModel, Field, HttpUrl, field_validator, model_validator

from ghdcbot.core.modes import RunMode

Expand Down Expand Up @@ -216,6 +216,24 @@ class SnapshotConfig(BaseModel):
branch: str | None = None


class WebhookConfig(BaseModel):
enabled: bool = False
secret: str = ""

@field_validator("secret")
@classmethod
def validate_secret(cls, value: str) -> str:
if value and len(value) < 16:
raise ValueError("webhooks.secret must be at least 16 characters")
return value

@model_validator(mode="after")
def validate_enabled_secret(self) -> "WebhookConfig":
if self.enabled and not self.secret:
raise ValueError("webhooks.secret is required when webhooks.enabled is true")
return self
Comment on lines +230 to +234

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | 💤 Low value

Consider unquoting the return type annotation.

Since from __future__ import annotations is already imported (line 1), the return type can be unquoted for consistency.

♻️ Optional style improvement
     `@model_validator`(mode="after")
-    def validate_enabled_secret(self) -> "WebhookConfig":
+    def validate_enabled_secret(self) -> WebhookConfig:
         if self.enabled and not self.secret:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
@model_validator(mode="after")
def validate_enabled_secret(self) -> "WebhookConfig":
if self.enabled and not self.secret:
raise ValueError("webhooks.secret is required when webhooks.enabled is true")
return self
`@model_validator`(mode="after")
def validate_enabled_secret(self) -> WebhookConfig:
if self.enabled and not self.secret:
raise ValueError("webhooks.secret is required when webhooks.enabled is true")
return self
🧰 Tools
🪛 Ruff (0.15.12)

[warning] 231-231: Remove quotes from type annotation

Remove quotes

(UP037)


[warning] 233-233: Avoid specifying long messages outside the exception class

(TRY003)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/ghdcbot/config/models.py` around lines 230 - 234, The
validate_enabled_secret model validator currently annotates its return type as a
quoted string "WebhookConfig"; since from __future__ import annotations is
present, change the annotation to an unquoted WebhookConfig on the
validate_enabled_secret method so the signature uses WebhookConfig (no quotes),
keeping the same behavior for the model_validator decorator and returning self.



class BotConfig(BaseModel):
runtime: RuntimeConfig
github: GitHubConfig
Expand All @@ -228,6 +246,7 @@ class BotConfig(BaseModel):
merge_role_rules: MergeRoleRulesConfig | None = None
# Optional: GitHub snapshot storage
snapshots: SnapshotConfig | None = None
webhooks: WebhookConfig | None = None
# Optional: repo name -> Discord role for "Contributor-X" (PR merged in repo X grants role)
repo_contributor_roles: dict[str, str] | None = None

Expand Down
4 changes: 4 additions & 0 deletions src/ghdcbot/core/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def get_cursor(self, source: str) -> datetime | None:
def set_cursor(self, source: str, cursor: datetime) -> None:
"""Persist last sync cursor for a source."""

def record_webhook_delivery(self, delivery_id: str, event_name: str) -> bool:
# Return False when a delivery was already stored.
...


class ScoreStrategy(Protocol):
def compute_scores(
Expand Down
250 changes: 250 additions & 0 deletions src/ghdcbot/engine/webhooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
from __future__ import annotations

import hashlib
import hmac
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Mapping

from ghdcbot.core.models import ContributionEvent


SIGNATURE_HEADER = "X-Hub-Signature-256"
EVENT_HEADER = "X-GitHub-Event"
DELIVERY_HEADER = "X-GitHub-Delivery"


class WebhookError(ValueError):
pass


@dataclass(frozen=True)
class GitHubDelivery:
event_name: str
delivery_id: str
events: list[ContributionEvent]


@dataclass(frozen=True)
class DeliveryIngestResult:
delivery_id: str
event_name: str
stored: int
duplicate: bool = False


def verify_github_signature(body: bytes, signature_header: str | None, secret: str) -> None:
# GitHub signs the raw request body, not the parsed JSON.
if not secret:
raise WebhookError("GitHub webhook secret is not configured")
if not signature_header:
raise WebhookError("Missing X-Hub-Signature-256 header")
if not signature_header.startswith("sha256="):
raise WebhookError("Unsupported GitHub webhook signature format")

expected = "sha256=" + hmac.new(
secret.encode("utf-8"),
body,
hashlib.sha256,
).hexdigest()
if not hmac.compare_digest(expected, signature_header):
raise WebhookError("Invalid GitHub webhook signature")


def parse_github_delivery(
headers: Mapping[str, str],
body: bytes,
payload: Mapping[str, Any],
secret: str,
) -> GitHubDelivery:
signature = _get_header(headers, SIGNATURE_HEADER)
event_name = _get_header(headers, EVENT_HEADER)
delivery_id = _get_header(headers, DELIVERY_HEADER)

if not event_name:
raise WebhookError("Missing X-GitHub-Event header")
if not delivery_id:
raise WebhookError("Missing X-GitHub-Delivery header")

verify_github_signature(body, signature, secret)
events = map_github_webhook_to_contributions(event_name, payload)
return GitHubDelivery(event_name=event_name, delivery_id=delivery_id, events=events)


def ingest_github_delivery(storage: Any, delivery: GitHubDelivery) -> DeliveryIngestResult:
record_delivery = getattr(storage, "record_webhook_delivery", None)
if not callable(record_delivery):
raise WebhookError("Storage adapter does not support webhook delivery dedupe")

if not record_delivery(delivery.delivery_id, delivery.event_name):
return DeliveryIngestResult(
delivery_id=delivery.delivery_id,
event_name=delivery.event_name,
stored=0,
duplicate=True,
)

stored = storage.record_contributions(delivery.events) if delivery.events else 0
Comment on lines +79 to +87

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Make dedupe marking and contribution persistence atomic

Line 79 records the delivery as processed before Line 87 writes events. If record_contributions(...) fails (e.g., transient DB error), retries will be flagged duplicate and the events are lost permanently.

Suggested direction
 def ingest_github_delivery(storage: Any, delivery: GitHubDelivery) -> DeliveryIngestResult:
-    record_delivery = getattr(storage, "record_webhook_delivery", None)
-    if not callable(record_delivery):
-        raise WebhookError("Storage adapter does not support webhook delivery dedupe")
-
-    if not record_delivery(delivery.delivery_id, delivery.event_name):
-        return DeliveryIngestResult(
-            delivery_id=delivery.delivery_id,
-            event_name=delivery.event_name,
-            stored=0,
-            duplicate=True,
-        )
-
-    stored = storage.record_contributions(delivery.events) if delivery.events else 0
-    return DeliveryIngestResult(
-        delivery_id=delivery.delivery_id,
-        event_name=delivery.event_name,
-        stored=stored,
-    )
+    # Prefer a single storage call that performs:
+    # 1) insert delivery-id if new
+    # 2) persist contributions
+    # in one DB transaction.
+    ingest = getattr(storage, "ingest_webhook_delivery", None)
+    if not callable(ingest):
+        raise WebhookError("Storage adapter does not support atomic webhook ingest")
+    stored, duplicate = ingest(
+        delivery_id=delivery.delivery_id,
+        event_name=delivery.event_name,
+        events=delivery.events,
+    )
+    return DeliveryIngestResult(
+        delivery_id=delivery.delivery_id,
+        event_name=delivery.event_name,
+        stored=stored,
+        duplicate=duplicate,
+    )
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/ghdcbot/engine/webhooks.py` around lines 79 - 87, The code currently
calls record_delivery(delivery.delivery_id, delivery.event_name) before
persisting events (storage.record_contributions), which can mark retries as
duplicates if persistence fails; change the flow so dedupe marking and
contributions persistence are done atomically: either perform
storage.record_contributions first and only call record_delivery on successful
write, or (preferably) use a single transactional operation in your data layer
that inserts contributions and sets the delivery as processed together; update
the logic around record_delivery and storage.record_contributions (referenced
symbols: record_delivery, storage.record_contributions, DeliveryIngestResult,
delivery.delivery_id, delivery.event_name, delivery.events) to ensure failures
roll back and do not leave the delivery marked processed.

return DeliveryIngestResult(
delivery_id=delivery.delivery_id,
event_name=delivery.event_name,
stored=stored,
)


def map_github_webhook_to_contributions(
event_name: str,
payload: Mapping[str, Any],
) -> list[ContributionEvent]:
action = payload.get("action")
repository = payload.get("repository") or {}
repo = _repo_name(repository)
if not repo:
return []

if event_name == "pull_request":
pull_request = payload.get("pull_request") or {}
if action == "opened":
return [_pull_request_event("pr_opened", repo, pull_request)]
if action == "closed" and pull_request.get("merged") is True:
event = _pull_request_event("pr_merged", repo, pull_request, time_field="merged_at")
labels = _label_names(pull_request.get("labels") or [])
if labels:
event.payload["difficulty_labels"] = labels
return [event]
return []

if event_name == "pull_request_review" and action == "submitted":
review = payload.get("review") or {}
pull_request = payload.get("pull_request") or {}
return [_pull_request_review_event(repo, pull_request, review)]

if event_name == "issues" and action == "opened":
issue = payload.get("issue") or {}
return [_issue_event("issue_opened", repo, issue)]

if event_name == "issue_comment" and action == "created":
issue = payload.get("issue") or {}
comment = payload.get("comment") or {}
return [_comment_event(repo, issue, comment)]

return []


def _pull_request_event(
event_type: str,
repo: str,
pull_request: Mapping[str, Any],
*,
time_field: str = "created_at",
) -> ContributionEvent:
user = pull_request.get("user") or {}
pr_number = int(pull_request.get("number") or 0)
return ContributionEvent(
github_user=str(user.get("login") or ""),
event_type=event_type,
repo=repo,
created_at=_parse_github_time(pull_request.get(time_field) or pull_request.get("created_at")),
payload={
"pr_number": pr_number,
"title": pull_request.get("title"),
"url": pull_request.get("html_url"),
"merged": pull_request.get("merged") is True,
},
)


def _pull_request_review_event(
repo: str,
pull_request: Mapping[str, Any],
review: Mapping[str, Any],
) -> ContributionEvent:
reviewer = review.get("user") or {}
author = pull_request.get("user") or {}
return ContributionEvent(
github_user=str(reviewer.get("login") or ""),
event_type="pr_reviewed",
repo=repo,
created_at=_parse_github_time(review.get("submitted_at")),
payload={
"pr_number": int(pull_request.get("number") or 0),
"review_id": review.get("id"),
"state": review.get("state"),
"url": review.get("html_url"),
"pr_author": author.get("login"),
},
)


def _issue_event(event_type: str, repo: str, issue: Mapping[str, Any]) -> ContributionEvent:
user = issue.get("user") or {}
return ContributionEvent(
github_user=str(user.get("login") or ""),
event_type=event_type,
repo=repo,
created_at=_parse_github_time(issue.get("created_at")),
payload={
"issue_number": int(issue.get("number") or 0),
"title": issue.get("title"),
"url": issue.get("html_url"),
},
)


def _comment_event(
repo: str,
issue: Mapping[str, Any],
comment: Mapping[str, Any],
) -> ContributionEvent:
user = comment.get("user") or {}
target_type = "pull_request" if issue.get("pull_request") else "issue"
return ContributionEvent(
github_user=str(user.get("login") or ""),
event_type="comment",
repo=repo,
created_at=_parse_github_time(comment.get("created_at")),
payload={
"target_type": target_type,
"target_number": int(issue.get("number") or 0),
"comment_id": comment.get("id"),
"url": comment.get("html_url"),
},
)


def _repo_name(repository: Mapping[str, Any]) -> str:
full_name = repository.get("full_name")
if full_name:
return str(full_name)
owner = repository.get("owner") or {}
owner_login = owner.get("login")
name = repository.get("name")
if owner_login and name:
return f"{owner_login}/{name}"
if name:
return str(name)
return ""


def _label_names(labels: list[Any]) -> list[str]:
names = []
for label in labels:
if isinstance(label, Mapping) and label.get("name"):
names.append(str(label["name"]))
return names


def _parse_github_time(value: Any) -> datetime:
if not value:
return datetime.now(timezone.utc)
if isinstance(value, datetime):
return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
parsed = datetime.fromisoformat(str(value).replace("Z", "+00:00"))
return parsed if parsed.tzinfo else parsed.replace(tzinfo=timezone.utc)


def _get_header(headers: Mapping[str, str], name: str) -> str | None:
for key, value in headers.items():
if key.lower() == name.lower():
return value
return None
21 changes: 21 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,24 @@ def test_role_mappings_required() -> None:
with pytest.raises(ValidationError) as excinfo:
BotConfig.model_validate(payload)
assert "role_mappings" in str(excinfo.value)


def test_webhooks_enabled_requires_secret() -> None:
payload = {
"runtime": {
"mode": "dry-run",
"log_level": "INFO",
"data_dir": "/tmp",
"github_adapter": "ghdcbot.adapters.github.rest:GitHubRestAdapter",
"discord_adapter": "ghdcbot.adapters.discord.api:DiscordApiAdapter",
"storage_adapter": "ghdcbot.adapters.storage.sqlite:SqliteStorage",
},
"github": {"org": "x", "token": "t", "api_base": "https://api.github.com"},
"discord": {"guild_id": "1", "token": "t"},
"scoring": {"period_days": 30, "weights": {"pr_merged": 5}},
"role_mappings": [{"discord_role": "Contributor", "min_score": 1}],
"webhooks": {"enabled": True, "secret": ""},
}
with pytest.raises(ValidationError) as excinfo:
BotConfig.model_validate(payload)
assert "webhooks.secret is required" in str(excinfo.value)
Loading
Loading