Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,24 @@ DATABASE_URL=postgres://user:password@localhost:5432/boost_dashboard
# Path to context repository (where markdown files are exported)
# DISCORD_CONTEXT_REPO_PATH=F:\boost\discord-cplusplus-together-context

# Optional: upload exported markdown via GitHub API (same pattern as BOOST_LIBRARY_TRACKER_REPO_*)
# DISCORD_MARKDOWN_REPO_OWNER=your-org
# DISCORD_MARKDOWN_REPO_NAME=your-discord-md-repo
# DISCORD_MARKDOWN_REPO_BRANCH=main

# Per-day exporter (run_discord_channel_export): optional override for script directory; default uses bundled offline_scripts/
# DISCORD_EXPORT_SCRIPT_DIR=
# DISCORD_CHANNEL_EXPORT_NAMES=Discussion - c-cpp-discussion
# DISCORD_EXPORT_TIMEZONE=America/New_York
# DISCORD_EXPORT_CHUNK_DAYS=1

# Pinecone (run_discord_exporter after markdown export)
# DISCORD_PINECONE_APP_TYPE=discord-together-cpp
# DISCORD_PINECONE_NAMESPACE=discord-cplusplus

# DiscordChatExporter version (Tyrrrz/DiscordChatExporter releases); zip download + git clone
# DISCORD_CHAT_EXPORTER_VERSION=2.47

# =============================================================================
# YouTube (cppa_youtube_script_tracker)
# =============================================================================
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ celerybeat.pid
*.swo
.cursor/

# DiscordChatExporter CLI (download to workspace/discord_activity_tracker/tools/)
# DiscordChatExporter CLI / source build (workspace/discord_activity_tracker/)
discord_activity_tracker/tools/
discord_activity_tracker/vendor/
# macOS
.DS_Store
._*
Expand Down
34 changes: 34 additions & 0 deletions config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,40 @@ def _slack_team_scope_from_env():
)
).resolve()

# Markdown upload (GitHub API) — optional; same pattern as BOOST_LIBRARY_TRACKER_REPO_*
DISCORD_MARKDOWN_REPO_OWNER = (
env("DISCORD_MARKDOWN_REPO_OWNER", default="") or ""
).strip()
DISCORD_MARKDOWN_REPO_NAME = (
env("DISCORD_MARKDOWN_REPO_NAME", default="") or ""
).strip()
DISCORD_MARKDOWN_REPO_BRANCH = (
env("DISCORD_MARKDOWN_REPO_BRANCH", default="main") or "main"
).strip()

# Per-day export script (export_guild_by_day.py) — defaults match workspace/script
DISCORD_EXPORT_SCRIPT_DIR = (
env("DISCORD_EXPORT_SCRIPT_DIR", default="") or ""
).strip() # empty = package offline_scripts/
DISCORD_CHANNEL_EXPORT_NAMES = (
env("DISCORD_CHANNEL_EXPORT_NAMES", default="Discussion - c-cpp-discussion") or ""
).strip()
DISCORD_EXPORT_TIMEZONE = (
env("DISCORD_EXPORT_TIMEZONE", default="America/New_York") or "America/New_York"
).strip()
DISCORD_EXPORT_CHUNK_DAYS = int(env("DISCORD_EXPORT_CHUNK_DAYS", default="1") or "1")

# Pinned DiscordChatExporter release (Tyrrrz/DiscordChatExporter); zip + git clone use this tag
DISCORD_CHAT_EXPORTER_VERSION = (
env("DISCORD_CHAT_EXPORTER_VERSION", default="2.47") or "2.47"
).strip()

# Pinecone (cppa_pinecone_sync) for Discord messages
DISCORD_PINECONE_APP_TYPE = (env("DISCORD_PINECONE_APP_TYPE", default="") or "").strip()
DISCORD_PINECONE_NAMESPACE = (
env("DISCORD_PINECONE_NAMESPACE", default="") or ""
).strip()

# WG21 Paper Tracker Configuration
WG21_GITHUB_DISPATCH_ENABLED = env.bool("WG21_GITHUB_DISPATCH_ENABLED", default=False)
WG21_GITHUB_DISPATCH_REPO = (env("WG21_GITHUB_DISPATCH_REPO", default="") or "").strip()
Expand Down
8 changes: 8 additions & 0 deletions config/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,11 @@
CLANG_GITHUB_CONTEXT_REPO_OWNER = ""
CLANG_GITHUB_CONTEXT_REPO_NAME = ""
CLANG_GITHUB_CONTEXT_REPO_BRANCH = ""

# Discord: no real markdown repo / Pinecone in tests
DISCORD_MARKDOWN_REPO_OWNER = ""
DISCORD_MARKDOWN_REPO_NAME = ""
DISCORD_PINECONE_APP_TYPE = ""
DISCORD_PINECONE_NAMESPACE = ""

DISCORD_CHAT_EXPORTER_VERSION = "2.47"
6 changes: 6 additions & 0 deletions discord_activity_tracker/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Built / downloaded DiscordChatExporter CLI (also listed in repo root .gitignore)
tools/
vendor/
# Local overrides (tokens, paths) — never commit secrets
*.local
.env.local
70 changes: 70 additions & 0 deletions discord_activity_tracker/github_publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Upload Discord markdown exports to a GitHub repo (API upload, no local git required)."""

from __future__ import annotations

import logging
from pathlib import Path

from django.conf import settings

from github_ops import get_github_token, upload_folder_to_github

logger = logging.getLogger(__name__)

DEFAULT_BRANCH = "main"


def discord_markdown_repo_config() -> tuple[str, str, str] | None:
"""Return (owner, repo, branch) for Markdown upload, or None if not configured."""
owner = getattr(settings, "DISCORD_MARKDOWN_REPO_OWNER", "") or ""
repo = getattr(settings, "DISCORD_MARKDOWN_REPO_NAME", "") or ""
branch = (
getattr(settings, "DISCORD_MARKDOWN_REPO_BRANCH", DEFAULT_BRANCH)
or DEFAULT_BRANCH
).strip()
Comment on lines +21 to +24
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Normalize branch fallback after trimming whitespace.

A whitespace-only branch setting currently becomes "" after .strip(), which can cause upload calls to target an invalid branch ref.

Suggested fix
-    branch = (
-        getattr(settings, "DISCORD_MARKDOWN_REPO_BRANCH", DEFAULT_BRANCH)
-        or DEFAULT_BRANCH
-    ).strip()
+    branch_raw = (
+        getattr(settings, "DISCORD_MARKDOWN_REPO_BRANCH", DEFAULT_BRANCH)
+        or DEFAULT_BRANCH
+    )
+    branch = branch_raw.strip() or DEFAULT_BRANCH
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@discord_activity_tracker/github_publish.py` around lines 21 - 24, The current
assignment to branch calls .strip() after the truthy-or-default logic which lets
whitespace-only settings become an empty string; instead, first retrieve the raw
value from getattr(settings, "DISCORD_MARKDOWN_REPO_BRANCH", DEFAULT_BRANCH),
call .strip() on that result, then if the stripped value is falsy set branch =
DEFAULT_BRANCH; update the code that assigns branch (the variable named branch
using settings.DISCORD_MARKDOWN_REPO_BRANCH and DEFAULT_BRANCH) so the fallback
happens after trimming whitespace.

owner = owner.strip()
repo = repo.strip()
if not owner or not repo:
return None
return owner, repo, branch


def push_discord_markdown_to_github(local_folder: Path) -> bool:
"""
Upload all files under local_folder to DISCORD_MARKDOWN_REPO_*.
Returns True on reported API success.
"""
cfg = discord_markdown_repo_config()
if not cfg:
logger.error(
"DISCORD_MARKDOWN_REPO_OWNER / DISCORD_MARKDOWN_REPO_NAME not set; "
"skipping upload."
)
return False
owner, repo, branch = cfg
if not local_folder.is_dir():
logger.error("Markdown folder is not a directory: %s", local_folder)
return False

logger.info(
"Uploading Discord markdown from %s to %s/%s@%s",
local_folder,
owner,
repo,
branch,
)
token = get_github_token(use="write")
result = upload_folder_to_github(
local_folder=local_folder,
owner=owner,
repo=repo,
commit_message="chore: update Discord archive markdown",
branch=branch,
token=token,
)
Comment on lines +56 to +64
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Preserve the function’s boolean failure contract on token errors.

get_github_token(use="write") can raise (e.g., missing token). Right now that exception escapes, despite this function being structured as a bool success/failure API.

Suggested fix
-    token = get_github_token(use="write")
-    result = upload_folder_to_github(
-        local_folder=local_folder,
-        owner=owner,
-        repo=repo,
-        commit_message="chore: update Discord archive markdown",
-        branch=branch,
-        token=token,
-    )
+    try:
+        token = get_github_token(use="write")
+        result = upload_folder_to_github(
+            local_folder=local_folder,
+            owner=owner,
+            repo=repo,
+            commit_message="chore: update Discord archive markdown",
+            branch=branch,
+            token=token,
+        )
+    except Exception as e:
+        logger.error("Discord markdown upload setup failed: %s", e)
+        return False
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@discord_activity_tracker/github_publish.py` around lines 56 - 64, The code
calls get_github_token(use="write") which can raise and currently lets
exceptions escape, breaking the function's boolean success/failure contract;
wrap the call to get_github_token in a try/except, handle exceptions (log or
process via existing logger) and return False on failure instead of propagating
the exception, then only call upload_folder_to_github when a token was
successfully retrieved—reference get_github_token and upload_folder_to_github to
locate where to add the try/except and the early False return.

if result.get("success"):
logger.info("Discord markdown upload complete")
return True
msg = result.get("message") or "Upload failed"
logger.error("Discord markdown upload failed: %s", msg)
return False
151 changes: 151 additions & 0 deletions discord_activity_tracker/management/commands/backfill_discord_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""Import historical DiscordChatExporter JSON (per-day / chunk layout) into the database."""

from __future__ import annotations

import asyncio
import logging
from pathlib import Path

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

from core.utils.datetime_parsing import parse_iso_datetime
from discord_activity_tracker.sync.backfill_paths import (
iter_discussion_json_files,
json_path_in_date_window,
)
from discord_activity_tracker.sync.chat_exporter import parse_exported_json
from discord_activity_tracker.sync.importer import persist_exporter_channel_payloads
from discord_activity_tracker.workspace import get_discussion_export_dir

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = (
"Backfill Discord messages from DiscordChatExporter JSON files (e.g. "
"workspace/discord_activity_tracker/Discussion - c-cpp-discussion/). "
"Processes one file at a time. Use --dry-run to list files only."
)

def add_arguments(self, parser):
parser.add_argument(
"--path",
type=str,
default=None,
help=(
"Root directory to scan (default: workspace/discord_activity_tracker/"
"Discussion - c-cpp-discussion/)."
),
)
parser.add_argument(
"--dry-run",
action="store_true",
help="List files that would be imported; do not write to the database.",
)
parser.add_argument(
"--limit",
type=int,
default=0,
metavar="N",
help="Process at most N JSON files (0 = no limit).",
)
parser.add_argument(
"--guild-id",
type=int,
default=None,
help="If set, skip JSON whose guild id does not match (default: DISCORD_SERVER_ID).",
)
parser.add_argument(
"--since",
"--from-date",
"--start-time",
type=str,
default=None,
dest="since",
help="Only include files whose day/chunk range overlaps this date (YYYY-MM-DD or ISO). "
"Aliases: --from-date, --start-time.",
)
parser.add_argument(
"--until",
"--to-date",
"--end-time",
type=str,
default=None,
dest="until",
help="Only include files up to this date (same formats as --since). "
"Aliases: --to-date, --end-time.",
)

def handle(self, *args, **options):
dry_run = options["dry_run"]
limit = max(0, int(options["limit"] or 0))
path_arg = options["path"]
root = (
Path(path_arg).expanduser().resolve()
if path_arg
else get_discussion_export_dir()
)

try:
since_dt = parse_iso_datetime(options.get("since"))
until_dt = parse_iso_datetime(options.get("until"))
except ValueError as e:
raise CommandError(str(e)) from e
if since_dt and until_dt and since_dt > until_dt:
raise CommandError("since must be on or before until")

cfg_guild = (getattr(settings, "DISCORD_SERVER_ID", "") or "").strip()
expected = options["guild_id"]
if expected is None and cfg_guild:
try:
expected = int(cfg_guild)
except ValueError:
expected = None

if not root.is_dir():
raise CommandError(f"Not a directory: {root}")

paths = list(iter_discussion_json_files(root))
paths = [p for p in paths if json_path_in_date_window(p, since_dt, until_dt)]
if limit:
paths = paths[:limit]

self.stdout.write(f"Found {len(paths)} JSON file(s) under {root}")
if not paths:
return

if dry_run:
for p in paths:
self.stdout.write(f" {p.relative_to(root)}")
self.stdout.write(self.style.WARNING("DRY RUN — no database writes"))
return

processed = 0
for i, json_path in enumerate(paths, 1):
try:
data = parse_exported_json(json_path)
channel_data = {
"guild": data.get("guild", {}),
"channel": data.get("channel", {}),
"messages": data.get("messages", []),
}
ch = channel_data["channel"].get("name", "?")
n = len(channel_data["messages"])
self.stdout.write(
f" [{i}/{len(paths)}] {json_path.name} #{ch}: {n} msgs"
)
asyncio.run(
persist_exporter_channel_payloads(
[channel_data],
expected_guild_id=expected,
)
)
processed += 1
except Exception as e:
logger.exception("Backfill failed for %s: %s", json_path, e)
self.stdout.write(self.style.WARNING(f" Skip {json_path.name}: {e}"))

self.stdout.write(
self.style.SUCCESS(f"✓ Backfill finished ({processed} file(s))")
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@


class Command(BaseCommand):
help = "Inspect reply links and exported markdown"
help = "Inspect reply links and raw markdown preview for Discord messages stored in the database."

def add_arguments(self, parser):
parser.add_argument(
"--message-id",
type=int,
help="Inspect a specific message by message_id (Discord message ID)",
metavar="ID",
help="Inspect a specific message by Discord snowflake message_id.",
)
parser.add_argument(
"--limit",
type=int,
default=5,
help="Number of reply messages to show (default: 5)",
metavar="N",
help="When listing replies, show at most N rows (default: 5).",
)

def handle(self, *args, **options):
Expand Down
Loading
Loading