diff --git a/src/sentry_app_hang_latch.c b/src/sentry_app_hang_latch.c index 96a1dd736..334e89f4f 100644 --- a/src/sentry_app_hang_latch.c +++ b/src/sentry_app_hang_latch.c @@ -61,6 +61,12 @@ sentry__app_hang_set_active(bool active) sentry__atomic_store(&g_app_hang_active, active ? 1 : 0); } +bool +sentry__app_hang_is_active(void) +{ + return sentry__atomic_fetch(&g_app_hang_active) != 0; +} + uint64_t sentry__app_hang_current_tid(void) { diff --git a/src/sentry_app_hang_latch.h b/src/sentry_app_hang_latch.h index d5d5af2e4..5d0123543 100644 --- a/src/sentry_app_hang_latch.h +++ b/src/sentry_app_hang_latch.h @@ -23,6 +23,9 @@ void sentry__app_hang_latch_reset(void); // when detection is not running. void sentry__app_hang_set_active(bool active); +// Whether app-hang detection is currently armed. +bool sentry__app_hang_is_active(void); + sentry_value_t sentry__app_hang_make_event( void **ips, size_t frame_count, uint64_t freeze_ms); diff --git a/src/sentry_app_hang_monitor.c b/src/sentry_app_hang_monitor.c index da045e00f..0b809f41a 100644 --- a/src/sentry_app_hang_monitor.c +++ b/src/sentry_app_hang_monitor.c @@ -54,7 +54,6 @@ sentry__app_hang_monitor_set_stackwalk_fn(sentry__app_hang_stackwalk_fn fn) #if SENTRY_HAS_THREAD_STACKWALK static bool g_running = false; -static volatile long g_stop = 0; static sentry_threadid_t g_thread; static sentry_mutex_t g_wait_mutex = SENTRY__MUTEX_INIT; static sentry_cond_t g_wait_cond; @@ -90,13 +89,13 @@ worker(void *arg) { (void)arg; uint64_t last_fired_hb = 0; - while (!sentry__atomic_fetch(&g_stop)) { + while (sentry__app_hang_is_active()) { sentry__mutex_lock(&g_wait_mutex); sentry__cond_wait_timeout( &g_wait_cond, &g_wait_mutex, SENTRY_APP_HANG_POLL_MS); sentry__mutex_unlock(&g_wait_mutex); - if (sentry__atomic_fetch(&g_stop)) { + if (!sentry__app_hang_is_active()) { break; } @@ -104,6 +103,10 @@ worker(void *arg) uint64_t now = sentry__monotonic_time(); if (sentry__app_hang_should_capture( latch.last_heartbeat_ms, now, g_timeout_ms, last_fired_hb)) { + // Bail if disarmed. Keeps duplicate reporting window minimal + if (!sentry__app_hang_is_active()) { + break; + } // Only mark this freeze as fired when an event was actually // captured. A transient stackwalk failure (0 frames) must not // suppress retries while the thread remains stuck. @@ -128,15 +131,17 @@ sentry__app_hang_monitor_start(const sentry_options_t *options) SENTRY_WARN("app-hang: `app_hang_timeout` is 0, hang detection is " "disabled"); } - sentry__atomic_store(&g_stop, 0); sentry__cond_init(&g_wait_cond); + // Arm before spawning: the worker uses is_active() as its run condition, so + // it must already be true when the new thread first evaluates the loop. + sentry__app_hang_set_active(true); if (sentry__thread_spawn(&g_thread, worker, NULL) != 0) { + sentry__app_hang_set_active(false); SENTRY_WARN("app-hang: failed to spawn watchdog thread"); return 1; } g_running = true; - sentry__app_hang_set_active(true); SENTRY_DEBUG("app-hang watchdog started"); return 0; } @@ -148,7 +153,6 @@ sentry__app_hang_monitor_stop(void) return; } sentry__app_hang_set_active(false); - sentry__atomic_store(&g_stop, 1); sentry__mutex_lock(&g_wait_mutex); sentry__cond_wake(&g_wait_cond); sentry__mutex_unlock(&g_wait_mutex); diff --git a/src/sentry_core.c b/src/sentry_core.c index 9b19cc9d1..6fdc4116d 100644 --- a/src/sentry_core.c +++ b/src/sentry_core.c @@ -3,6 +3,7 @@ #include #include +#include "sentry_app_hang_latch.h" #include "sentry_app_hang_monitor.h" #include "sentry_attachment.h" #include "sentry_backend.h" @@ -830,6 +831,9 @@ prepare_user_feedback(const sentry_options_t *options, void sentry_handle_exception(const sentry_ucontext_t *uctx) { + // Disarm the app-hang monitor to avoid duplicate capture. + sentry__app_hang_set_active(false); + SENTRY_WITH_OPTIONS (options) { if (options->backend && options->backend->except_func) { options->backend->except_func(options->backend, uctx); diff --git a/tests/unit/test_app_hang.c b/tests/unit/test_app_hang.c index 9198ae75a..9b5aea60b 100644 --- a/tests/unit/test_app_hang.c +++ b/tests/unit/test_app_hang.c @@ -125,6 +125,39 @@ SENTRY_TEST(app_hang_monitor_fires) sentry__app_hang_monitor_set_stackwalk_fn(NULL); } +SENTRY_TEST(app_hang_disarm_prevents_capture) +{ + // Mirrors app_hang_monitor_fires, but disarms after latching. This is the + // crash-handler path: once disarmed, the watchdog must not capture an + // app-hang even though the latched thread stops heart-beating (so a crash + // is never also reported as an app-hang). + g_app_hang_seen = 0; + g_app_hang_type[0] = '\0'; + sentry__app_hang_latch_reset(); + sentry__app_hang_monitor_set_stackwalk_fn(fake_stackwalk); + + sentry_options_t *options = sentry_options_new(); + sentry_options_set_dsn(options, "https://foo@sentry.invalid/42"); + sentry_options_set_before_send(options, capture_before_send, NULL); + sentry_options_set_enable_app_hang_tracking(options, 1); + sentry_options_set_app_hang_timeout(options, 50); + sentry_init(options); + + sentry_app_hang_heartbeat(); + // Simulate entering the crash handler: disable -> never heartbeat again. + sentry__app_hang_set_active(false); + + // Wait well past several timeout/poll cycles to be sure nothing fires. + for (int i = 0; i < 50; i++) { + sleep_ms(10); + } + + TEST_CHECK(sentry__atomic_fetch(&g_app_hang_seen) == 0); + + sentry_close(); + sentry__app_hang_monitor_set_stackwalk_fn(NULL); +} + static long g_real_seen; static long g_real_frames; static volatile long g_keep_spinning; diff --git a/tests/unit/tests.inc b/tests/unit/tests.inc index e7d78bd71..8adce0aad 100644 --- a/tests/unit/tests.inc +++ b/tests/unit/tests.inc @@ -1,3 +1,9 @@ +XX(app_hang_disarm_prevents_capture) +XX(app_hang_end_to_end) +XX(app_hang_latch) +XX(app_hang_make_event) +XX(app_hang_monitor_fires) +XX(app_hang_should_capture) XX(assert_sdk_name) XX(assert_sdk_user_agent) XX(assert_sdk_version) @@ -15,11 +21,6 @@ XX(attachments_add_remove) XX(attachments_bytes) XX(attachments_extend) XX(attachments_more_than_ten) -XX(app_hang_should_capture) -XX(app_hang_latch) -XX(app_hang_make_event) -XX(app_hang_monitor_fires) -XX(app_hang_end_to_end) XX(background_worker) XX(baggage_iter_basic) XX(baggage_iter_case_preserved)