]> Kevux Git Server - controller/commitdiff
Bugfix: Final clean up thread deadlocks on interrupt.
authorKevin Day <Kevin@kevux.org>
Tue, 29 Jul 2025 02:30:41 +0000 (21:30 -0500)
committerKevin Day <Kevin@kevux.org>
Tue, 29 Jul 2025 02:43:41 +0000 (21:43 -0500)
There is a sleep at the top of the while loop in the clean up thread.
This sleep does not receive interrupts when `PTHREAD_CANCEL_DEFERRED` is set as the cancel state on the thread.

Set the cancel state to `PTHREAD_CANCEL_ASYNCHRONOUS` during the execution of the loop.
This is safe because there are no special locks being held during this sleep at the top of the loop.

Restore the cancel state to `PTHREAD_CANCEL_DEFERRED` once past the sleep.

Add additional checks in the loops for `controller_thread_enabled_e` to potentially catch any situations where an immediate exit would be ideal.
Make sure all locks are unlocked before breaking out of any part of any loop.

Use a try lock when sending signals to child processes rather than a full lock to increase the chance of handling any interrupts.

sources/c/program/controller/main/thread/cleanup.c

index 77b6e56f07cf516f04d2c60a5f571e677f32601b..04f628b4ccbc0e87f3248f75f961206eef10af96 100644 (file)
@@ -24,8 +24,14 @@ extern "C" {
 
     while (main->thread.enabled == controller_thread_enabled_e) {
 
+      // Allow thread to be interrupted and auto-cancelled while sleeping.
+      f_thread_cancel_state_set(PTHREAD_CANCEL_ASYNCHRONOUS, 0);
+
       f_time_sleep_spec(delay, 0);
 
+      // Prevent thread from being interrupted and auto-cancelled.
+      f_thread_cancel_state_set(PTHREAD_CANCEL_DEFERRED, 0);
+
       if (main->thread.enabled != controller_thread_enabled_e) break;
 
       if (f_thread_lock_write_try(&main->thread.lock.instance) == F_okay) {
@@ -55,29 +61,33 @@ extern "C" {
           }
 
           // If instance is active or busy, then do not attempt to clean it.
-          if (instance->state == controller_instance_state_active_e || instance->state == controller_instance_state_busy_e) {
+          if (instance->state == controller_instance_state_active_e || instance->state == controller_instance_state_busy_e || main->thread.enabled != controller_thread_enabled_e) {
             f_thread_unlock(&instance->active);
             f_thread_unlock(&instance->lock);
 
-            continue;
+            if (main->thread.enabled == controller_thread_enabled_e) continue;
+
+            break;
           }
 
           // If instance has a PID file, then it is running in the background, only cleanup if the PID file no longer exists.
           if (instance->path_pids.used) {
             f_number_unsigned_t j = 0;
 
-            for (; j < instance->path_pids.used; ++j) {
+            for (; j < instance->path_pids.used && main->thread.enabled == controller_thread_enabled_e; ++j) {
 
               if (instance->path_pids.array[j].used && f_file_exists(instance->path_pids.array[j], F_true) == F_true) {
                 break;
               }
             } // for
 
-            if (j < instance->path_pids.used) {
+            if (j < instance->path_pids.used || main->thread.enabled != controller_thread_enabled_e) {
               f_thread_unlock(&instance->active);
               f_thread_unlock(&instance->lock);
 
-              continue;
+              if (main->thread.enabled == controller_thread_enabled_e) continue;
+
+              break;
             }
           }
 
@@ -88,24 +98,25 @@ extern "C" {
             status = f_thread_join(instance->id_thread, 0);
 
             if (F_status_is_error_not(status) || F_status_set_fine(status) == F_found_not) {
-              status = f_thread_lock_write(&instance->lock);
+              status = f_thread_lock_write_try(&instance->lock);
+
+              if (status == F_okay) {
+                instance->state = controller_instance_state_idle_e;
+                instance->id_thread = 0;
+
+                f_thread_mutex_lock(&instance->wait_lock);
+                f_thread_condition_signal_all(&instance->wait);
+                f_thread_mutex_unlock(&instance->wait_lock);
 
-              if (F_status_is_error(status)) {
+                f_thread_unlock(&instance->lock);
+              }
+              else if (F_status_is_error(status)) {
                 controller_print_error_lock_critical(&main->program.error, F_status_set_fine(status), F_false);
 
                 f_thread_unlock(&instance->active);
 
                 continue;
               }
-
-              instance->state = controller_instance_state_idle_e;
-              instance->id_thread = 0;
-
-              f_thread_mutex_lock(&instance->wait_lock);
-              f_thread_condition_signal_all(&instance->wait);
-              f_thread_mutex_unlock(&instance->wait_lock);
-
-              f_thread_unlock(&instance->lock);
             }
             else {
               f_thread_unlock(&instance->active);
@@ -120,10 +131,16 @@ extern "C" {
 
           // Shrink the childs array.
           if (instance->childs.used) {
-            for (; instance->childs.used; --instance->childs.used) {
+            for (; instance->childs.used && main->thread.enabled == controller_thread_enabled_e; --instance->childs.used) {
               if (instance->childs.array[instance->childs.used]) break;
             } // for
 
+             if (main->thread.enabled != controller_thread_enabled_e) {
+                f_thread_unlock(&instance->active);
+
+               break;
+             }
+
             if (instance->childs.used < instance->childs.size) {
               f_memory_array_resize(instance->childs.used, sizeof(pid_t), (void **) &instance->childs.array, &instance->childs.used, &instance->childs.size);
             }