Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions caddy/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func (f *FrankenPHPApp) Start() error {
frankenphp.WithWorkerWatchMode(w.Watch),
frankenphp.WithWorkerMaxFailures(w.MaxConsecutiveFailures),
frankenphp.WithWorkerMaxThreads(w.MaxThreads),
frankenphp.WithWorkerTimeout(w.WorkerTimeout),
frankenphp.WithWorkerRequestOptions(w.requestOptions...),
)

Expand Down
59 changes: 59 additions & 0 deletions caddy/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package caddy

import (
"testing"
"time"

"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -35,6 +36,64 @@ func TestModuleWorkerDuplicateFilenamesFail(t *testing.T) {
require.Contains(t, err.Error(), "must not have duplicate filenames", "Error message should mention duplicate filenames")
}

func TestModuleWorkerTimeoutParses(t *testing.T) {
config := `
{
php {
worker {
file ../testdata/worker-with-env.php
num 1
worker_timeout 30s
}
}
}`

d := caddyfile.NewTestDispenser(config)
module := &FrankenPHPModule{}

require.NoError(t, module.UnmarshalCaddyfile(d))
require.Len(t, module.Workers, 1)
require.Equal(t, 30*time.Second, module.Workers[0].WorkerTimeout)
}

func TestModuleWorkerTimeoutDefaultsToZero(t *testing.T) {
config := `
{
php {
worker {
file ../testdata/worker-with-env.php
num 1
}
}
}`

d := caddyfile.NewTestDispenser(config)
module := &FrankenPHPModule{}

require.NoError(t, module.UnmarshalCaddyfile(d))
require.Len(t, module.Workers, 1)
require.Zero(t, module.Workers[0].WorkerTimeout)
}

func TestModuleWorkerTimeoutInvalidDurationFails(t *testing.T) {
config := `
{
php {
worker {
file ../testdata/worker-with-env.php
worker_timeout not-a-duration
}
}
}`

d := caddyfile.NewTestDispenser(config)
module := &FrankenPHPModule{}

err := module.UnmarshalCaddyfile(d)
require.Error(t, err)
require.Contains(t, err.Error(), "worker_timeout must be a valid duration")
}

func TestModuleWorkersWithDifferentFilenames(t *testing.T) {
// Create a test configuration with different worker filenames
configWithDifferentFilenames := `
Expand Down
19 changes: 18 additions & 1 deletion caddy/workerconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"path"
"path/filepath"
"strconv"
"time"

"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
Expand Down Expand Up @@ -41,6 +42,8 @@ type workerConfig struct {
MatchPath []string `json:"match_path,omitempty"`
// MaxConsecutiveFailures sets the maximum number of consecutive failures before panicking (defaults to 6, set to -1 to never panick)
MaxConsecutiveFailures int `json:"max_consecutive_failures,omitempty"`
// WorkerTimeout sets a hard per-request timeout (e.g. 30s). A worker request running longer is interrupted so the thread can be reclaimed. 0 (default) disables it.
WorkerTimeout time.Duration `json:"worker_timeout,omitempty"`

options []frankenphp.WorkerOption
requestOptions []frankenphp.RequestOption
Expand Down Expand Up @@ -145,8 +148,22 @@ func unmarshalWorker(d *caddyfile.Dispenser) (workerConfig, error) {
}

wc.MaxConsecutiveFailures = v
case "worker_timeout":
if !d.NextArg() {
return wc, d.ArgErr()
}

v, err := time.ParseDuration(d.Val())
if err != nil {
return wc, d.Errf("worker_timeout must be a valid duration (example: 30s): %v", err)
}
if v < 0 {
return wc, d.Err("worker_timeout must be >= 0")
}

wc.WorkerTimeout = v
default:
return wc, wrongSubDirectiveError("worker", "name, file, num, env, watch, match, max_consecutive_failures, max_threads", v)
return wc, wrongSubDirectiveError("worker", "name, file, num, env, watch, match, max_consecutive_failures, max_threads, worker_timeout", v)
}
}

Expand Down
1 change: 1 addition & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ You can also explicitly configure FrankenPHP using the [global option](https://c
watch <path> # Sets the path to watch for file changes. Can be specified more than once for multiple paths.
name <name> # Sets the name of the worker, used in logs and metrics. Default: absolute path of worker file
max_consecutive_failures <num> # Sets the maximum number of consecutive failures before the worker is considered unhealthy, -1 means the worker will always restart. Default: 6.
worker_timeout <duration> # (experimental) Hard per-request timeout (e.g. 30s). A request running longer is interrupted so the worker thread can be reclaimed. Default: 0 (disabled).
}
}
}
Expand Down
68 changes: 68 additions & 0 deletions docs/worker.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,74 @@ frankenphp {
}
```

### Request timeout (experimental)

By default a worker thread blocked on a slow external call (a hung MySQL query, a
stuck HTTP client, a Redis call, a long `sleep()`) holds that thread until the call
returns on its own. The `worker_timeout` option sets a hard per-request timeout —
the worker-mode equivalent of PHP-FPM's `request_terminate_timeout` — after which
FrankenPHP interrupts the PHP thread so the request bails out and the worker is
reclaimed:

```caddyfile
frankenphp {
worker {
# ...
worker_timeout 30s
}
}
```

When the timeout elapses, the request is aborted with a fatal error whose message
is `Worker request timeout of N second(s) exceeded`. The worker script then
restarts cleanly and serves the next request — no special userland code is
required. Note that `max_execution_time` does **not** count time spent inside a
blocking call such as a database query, which is exactly the case `worker_timeout`
is designed to cover.

How it works (and its limits):

- A blocking syscall (a stuck database query, a hung Redis/Elasticsearch/HTTP
read, a black-holed `connect()`) cannot be aborted by PHP's timeout flag
alone, because PHP retries the interrupted read. On **Linux**, FrankenPHP
inspects what the worker thread is blocked on and shuts down the socket(s)
involved, so the read fails and the request unwinds. Only sockets are
aborted this way (a read blocked on a file or pipe is not). It recognises:
- `read`/`recvfrom`/`recvmsg` and a blocking `connect` — the descriptor is the
syscall's first argument;
- `poll`/`ppoll` — the descriptors are read out of the poll set (PHP's stream
layer, and thus most Redis/HTTP/DB clients built on it, always poll before
reading). This is what lets a stuck `SELECT SLEEP(30)` actually stop at the
timeout instead of running to completion;
- `epoll_wait`/`epoll_pwait` — the watched descriptors are enumerated from the
epoll instance (covers clients running their own event loop, such as
`curl_multi` and gRPC).

Every descriptor is confirmed to be a socket before it is shut down.
- A long `sleep()`/`usleep()` (no socket) is interrupted by a realtime signal on
**Linux and FreeBSD**.
- On **macOS** and **Windows**, and for a tight CPU loop inside a C extension that
swallows `EINTR`, only PHP's VM-interrupt flag is set: a CPU-bound overrun is
still caught at the next opcode boundary, but a blocking syscall already in
progress cannot be unblocked. A client blocked in a `select`-based loop (rare on
Linux, where `poll` is preferred) is likewise not aborted.
- The socket abort needs no extra privilege (all inspection is of the process
itself), but it relies on `/proc` and — for poll-based waits, the common case —
on [`process_vm_readv(2)`](https://man7.org/linux/man-pages/man2/process_vm_readv.2.html).
Docker's default seccomp profile allows this syscall on kernels ≥ 4.8
([moby#42083](https://github.com/moby/moby/pull/42083)); under an older or
stricter policy (gVisor, custom profiles) the call fails closed: FrankenPHP
logs a warning once and a request blocked in a poll-based socket read can then
not be aborted (sleeps and CPU-bound overruns still are).
- `worker_timeout` aborts the request hard, like `request_terminate_timeout`
does in PHP-FPM. The database server rolls back an open transaction when its
connection is shut down, and PHP's request shutdown still runs (sessions are
released as usual). But application-level sequences are not rolled back: an
e-mail already sent, a file already written or an external lock with a TTL
stay as they are. Set the timeout comfortably above your slowest legitimate
request.
- `worker_timeout` defaults to `0` (disabled).

## Superglobals behavior

[PHP superglobals](https://www.php.net/manual/language.variables.superglobals.php) (`$_SERVER`, `$_ENV`, `$_GET`...)
Expand Down
136 changes: 135 additions & 1 deletion frankenphp.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,35 @@ static void frankenphp_register_atfork(void) {
static void CALLBACK frankenphp_noop_apc(ULONG_PTR param) { (void)param; }
#endif

/* ===== Worker request timeout (per-request hard timeout) =====
*
* A blocking syscall (a stuck SELECT SLEEP(), a hung HTTP read, ...) cannot be
* aborted by the VM-interrupt flag alone: PHP's network layer retries EINTR, so
* the read just resumes, and a driver like mysqlnd even removes its socket from
* EG(regular_list) so it can't be found by walking the resource list. To cut
* such a request short the Go watchdog shuts down the fd the thread is blocked
* on (found via /proc/<tid>/syscall); the EINTR wakes sleep-style waits. Once
* the thread runs PHP again, this custom zend_interrupt_function raises a clear
* "Worker request timeout" fatal.
*
* Per-thread state is indexed by thread_index and allocated once max_threads is
* known (frankenphp_init_worker_timeout). */
static zend_atomic_bool *worker_timeout_pending = NULL;
static double *worker_timeout_seconds = NULL;
static int worker_timeout_max_threads = 0;
/* Saved to chain PHP's own interrupt handler (fibers, pcntl, ...). */
static void (*frankenphp_original_interrupt)(zend_execute_data *) = NULL;

static bool frankenphp_worker_timeout_is_pending(uintptr_t idx) {
return worker_timeout_pending != NULL &&
idx < (uintptr_t)worker_timeout_max_threads &&
zend_atomic_bool_load(&worker_timeout_pending[idx]);
}

#ifdef FRANKENPHP_HAS_KILL_SIGNAL
/* No-op: delivery itself is what unblocks the syscall via EINTR. */
/* No-op: delivery itself is what unblocks an EINTR-abortable wait. The socket
* abort that handles retried blocking reads is done from Go (shutdown on the
* blocked fd), not here. */
static void frankenphp_kill_signal_handler(int sig) { (void)sig; }

static pthread_once_t kill_signal_handler_installed = PTHREAD_ONCE_INIT;
Expand Down Expand Up @@ -222,6 +249,104 @@ void frankenphp_release_thread_for_kill(force_kill_slot slot) {
#endif
}

/* zend_interrupt_function hook: when a worker timeout is pending for this
* thread, raise a fatal that unwinds the request with a clear message. Any
* exception left over from the aborted I/O (e.g. a mysqli connection error
* caused by the socket shutdown) is dropped so our message is what surfaces.
* E_ERROR triggers a bailout, so the original handler is not chained in that
* case; otherwise we chain it. */
static void frankenphp_timeout_interrupt(zend_execute_data *execute_data) {
if (is_worker_thread && frankenphp_worker_timeout_is_pending(thread_index)) {
zend_atomic_bool_store(&worker_timeout_pending[thread_index], false);
if (EG(exception)) {
zend_clear_exception();
}
zend_error_noreturn(E_ERROR, "Worker request timeout of %g second(s) exceeded",
worker_timeout_seconds[thread_index]);
}

if (frankenphp_original_interrupt != NULL) {
frankenphp_original_interrupt(execute_data);
}
}

/* Installed on the main thread after SAPI startup. php_main can run more than
* once per process (Init/Shutdown cycles when embedding, and in the test
* suite) and zend_interrupt_function survives a SAPI shutdown, so guard
* against saving ourselves as the "original" handler - the chain call would
* recurse forever the first time the hook fired without a pending timeout. */
static void frankenphp_install_timeout_interrupt(void) {
if (zend_interrupt_function == frankenphp_timeout_interrupt) {
return;
}
frankenphp_original_interrupt = zend_interrupt_function;
zend_interrupt_function = frankenphp_timeout_interrupt;
}

/* Allocate per-thread timeout state once max_threads is known. Called from Go
* alongside frankenphp_init_thread_metrics. */
void frankenphp_init_worker_timeout(int max_threads) {
worker_timeout_max_threads = max_threads;
worker_timeout_pending = calloc(max_threads, sizeof(zend_atomic_bool));
worker_timeout_seconds = calloc(max_threads, sizeof(double));
}

void frankenphp_destroy_worker_timeout(void) {
free(worker_timeout_pending);
worker_timeout_pending = NULL;
free(worker_timeout_seconds);
worker_timeout_seconds = NULL;
worker_timeout_max_threads = 0;
}

/* Arm the timeout for a thread that has overrun its worker_timeout: record the
* limit (for the message) and set the per-thread flag + VM interrupt so the
* interrupt hook fires the moment the thread next runs PHP. No wakeup yet - the
* caller first shuts down the blocked fd (so the message isn't pre-empted by the
* driver's own connection error), then calls frankenphp_wake_worker_thread. */
void frankenphp_arm_worker_timeout(uintptr_t thread_index_arg, force_kill_slot slot,
double timeout_seconds) {
if (slot.vm_interrupt == NULL ||
thread_index_arg >= (uintptr_t)worker_timeout_max_threads ||
worker_timeout_pending == NULL || worker_timeout_seconds == NULL) {
return;
}

worker_timeout_seconds[thread_index_arg] = timeout_seconds;
zend_atomic_bool_store(&worker_timeout_pending[thread_index_arg], true);
zend_atomic_bool_store(slot.vm_interrupt, true);
}

/* Wake a thread parked in an EINTR-abortable wait (sleep, usleep) so it returns
* and reaches the VM interrupt. Socket reads are handled by the fd shutdown done
* before this call; this is the fallback for waits that have no fd. Safe on a
* thread that has already gone away (zeroed slot). */
void frankenphp_wake_worker_thread(force_kill_slot slot) {
if (slot.vm_interrupt == NULL) {
return;
}
#ifdef FRANKENPHP_HAS_KILL_SIGNAL
if (zend_atomic_bool_load(&kill_signal_handler_active)) {
pthread_kill(slot.tid, FRANKENPHP_KILL_SIGNAL);
}
#elif defined(PHP_WIN32)
if (slot.thread_handle != NULL) {
CancelSynchronousIo(slot.thread_handle);
QueueUserAPC((PAPCFUNC)frankenphp_noop_apc, slot.thread_handle, 0);
}
#endif
}

/* Clear a (possibly stale) pending flag at the start of a worker request so a
* watchdog that raced request completion cannot abort the next request. */
void frankenphp_clear_worker_timeout(uintptr_t thread_index_arg) {
if (worker_timeout_pending == NULL ||
thread_index_arg >= (uintptr_t)worker_timeout_max_threads) {
return;
}
zend_atomic_bool_store(&worker_timeout_pending[thread_index_arg], false);
}

void frankenphp_update_local_thread_context(bool is_worker) {
is_worker_thread = is_worker;

Expand Down Expand Up @@ -1269,6 +1394,12 @@ static void *php_thread(void *arg) {
* grace period can wake it from a busy PHP loop or blocking syscall. */
frankenphp_register_thread_for_kill(thread_index);

#ifdef __linux__
/* Publish the kernel thread id so the worker-timeout watchdog can locate the
* fd this thread blocks on (via /proc/<tid>/syscall) and shut it down. */
go_frankenphp_store_thread_tid(thread_index, (int)gettid());
#endif

bool thread_is_healthy = true;
bool has_attempted_shutdown = false;

Expand Down Expand Up @@ -1467,6 +1598,9 @@ static void *php_main(void *arg) {

frankenphp_sapi_module.startup(&frankenphp_sapi_module);

/* Hook the VM interrupt so worker_timeout can raise its own fatal. */
frankenphp_install_timeout_interrupt();

/* check if a default filter is set in php.ini and only filter if
* it is, this is deprecated and will be removed in PHP 9 */
char *default_filter;
Expand Down
6 changes: 6 additions & 0 deletions frankenphp.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ import (
"unsafe"
// debug on Linux
//_ "github.com/ianlancetaylor/cgosymbolizer"

"github.com/dunglas/frankenphp/internal/blockio"
)

type contextKeyStruct struct{}
Expand Down Expand Up @@ -269,6 +271,10 @@ func Init(options ...Option) error {
opt.logger = nil
}

// Let the worker-timeout watchdog report (once) when the platform denies
// the syscalls it needs to abort a blocked socket read.
blockio.SetLogger(globalLogger)

globalMu.Unlock()

if opt.metrics != nil {
Expand Down
Loading
Loading