2024-09-08 17:37:00 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
namespace App\Jobs;
|
|
|
|
|
|
|
|
|
|
use App\Models\Server;
|
2024-09-23 17:51:31 +00:00
|
|
|
use Carbon\Carbon;
|
2024-09-08 17:37:00 +00:00
|
|
|
use Illuminate\Bus\Queueable;
|
|
|
|
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
|
|
|
|
use Illuminate\Foundation\Bus\Dispatchable;
|
|
|
|
|
use Illuminate\Queue\InteractsWithQueue;
|
|
|
|
|
use Illuminate\Queue\SerializesModels;
|
2026-05-22 15:31:38 +00:00
|
|
|
use Illuminate\Support\Facades\Log;
|
2024-09-08 17:37:00 +00:00
|
|
|
use Illuminate\Support\Facades\Process;
|
2024-09-17 13:54:22 +00:00
|
|
|
use Illuminate\Support\Facades\Storage;
|
2024-09-08 17:37:00 +00:00
|
|
|
|
|
|
|
|
class CleanupStaleMultiplexedConnections implements ShouldQueue
|
|
|
|
|
{
|
|
|
|
|
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
|
|
|
|
|
|
|
|
|
public function handle()
|
|
|
|
|
{
|
2024-09-17 13:54:22 +00:00
|
|
|
$this->cleanupStaleConnections();
|
|
|
|
|
$this->cleanupNonExistentServerConnections();
|
2026-05-22 16:01:53 +00:00
|
|
|
$this->cleanupDuplicateSshProcesses();
|
2026-05-22 15:31:38 +00:00
|
|
|
$this->cleanupOrphanedSshProcesses();
|
|
|
|
|
$this->cleanupOrphanedCloudflaredProcesses();
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-22 16:01:53 +00:00
|
|
|
/**
|
|
|
|
|
* Once two background ssh masters share the same ControlPath, OpenSSH's
|
|
|
|
|
* control socket state is no longer trustworthy: `ssh -O check` may report
|
|
|
|
|
* one PID while the socket lifecycle is tied to another. Reset the whole
|
|
|
|
|
* duplicate group rather than trying to choose an owner.
|
|
|
|
|
*/
|
|
|
|
|
private function cleanupDuplicateSshProcesses(): void
|
|
|
|
|
{
|
|
|
|
|
$muxDir = storage_path('app/ssh/mux');
|
|
|
|
|
$groups = [];
|
|
|
|
|
|
|
|
|
|
foreach ($this->listProcesses() as $process) {
|
|
|
|
|
$controlPath = $this->extractControlPath($process['args']);
|
|
|
|
|
if (! is_string($controlPath) || ! str_starts_with($controlPath, $muxDir.'/')) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$groups[$controlPath][] = $process;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($groups as $controlPath => $processes) {
|
|
|
|
|
if (count($processes) < 2) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->resetDuplicateGroup($controlPath, $processes);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-22 15:31:38 +00:00
|
|
|
/**
|
|
|
|
|
* Kill backgrounded ssh master processes that lost the ControlPath socket
|
|
|
|
|
* race. Such processes are not masters, so ControlPersist never reaps them
|
|
|
|
|
* and they leak memory until the container restarts. A legitimate master
|
|
|
|
|
* always owns its socket file; an orphan has none.
|
|
|
|
|
*
|
|
|
|
|
* Processes younger than the minimum age are skipped: a freshly forked
|
|
|
|
|
* master creates its socket a few milliseconds after starting, so a young
|
|
|
|
|
* process with no socket may simply be mid-establish rather than orphaned.
|
|
|
|
|
*/
|
|
|
|
|
private function cleanupOrphanedSshProcesses(): void
|
|
|
|
|
{
|
|
|
|
|
$muxDir = storage_path('app/ssh/mux');
|
|
|
|
|
$minAge = (int) config('constants.ssh.mux_orphan_min_age');
|
|
|
|
|
|
|
|
|
|
foreach ($this->listProcesses() as $process) {
|
|
|
|
|
// Only ever touch ssh processes pointing at Coolify's mux directory.
|
2026-05-22 16:17:37 +00:00
|
|
|
$controlPath = $this->extractControlPath($process['args']);
|
|
|
|
|
if (! is_string($controlPath) || ! str_starts_with($controlPath, $muxDir.'/')) {
|
2026-05-22 15:31:38 +00:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-22 16:17:37 +00:00
|
|
|
if ($process['etimes'] >= $minAge && ! file_exists($controlPath)) {
|
2026-05-22 15:31:38 +00:00
|
|
|
$this->reapOrphan('ssh', $process);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Kill orphaned `cloudflared access ssh` proxy processes. Each is spawned
|
|
|
|
|
* as the SSH ProxyCommand transport for a Cloudflare Tunnel server and must
|
|
|
|
|
* die with its parent ssh. When that ssh is killed or orphaned (e.g. a lost
|
|
|
|
|
* mux master), the cloudflared process can leak and accumulate. A legitimate
|
|
|
|
|
* proxy always has a live ssh parent; one without is safe to reap.
|
|
|
|
|
*
|
|
|
|
|
* Processes younger than the minimum age are skipped so a proxy whose parent
|
|
|
|
|
* ssh is still starting up, or a transient `ssh -O check` proxy mid-exit, is
|
|
|
|
|
* never mistaken for an orphan.
|
|
|
|
|
*/
|
|
|
|
|
private function cleanupOrphanedCloudflaredProcesses(): void
|
|
|
|
|
{
|
|
|
|
|
$minAge = (int) config('constants.ssh.mux_orphan_min_age');
|
|
|
|
|
$processes = $this->listProcesses();
|
|
|
|
|
|
|
|
|
|
$sshPids = [];
|
|
|
|
|
foreach ($processes as $process) {
|
|
|
|
|
// The ssh binary itself, not `cloudflared access ssh` (space before ssh).
|
|
|
|
|
if (preg_match('#(^|/)ssh\s#', $process['args'])) {
|
|
|
|
|
$sshPids[$process['pid']] = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($processes as $process) {
|
|
|
|
|
// `cloudflared access ssh`, never the `cloudflared tunnel` daemon.
|
|
|
|
|
if (! str_contains($process['args'], 'cloudflared access ssh')) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Orphaned when no live ssh process is its parent.
|
|
|
|
|
if ($process['etimes'] >= $minAge && ! isset($sshPids[$process['ppid']])) {
|
|
|
|
|
$this->reapOrphan('cloudflared', $process);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reap a detected orphan process. When orphan reaping is disabled (the
|
|
|
|
|
* default), the orphan is only logged — a dry-run mode that lets operators
|
|
|
|
|
* verify what would be killed before enabling it for real.
|
|
|
|
|
*
|
|
|
|
|
* @param array{pid: string, ppid: string, etimes: int, args: string} $process
|
|
|
|
|
*/
|
|
|
|
|
private function reapOrphan(string $kind, array $process): void
|
|
|
|
|
{
|
|
|
|
|
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
|
|
|
|
Log::info("Orphaned {$kind} process detected (dry-run, not killed)", [
|
|
|
|
|
'pid' => $process['pid'],
|
|
|
|
|
'etimes' => $process['etimes'],
|
|
|
|
|
'command' => $process['args'],
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Process::run('kill '.escapeshellarg($process['pid']));
|
|
|
|
|
Log::info("Killed orphaned {$kind} process", [
|
|
|
|
|
'pid' => $process['pid'],
|
|
|
|
|
'etimes' => $process['etimes'],
|
|
|
|
|
'command' => $process['args'],
|
|
|
|
|
]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Snapshot of running processes.
|
|
|
|
|
*
|
|
|
|
|
* @return list<array{pid: string, ppid: string, etimes: int, args: string}>
|
|
|
|
|
*/
|
|
|
|
|
private function listProcesses(): array
|
|
|
|
|
{
|
|
|
|
|
$ps = Process::run('ps -ww -eo pid=,ppid=,etimes=,args=');
|
|
|
|
|
if ($ps->exitCode() !== 0) {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$processes = [];
|
|
|
|
|
foreach (explode("\n", trim($ps->output())) as $line) {
|
|
|
|
|
if (! preg_match('/^\s*(\d+)\s+(\d+)\s+(\d+)\s+(.*)$/', $line, $matches)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
$processes[] = [
|
|
|
|
|
'pid' => $matches[1],
|
|
|
|
|
'ppid' => $matches[2],
|
|
|
|
|
'etimes' => (int) $matches[3],
|
|
|
|
|
'args' => $matches[4],
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $processes;
|
2024-09-17 13:54:22 +00:00
|
|
|
}
|
|
|
|
|
|
2026-05-22 16:01:53 +00:00
|
|
|
/**
|
|
|
|
|
* @param list<array{pid: string, ppid: string, etimes: int, args: string}> $processes
|
|
|
|
|
*/
|
|
|
|
|
private function resetDuplicateGroup(string $controlPath, array $processes): void
|
|
|
|
|
{
|
|
|
|
|
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
|
|
|
|
Log::info('Duplicate ssh mux processes detected (dry-run, not killed)', [
|
|
|
|
|
'control_path' => $controlPath,
|
|
|
|
|
'pids' => array_column($processes, 'pid'),
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($processes as $process) {
|
|
|
|
|
Process::run('kill '.escapeshellarg($process['pid']));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (file_exists($controlPath)) {
|
|
|
|
|
@unlink($controlPath);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Log::info('Reset duplicate ssh mux processes', [
|
|
|
|
|
'control_path' => $controlPath,
|
|
|
|
|
'pids' => array_column($processes, 'pid'),
|
|
|
|
|
]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function extractControlPath(string $args): ?string
|
|
|
|
|
{
|
|
|
|
|
if (! preg_match('/(?:^|\s)-o\s+ControlPath=(?:"([^"]+)"|\'([^\']+)\'|(\S+))/', $args, $matches)) {
|
2026-05-22 16:17:37 +00:00
|
|
|
if (preg_match('/^ssh:\s+(\S+)\s+\[mux\]$/', $args, $matches)) {
|
|
|
|
|
return $matches[1];
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-22 16:01:53 +00:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $matches[1] ?: ($matches[2] ?: $matches[3]);
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-17 13:54:22 +00:00
|
|
|
private function cleanupStaleConnections()
|
|
|
|
|
{
|
|
|
|
|
$muxFiles = Storage::disk('ssh-mux')->files();
|
|
|
|
|
|
|
|
|
|
foreach ($muxFiles as $muxFile) {
|
|
|
|
|
$serverUuid = $this->extractServerUuidFromMuxFile($muxFile);
|
|
|
|
|
$server = Server::where('uuid', $serverUuid)->first();
|
|
|
|
|
|
2024-09-23 17:51:31 +00:00
|
|
|
if (! $server) {
|
2026-05-22 15:31:38 +00:00
|
|
|
$this->removeMultiplexFile($muxFile, 'server_not_found');
|
2024-09-23 17:51:31 +00:00
|
|
|
|
2024-09-17 13:54:22 +00:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}";
|
|
|
|
|
$checkCommand = "ssh -O check -o ControlPath={$muxSocket} {$server->user}@{$server->ip} 2>/dev/null";
|
|
|
|
|
$checkProcess = Process::run($checkCommand);
|
|
|
|
|
|
|
|
|
|
if ($checkProcess->exitCode() !== 0) {
|
2026-05-22 15:31:38 +00:00
|
|
|
$this->removeMultiplexFile($muxFile, 'connection_check_failed');
|
2024-09-17 13:54:22 +00:00
|
|
|
} else {
|
|
|
|
|
$muxContent = Storage::disk('ssh-mux')->get($muxFile);
|
|
|
|
|
$establishedAt = Carbon::parse(substr($muxContent, 37));
|
|
|
|
|
$expirationTime = $establishedAt->addSeconds(config('constants.ssh.mux_persist_time'));
|
|
|
|
|
|
|
|
|
|
if (Carbon::now()->isAfter($expirationTime)) {
|
2026-05-22 15:31:38 +00:00
|
|
|
$this->removeMultiplexFile($muxFile, 'expired');
|
2024-09-17 13:54:22 +00:00
|
|
|
}
|
2024-09-08 17:37:00 +00:00
|
|
|
}
|
2024-09-17 13:54:22 +00:00
|
|
|
}
|
2024-09-08 17:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
2024-09-17 13:54:22 +00:00
|
|
|
private function cleanupNonExistentServerConnections()
|
2024-09-08 17:37:00 +00:00
|
|
|
{
|
2024-09-17 13:54:22 +00:00
|
|
|
$muxFiles = Storage::disk('ssh-mux')->files();
|
|
|
|
|
$existingServerUuids = Server::pluck('uuid')->toArray();
|
2024-09-08 17:37:00 +00:00
|
|
|
|
2024-09-17 13:54:22 +00:00
|
|
|
foreach ($muxFiles as $muxFile) {
|
|
|
|
|
$serverUuid = $this->extractServerUuidFromMuxFile($muxFile);
|
2024-09-23 17:51:31 +00:00
|
|
|
if (! in_array($serverUuid, $existingServerUuids)) {
|
2026-05-22 15:31:38 +00:00
|
|
|
$this->removeMultiplexFile($muxFile, 'server_does_not_exist');
|
2024-09-17 13:54:22 +00:00
|
|
|
}
|
2024-09-08 17:37:00 +00:00
|
|
|
}
|
|
|
|
|
}
|
2024-09-17 13:54:22 +00:00
|
|
|
|
|
|
|
|
private function extractServerUuidFromMuxFile($muxFile)
|
|
|
|
|
{
|
|
|
|
|
return substr($muxFile, 4);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-22 15:31:38 +00:00
|
|
|
/**
|
|
|
|
|
* Close and delete a stale mux socket file. When orphan reaping is disabled
|
|
|
|
|
* (the default), the file is only logged — a dry-run mode that lets operators
|
|
|
|
|
* verify what would be removed before enabling it for real.
|
|
|
|
|
*/
|
|
|
|
|
private function removeMultiplexFile(string $muxFile, string $reason): void
|
2024-09-17 13:54:22 +00:00
|
|
|
{
|
2026-05-22 15:31:38 +00:00
|
|
|
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
|
|
|
|
Log::info('Stale mux file detected (dry-run, not removed)', [
|
|
|
|
|
'file' => $muxFile,
|
|
|
|
|
'reason' => $reason,
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-17 13:54:22 +00:00
|
|
|
$muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}";
|
|
|
|
|
$closeCommand = "ssh -O exit -o ControlPath={$muxSocket} localhost 2>/dev/null";
|
|
|
|
|
Process::run($closeCommand);
|
|
|
|
|
Storage::disk('ssh-mux')->delete($muxFile);
|
2026-05-22 15:31:38 +00:00
|
|
|
|
|
|
|
|
Log::info('Removed stale mux file', [
|
|
|
|
|
'file' => $muxFile,
|
|
|
|
|
'reason' => $reason,
|
|
|
|
|
]);
|
2024-09-17 13:54:22 +00:00
|
|
|
}
|
2024-09-08 17:37:00 +00:00
|
|
|
}
|