coolify/app/Jobs/CoolifyTask.php
Andras Bacsai b22e79caec feat(jobs): improve scheduled tasks with retry logic and queue cleanup
- Add retry configuration to CoolifyTask (3 tries, 600s timeout)
- Add retry configuration to ScheduledTaskJob (3 tries, configurable timeout)
- Add retry configuration to DatabaseBackupJob (2 tries)
- Implement exponential backoff for all jobs (30s, 60s, 120s intervals)
- Add failed() handlers with comprehensive error logging to scheduled-errors channel
- Add execution tracking: started_at, retry_count, duration (decimal), error_details
- Add configurable timeout field to scheduled tasks (60-3600s, default 300s)
- Update UI to include timeout configuration in task creation/editing forms
- Increase ScheduledJobManager lock expiration from 60s to 90s for high-load environments
- Implement safe queue cleanup with restart vs runtime modes
  - Restart mode: aggressive cleanup (marks all processing jobs as failed)
  - Runtime mode: conservative cleanup (only marks jobs >12h as failed, skips deployments)
- Add cleanup:redis --restart flag for system startup
- Integrate cleanup into Dev.php init() for development environment
- Increase scheduled-errors log retention from 7 to 14 days
- Create comprehensive test suite (unit and feature tests)
- Add TESTING_GUIDE.md with manual testing instructions

Fixes issues with jobs failing after single attempt and "attempted too many times" errors
2025-11-10 11:11:18 +01:00

95 lines
2.7 KiB
PHP
Executable File

<?php
namespace App\Jobs;
use App\Actions\CoolifyTask\RunRemoteProcess;
use App\Enums\ProcessStatus;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeEncrypted;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
use Spatie\Activitylog\Models\Activity;
class CoolifyTask implements ShouldBeEncrypted, ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
/**
* The number of times the job may be attempted.
*/
public $tries = 3;
/**
* The maximum number of unhandled exceptions to allow before failing.
*/
public $maxExceptions = 1;
/**
* The number of seconds the job can run before timing out.
*/
public $timeout = 600;
/**
* Create a new job instance.
*/
public function __construct(
public Activity $activity,
public bool $ignore_errors,
public $call_event_on_finish,
public $call_event_data,
) {
$this->onQueue('high');
}
/**
* Execute the job.
*/
public function handle(): void
{
$remote_process = resolve(RunRemoteProcess::class, [
'activity' => $this->activity,
'ignore_errors' => $this->ignore_errors,
'call_event_on_finish' => $this->call_event_on_finish,
'call_event_data' => $this->call_event_data,
]);
$remote_process();
}
/**
* Calculate the number of seconds to wait before retrying the job.
*/
public function backoff(): array
{
return [30, 90, 180]; // 30s, 90s, 180s between retries
}
/**
* Handle a job failure.
*/
public function failed(?\Throwable $exception): void
{
Log::channel('scheduled-errors')->error('CoolifyTask permanently failed', [
'job' => 'CoolifyTask',
'activity_id' => $this->activity->id,
'server_uuid' => $this->activity->getExtraProperty('server_uuid'),
'command_preview' => substr($this->activity->getExtraProperty('command') ?? '', 0, 200),
'error' => $exception?->getMessage(),
'total_attempts' => $this->attempts(),
'trace' => $exception?->getTraceAsString(),
]);
// Update activity status to reflect permanent failure
$this->activity->properties = $this->activity->properties->merge([
'status' => ProcessStatus::ERROR->value,
'error' => $exception?->getMessage() ?? 'Job permanently failed',
'failed_at' => now()->toIso8601String(),
]);
$this->activity->save();
}
}