All checks were successful
Build and Release / Create Release (push) Successful in 0s
Build and Release / Integration Tests (PostgreSQL) (push) Successful in 3m7s
Build and Release / Lint (push) Successful in 5m21s
Build and Release / Unit Tests (push) Successful in 5m46s
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Successful in 3m44s
Build and Release / Build Binaries (amd64, darwin, linux-latest) (push) Successful in 4m4s
Build and Release / Build Binaries (arm64, darwin, linux-latest) (push) Successful in 3m23s
Build and Release / Build Binaries (arm64, linux, linux-latest) (push) Successful in 3m47s
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Successful in 8h6m28s
267 lines
7.4 KiB
Go
267 lines
7.4 KiB
Go
// Copyright 2026 MarketAlly. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package actions
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"code.gitcaddy.com/server/models/db"
|
|
"code.gitcaddy.com/server/modules/json"
|
|
"code.gitcaddy.com/server/modules/log"
|
|
"code.gitcaddy.com/server/modules/setting"
|
|
"code.gitcaddy.com/server/modules/timeutil"
|
|
)
|
|
|
|
// RunnerCapabilities represents the parsed capabilities from CapabilitiesJSON
|
|
type RunnerCapabilities struct {
|
|
OS string `json:"os"`
|
|
Arch string `json:"arch"`
|
|
Disk *DiskInfo `json:"disk"`
|
|
CPU *CPUInfo `json:"cpu"`
|
|
Bandwidth *BandwidthInfo `json:"bandwidth"`
|
|
}
|
|
|
|
// DiskInfo contains disk usage information
|
|
type DiskInfo struct {
|
|
TotalBytes int64 `json:"total_bytes"`
|
|
FreeBytes int64 `json:"free_bytes"`
|
|
UsedBytes int64 `json:"used_bytes"`
|
|
UsedPercent float64 `json:"used_percent"`
|
|
}
|
|
|
|
// CPUInfo contains CPU load information
|
|
type CPUInfo struct {
|
|
NumCPU int `json:"num_cpu"` // Number of logical CPUs
|
|
LoadAvg1m float64 `json:"load_avg_1m"` // 1-minute load average
|
|
LoadAvg5m float64 `json:"load_avg_5m"` // 5-minute load average
|
|
LoadAvg15m float64 `json:"load_avg_15m"` // 15-minute load average
|
|
LoadPercent float64 `json:"load_percent"` // (load_avg_1m / num_cpu) * 100
|
|
}
|
|
|
|
// BandwidthInfo contains network performance information
|
|
type BandwidthInfo struct {
|
|
DownloadMbps float64 `json:"download_mbps"`
|
|
LatencyMs float64 `json:"latency_ms"`
|
|
TestedAt time.Time `json:"tested_at"`
|
|
}
|
|
|
|
// RunnerHealthStatus represents the health status of a runner
|
|
type RunnerHealthStatus struct {
|
|
Healthy bool `json:"healthy"`
|
|
DiskHealthy bool `json:"disk_healthy"`
|
|
CPUHealthy bool `json:"cpu_healthy"`
|
|
LatencyHealthy bool `json:"latency_healthy"`
|
|
DiskUsedPercent float64 `json:"disk_used_percent"`
|
|
DiskFreeBytes int64 `json:"disk_free_bytes"`
|
|
CPULoadPercent float64 `json:"cpu_load_percent"`
|
|
LatencyMs float64 `json:"latency_ms"`
|
|
Reason string `json:"reason,omitempty"`
|
|
NeedsCleanup bool `json:"needs_cleanup"`
|
|
}
|
|
|
|
// GetCapabilities parses and returns the runner's capabilities
|
|
func (r *ActionRunner) GetCapabilities() *RunnerCapabilities {
|
|
if r.CapabilitiesJSON == "" {
|
|
return nil
|
|
}
|
|
|
|
var caps RunnerCapabilities
|
|
if err := json.Unmarshal([]byte(r.CapabilitiesJSON), &caps); err != nil {
|
|
log.Error("Failed to parse runner %s capabilities: %v", r.Name, err)
|
|
return nil
|
|
}
|
|
return &caps
|
|
}
|
|
|
|
// GetHealthStatus returns detailed health status of the runner
|
|
func (r *ActionRunner) GetHealthStatus() *RunnerHealthStatus {
|
|
status := &RunnerHealthStatus{
|
|
Healthy: true,
|
|
DiskHealthy: true,
|
|
CPUHealthy: true,
|
|
LatencyHealthy: true,
|
|
}
|
|
|
|
caps := r.GetCapabilities()
|
|
if caps == nil {
|
|
// No capabilities reported, assume healthy but note it
|
|
status.Reason = "no capabilities reported"
|
|
return status
|
|
}
|
|
|
|
healthSettings := setting.Actions.RunnerHealthCheck
|
|
|
|
// Check disk health
|
|
if caps.Disk != nil {
|
|
status.DiskUsedPercent = caps.Disk.UsedPercent
|
|
status.DiskFreeBytes = caps.Disk.FreeBytes
|
|
|
|
freePercent := 100.0 - caps.Disk.UsedPercent
|
|
if freePercent < healthSettings.MinDiskPercent {
|
|
status.DiskHealthy = false
|
|
status.Healthy = false
|
|
status.Reason = "insufficient disk space"
|
|
status.NeedsCleanup = true
|
|
}
|
|
|
|
if caps.Disk.UsedPercent >= healthSettings.MaxDiskUsagePercent {
|
|
status.NeedsCleanup = true
|
|
}
|
|
}
|
|
|
|
// Check CPU health
|
|
if caps.CPU != nil {
|
|
status.CPULoadPercent = caps.CPU.LoadPercent
|
|
|
|
if caps.CPU.LoadPercent > healthSettings.MaxCPULoadPercent {
|
|
status.CPUHealthy = false
|
|
status.Healthy = false
|
|
if status.Reason != "" {
|
|
status.Reason += "; "
|
|
}
|
|
status.Reason += "CPU overloaded"
|
|
}
|
|
}
|
|
|
|
// Check latency health
|
|
if caps.Bandwidth != nil {
|
|
status.LatencyMs = caps.Bandwidth.LatencyMs
|
|
|
|
if caps.Bandwidth.LatencyMs > healthSettings.MaxLatencyMs {
|
|
status.LatencyHealthy = false
|
|
status.Healthy = false
|
|
if status.Reason != "" {
|
|
status.Reason += "; "
|
|
}
|
|
status.Reason += "high latency"
|
|
}
|
|
}
|
|
|
|
return status
|
|
}
|
|
|
|
// IsHealthy returns true if the runner is healthy enough for job assignment
|
|
func (r *ActionRunner) IsHealthy() bool {
|
|
if !setting.Actions.RunnerHealthCheck.Enabled {
|
|
return true
|
|
}
|
|
return r.GetHealthStatus().Healthy
|
|
}
|
|
|
|
// NeedsCleanup returns true if the runner should perform cleanup
|
|
func (r *ActionRunner) NeedsCleanup() bool {
|
|
status := r.GetHealthStatus()
|
|
return status.NeedsCleanup
|
|
}
|
|
|
|
// RunnerCleanupRequest tracks cleanup requests sent to runners
|
|
type RunnerCleanupRequest struct {
|
|
ID int64 `xorm:"pk autoincr"`
|
|
RunnerID int64 `xorm:"INDEX NOT NULL"`
|
|
RequestedAt timeutil.TimeStamp `xorm:"created INDEX"`
|
|
CompletedAt timeutil.TimeStamp `xorm:"INDEX"`
|
|
Success bool
|
|
BytesFreed int64
|
|
ErrorMsg string `xorm:"TEXT"`
|
|
}
|
|
|
|
func init() {
|
|
db.RegisterModel(new(RunnerCleanupRequest))
|
|
}
|
|
|
|
// TableName returns the table name for RunnerCleanupRequest
|
|
func (RunnerCleanupRequest) TableName() string {
|
|
return "runner_cleanup_request"
|
|
}
|
|
|
|
// CreateCleanupRequest creates a new cleanup request for a runner
|
|
func CreateCleanupRequest(ctx context.Context, runnerID int64) (*RunnerCleanupRequest, error) {
|
|
req := &RunnerCleanupRequest{
|
|
RunnerID: runnerID,
|
|
}
|
|
_, err := db.GetEngine(ctx).Insert(req)
|
|
return req, err
|
|
}
|
|
|
|
// GetLastCleanupRequest returns the last cleanup request for a runner
|
|
func GetLastCleanupRequest(ctx context.Context, runnerID int64) (*RunnerCleanupRequest, error) {
|
|
req := &RunnerCleanupRequest{}
|
|
has, err := db.GetEngine(ctx).Where("runner_id = ?", runnerID).
|
|
OrderBy("requested_at DESC").
|
|
Limit(1).
|
|
Get(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !has {
|
|
return nil, nil
|
|
}
|
|
return req, nil
|
|
}
|
|
|
|
// GetPendingCleanupRequest returns the pending (uncompleted) cleanup request for a runner
|
|
func GetPendingCleanupRequest(ctx context.Context, runnerID int64) (*RunnerCleanupRequest, error) {
|
|
req := &RunnerCleanupRequest{}
|
|
has, err := db.GetEngine(ctx).Where("runner_id = ? AND completed_at = 0", runnerID).
|
|
OrderBy("requested_at DESC").
|
|
Limit(1).
|
|
Get(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !has {
|
|
return nil, nil
|
|
}
|
|
return req, nil
|
|
}
|
|
|
|
// CanRequestCleanup checks if we can request cleanup (respects cooldown)
|
|
func CanRequestCleanup(ctx context.Context, runnerID int64) (bool, error) {
|
|
lastReq, err := GetLastCleanupRequest(ctx, runnerID)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if lastReq == nil {
|
|
return true, nil
|
|
}
|
|
|
|
cooldown := setting.Actions.RunnerHealthCheck.CleanupCooldown
|
|
if time.Since(lastReq.RequestedAt.AsTime()) < cooldown {
|
|
return false, nil
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// CompleteCleanupRequest marks a cleanup request as completed
|
|
func CompleteCleanupRequest(ctx context.Context, id int64, success bool, bytesFreed int64, errorMsg string) error {
|
|
_, err := db.GetEngine(ctx).ID(id).Cols("completed_at", "success", "bytes_freed", "error_msg").Update(&RunnerCleanupRequest{
|
|
CompletedAt: timeutil.TimeStampNow(),
|
|
Success: success,
|
|
BytesFreed: bytesFreed,
|
|
ErrorMsg: errorMsg,
|
|
})
|
|
return err
|
|
}
|
|
|
|
// GetUnhealthyRunners returns all runners that are unhealthy
|
|
func GetUnhealthyRunners(ctx context.Context) ([]*ActionRunner, error) {
|
|
var runners []*ActionRunner
|
|
err := db.GetEngine(ctx).Where("deleted_unix = 0").Find(&runners)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var unhealthy []*ActionRunner
|
|
for _, r := range runners {
|
|
if !r.IsOnline() {
|
|
continue // Skip offline runners
|
|
}
|
|
if !r.IsHealthy() {
|
|
unhealthy = append(unhealthy, r)
|
|
}
|
|
}
|
|
return unhealthy, nil
|
|
}
|