2
0
Files
gitcaddy-server/models/actions/runner_health.go
logikonline af6d4addd4
All checks were successful
Build and Release / Create Release (push) Successful in 0s
Build and Release / Integration Tests (PostgreSQL) (push) Successful in 3m7s
Build and Release / Lint (push) Successful in 5m21s
Build and Release / Unit Tests (push) Successful in 5m46s
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Successful in 3m44s
Build and Release / Build Binaries (amd64, darwin, linux-latest) (push) Successful in 4m4s
Build and Release / Build Binaries (arm64, darwin, linux-latest) (push) Successful in 3m23s
Build and Release / Build Binaries (arm64, linux, linux-latest) (push) Successful in 3m47s
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Successful in 8h6m28s
code.caddy rename
2026-01-17 09:02:21 -05:00

267 lines
7.4 KiB
Go

// Copyright 2026 MarketAlly. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"time"
"code.gitcaddy.com/server/models/db"
"code.gitcaddy.com/server/modules/json"
"code.gitcaddy.com/server/modules/log"
"code.gitcaddy.com/server/modules/setting"
"code.gitcaddy.com/server/modules/timeutil"
)
// RunnerCapabilities represents the parsed capabilities from CapabilitiesJSON
type RunnerCapabilities struct {
OS string `json:"os"`
Arch string `json:"arch"`
Disk *DiskInfo `json:"disk"`
CPU *CPUInfo `json:"cpu"`
Bandwidth *BandwidthInfo `json:"bandwidth"`
}
// DiskInfo contains disk usage information
type DiskInfo struct {
TotalBytes int64 `json:"total_bytes"`
FreeBytes int64 `json:"free_bytes"`
UsedBytes int64 `json:"used_bytes"`
UsedPercent float64 `json:"used_percent"`
}
// CPUInfo contains CPU load information
type CPUInfo struct {
NumCPU int `json:"num_cpu"` // Number of logical CPUs
LoadAvg1m float64 `json:"load_avg_1m"` // 1-minute load average
LoadAvg5m float64 `json:"load_avg_5m"` // 5-minute load average
LoadAvg15m float64 `json:"load_avg_15m"` // 15-minute load average
LoadPercent float64 `json:"load_percent"` // (load_avg_1m / num_cpu) * 100
}
// BandwidthInfo contains network performance information
type BandwidthInfo struct {
DownloadMbps float64 `json:"download_mbps"`
LatencyMs float64 `json:"latency_ms"`
TestedAt time.Time `json:"tested_at"`
}
// RunnerHealthStatus represents the health status of a runner
type RunnerHealthStatus struct {
Healthy bool `json:"healthy"`
DiskHealthy bool `json:"disk_healthy"`
CPUHealthy bool `json:"cpu_healthy"`
LatencyHealthy bool `json:"latency_healthy"`
DiskUsedPercent float64 `json:"disk_used_percent"`
DiskFreeBytes int64 `json:"disk_free_bytes"`
CPULoadPercent float64 `json:"cpu_load_percent"`
LatencyMs float64 `json:"latency_ms"`
Reason string `json:"reason,omitempty"`
NeedsCleanup bool `json:"needs_cleanup"`
}
// GetCapabilities parses and returns the runner's capabilities
func (r *ActionRunner) GetCapabilities() *RunnerCapabilities {
if r.CapabilitiesJSON == "" {
return nil
}
var caps RunnerCapabilities
if err := json.Unmarshal([]byte(r.CapabilitiesJSON), &caps); err != nil {
log.Error("Failed to parse runner %s capabilities: %v", r.Name, err)
return nil
}
return &caps
}
// GetHealthStatus returns detailed health status of the runner
func (r *ActionRunner) GetHealthStatus() *RunnerHealthStatus {
status := &RunnerHealthStatus{
Healthy: true,
DiskHealthy: true,
CPUHealthy: true,
LatencyHealthy: true,
}
caps := r.GetCapabilities()
if caps == nil {
// No capabilities reported, assume healthy but note it
status.Reason = "no capabilities reported"
return status
}
healthSettings := setting.Actions.RunnerHealthCheck
// Check disk health
if caps.Disk != nil {
status.DiskUsedPercent = caps.Disk.UsedPercent
status.DiskFreeBytes = caps.Disk.FreeBytes
freePercent := 100.0 - caps.Disk.UsedPercent
if freePercent < healthSettings.MinDiskPercent {
status.DiskHealthy = false
status.Healthy = false
status.Reason = "insufficient disk space"
status.NeedsCleanup = true
}
if caps.Disk.UsedPercent >= healthSettings.MaxDiskUsagePercent {
status.NeedsCleanup = true
}
}
// Check CPU health
if caps.CPU != nil {
status.CPULoadPercent = caps.CPU.LoadPercent
if caps.CPU.LoadPercent > healthSettings.MaxCPULoadPercent {
status.CPUHealthy = false
status.Healthy = false
if status.Reason != "" {
status.Reason += "; "
}
status.Reason += "CPU overloaded"
}
}
// Check latency health
if caps.Bandwidth != nil {
status.LatencyMs = caps.Bandwidth.LatencyMs
if caps.Bandwidth.LatencyMs > healthSettings.MaxLatencyMs {
status.LatencyHealthy = false
status.Healthy = false
if status.Reason != "" {
status.Reason += "; "
}
status.Reason += "high latency"
}
}
return status
}
// IsHealthy returns true if the runner is healthy enough for job assignment
func (r *ActionRunner) IsHealthy() bool {
if !setting.Actions.RunnerHealthCheck.Enabled {
return true
}
return r.GetHealthStatus().Healthy
}
// NeedsCleanup returns true if the runner should perform cleanup
func (r *ActionRunner) NeedsCleanup() bool {
status := r.GetHealthStatus()
return status.NeedsCleanup
}
// RunnerCleanupRequest tracks cleanup requests sent to runners
type RunnerCleanupRequest struct {
ID int64 `xorm:"pk autoincr"`
RunnerID int64 `xorm:"INDEX NOT NULL"`
RequestedAt timeutil.TimeStamp `xorm:"created INDEX"`
CompletedAt timeutil.TimeStamp `xorm:"INDEX"`
Success bool
BytesFreed int64
ErrorMsg string `xorm:"TEXT"`
}
func init() {
db.RegisterModel(new(RunnerCleanupRequest))
}
// TableName returns the table name for RunnerCleanupRequest
func (RunnerCleanupRequest) TableName() string {
return "runner_cleanup_request"
}
// CreateCleanupRequest creates a new cleanup request for a runner
func CreateCleanupRequest(ctx context.Context, runnerID int64) (*RunnerCleanupRequest, error) {
req := &RunnerCleanupRequest{
RunnerID: runnerID,
}
_, err := db.GetEngine(ctx).Insert(req)
return req, err
}
// GetLastCleanupRequest returns the last cleanup request for a runner
func GetLastCleanupRequest(ctx context.Context, runnerID int64) (*RunnerCleanupRequest, error) {
req := &RunnerCleanupRequest{}
has, err := db.GetEngine(ctx).Where("runner_id = ?", runnerID).
OrderBy("requested_at DESC").
Limit(1).
Get(req)
if err != nil {
return nil, err
}
if !has {
return nil, nil
}
return req, nil
}
// GetPendingCleanupRequest returns the pending (uncompleted) cleanup request for a runner
func GetPendingCleanupRequest(ctx context.Context, runnerID int64) (*RunnerCleanupRequest, error) {
req := &RunnerCleanupRequest{}
has, err := db.GetEngine(ctx).Where("runner_id = ? AND completed_at = 0", runnerID).
OrderBy("requested_at DESC").
Limit(1).
Get(req)
if err != nil {
return nil, err
}
if !has {
return nil, nil
}
return req, nil
}
// CanRequestCleanup checks if we can request cleanup (respects cooldown)
func CanRequestCleanup(ctx context.Context, runnerID int64) (bool, error) {
lastReq, err := GetLastCleanupRequest(ctx, runnerID)
if err != nil {
return false, err
}
if lastReq == nil {
return true, nil
}
cooldown := setting.Actions.RunnerHealthCheck.CleanupCooldown
if time.Since(lastReq.RequestedAt.AsTime()) < cooldown {
return false, nil
}
return true, nil
}
// CompleteCleanupRequest marks a cleanup request as completed
func CompleteCleanupRequest(ctx context.Context, id int64, success bool, bytesFreed int64, errorMsg string) error {
_, err := db.GetEngine(ctx).ID(id).Cols("completed_at", "success", "bytes_freed", "error_msg").Update(&RunnerCleanupRequest{
CompletedAt: timeutil.TimeStampNow(),
Success: success,
BytesFreed: bytesFreed,
ErrorMsg: errorMsg,
})
return err
}
// GetUnhealthyRunners returns all runners that are unhealthy
func GetUnhealthyRunners(ctx context.Context) ([]*ActionRunner, error) {
var runners []*ActionRunner
err := db.GetEngine(ctx).Where("deleted_unix = 0").Find(&runners)
if err != nil {
return nil, err
}
var unhealthy []*ActionRunner
for _, r := range runners {
if !r.IsOnline() {
continue // Skip offline runners
}
if !r.IsHealthy() {
unhealthy = append(unhealthy, r)
}
}
return unhealthy, nil
}