This commit is contained in:
2025-07-20 04:08:08 -04:00
commit e0ca63ebdf
48 changed files with 7913 additions and 0 deletions

122
src/Batch/BatchJob.php Normal file
View File

@@ -0,0 +1,122 @@
<?php
declare(strict_types=1);
namespace SqrtSpace\SpaceTime\Batch;
use SqrtSpace\SpaceTime\Checkpoint\CheckpointManager;
/**
* Abstract batch job for queue processing
*/
abstract class BatchJob
{
protected array $options;
protected BatchProcessor $processor;
protected ?CheckpointManager $checkpoint = null;
public function __construct(array $options = [])
{
$this->options = array_merge($this->getDefaultOptions(), $options);
$this->processor = new BatchProcessor($this->options);
}
/**
* Get job ID for checkpointing
*/
public function getJobId(): string
{
return static::class . '_' . $this->getUniqueId();
}
/**
* Execute the batch job
*/
public function execute(): BatchResult
{
// Get items to process
$items = $this->getItems();
// Process items
$result = $this->processor->process(
$items,
[$this, 'processItem'],
$this->getJobId()
);
// Handle completion
if ($result->isComplete()) {
$this->onComplete($result);
} else {
$this->onError($result);
}
return $result;
}
/**
* Get items to process
*/
abstract protected function getItems(): iterable;
/**
* Process single item
*/
abstract public function processItem(array $batch): array;
/**
* Get unique identifier for this job instance
*/
abstract protected function getUniqueId(): string;
/**
* Called when job completes successfully
*/
protected function onComplete(BatchResult $result): void
{
// Override in subclass
}
/**
* Called when job has errors
*/
protected function onError(BatchResult $result): void
{
// Override in subclass
}
/**
* Get default options
*/
protected function getDefaultOptions(): array
{
return [
'batch_size' => null,
'checkpoint_enabled' => true,
'max_retries' => 3,
];
}
/**
* Resume job from checkpoint
*/
public function resume(): BatchResult
{
$checkpoint = new CheckpointManager($this->getJobId());
if (!$checkpoint->exists()) {
throw new \RuntimeException('No checkpoint found for job: ' . $this->getJobId());
}
return $this->execute();
}
/**
* Check if job can be resumed
*/
public function canResume(): bool
{
$checkpoint = new CheckpointManager($this->getJobId());
return $checkpoint->exists();
}
}

View File

@@ -0,0 +1,267 @@
<?php
declare(strict_types=1);
namespace SqrtSpace\SpaceTime\Batch;
use SqrtSpace\SpaceTime\SpaceTimeConfig;
use SqrtSpace\SpaceTime\Memory\MemoryPressureMonitor;
use SqrtSpace\SpaceTime\Memory\MemoryPressureLevel;
use SqrtSpace\SpaceTime\Checkpoint\CheckpointManager;
/**
* Process large datasets in memory-efficient batches
*/
class BatchProcessor
{
private MemoryPressureMonitor $memoryMonitor;
private ?CheckpointManager $checkpoint = null;
private array $options;
public function __construct(array $options = [])
{
$this->options = array_merge([
'batch_size' => null, // Auto-calculate if null
'memory_threshold' => 0.8, // 80% memory usage
'checkpoint_enabled' => true,
'progress_callback' => null,
'error_handler' => null,
'max_retries' => 3,
], $options);
$this->memoryMonitor = new MemoryPressureMonitor();
}
/**
* Process items in batches
*/
public function process(iterable $items, callable $processor, ?string $checkpointId = null): BatchResult
{
$result = new BatchResult();
// Setup checkpoint if enabled
if ($this->options['checkpoint_enabled'] && $checkpointId) {
$this->checkpoint = new CheckpointManager($checkpointId);
// Resume from checkpoint if exists
if ($this->checkpoint->exists()) {
$state = $this->checkpoint->load();
$result->restore($state);
}
}
// Calculate batch size
$batchSize = $this->calculateBatchSize($items);
// Process batches
$batch = [];
$batchNumber = 0;
foreach ($items as $key => $item) {
// Skip already processed items
if ($result->isProcessed($key)) {
continue;
}
$batch[$key] = $item;
// Process batch when full or memory pressure
if (count($batch) >= $batchSize || $this->shouldProcessBatch()) {
$this->processBatch($batch, $processor, $result, $batchNumber);
$batch = [];
$batchNumber++;
}
}
// Process remaining items
if (!empty($batch)) {
$this->processBatch($batch, $processor, $result, $batchNumber);
}
// Clean up checkpoint on success
if ($this->checkpoint && $result->isComplete()) {
$this->checkpoint->delete();
}
return $result;
}
/**
* Process items in parallel batches
*/
public function processParallel(iterable $items, callable $processor, int $workers = 4): BatchResult
{
if (!function_exists('pcntl_fork')) {
throw new \RuntimeException('Parallel processing requires pcntl extension');
}
$result = new BatchResult();
$chunks = $this->splitIntoChunks($items, $workers);
$pids = [];
foreach ($chunks as $i => $chunk) {
$pid = pcntl_fork();
if ($pid === -1) {
throw new \RuntimeException('Failed to fork process');
} elseif ($pid === 0) {
// Child process
$chunkResult = $this->process($chunk, $processor);
// Write result to shared memory or file
$this->saveChunkResult($i, $chunkResult);
exit(0);
} else {
// Parent process
$pids[$i] = $pid;
}
}
// Wait for all children
foreach ($pids as $i => $pid) {
pcntl_waitpid($pid, $status);
// Merge chunk result
$chunkResult = $this->loadChunkResult($i);
$result->merge($chunkResult);
}
return $result;
}
/**
* Process batch with error handling
*/
private function processBatch(array $batch, callable $processor, BatchResult $result, int $batchNumber): void
{
$retries = 0;
$success = false;
while (!$success && $retries < $this->options['max_retries']) {
try {
// Call progress callback
if ($this->options['progress_callback']) {
($this->options['progress_callback'])($batchNumber, count($batch), $result);
}
// Process batch
$batchResult = $processor($batch);
// Record results
foreach ($batch as $key => $item) {
$result->addSuccess($key, $batchResult[$key] ?? null);
}
$success = true;
} catch (\Exception $e) {
$retries++;
if ($retries >= $this->options['max_retries']) {
// Record failures
foreach ($batch as $key => $item) {
$result->addError($key, $e);
}
// Call error handler
if ($this->options['error_handler']) {
($this->options['error_handler'])($e, $batch);
}
} else {
// Wait before retry
sleep(pow(2, $retries)); // Exponential backoff
}
}
}
// Save checkpoint
if ($this->checkpoint && $this->checkpoint->shouldCheckpoint()) {
$this->checkpoint->save($result->getState());
}
}
/**
* Calculate optimal batch size
*/
private function calculateBatchSize(iterable $items): int
{
if ($this->options['batch_size'] !== null) {
return $this->options['batch_size'];
}
// Estimate based on available memory
$memoryInfo = $this->memoryMonitor->getMemoryInfo();
$availableMemory = $memoryInfo['available'];
// Estimate item size (sample first few items)
$sampleSize = 10;
$totalSize = 0;
$count = 0;
foreach ($items as $item) {
$totalSize += strlen(serialize($item));
$count++;
if ($count >= $sampleSize) {
break;
}
}
if ($count === 0) {
return 100; // Default
}
$avgItemSize = $totalSize / $count;
$targetMemoryUsage = $availableMemory * 0.5; // Use 50% of available memory
return max(10, min(10000, (int)($targetMemoryUsage / $avgItemSize)));
}
/**
* Check if batch should be processed due to memory pressure
*/
private function shouldProcessBatch(): bool
{
$level = $this->memoryMonitor->check();
return $level->isHigherThan(MemoryPressureLevel::MEDIUM);
}
/**
* Split items into chunks for parallel processing
*/
private function splitIntoChunks(iterable $items, int $numChunks): array
{
$chunks = array_fill(0, $numChunks, []);
$i = 0;
foreach ($items as $key => $item) {
$chunks[$i % $numChunks][$key] = $item;
$i++;
}
return $chunks;
}
/**
* Save chunk result (simplified - use shared memory in production)
*/
private function saveChunkResult(int $chunkId, BatchResult $result): void
{
$filename = sys_get_temp_dir() . "/batch_chunk_{$chunkId}.tmp";
file_put_contents($filename, serialize($result));
}
/**
* Load chunk result
*/
private function loadChunkResult(int $chunkId): BatchResult
{
$filename = sys_get_temp_dir() . "/batch_chunk_{$chunkId}.tmp";
$result = unserialize(file_get_contents($filename));
unlink($filename);
return $result;
}
}

206
src/Batch/BatchResult.php Normal file
View File

@@ -0,0 +1,206 @@
<?php
declare(strict_types=1);
namespace SqrtSpace\SpaceTime\Batch;
/**
* Result container for batch processing
*/
class BatchResult
{
private array $processed = [];
private array $errors = [];
private array $results = [];
private int $successCount = 0;
private int $errorCount = 0;
private float $startTime;
private float $endTime;
public function __construct()
{
$this->startTime = microtime(true);
}
/**
* Add successful result
*/
public function addSuccess(string|int $key, mixed $result = null): void
{
$this->processed[$key] = true;
$this->results[$key] = $result;
$this->successCount++;
}
/**
* Add error
*/
public function addError(string|int $key, \Throwable $error): void
{
$this->processed[$key] = true;
$this->errors[$key] = [
'message' => $error->getMessage(),
'code' => $error->getCode(),
'file' => $error->getFile(),
'line' => $error->getLine(),
];
$this->errorCount++;
}
/**
* Check if item was processed
*/
public function isProcessed(string|int $key): bool
{
return isset($this->processed[$key]);
}
/**
* Check if all items were successful
*/
public function isComplete(): bool
{
return $this->errorCount === 0;
}
/**
* Get total processed count
*/
public function getProcessedCount(): int
{
return $this->successCount + $this->errorCount;
}
/**
* Get success count
*/
public function getSuccessCount(): int
{
return $this->successCount;
}
/**
* Get error count
*/
public function getErrorCount(): int
{
return $this->errorCount;
}
/**
* Get all errors
*/
public function getErrors(): array
{
return $this->errors;
}
/**
* Get all results
*/
public function getResults(): array
{
return $this->results;
}
/**
* Get result for specific key
*/
public function getResult(string|int $key): mixed
{
return $this->results[$key] ?? null;
}
/**
* Get error for specific key
*/
public function getError(string|int $key): ?array
{
return $this->errors[$key] ?? null;
}
/**
* Get execution time
*/
public function getExecutionTime(): float
{
$endTime = $this->endTime ?? microtime(true);
return $endTime - $this->startTime;
}
/**
* Mark as finished
*/
public function finish(): void
{
$this->endTime = microtime(true);
}
/**
* Get state for checkpointing
*/
public function getState(): array
{
return [
'processed' => $this->processed,
'errors' => $this->errors,
'results' => $this->results,
'success_count' => $this->successCount,
'error_count' => $this->errorCount,
'start_time' => $this->startTime,
];
}
/**
* Restore from checkpoint state
*/
public function restore(array $state): void
{
$this->processed = $state['processed'] ?? [];
$this->errors = $state['errors'] ?? [];
$this->results = $state['results'] ?? [];
$this->successCount = $state['success_count'] ?? 0;
$this->errorCount = $state['error_count'] ?? 0;
$this->startTime = $state['start_time'] ?? microtime(true);
}
/**
* Merge another result
*/
public function merge(BatchResult $other): void
{
foreach ($other->processed as $key => $value) {
$this->processed[$key] = $value;
}
foreach ($other->results as $key => $result) {
$this->results[$key] = $result;
}
foreach ($other->errors as $key => $error) {
$this->errors[$key] = $error;
}
$this->successCount += $other->successCount;
$this->errorCount += $other->errorCount;
}
/**
* Get summary statistics
*/
public function getSummary(): array
{
return [
'total_processed' => $this->getProcessedCount(),
'success_count' => $this->successCount,
'error_count' => $this->errorCount,
'success_rate' => $this->getProcessedCount() > 0
? ($this->successCount / $this->getProcessedCount()) * 100
: 0,
'execution_time' => $this->getExecutionTime(),
'items_per_second' => $this->getExecutionTime() > 0
? $this->getProcessedCount() / $this->getExecutionTime()
: 0,
];
}
}