Initial
This commit is contained in:
122
src/Batch/BatchJob.php
Normal file
122
src/Batch/BatchJob.php
Normal file
@@ -0,0 +1,122 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace SqrtSpace\SpaceTime\Batch;
|
||||
|
||||
use SqrtSpace\SpaceTime\Checkpoint\CheckpointManager;
|
||||
|
||||
/**
|
||||
* Abstract batch job for queue processing
|
||||
*/
|
||||
abstract class BatchJob
|
||||
{
|
||||
protected array $options;
|
||||
protected BatchProcessor $processor;
|
||||
protected ?CheckpointManager $checkpoint = null;
|
||||
|
||||
public function __construct(array $options = [])
|
||||
{
|
||||
$this->options = array_merge($this->getDefaultOptions(), $options);
|
||||
$this->processor = new BatchProcessor($this->options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get job ID for checkpointing
|
||||
*/
|
||||
public function getJobId(): string
|
||||
{
|
||||
return static::class . '_' . $this->getUniqueId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the batch job
|
||||
*/
|
||||
public function execute(): BatchResult
|
||||
{
|
||||
// Get items to process
|
||||
$items = $this->getItems();
|
||||
|
||||
// Process items
|
||||
$result = $this->processor->process(
|
||||
$items,
|
||||
[$this, 'processItem'],
|
||||
$this->getJobId()
|
||||
);
|
||||
|
||||
// Handle completion
|
||||
if ($result->isComplete()) {
|
||||
$this->onComplete($result);
|
||||
} else {
|
||||
$this->onError($result);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get items to process
|
||||
*/
|
||||
abstract protected function getItems(): iterable;
|
||||
|
||||
/**
|
||||
* Process single item
|
||||
*/
|
||||
abstract public function processItem(array $batch): array;
|
||||
|
||||
/**
|
||||
* Get unique identifier for this job instance
|
||||
*/
|
||||
abstract protected function getUniqueId(): string;
|
||||
|
||||
/**
|
||||
* Called when job completes successfully
|
||||
*/
|
||||
protected function onComplete(BatchResult $result): void
|
||||
{
|
||||
// Override in subclass
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when job has errors
|
||||
*/
|
||||
protected function onError(BatchResult $result): void
|
||||
{
|
||||
// Override in subclass
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default options
|
||||
*/
|
||||
protected function getDefaultOptions(): array
|
||||
{
|
||||
return [
|
||||
'batch_size' => null,
|
||||
'checkpoint_enabled' => true,
|
||||
'max_retries' => 3,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Resume job from checkpoint
|
||||
*/
|
||||
public function resume(): BatchResult
|
||||
{
|
||||
$checkpoint = new CheckpointManager($this->getJobId());
|
||||
|
||||
if (!$checkpoint->exists()) {
|
||||
throw new \RuntimeException('No checkpoint found for job: ' . $this->getJobId());
|
||||
}
|
||||
|
||||
return $this->execute();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if job can be resumed
|
||||
*/
|
||||
public function canResume(): bool
|
||||
{
|
||||
$checkpoint = new CheckpointManager($this->getJobId());
|
||||
return $checkpoint->exists();
|
||||
}
|
||||
}
|
||||
267
src/Batch/BatchProcessor.php
Normal file
267
src/Batch/BatchProcessor.php
Normal file
@@ -0,0 +1,267 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace SqrtSpace\SpaceTime\Batch;
|
||||
|
||||
use SqrtSpace\SpaceTime\SpaceTimeConfig;
|
||||
use SqrtSpace\SpaceTime\Memory\MemoryPressureMonitor;
|
||||
use SqrtSpace\SpaceTime\Memory\MemoryPressureLevel;
|
||||
use SqrtSpace\SpaceTime\Checkpoint\CheckpointManager;
|
||||
|
||||
/**
|
||||
* Process large datasets in memory-efficient batches
|
||||
*/
|
||||
class BatchProcessor
|
||||
{
|
||||
private MemoryPressureMonitor $memoryMonitor;
|
||||
private ?CheckpointManager $checkpoint = null;
|
||||
private array $options;
|
||||
|
||||
public function __construct(array $options = [])
|
||||
{
|
||||
$this->options = array_merge([
|
||||
'batch_size' => null, // Auto-calculate if null
|
||||
'memory_threshold' => 0.8, // 80% memory usage
|
||||
'checkpoint_enabled' => true,
|
||||
'progress_callback' => null,
|
||||
'error_handler' => null,
|
||||
'max_retries' => 3,
|
||||
], $options);
|
||||
|
||||
$this->memoryMonitor = new MemoryPressureMonitor();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process items in batches
|
||||
*/
|
||||
public function process(iterable $items, callable $processor, ?string $checkpointId = null): BatchResult
|
||||
{
|
||||
$result = new BatchResult();
|
||||
|
||||
// Setup checkpoint if enabled
|
||||
if ($this->options['checkpoint_enabled'] && $checkpointId) {
|
||||
$this->checkpoint = new CheckpointManager($checkpointId);
|
||||
|
||||
// Resume from checkpoint if exists
|
||||
if ($this->checkpoint->exists()) {
|
||||
$state = $this->checkpoint->load();
|
||||
$result->restore($state);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate batch size
|
||||
$batchSize = $this->calculateBatchSize($items);
|
||||
|
||||
// Process batches
|
||||
$batch = [];
|
||||
$batchNumber = 0;
|
||||
|
||||
foreach ($items as $key => $item) {
|
||||
// Skip already processed items
|
||||
if ($result->isProcessed($key)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$batch[$key] = $item;
|
||||
|
||||
// Process batch when full or memory pressure
|
||||
if (count($batch) >= $batchSize || $this->shouldProcessBatch()) {
|
||||
$this->processBatch($batch, $processor, $result, $batchNumber);
|
||||
$batch = [];
|
||||
$batchNumber++;
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining items
|
||||
if (!empty($batch)) {
|
||||
$this->processBatch($batch, $processor, $result, $batchNumber);
|
||||
}
|
||||
|
||||
// Clean up checkpoint on success
|
||||
if ($this->checkpoint && $result->isComplete()) {
|
||||
$this->checkpoint->delete();
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process items in parallel batches
|
||||
*/
|
||||
public function processParallel(iterable $items, callable $processor, int $workers = 4): BatchResult
|
||||
{
|
||||
if (!function_exists('pcntl_fork')) {
|
||||
throw new \RuntimeException('Parallel processing requires pcntl extension');
|
||||
}
|
||||
|
||||
$result = new BatchResult();
|
||||
$chunks = $this->splitIntoChunks($items, $workers);
|
||||
$pids = [];
|
||||
|
||||
foreach ($chunks as $i => $chunk) {
|
||||
$pid = pcntl_fork();
|
||||
|
||||
if ($pid === -1) {
|
||||
throw new \RuntimeException('Failed to fork process');
|
||||
} elseif ($pid === 0) {
|
||||
// Child process
|
||||
$chunkResult = $this->process($chunk, $processor);
|
||||
|
||||
// Write result to shared memory or file
|
||||
$this->saveChunkResult($i, $chunkResult);
|
||||
|
||||
exit(0);
|
||||
} else {
|
||||
// Parent process
|
||||
$pids[$i] = $pid;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all children
|
||||
foreach ($pids as $i => $pid) {
|
||||
pcntl_waitpid($pid, $status);
|
||||
|
||||
// Merge chunk result
|
||||
$chunkResult = $this->loadChunkResult($i);
|
||||
$result->merge($chunkResult);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process batch with error handling
|
||||
*/
|
||||
private function processBatch(array $batch, callable $processor, BatchResult $result, int $batchNumber): void
|
||||
{
|
||||
$retries = 0;
|
||||
$success = false;
|
||||
|
||||
while (!$success && $retries < $this->options['max_retries']) {
|
||||
try {
|
||||
// Call progress callback
|
||||
if ($this->options['progress_callback']) {
|
||||
($this->options['progress_callback'])($batchNumber, count($batch), $result);
|
||||
}
|
||||
|
||||
// Process batch
|
||||
$batchResult = $processor($batch);
|
||||
|
||||
// Record results
|
||||
foreach ($batch as $key => $item) {
|
||||
$result->addSuccess($key, $batchResult[$key] ?? null);
|
||||
}
|
||||
|
||||
$success = true;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
$retries++;
|
||||
|
||||
if ($retries >= $this->options['max_retries']) {
|
||||
// Record failures
|
||||
foreach ($batch as $key => $item) {
|
||||
$result->addError($key, $e);
|
||||
}
|
||||
|
||||
// Call error handler
|
||||
if ($this->options['error_handler']) {
|
||||
($this->options['error_handler'])($e, $batch);
|
||||
}
|
||||
} else {
|
||||
// Wait before retry
|
||||
sleep(pow(2, $retries)); // Exponential backoff
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save checkpoint
|
||||
if ($this->checkpoint && $this->checkpoint->shouldCheckpoint()) {
|
||||
$this->checkpoint->save($result->getState());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate optimal batch size
|
||||
*/
|
||||
private function calculateBatchSize(iterable $items): int
|
||||
{
|
||||
if ($this->options['batch_size'] !== null) {
|
||||
return $this->options['batch_size'];
|
||||
}
|
||||
|
||||
// Estimate based on available memory
|
||||
$memoryInfo = $this->memoryMonitor->getMemoryInfo();
|
||||
$availableMemory = $memoryInfo['available'];
|
||||
|
||||
// Estimate item size (sample first few items)
|
||||
$sampleSize = 10;
|
||||
$totalSize = 0;
|
||||
$count = 0;
|
||||
|
||||
foreach ($items as $item) {
|
||||
$totalSize += strlen(serialize($item));
|
||||
$count++;
|
||||
|
||||
if ($count >= $sampleSize) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($count === 0) {
|
||||
return 100; // Default
|
||||
}
|
||||
|
||||
$avgItemSize = $totalSize / $count;
|
||||
$targetMemoryUsage = $availableMemory * 0.5; // Use 50% of available memory
|
||||
|
||||
return max(10, min(10000, (int)($targetMemoryUsage / $avgItemSize)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if batch should be processed due to memory pressure
|
||||
*/
|
||||
private function shouldProcessBatch(): bool
|
||||
{
|
||||
$level = $this->memoryMonitor->check();
|
||||
|
||||
return $level->isHigherThan(MemoryPressureLevel::MEDIUM);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split items into chunks for parallel processing
|
||||
*/
|
||||
private function splitIntoChunks(iterable $items, int $numChunks): array
|
||||
{
|
||||
$chunks = array_fill(0, $numChunks, []);
|
||||
$i = 0;
|
||||
|
||||
foreach ($items as $key => $item) {
|
||||
$chunks[$i % $numChunks][$key] = $item;
|
||||
$i++;
|
||||
}
|
||||
|
||||
return $chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save chunk result (simplified - use shared memory in production)
|
||||
*/
|
||||
private function saveChunkResult(int $chunkId, BatchResult $result): void
|
||||
{
|
||||
$filename = sys_get_temp_dir() . "/batch_chunk_{$chunkId}.tmp";
|
||||
file_put_contents($filename, serialize($result));
|
||||
}
|
||||
|
||||
/**
|
||||
* Load chunk result
|
||||
*/
|
||||
private function loadChunkResult(int $chunkId): BatchResult
|
||||
{
|
||||
$filename = sys_get_temp_dir() . "/batch_chunk_{$chunkId}.tmp";
|
||||
$result = unserialize(file_get_contents($filename));
|
||||
unlink($filename);
|
||||
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
206
src/Batch/BatchResult.php
Normal file
206
src/Batch/BatchResult.php
Normal file
@@ -0,0 +1,206 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace SqrtSpace\SpaceTime\Batch;
|
||||
|
||||
/**
|
||||
* Result container for batch processing
|
||||
*/
|
||||
class BatchResult
|
||||
{
|
||||
private array $processed = [];
|
||||
private array $errors = [];
|
||||
private array $results = [];
|
||||
private int $successCount = 0;
|
||||
private int $errorCount = 0;
|
||||
private float $startTime;
|
||||
private float $endTime;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->startTime = microtime(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add successful result
|
||||
*/
|
||||
public function addSuccess(string|int $key, mixed $result = null): void
|
||||
{
|
||||
$this->processed[$key] = true;
|
||||
$this->results[$key] = $result;
|
||||
$this->successCount++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add error
|
||||
*/
|
||||
public function addError(string|int $key, \Throwable $error): void
|
||||
{
|
||||
$this->processed[$key] = true;
|
||||
$this->errors[$key] = [
|
||||
'message' => $error->getMessage(),
|
||||
'code' => $error->getCode(),
|
||||
'file' => $error->getFile(),
|
||||
'line' => $error->getLine(),
|
||||
];
|
||||
$this->errorCount++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if item was processed
|
||||
*/
|
||||
public function isProcessed(string|int $key): bool
|
||||
{
|
||||
return isset($this->processed[$key]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if all items were successful
|
||||
*/
|
||||
public function isComplete(): bool
|
||||
{
|
||||
return $this->errorCount === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total processed count
|
||||
*/
|
||||
public function getProcessedCount(): int
|
||||
{
|
||||
return $this->successCount + $this->errorCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get success count
|
||||
*/
|
||||
public function getSuccessCount(): int
|
||||
{
|
||||
return $this->successCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get error count
|
||||
*/
|
||||
public function getErrorCount(): int
|
||||
{
|
||||
return $this->errorCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all errors
|
||||
*/
|
||||
public function getErrors(): array
|
||||
{
|
||||
return $this->errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all results
|
||||
*/
|
||||
public function getResults(): array
|
||||
{
|
||||
return $this->results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get result for specific key
|
||||
*/
|
||||
public function getResult(string|int $key): mixed
|
||||
{
|
||||
return $this->results[$key] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get error for specific key
|
||||
*/
|
||||
public function getError(string|int $key): ?array
|
||||
{
|
||||
return $this->errors[$key] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get execution time
|
||||
*/
|
||||
public function getExecutionTime(): float
|
||||
{
|
||||
$endTime = $this->endTime ?? microtime(true);
|
||||
return $endTime - $this->startTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark as finished
|
||||
*/
|
||||
public function finish(): void
|
||||
{
|
||||
$this->endTime = microtime(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get state for checkpointing
|
||||
*/
|
||||
public function getState(): array
|
||||
{
|
||||
return [
|
||||
'processed' => $this->processed,
|
||||
'errors' => $this->errors,
|
||||
'results' => $this->results,
|
||||
'success_count' => $this->successCount,
|
||||
'error_count' => $this->errorCount,
|
||||
'start_time' => $this->startTime,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore from checkpoint state
|
||||
*/
|
||||
public function restore(array $state): void
|
||||
{
|
||||
$this->processed = $state['processed'] ?? [];
|
||||
$this->errors = $state['errors'] ?? [];
|
||||
$this->results = $state['results'] ?? [];
|
||||
$this->successCount = $state['success_count'] ?? 0;
|
||||
$this->errorCount = $state['error_count'] ?? 0;
|
||||
$this->startTime = $state['start_time'] ?? microtime(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge another result
|
||||
*/
|
||||
public function merge(BatchResult $other): void
|
||||
{
|
||||
foreach ($other->processed as $key => $value) {
|
||||
$this->processed[$key] = $value;
|
||||
}
|
||||
|
||||
foreach ($other->results as $key => $result) {
|
||||
$this->results[$key] = $result;
|
||||
}
|
||||
|
||||
foreach ($other->errors as $key => $error) {
|
||||
$this->errors[$key] = $error;
|
||||
}
|
||||
|
||||
$this->successCount += $other->successCount;
|
||||
$this->errorCount += $other->errorCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get summary statistics
|
||||
*/
|
||||
public function getSummary(): array
|
||||
{
|
||||
return [
|
||||
'total_processed' => $this->getProcessedCount(),
|
||||
'success_count' => $this->successCount,
|
||||
'error_count' => $this->errorCount,
|
||||
'success_rate' => $this->getProcessedCount() > 0
|
||||
? ($this->successCount / $this->getProcessedCount()) * 100
|
||||
: 0,
|
||||
'execution_time' => $this->getExecutionTime(),
|
||||
'items_per_second' => $this->getExecutionTime() > 0
|
||||
? $this->getProcessedCount() / $this->getExecutionTime()
|
||||
: 0,
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user