Initial push
This commit is contained in:
491
src/SqrtSpace.SpaceTime.Pipeline/SpaceTimePipeline.cs
Normal file
491
src/SqrtSpace.SpaceTime.Pipeline/SpaceTimePipeline.cs
Normal file
@@ -0,0 +1,491 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Channels;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using SqrtSpace.SpaceTime.Core;
|
||||
|
||||
namespace SqrtSpace.SpaceTime.Pipeline;
|
||||
|
||||
/// <summary>
|
||||
/// Memory-efficient data pipeline with √n buffering
|
||||
/// </summary>
|
||||
public class SpaceTimePipeline<TInput, TOutput> : ISpaceTimePipeline<TInput, TOutput>
|
||||
{
|
||||
private readonly List<IPipelineStage> _stages;
|
||||
private readonly ILogger<SpaceTimePipeline<TInput, TOutput>> _logger;
|
||||
private readonly PipelineConfiguration _configuration;
|
||||
private readonly CancellationTokenSource _cancellationTokenSource;
|
||||
private readonly SemaphoreSlim _executionLock;
|
||||
private PipelineState _state;
|
||||
|
||||
public string Name { get; }
|
||||
public PipelineState State => _state;
|
||||
|
||||
public SpaceTimePipeline(
|
||||
string name,
|
||||
ILogger<SpaceTimePipeline<TInput, TOutput>> logger,
|
||||
PipelineConfiguration? configuration = null)
|
||||
{
|
||||
Name = name ?? throw new ArgumentNullException(nameof(name));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_configuration = configuration ?? new PipelineConfiguration();
|
||||
_stages = new List<IPipelineStage>();
|
||||
_cancellationTokenSource = new CancellationTokenSource();
|
||||
_executionLock = new SemaphoreSlim(1, 1);
|
||||
_state = PipelineState.Created;
|
||||
}
|
||||
|
||||
public ISpaceTimePipeline<TInput, TOutput> AddStage<TStageInput, TStageOutput>(
|
||||
string stageName,
|
||||
Func<TStageInput, CancellationToken, Task<TStageOutput>> transform,
|
||||
StageConfiguration? configuration = null)
|
||||
{
|
||||
if (_state != PipelineState.Created)
|
||||
throw new InvalidOperationException("Cannot add stages after pipeline has started");
|
||||
|
||||
var stage = new TransformStage<TStageInput, TStageOutput>(
|
||||
stageName,
|
||||
transform,
|
||||
configuration ?? new StageConfiguration(),
|
||||
_logger);
|
||||
|
||||
_stages.Add(stage);
|
||||
return this;
|
||||
}
|
||||
|
||||
public ISpaceTimePipeline<TInput, TOutput> AddBatchStage<TStageInput, TStageOutput>(
|
||||
string stageName,
|
||||
Func<IReadOnlyList<TStageInput>, CancellationToken, Task<IEnumerable<TStageOutput>>> batchTransform,
|
||||
StageConfiguration? configuration = null)
|
||||
{
|
||||
if (_state != PipelineState.Created)
|
||||
throw new InvalidOperationException("Cannot add stages after pipeline has started");
|
||||
|
||||
var stage = new BatchTransformStage<TStageInput, TStageOutput>(
|
||||
stageName,
|
||||
batchTransform,
|
||||
configuration ?? new StageConfiguration(),
|
||||
_logger);
|
||||
|
||||
_stages.Add(stage);
|
||||
return this;
|
||||
}
|
||||
|
||||
public ISpaceTimePipeline<TInput, TOutput> AddFilterStage<T>(
|
||||
string stageName,
|
||||
Func<T, bool> predicate,
|
||||
StageConfiguration? configuration = null)
|
||||
{
|
||||
if (_state != PipelineState.Created)
|
||||
throw new InvalidOperationException("Cannot add stages after pipeline has started");
|
||||
|
||||
var stage = new FilterStage<T>(
|
||||
stageName,
|
||||
predicate,
|
||||
configuration ?? new StageConfiguration(),
|
||||
_logger);
|
||||
|
||||
_stages.Add(stage);
|
||||
return this;
|
||||
}
|
||||
|
||||
public ISpaceTimePipeline<TInput, TOutput> AddCheckpointStage<T>(
|
||||
string stageName,
|
||||
ICheckpointManager checkpointManager,
|
||||
StageConfiguration? configuration = null)
|
||||
{
|
||||
if (_state != PipelineState.Created)
|
||||
throw new InvalidOperationException("Cannot add stages after pipeline has started");
|
||||
|
||||
var stage = new CheckpointStage<T>(
|
||||
stageName,
|
||||
checkpointManager,
|
||||
configuration ?? new StageConfiguration(),
|
||||
_logger);
|
||||
|
||||
_stages.Add(stage);
|
||||
return this;
|
||||
}
|
||||
|
||||
public async Task<PipelineResult<TOutput>> ExecuteAsync(
|
||||
TInput input,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await ExecuteAsync(new[] { input }, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<PipelineResult<TOutput>> ExecuteAsync(
|
||||
IEnumerable<TInput> inputs,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _executionLock.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
_state = PipelineState.Running;
|
||||
var startTime = DateTime.UtcNow;
|
||||
var result = new PipelineResult<TOutput>();
|
||||
|
||||
// Link cancellation tokens
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(
|
||||
cancellationToken,
|
||||
_cancellationTokenSource.Token);
|
||||
|
||||
// Create execution context
|
||||
var context = new PipelineExecutionContext
|
||||
{
|
||||
PipelineName = Name,
|
||||
ExecutionId = Guid.NewGuid().ToString(),
|
||||
StartTime = startTime,
|
||||
Configuration = _configuration,
|
||||
CancellationToken = linkedCts.Token
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
// Build stage channels
|
||||
var channels = BuildStageChannels();
|
||||
|
||||
// Start stage processors
|
||||
var stageTasks = StartStageProcessors(channels, context);
|
||||
|
||||
// Feed inputs
|
||||
await FeedInputsAsync(inputs, channels.First().Writer, context);
|
||||
|
||||
// Wait for completion
|
||||
await Task.WhenAll(stageTasks);
|
||||
|
||||
// Collect outputs
|
||||
var outputs = new List<TOutput>();
|
||||
var outputChannel = channels.Last().Reader;
|
||||
|
||||
await foreach (var output in outputChannel.ReadAllAsync(linkedCts.Token))
|
||||
{
|
||||
outputs.Add((TOutput)(object)output);
|
||||
}
|
||||
|
||||
result.Outputs = outputs;
|
||||
result.Success = true;
|
||||
result.ProcessedCount = outputs.Count;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Pipeline execution failed");
|
||||
result.Success = false;
|
||||
result.Error = ex;
|
||||
_state = PipelineState.Failed;
|
||||
}
|
||||
|
||||
result.Duration = DateTime.UtcNow - startTime;
|
||||
_state = result.Success ? PipelineState.Completed : PipelineState.Failed;
|
||||
|
||||
return result;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_executionLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<TOutput> ExecuteStreamingAsync(
|
||||
IAsyncEnumerable<TInput> inputs,
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _executionLock.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
_state = PipelineState.Running;
|
||||
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(
|
||||
cancellationToken,
|
||||
_cancellationTokenSource.Token);
|
||||
|
||||
var context = new PipelineExecutionContext
|
||||
{
|
||||
PipelineName = Name,
|
||||
ExecutionId = Guid.NewGuid().ToString(),
|
||||
StartTime = DateTime.UtcNow,
|
||||
Configuration = _configuration,
|
||||
CancellationToken = linkedCts.Token
|
||||
};
|
||||
|
||||
// Build channels
|
||||
var channels = BuildStageChannels();
|
||||
|
||||
// Start processors
|
||||
var stageTasks = StartStageProcessors(channels, context);
|
||||
|
||||
// Start input feeder
|
||||
var feederTask = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (var input in inputs.WithCancellation(linkedCts.Token))
|
||||
{
|
||||
await channels.First().Writer.WriteAsync(input, linkedCts.Token);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
channels.First().Writer.Complete();
|
||||
}
|
||||
}, linkedCts.Token);
|
||||
|
||||
// Stream outputs
|
||||
var outputChannel = channels.Last().Reader;
|
||||
await foreach (var output in outputChannel.ReadAllAsync(linkedCts.Token))
|
||||
{
|
||||
yield return (TOutput)(object)output;
|
||||
}
|
||||
|
||||
await Task.WhenAll(stageTasks.Concat(new[] { feederTask }));
|
||||
_state = PipelineState.Completed;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_executionLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<PipelineStatistics> GetStatisticsAsync()
|
||||
{
|
||||
var stats = new PipelineStatistics
|
||||
{
|
||||
PipelineName = Name,
|
||||
State = _state,
|
||||
StageCount = _stages.Count,
|
||||
StageStatistics = new List<StageStatistics>()
|
||||
};
|
||||
|
||||
foreach (var stage in _stages)
|
||||
{
|
||||
stats.StageStatistics.Add(await stage.GetStatisticsAsync());
|
||||
}
|
||||
|
||||
stats.TotalItemsProcessed = stats.StageStatistics.Sum(s => s.ItemsProcessed);
|
||||
stats.TotalErrors = stats.StageStatistics.Sum(s => s.Errors);
|
||||
stats.AverageLatency = stats.StageStatistics.Any()
|
||||
? TimeSpan.FromMilliseconds(stats.StageStatistics.Average(s => s.AverageLatency.TotalMilliseconds))
|
||||
: TimeSpan.Zero;
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
private List<Channel<object>> BuildStageChannels()
|
||||
{
|
||||
var channels = new List<Channel<object>>();
|
||||
|
||||
for (int i = 0; i <= _stages.Count; i++)
|
||||
{
|
||||
var bufferSize = i < _stages.Count
|
||||
? _stages[i].Configuration.BufferSize
|
||||
: _configuration.OutputBufferSize;
|
||||
|
||||
// Use √n buffering if not specified
|
||||
if (bufferSize == 0)
|
||||
{
|
||||
bufferSize = SpaceTimeCalculator.CalculateSqrtInterval(_configuration.ExpectedItemCount);
|
||||
}
|
||||
|
||||
var channel = Channel.CreateBounded<object>(new BoundedChannelOptions(bufferSize)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
SingleWriter = false,
|
||||
SingleReader = false
|
||||
});
|
||||
|
||||
channels.Add(channel);
|
||||
}
|
||||
|
||||
return channels;
|
||||
}
|
||||
|
||||
private List<Task> StartStageProcessors(
|
||||
List<Channel<object>> channels,
|
||||
PipelineExecutionContext context)
|
||||
{
|
||||
var tasks = new List<Task>();
|
||||
|
||||
for (int i = 0; i < _stages.Count; i++)
|
||||
{
|
||||
var stage = _stages[i];
|
||||
var inputChannel = channels[i];
|
||||
var outputChannel = channels[i + 1];
|
||||
|
||||
var task = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await stage.ProcessAsync(
|
||||
inputChannel.Reader,
|
||||
outputChannel.Writer,
|
||||
context);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Stage {StageName} failed", stage.Name);
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
{
|
||||
outputChannel.Writer.Complete();
|
||||
}
|
||||
}, context.CancellationToken);
|
||||
|
||||
tasks.Add(task);
|
||||
}
|
||||
|
||||
return tasks;
|
||||
}
|
||||
|
||||
private async Task FeedInputsAsync<T>(
|
||||
IEnumerable<T> inputs,
|
||||
ChannelWriter<object> writer,
|
||||
PipelineExecutionContext context)
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var input in inputs)
|
||||
{
|
||||
if (context.CancellationToken.IsCancellationRequested)
|
||||
break;
|
||||
|
||||
await writer.WriteAsync(input!, context.CancellationToken);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
writer.Complete();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_cancellationTokenSource?.Cancel();
|
||||
_cancellationTokenSource?.Dispose();
|
||||
_executionLock?.Dispose();
|
||||
|
||||
foreach (var stage in _stages.OfType<IDisposable>())
|
||||
{
|
||||
stage.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Interfaces and supporting classes
|
||||
public interface ISpaceTimePipeline<TInput, TOutput> : IDisposable
|
||||
{
|
||||
string Name { get; }
|
||||
PipelineState State { get; }
|
||||
|
||||
ISpaceTimePipeline<TInput, TOutput> AddStage<TStageInput, TStageOutput>(
|
||||
string stageName,
|
||||
Func<TStageInput, CancellationToken, Task<TStageOutput>> transform,
|
||||
StageConfiguration? configuration = null);
|
||||
|
||||
ISpaceTimePipeline<TInput, TOutput> AddBatchStage<TStageInput, TStageOutput>(
|
||||
string stageName,
|
||||
Func<IReadOnlyList<TStageInput>, CancellationToken, Task<IEnumerable<TStageOutput>>> batchTransform,
|
||||
StageConfiguration? configuration = null);
|
||||
|
||||
ISpaceTimePipeline<TInput, TOutput> AddFilterStage<T>(
|
||||
string stageName,
|
||||
Func<T, bool> predicate,
|
||||
StageConfiguration? configuration = null);
|
||||
|
||||
ISpaceTimePipeline<TInput, TOutput> AddCheckpointStage<T>(
|
||||
string stageName,
|
||||
ICheckpointManager checkpointManager,
|
||||
StageConfiguration? configuration = null);
|
||||
|
||||
Task<PipelineResult<TOutput>> ExecuteAsync(
|
||||
TInput input,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<PipelineResult<TOutput>> ExecuteAsync(
|
||||
IEnumerable<TInput> inputs,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
IAsyncEnumerable<TOutput> ExecuteStreamingAsync(
|
||||
IAsyncEnumerable<TInput> inputs,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
Task<PipelineStatistics> GetStatisticsAsync();
|
||||
}
|
||||
|
||||
public enum PipelineState
|
||||
{
|
||||
Created,
|
||||
Running,
|
||||
Completed,
|
||||
Failed,
|
||||
Cancelled
|
||||
}
|
||||
|
||||
public class PipelineConfiguration
|
||||
{
|
||||
public int ExpectedItemCount { get; set; } = 10000;
|
||||
public int OutputBufferSize { get; set; } = 0; // 0 = auto (√n)
|
||||
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromMinutes(30);
|
||||
public bool EnableCheckpointing { get; set; } = true;
|
||||
public bool EnableMetrics { get; set; } = true;
|
||||
}
|
||||
|
||||
public class StageConfiguration
|
||||
{
|
||||
public int BufferSize { get; set; } = 0; // 0 = auto (√n)
|
||||
public int MaxConcurrency { get; set; } = Environment.ProcessorCount;
|
||||
public TimeSpan Timeout { get; set; } = TimeSpan.FromMinutes(5);
|
||||
public bool EnableRetry { get; set; } = true;
|
||||
public int MaxRetries { get; set; } = 3;
|
||||
}
|
||||
|
||||
public class PipelineResult<T>
|
||||
{
|
||||
public bool Success { get; set; }
|
||||
public List<T> Outputs { get; set; } = new();
|
||||
public int ProcessedCount { get; set; }
|
||||
public TimeSpan Duration { get; set; }
|
||||
public Exception? Error { get; set; }
|
||||
}
|
||||
|
||||
public class PipelineStatistics
|
||||
{
|
||||
public string PipelineName { get; set; } = "";
|
||||
public PipelineState State { get; set; }
|
||||
public int StageCount { get; set; }
|
||||
public long TotalItemsProcessed { get; set; }
|
||||
public long TotalErrors { get; set; }
|
||||
public TimeSpan AverageLatency { get; set; }
|
||||
public List<StageStatistics> StageStatistics { get; set; } = new();
|
||||
}
|
||||
|
||||
public class StageStatistics
|
||||
{
|
||||
public string StageName { get; set; } = "";
|
||||
public long ItemsProcessed { get; set; }
|
||||
public long ItemsFiltered { get; set; }
|
||||
public long Errors { get; set; }
|
||||
public TimeSpan AverageLatency { get; set; }
|
||||
public long MemoryUsage { get; set; }
|
||||
}
|
||||
|
||||
internal interface IPipelineStage
|
||||
{
|
||||
string Name { get; }
|
||||
StageConfiguration Configuration { get; }
|
||||
Task ProcessAsync(ChannelReader<object> input, ChannelWriter<object> output, PipelineExecutionContext context);
|
||||
Task<StageStatistics> GetStatisticsAsync();
|
||||
}
|
||||
|
||||
internal class PipelineExecutionContext
|
||||
{
|
||||
public string PipelineName { get; set; } = "";
|
||||
public string ExecutionId { get; set; } = "";
|
||||
public DateTime StartTime { get; set; }
|
||||
public PipelineConfiguration Configuration { get; set; } = null!;
|
||||
public CancellationToken CancellationToken { get; set; }
|
||||
}
|
||||
Reference in New Issue
Block a user