Initial push

This commit is contained in:
2025-07-20 03:41:39 -04:00
commit d315f5d26e
118 changed files with 25819 additions and 0 deletions

View File

@@ -0,0 +1,158 @@
using Microsoft.AspNetCore.Mvc;
using SqrtSpace.SpaceTime.AspNetCore;
using SqrtSpace.SpaceTime.Core;
using SampleWebApi.Models;
using SampleWebApi.Services;
namespace SampleWebApi.Controllers;
[ApiController]
[Route("api/[controller]")]
public class AnalyticsController : ControllerBase
{
private readonly IOrderAnalyticsService _analyticsService;
private readonly ILogger<AnalyticsController> _logger;
public AnalyticsController(IOrderAnalyticsService analyticsService, ILogger<AnalyticsController> logger)
{
_analyticsService = analyticsService;
_logger = logger;
}
/// <summary>
/// Calculate revenue by category using memory-efficient aggregation
/// </summary>
/// <remarks>
/// This endpoint demonstrates using external grouping for large datasets.
/// When processing millions of orders, it automatically uses disk-based
/// aggregation to stay within memory limits.
/// </remarks>
[HttpGet("revenue-by-category")]
public async Task<ActionResult<IEnumerable<CategoryRevenue>>> GetRevenueByCategory(
[FromQuery] DateTime? startDate = null,
[FromQuery] DateTime? endDate = null)
{
var result = await _analyticsService.GetRevenueByCategoryAsync(startDate, endDate);
return Ok(result);
}
/// <summary>
/// Get top customers using external sorting
/// </summary>
/// <remarks>
/// This endpoint finds top customers by order value using external sorting.
/// Even with millions of customers, it maintains O(√n) memory usage.
/// </remarks>
[HttpGet("top-customers")]
public async Task<ActionResult<IEnumerable<CustomerSummary>>> GetTopCustomers(
[FromQuery] int top = 100,
[FromQuery] DateTime? since = null)
{
if (top > 1000)
{
return BadRequest("Cannot retrieve more than 1000 customers at once");
}
var customers = await _analyticsService.GetTopCustomersAsync(top, since);
return Ok(customers);
}
/// <summary>
/// Stream real-time order analytics
/// </summary>
/// <remarks>
/// This endpoint streams analytics data in real-time using Server-Sent Events (SSE).
/// It demonstrates memory-efficient streaming of continuous data.
/// </remarks>
[HttpGet("real-time/orders")]
[SpaceTimeStreaming]
public async Task StreamOrderAnalytics(CancellationToken cancellationToken)
{
Response.ContentType = "text/event-stream";
Response.Headers.Append("Cache-Control", "no-cache");
Response.Headers.Append("X-Accel-Buffering", "no");
await foreach (var analytics in _analyticsService.StreamRealTimeAnalyticsAsync(cancellationToken))
{
var data = System.Text.Json.JsonSerializer.Serialize(analytics);
await Response.WriteAsync($"data: {data}\n\n", cancellationToken);
await Response.Body.FlushAsync(cancellationToken);
// Small delay to simulate real-time updates
await Task.Delay(1000, cancellationToken);
}
}
/// <summary>
/// Generate complex report with checkpointing
/// </summary>
/// <remarks>
/// This endpoint generates a complex report that may take a long time.
/// It uses checkpointing to allow resuming if the operation is interrupted.
/// The report includes multiple aggregations and can handle billions of records.
/// </remarks>
[HttpPost("reports/generate")]
[EnableCheckpoint(Strategy = CheckpointStrategy.SqrtN)]
public async Task<ActionResult<ReportResult>> GenerateReport(
[FromBody] ReportRequest request,
[FromHeader(Name = "X-Report-Id")] string? reportId = null)
{
reportId ??= Guid.NewGuid().ToString();
var checkpoint = HttpContext.Features.Get<ICheckpointFeature>();
ReportState? previousState = null;
if (checkpoint != null)
{
previousState = await checkpoint.CheckpointManager.RestoreLatestCheckpointAsync<ReportState>();
if (previousState != null)
{
_logger.LogInformation("Resuming report generation from checkpoint. Progress: {progress}%",
previousState.ProgressPercent);
}
}
var result = await _analyticsService.GenerateComplexReportAsync(
request,
reportId,
previousState,
checkpoint?.CheckpointManager);
return Ok(result);
}
/// <summary>
/// Analyze order patterns using machine learning with batched processing
/// </summary>
/// <remarks>
/// This endpoint demonstrates processing large datasets for ML analysis
/// using √n batching to maintain memory efficiency while computing features.
/// </remarks>
[HttpPost("analyze-patterns")]
public async Task<ActionResult<PatternAnalysisResult>> AnalyzeOrderPatterns(
[FromBody] PatternAnalysisRequest request)
{
if (request.MaxOrdersToAnalyze > 1_000_000)
{
return BadRequest("Cannot analyze more than 1 million orders in a single request");
}
var result = await _analyticsService.AnalyzeOrderPatternsAsync(request);
return Ok(result);
}
/// <summary>
/// Get memory usage statistics for the analytics operations
/// </summary>
/// <remarks>
/// This endpoint provides insights into how SpaceTime is managing memory
/// for analytics operations, useful for monitoring and optimization.
/// </remarks>
[HttpGet("memory-stats")]
public ActionResult<MemoryStatistics> GetMemoryStatistics()
{
var stats = _analyticsService.GetMemoryStatistics();
return Ok(stats);
}
}

View File

@@ -0,0 +1,166 @@
using Microsoft.AspNetCore.Mvc;
using SqrtSpace.SpaceTime.AspNetCore;
using SqrtSpace.SpaceTime.Core;
using SampleWebApi.Models;
using SampleWebApi.Services;
namespace SampleWebApi.Controllers;
[ApiController]
[Route("api/[controller]")]
public class ProductsController : ControllerBase
{
private readonly IProductService _productService;
private readonly ILogger<ProductsController> _logger;
public ProductsController(IProductService productService, ILogger<ProductsController> logger)
{
_productService = productService;
_logger = logger;
}
/// <summary>
/// Get all products with memory-efficient paging
/// </summary>
/// <remarks>
/// This endpoint demonstrates basic pagination to limit memory usage.
/// For very large datasets, consider using the streaming endpoint instead.
/// </remarks>
[HttpGet]
public async Task<ActionResult<PagedResult<Product>>> GetProducts(
[FromQuery] int page = 1,
[FromQuery] int pageSize = 100)
{
if (pageSize > 1000)
{
return BadRequest("Page size cannot exceed 1000 items");
}
var result = await _productService.GetProductsPagedAsync(page, pageSize);
return Ok(result);
}
/// <summary>
/// Stream products using √n batching for memory efficiency
/// </summary>
/// <remarks>
/// This endpoint streams large datasets using √n-sized batches.
/// It's ideal for processing millions of records without loading them all into memory.
/// The response is streamed as newline-delimited JSON (NDJSON).
/// </remarks>
[HttpGet("stream")]
[SpaceTimeStreaming(ChunkStrategy = ChunkStrategy.SqrtN)]
public async IAsyncEnumerable<Product> StreamProducts(
[FromQuery] string? category = null,
[FromQuery] decimal? minPrice = null)
{
await foreach (var product in _productService.StreamProductsAsync(category, minPrice))
{
yield return product;
}
}
/// <summary>
/// Search products with memory-aware filtering
/// </summary>
/// <remarks>
/// This endpoint uses external sorting when the result set is large,
/// automatically spilling to disk if memory pressure is detected.
/// </remarks>
[HttpGet("search")]
public async Task<ActionResult<IEnumerable<Product>>> SearchProducts(
[FromQuery] string query,
[FromQuery] string? sortBy = "name",
[FromQuery] bool descending = false)
{
if (string.IsNullOrWhiteSpace(query))
{
return BadRequest("Search query is required");
}
var results = await _productService.SearchProductsAsync(query, sortBy, descending);
return Ok(results);
}
/// <summary>
/// Bulk update product prices with checkpointing
/// </summary>
/// <remarks>
/// This endpoint demonstrates checkpoint-enabled bulk operations.
/// If the operation fails, it can be resumed from the last checkpoint.
/// Pass the same operationId to resume a failed operation.
/// </remarks>
[HttpPost("bulk-update-prices")]
[EnableCheckpoint(Strategy = CheckpointStrategy.Linear)]
public async Task<ActionResult<BulkUpdateResult>> BulkUpdatePrices(
[FromBody] BulkPriceUpdateRequest request,
[FromHeader(Name = "X-Operation-Id")] string? operationId = null)
{
operationId ??= Guid.NewGuid().ToString();
var checkpoint = HttpContext.Features.Get<ICheckpointFeature>();
if (checkpoint != null)
{
// Try to restore from previous checkpoint
var state = await checkpoint.CheckpointManager.RestoreLatestCheckpointAsync<BulkUpdateState>();
if (state != null)
{
_logger.LogInformation("Resuming bulk update from checkpoint. Processed: {count}", state.ProcessedCount);
}
}
var result = await _productService.BulkUpdatePricesAsync(
request.CategoryFilter,
request.PriceMultiplier,
operationId,
checkpoint?.CheckpointManager);
return Ok(result);
}
/// <summary>
/// Export products to CSV with memory streaming
/// </summary>
/// <remarks>
/// This endpoint exports products to CSV format using streaming to minimize memory usage.
/// Even millions of products can be exported without loading them all into memory.
/// </remarks>
[HttpGet("export/csv")]
public async Task ExportToCsv([FromQuery] string? category = null)
{
Response.ContentType = "text/csv";
Response.Headers.Append("Content-Disposition", $"attachment; filename=products_{DateTime.UtcNow:yyyyMMdd}.csv");
await _productService.ExportToCsvAsync(Response.Body, category);
}
/// <summary>
/// Get product price statistics using memory-efficient aggregation
/// </summary>
/// <remarks>
/// This endpoint calculates statistics over large datasets using external aggregation
/// when memory pressure is detected.
/// </remarks>
[HttpGet("statistics")]
public async Task<ActionResult<ProductStatistics>> GetStatistics([FromQuery] string? category = null)
{
var stats = await _productService.GetStatisticsAsync(category);
return Ok(stats);
}
}
public class BulkPriceUpdateRequest
{
public string? CategoryFilter { get; set; }
public decimal PriceMultiplier { get; set; }
}
public class BulkUpdateResult
{
public string OperationId { get; set; } = "";
public int TotalProducts { get; set; }
public int UpdatedProducts { get; set; }
public int FailedProducts { get; set; }
public bool Completed { get; set; }
public string? CheckpointId { get; set; }
}

View File

@@ -0,0 +1,140 @@
using SampleWebApi.Models;
namespace SampleWebApi.Data;
public static class DataSeeder
{
private static readonly Random _random = new Random();
private static readonly string[] _categories = { "Electronics", "Books", "Clothing", "Home & Garden", "Sports", "Toys", "Food & Beverage" };
private static readonly string[] _productAdjectives = { "Premium", "Essential", "Professional", "Deluxe", "Standard", "Advanced", "Basic" };
private static readonly string[] _productNouns = { "Widget", "Gadget", "Tool", "Device", "Kit", "Set", "Pack", "Bundle" };
public static async Task SeedAsync(SampleDbContext context)
{
// Check if data already exists
if (context.Products.Any())
{
return;
}
// Create customers
var customers = GenerateCustomers(1000);
await context.Customers.AddRangeAsync(customers);
await context.SaveChangesAsync();
// Create products
var products = GenerateProducts(10000);
await context.Products.AddRangeAsync(products);
await context.SaveChangesAsync();
// Create orders with items
var orders = GenerateOrders(customers, products, 50000);
await context.Orders.AddRangeAsync(orders);
await context.SaveChangesAsync();
}
private static List<Customer> GenerateCustomers(int count)
{
var customers = new List<Customer>();
for (int i = 1; i <= count; i++)
{
customers.Add(new Customer
{
Id = $"CUST{i:D6}",
Name = $"Customer {i}",
Email = $"customer{i}@example.com",
RegisteredAt = DateTime.UtcNow.AddDays(-_random.Next(1, 730))
});
}
return customers;
}
private static List<Product> GenerateProducts(int count)
{
var products = new List<Product>();
for (int i = 1; i <= count; i++)
{
var category = _categories[_random.Next(_categories.Length)];
var adjective = _productAdjectives[_random.Next(_productAdjectives.Length)];
var noun = _productNouns[_random.Next(_productNouns.Length)];
products.Add(new Product
{
Id = i,
Name = $"{adjective} {noun} {i}",
Description = $"High-quality {adjective.ToLower()} {noun.ToLower()} for {category.ToLower()} enthusiasts",
Category = category,
Price = (decimal)(_random.NextDouble() * 990 + 10), // $10 to $1000
StockQuantity = _random.Next(0, 1000),
CreatedAt = DateTime.UtcNow.AddDays(-_random.Next(1, 365)),
UpdatedAt = DateTime.UtcNow.AddDays(-_random.Next(0, 30))
});
}
return products;
}
private static List<Order> GenerateOrders(List<Customer> customers, List<Product> products, int count)
{
var orders = new List<Order>();
for (int i = 1; i <= count; i++)
{
var customer = customers[_random.Next(customers.Count)];
var orderDate = DateTime.UtcNow.AddDays(-_random.Next(0, 365));
var itemCount = _random.Next(1, 10);
var orderItems = new List<OrderItem>();
decimal totalAmount = 0;
// Add random products to the order
var selectedProducts = products
.OrderBy(x => _random.Next())
.Take(itemCount)
.ToList();
foreach (var product in selectedProducts)
{
var quantity = _random.Next(1, 5);
var itemTotal = product.Price * quantity;
totalAmount += itemTotal;
orderItems.Add(new OrderItem
{
ProductId = product.Id,
Quantity = quantity,
UnitPrice = product.Price,
TotalPrice = itemTotal
});
}
orders.Add(new Order
{
Id = i,
CustomerId = customer.Id,
OrderDate = orderDate,
TotalAmount = totalAmount,
Status = GetRandomOrderStatus(orderDate),
Items = orderItems
});
}
return orders;
}
private static string GetRandomOrderStatus(DateTime orderDate)
{
var daysSinceOrder = (DateTime.UtcNow - orderDate).Days;
if (daysSinceOrder < 1)
return "Pending";
else if (daysSinceOrder < 3)
return _random.Next(2) == 0 ? "Processing" : "Pending";
else if (daysSinceOrder < 7)
return _random.Next(3) == 0 ? "Shipped" : "Processing";
else
return _random.Next(10) == 0 ? "Cancelled" : "Delivered";
}
}

View File

@@ -0,0 +1,65 @@
using Microsoft.EntityFrameworkCore;
using SampleWebApi.Models;
namespace SampleWebApi.Data;
public class SampleDbContext : DbContext
{
public SampleDbContext(DbContextOptions<SampleDbContext> options) : base(options)
{
}
public DbSet<Product> Products { get; set; } = null!;
public DbSet<Order> Orders { get; set; } = null!;
public DbSet<OrderItem> OrderItems { get; set; } = null!;
public DbSet<Customer> Customers { get; set; } = null!;
protected override void OnModelCreating(ModelBuilder modelBuilder)
{
// Product configuration
modelBuilder.Entity<Product>(entity =>
{
entity.HasKey(p => p.Id);
entity.Property(p => p.Name).IsRequired().HasMaxLength(200);
entity.Property(p => p.Category).IsRequired().HasMaxLength(100);
entity.Property(p => p.Price).HasPrecision(10, 2);
entity.HasIndex(p => p.Category);
entity.HasIndex(p => p.Price);
});
// Order configuration
modelBuilder.Entity<Order>(entity =>
{
entity.HasKey(o => o.Id);
entity.Property(o => o.CustomerId).IsRequired().HasMaxLength(50);
entity.Property(o => o.TotalAmount).HasPrecision(10, 2);
entity.HasIndex(o => o.CustomerId);
entity.HasIndex(o => o.OrderDate);
entity.HasMany(o => o.Items)
.WithOne(oi => oi.Order)
.HasForeignKey(oi => oi.OrderId);
});
// OrderItem configuration
modelBuilder.Entity<OrderItem>(entity =>
{
entity.HasKey(oi => oi.Id);
entity.Property(oi => oi.UnitPrice).HasPrecision(10, 2);
entity.Property(oi => oi.TotalPrice).HasPrecision(10, 2);
entity.HasIndex(oi => new { oi.OrderId, oi.ProductId });
});
// Customer configuration
modelBuilder.Entity<Customer>(entity =>
{
entity.HasKey(c => c.Id);
entity.Property(c => c.Id).HasMaxLength(50);
entity.Property(c => c.Name).IsRequired().HasMaxLength(200);
entity.Property(c => c.Email).IsRequired().HasMaxLength(200);
entity.HasIndex(c => c.Email).IsUnique();
entity.HasMany(c => c.Orders)
.WithOne()
.HasForeignKey(o => o.CustomerId);
});
}
}

View File

@@ -0,0 +1,111 @@
namespace SampleWebApi.Models;
public class BulkUpdateResult
{
public string OperationId { get; set; } = "";
public int TotalProducts { get; set; }
public int UpdatedProducts { get; set; }
public int FailedProducts { get; set; }
public bool Completed { get; set; }
public string? CheckpointId { get; set; }
public int TotalProcessed { get; set; }
public int SuccessCount { get; set; }
public int FailureCount { get; set; }
public TimeSpan Duration { get; set; }
public List<string> Errors { get; set; } = new();
}
public class ReportRequest
{
public DateTime StartDate { get; set; }
public DateTime EndDate { get; set; }
public List<string> MetricsToInclude { get; set; } = new();
public bool IncludeDetailedBreakdown { get; set; }
}
public class ReportResult
{
public string ReportId { get; set; } = "";
public DateTime GeneratedAt { get; set; }
public Dictionary<string, object> Metrics { get; set; } = new();
public List<CategoryBreakdown> CategoryBreakdowns { get; set; } = new();
public List<CustomerActivity> TopCustomers { get; set; } = new();
public List<ProductPerformance> TopProducts { get; set; } = new();
public bool Completed { get; set; }
public double ProgressPercent { get; set; }
public long ProcessingTimeMs { get; set; }
public long MemoryUsedMB { get; set; }
}
public class CategoryBreakdown
{
public string Category { get; set; } = "";
public decimal Revenue { get; set; }
public int OrderCount { get; set; }
public decimal AverageOrderValue { get; set; }
}
public class CustomerActivity
{
public string CustomerId { get; set; } = "";
public string CustomerName { get; set; } = "";
public decimal TotalSpent { get; set; }
public int OrderCount { get; set; }
}
public class ProductPerformance
{
public int ProductId { get; set; }
public string ProductName { get; set; } = "";
public decimal Revenue { get; set; }
public int QuantitySold { get; set; }
}
public class PatternAnalysisRequest
{
public string PatternType { get; set; } = "";
public DateTime StartDate { get; set; }
public DateTime EndDate { get; set; }
public Dictionary<string, object> Parameters { get; set; } = new();
public int MaxOrdersToAnalyze { get; set; } = 100000;
public bool IncludeCustomerSegmentation { get; set; }
public bool IncludeSeasonalAnalysis { get; set; }
}
public class PatternResult
{
public string Pattern { get; set; } = "";
public double Confidence { get; set; }
public Dictionary<string, object> Data { get; set; } = new();
}
public class MemoryStats
{
public long CurrentMemoryUsageMB { get; set; }
public long PeakMemoryUsageMB { get; set; }
public int ExternalSortOperations { get; set; }
public int CheckpointsSaved { get; set; }
public long DataSpilledToDiskMB { get; set; }
public double CacheHitRate { get; set; }
public string CurrentMemoryPressure { get; set; } = "";
}
public class BulkPriceUpdateRequest
{
public string? CategoryFilter { get; set; }
public decimal PriceMultiplier { get; set; }
}
public class OrderAggregate
{
public DateTime Hour { get; set; }
public int OrderCount { get; set; }
public decimal TotalRevenue { get; set; }
public int UniqueCustomers { get; set; }
}
public class MemoryOptions
{
public int MaxMemoryMB { get; set; } = 512;
public int WarningThresholdPercent { get; set; } = 80;
}

View File

@@ -0,0 +1,149 @@
namespace SampleWebApi.Models;
public class Product
{
public int Id { get; set; }
public string Name { get; set; } = "";
public string Description { get; set; } = "";
public string Category { get; set; } = "";
public decimal Price { get; set; }
public int StockQuantity { get; set; }
public DateTime CreatedAt { get; set; }
public DateTime UpdatedAt { get; set; }
}
public class Order
{
public int Id { get; set; }
public string CustomerId { get; set; } = "";
public DateTime OrderDate { get; set; }
public decimal TotalAmount { get; set; }
public string Status { get; set; } = "";
public List<OrderItem> Items { get; set; } = new();
}
public class OrderItem
{
public int Id { get; set; }
public int OrderId { get; set; }
public int ProductId { get; set; }
public int Quantity { get; set; }
public decimal UnitPrice { get; set; }
public decimal TotalPrice { get; set; }
public Order Order { get; set; } = null!;
public Product Product { get; set; } = null!;
}
public class Customer
{
public string Id { get; set; } = "";
public string Name { get; set; } = "";
public string Email { get; set; } = "";
public DateTime RegisteredAt { get; set; }
public List<Order> Orders { get; set; } = new();
}
public class PagedResult<T>
{
public List<T> Items { get; set; } = new();
public int Page { get; set; }
public int PageSize { get; set; }
public int TotalCount { get; set; }
public int TotalPages => (int)Math.Ceiling(TotalCount / (double)PageSize);
public bool HasNextPage => Page < TotalPages;
public bool HasPreviousPage => Page > 1;
}
public class ProductStatistics
{
public int TotalProducts { get; set; }
public decimal AveragePrice { get; set; }
public decimal MinPrice { get; set; }
public decimal MaxPrice { get; set; }
public Dictionary<string, int> ProductsByCategory { get; set; } = new();
public Dictionary<string, decimal> AveragePriceByCategory { get; set; } = new();
public long ComputationTimeMs { get; set; }
public string ComputationMethod { get; set; } = ""; // "InMemory" or "External"
}
public class CategoryRevenue
{
public string Category { get; set; } = "";
public decimal TotalRevenue { get; set; }
public int OrderCount { get; set; }
public decimal AverageOrderValue { get; set; }
}
public class CustomerSummary
{
public string CustomerId { get; set; } = "";
public string CustomerName { get; set; } = "";
public int TotalOrders { get; set; }
public decimal TotalSpent { get; set; }
public decimal AverageOrderValue { get; set; }
public DateTime FirstOrderDate { get; set; }
public DateTime LastOrderDate { get; set; }
}
public class RealTimeAnalytics
{
public DateTime Timestamp { get; set; }
public int OrdersLastHour { get; set; }
public decimal RevenueLastHour { get; set; }
public int ActiveCustomers { get; set; }
public Dictionary<string, int> TopProductsLastHour { get; set; } = new();
public double OrdersPerMinute { get; set; }
}
public class BulkUpdateState
{
public string OperationId { get; set; } = "";
public int ProcessedCount { get; set; }
public int UpdatedCount { get; set; }
public int FailedCount { get; set; }
public DateTime LastCheckpoint { get; set; }
}
public class ReportState
{
public string ReportId { get; set; } = "";
public int ProgressPercent { get; set; }
public Dictionary<string, object> PartialResults { get; set; } = new();
public DateTime LastCheckpoint { get; set; }
}
public class PatternAnalysisResult
{
public Dictionary<string, double> OrderPatterns { get; set; } = new();
public List<CustomerSegment> CustomerSegments { get; set; } = new();
public SeasonalAnalysis? SeasonalAnalysis { get; set; }
public long AnalysisTimeMs { get; set; }
public long RecordsProcessed { get; set; }
public long MemoryUsedMB { get; set; }
}
public class CustomerSegment
{
public string SegmentName { get; set; } = "";
public int CustomerCount { get; set; }
public Dictionary<string, double> Characteristics { get; set; } = new();
}
public class SeasonalAnalysis
{
public Dictionary<string, double> MonthlySalesPattern { get; set; } = new();
public Dictionary<string, double> WeeklySalesPattern { get; set; } = new();
public List<string> PeakPeriods { get; set; } = new();
}
public class MemoryStatistics
{
public long CurrentMemoryUsageMB { get; set; }
public long PeakMemoryUsageMB { get; set; }
public int ExternalSortOperations { get; set; }
public int CheckpointsSaved { get; set; }
public long DataSpilledToDiskMB { get; set; }
public double CacheHitRate { get; set; }
public string CurrentMemoryPressure { get; set; } = "";
}

View File

@@ -0,0 +1,72 @@
using Microsoft.EntityFrameworkCore;
using SqrtSpace.SpaceTime.AspNetCore;
using SqrtSpace.SpaceTime.Core;
using SqrtSpace.SpaceTime.EntityFramework;
using SqrtSpace.SpaceTime.Linq;
using SampleWebApi.Data;
using SampleWebApi.Services;
var builder = WebApplication.CreateBuilder(args);
// Add services to the container
builder.Services.AddControllers();
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen(c =>
{
c.SwaggerDoc("v1", new() {
Title = "SqrtSpace SpaceTime Sample API",
Version = "v1",
Description = "Demonstrates memory-efficient data processing using √n space-time tradeoffs"
});
});
// Configure SpaceTime services with memory-aware settings
builder.Services.AddSpaceTime(options =>
{
options.EnableCheckpointing = true;
options.CheckpointDirectory = Path.Combine(Path.GetTempPath(), "spacetime-sample");
options.CheckpointStrategy = CheckpointStrategy.SqrtN;
options.DefaultChunkSize = 1000;
options.StreamingBufferSize = 64 * 1024; // 64KB
options.ExternalStorageDirectory = Path.Combine(Path.GetTempPath(), "spacetime-external");
});
// Add Entity Framework with in-memory database for demo
builder.Services.AddDbContext<SampleDbContext>(options =>
{
options.UseInMemoryDatabase("SampleDb");
// SpaceTime optimizations are available via EF integration
});
// Add application services
builder.Services.AddScoped<IProductService, ProductService>();
builder.Services.AddScoped<IOrderAnalyticsService, OrderAnalyticsService>();
builder.Services.AddHostedService<DataGeneratorService>();
// Configure memory limits
builder.Services.Configure<SampleWebApi.Models.MemoryOptions>(builder.Configuration.GetSection("MemoryOptions"));
var app = builder.Build();
// Configure the HTTP request pipeline
if (app.Environment.IsDevelopment())
{
app.UseSwagger();
app.UseSwaggerUI();
}
app.UseHttpsRedirection();
// Enable SpaceTime middleware for automatic memory management
app.UseSpaceTime();
app.MapControllers();
// Ensure database is created and seeded
using (var scope = app.Services.CreateScope())
{
var context = scope.ServiceProvider.GetRequiredService<SampleDbContext>();
await DataSeeder.SeedAsync(context);
}
app.Run();

View File

@@ -0,0 +1,12 @@
{
"profiles": {
"SampleWebApi": {
"commandName": "Project",
"launchBrowser": true,
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"applicationUrl": "https://localhost:50878;http://localhost:50881"
}
}
}

View File

@@ -0,0 +1,190 @@
# SqrtSpace SpaceTime Sample Web API
This sample demonstrates how to build a memory-efficient Web API using the SqrtSpace SpaceTime library. It showcases real-world scenarios where √n space-time tradeoffs can significantly improve application performance and scalability.
## Features Demonstrated
### 1. **Memory-Efficient Data Processing**
- Streaming large datasets without loading everything into memory
- Automatic batching using √n-sized chunks
- External sorting and aggregation for datasets that exceed memory limits
### 2. **Checkpoint-Enabled Operations**
- Resumable bulk operations that can recover from failures
- Progress tracking for long-running tasks
- Automatic state persistence at optimal intervals
### 3. **Real-World API Patterns**
#### Products Controller (`/api/products`)
- **Paginated queries** - Basic memory control through pagination
- **Streaming endpoints** - Stream millions of products using NDJSON format
- **Smart search** - Automatically switches to external sorting for large result sets
- **Bulk updates** - Checkpoint-enabled price updates that can resume after failures
- **CSV export** - Stream large exports without memory bloat
- **Statistics** - Calculate aggregates over large datasets efficiently
#### Analytics Controller (`/api/analytics`)
- **Revenue analysis** - External grouping for large-scale aggregations
- **Top customers** - Find top N using external sorting when needed
- **Real-time streaming** - Server-Sent Events for continuous analytics
- **Complex reports** - Multi-stage report generation with checkpointing
- **Pattern analysis** - ML-ready data processing with memory constraints
- **Memory monitoring** - Track how the system manages memory
### 4. **Automatic Memory Management**
- Adapts processing strategy based on data size
- Spills to disk when memory pressure is detected
- Provides memory usage statistics for monitoring
## Running the Sample
1. **Start the API:**
```bash
dotnet run
```
2. **Access Swagger UI:**
Navigate to `https://localhost:5001/swagger` to explore the API
3. **Generate Test Data:**
The application automatically seeds the database with:
- 1,000 customers
- 10,000 products
- 50,000 orders
A background service continuously generates new orders to simulate real-time data.
## Key Scenarios to Try
### 1. Stream Large Dataset
```bash
# Stream all products (10,000+) without loading into memory
curl -N https://localhost:5001/api/products/stream
# The response is newline-delimited JSON (NDJSON)
```
### 2. Bulk Update with Checkpointing
```bash
# Start a bulk price update
curl -X POST https://localhost:5001/api/products/bulk-update-prices \
-H "Content-Type: application/json" \
-H "X-Operation-Id: price-update-123" \
-d '{"categoryFilter": "Electronics", "priceMultiplier": 1.1}'
# If it fails, resume with the same Operation ID
```
### 3. Generate Complex Report
```bash
# Generate a report with automatic checkpointing
curl -X POST https://localhost:5001/api/analytics/reports/generate \
-H "Content-Type: application/json" \
-d '{
"startDate": "2024-01-01",
"endDate": "2024-12-31",
"metricsToInclude": ["revenue", "categories", "customers", "products"],
"includeDetailedBreakdown": true
}'
```
### 4. Real-Time Analytics Stream
```bash
# Connect to real-time analytics stream
curl -N https://localhost:5001/api/analytics/real-time/orders
# Streams analytics data every second using Server-Sent Events
```
### 5. Export Large Dataset
```bash
# Export all products to CSV (streams the file)
curl https://localhost:5001/api/products/export/csv > products.csv
```
## Memory Efficiency Examples
### Small Dataset (In-Memory Processing)
When working with small datasets (<10,000 items), the API uses standard in-memory processing:
```csharp
// Standard LINQ operations
var results = await query
.Where(p => p.Category == "Books")
.OrderBy(p => p.Price)
.ToListAsync();
```
### Large Dataset (External Processing)
For large datasets (>10,000 items), the API automatically switches to external processing:
```csharp
// Automatic external sorting
if (count > 10000)
{
query = query.UseExternalSorting();
}
// Process in √n-sized batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
// Process batch
}
```
## Configuration
The sample includes configurable memory limits:
```csharp
// appsettings.json
{
"MemoryOptions": {
"MaxMemoryMB": 512,
"WarningThresholdPercent": 80
}
}
```
## Monitoring
Check memory usage statistics:
```bash
curl https://localhost:5001/api/analytics/memory-stats
```
Response:
```json
{
"currentMemoryUsageMB": 245,
"peakMemoryUsageMB": 412,
"externalSortOperations": 3,
"checkpointsSaved": 15,
"dataSpilledToDiskMB": 89,
"cacheHitRate": 0.87,
"currentMemoryPressure": "Medium"
}
```
## Architecture Highlights
1. **Service Layer**: Encapsulates business logic and SpaceTime optimizations
2. **Entity Framework Integration**: Seamless integration with EF Core queries
3. **Middleware**: Automatic checkpoint and streaming support
4. **Background Services**: Continuous data generation for testing
5. **Memory Monitoring**: Real-time tracking of memory usage
## Best Practices Demonstrated
1. **Know Your Data Size**: Check count before choosing processing strategy
2. **Stream When Possible**: Use IAsyncEnumerable for large results
3. **Checkpoint Long Operations**: Enable recovery from failures
4. **Monitor Memory Usage**: Track and respond to memory pressure
5. **Use External Processing**: Let the library handle large datasets efficiently
## Next Steps
- Modify the memory limits and observe behavior changes
- Add your own endpoints using SpaceTime patterns
- Connect to a real database for production scenarios
- Implement caching with hot/cold storage tiers
- Add distributed processing with Redis coordination

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="9.0.0" />
<PackageReference Include="Microsoft.EntityFrameworkCore.InMemory" Version="9.0.0" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.6.2" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\SqrtSpace.SpaceTime.Core\SqrtSpace.SpaceTime.Core.csproj" />
<ProjectReference Include="..\..\src\SqrtSpace.SpaceTime.AspNetCore\SqrtSpace.SpaceTime.AspNetCore.csproj" />
<ProjectReference Include="..\..\src\SqrtSpace.SpaceTime.Linq\SqrtSpace.SpaceTime.Linq.csproj" />
<ProjectReference Include="..\..\src\SqrtSpace.SpaceTime.EntityFramework\SqrtSpace.SpaceTime.EntityFramework.csproj" />
<ProjectReference Include="..\..\src\SqrtSpace.SpaceTime.Caching\SqrtSpace.SpaceTime.Caching.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,131 @@
using Microsoft.Extensions.Options;
using SampleWebApi.Data;
using SampleWebApi.Models;
namespace SampleWebApi.Services;
/// <summary>
/// Background service that continuously generates new orders to simulate real-time data
/// </summary>
public class DataGeneratorService : BackgroundService
{
private readonly IServiceProvider _serviceProvider;
private readonly ILogger<DataGeneratorService> _logger;
private readonly Random _random = new();
public DataGeneratorService(IServiceProvider serviceProvider, ILogger<DataGeneratorService> logger)
{
_serviceProvider = serviceProvider;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation("Data generator service started");
while (!stoppingToken.IsCancellationRequested)
{
try
{
await GenerateNewOrdersAsync(stoppingToken);
// Wait between 5-15 seconds before generating next batch
var delay = _random.Next(5000, 15000);
await Task.Delay(delay, stoppingToken);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating data");
await Task.Delay(60000, stoppingToken); // Wait 1 minute on error
}
}
}
private async Task GenerateNewOrdersAsync(CancellationToken cancellationToken)
{
using var scope = _serviceProvider.CreateScope();
var context = scope.ServiceProvider.GetRequiredService<SampleDbContext>();
// Generate 1-5 new orders
var orderCount = _random.Next(1, 6);
// Get random customers and products
var customers = context.Customers
.OrderBy(c => Guid.NewGuid())
.Take(orderCount)
.ToList();
if (!customers.Any())
{
_logger.LogWarning("No customers found for data generation");
return;
}
var products = context.Products
.Where(p => p.StockQuantity > 0)
.OrderBy(p => Guid.NewGuid())
.Take(orderCount * 5) // Get more products for variety
.ToList();
if (!products.Any())
{
_logger.LogWarning("No products in stock for data generation");
return;
}
var newOrders = new List<Order>();
foreach (var customer in customers)
{
var itemCount = _random.Next(1, 6);
var orderItems = new List<OrderItem>();
decimal totalAmount = 0;
// Select random products for this order
var orderProducts = products
.OrderBy(p => Guid.NewGuid())
.Take(itemCount)
.ToList();
foreach (var product in orderProducts)
{
var quantity = Math.Min(_random.Next(1, 4), product.StockQuantity);
if (quantity == 0) continue;
var itemTotal = product.Price * quantity;
totalAmount += itemTotal;
orderItems.Add(new OrderItem
{
ProductId = product.Id,
Quantity = quantity,
UnitPrice = product.Price,
TotalPrice = itemTotal
});
// Update stock
product.StockQuantity -= quantity;
}
if (orderItems.Any())
{
newOrders.Add(new Order
{
CustomerId = customer.Id,
OrderDate = DateTime.UtcNow,
TotalAmount = totalAmount,
Status = "Pending",
Items = orderItems
});
}
}
if (newOrders.Any())
{
await context.Orders.AddRangeAsync(newOrders, cancellationToken);
await context.SaveChangesAsync(cancellationToken);
_logger.LogInformation("Generated {count} new orders", newOrders.Count);
}
}
}

View File

@@ -0,0 +1,473 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
using SqrtSpace.SpaceTime.Core;
using SqrtSpace.SpaceTime.EntityFramework;
using SqrtSpace.SpaceTime.Linq;
using SampleWebApi.Data;
using SampleWebApi.Models;
using System.Diagnostics;
namespace SampleWebApi.Services;
public interface IOrderAnalyticsService
{
Task<IEnumerable<CategoryRevenue>> GetRevenueByCategoryAsync(DateTime? startDate, DateTime? endDate);
Task<IEnumerable<CustomerSummary>> GetTopCustomersAsync(int top, DateTime? since);
IAsyncEnumerable<RealTimeAnalytics> StreamRealTimeAnalyticsAsync(CancellationToken cancellationToken);
Task<ReportResult> GenerateComplexReportAsync(ReportRequest request, string reportId, ReportState? previousState, CheckpointManager? checkpoint);
Task<PatternAnalysisResult> AnalyzeOrderPatternsAsync(PatternAnalysisRequest request);
MemoryStatistics GetMemoryStatistics();
}
public class OrderAnalyticsService : IOrderAnalyticsService
{
private readonly SampleDbContext _context;
private readonly ILogger<OrderAnalyticsService> _logger;
private readonly MemoryOptions _memoryOptions;
private static readonly MemoryStatistics _memoryStats = new();
public OrderAnalyticsService(
SampleDbContext context,
ILogger<OrderAnalyticsService> logger,
IOptions<MemoryOptions> memoryOptions)
{
_context = context;
_logger = logger;
_memoryOptions = memoryOptions.Value;
}
public async Task<IEnumerable<CategoryRevenue>> GetRevenueByCategoryAsync(DateTime? startDate, DateTime? endDate)
{
var query = _context.OrderItems
.Include(oi => oi.Product)
.Include(oi => oi.Order)
.AsQueryable();
if (startDate.HasValue)
query = query.Where(oi => oi.Order.OrderDate >= startDate.Value);
if (endDate.HasValue)
query = query.Where(oi => oi.Order.OrderDate <= endDate.Value);
var itemCount = await query.CountAsync();
_logger.LogInformation("Processing revenue for {count} order items", itemCount);
// Use external grouping for large datasets
if (itemCount > 50000)
{
_logger.LogInformation("Using external grouping for revenue calculation");
_memoryStats.ExternalSortOperations++;
var categoryRevenue = new Dictionary<string, (decimal revenue, int count)>();
// Process in memory-efficient batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var item in batch)
{
var category = item.Product.Category;
if (!categoryRevenue.ContainsKey(category))
{
categoryRevenue[category] = (0, 0);
}
var current = categoryRevenue[category];
categoryRevenue[category] = (current.revenue + item.TotalPrice, current.count + 1);
}
}
return categoryRevenue.Select(kvp => new CategoryRevenue
{
Category = kvp.Key,
TotalRevenue = kvp.Value.revenue,
OrderCount = kvp.Value.count,
AverageOrderValue = kvp.Value.count > 0 ? kvp.Value.revenue / kvp.Value.count : 0
}).OrderByDescending(c => c.TotalRevenue);
}
else
{
// Use in-memory grouping for smaller datasets
var grouped = await query
.GroupBy(oi => oi.Product.Category)
.Select(g => new CategoryRevenue
{
Category = g.Key,
TotalRevenue = g.Sum(oi => oi.TotalPrice),
OrderCount = g.Select(oi => oi.OrderId).Distinct().Count(),
AverageOrderValue = g.Average(oi => oi.TotalPrice)
})
.OrderByDescending(c => c.TotalRevenue)
.ToListAsync();
return grouped;
}
}
public async Task<IEnumerable<CustomerSummary>> GetTopCustomersAsync(int top, DateTime? since)
{
var query = _context.Orders.AsQueryable();
if (since.HasValue)
query = query.Where(o => o.OrderDate >= since.Value);
var orderCount = await query.CountAsync();
_logger.LogInformation("Finding top {top} customers from {count} orders", top, orderCount);
// For large datasets, use external sorting
if (orderCount > 100000)
{
_logger.LogInformation("Using external sorting for top customers");
_memoryStats.ExternalSortOperations++;
var customerData = new Dictionary<string, (decimal total, int count, DateTime first, DateTime last)>();
// Aggregate customer data in batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var order in batch)
{
if (!customerData.ContainsKey(order.CustomerId))
{
customerData[order.CustomerId] = (0, 0, order.OrderDate, order.OrderDate);
}
var current = customerData[order.CustomerId];
customerData[order.CustomerId] = (
current.total + order.TotalAmount,
current.count + 1,
order.OrderDate < current.first ? order.OrderDate : current.first,
order.OrderDate > current.last ? order.OrderDate : current.last
);
}
}
// Get customer details
var customerIds = customerData.Keys.ToList();
var customers = await _context.Customers
.Where(c => customerIds.Contains(c.Id))
.ToDictionaryAsync(c => c.Id, c => c.Name);
// Sort and take top N
return customerData
.OrderByDescending(kvp => kvp.Value.total)
.Take(top)
.Select(kvp => new CustomerSummary
{
CustomerId = kvp.Key,
CustomerName = customers.GetValueOrDefault(kvp.Key, "Unknown"),
TotalOrders = kvp.Value.count,
TotalSpent = kvp.Value.total,
AverageOrderValue = kvp.Value.total / kvp.Value.count,
FirstOrderDate = kvp.Value.first,
LastOrderDate = kvp.Value.last
});
}
else
{
// Use in-memory processing for smaller datasets
var topCustomers = await query
.GroupBy(o => o.CustomerId)
.Select(g => new
{
CustomerId = g.Key,
TotalSpent = g.Sum(o => o.TotalAmount),
OrderCount = g.Count(),
FirstOrder = g.Min(o => o.OrderDate),
LastOrder = g.Max(o => o.OrderDate)
})
.OrderByDescending(c => c.TotalSpent)
.Take(top)
.ToListAsync();
var customerIds = topCustomers.Select(c => c.CustomerId).ToList();
var customers = await _context.Customers
.Where(c => customerIds.Contains(c.Id))
.ToDictionaryAsync(c => c.Id, c => c.Name);
return topCustomers.Select(c => new CustomerSummary
{
CustomerId = c.CustomerId,
CustomerName = customers.GetValueOrDefault(c.CustomerId, "Unknown"),
TotalOrders = c.OrderCount,
TotalSpent = c.TotalSpent,
AverageOrderValue = c.TotalSpent / c.OrderCount,
FirstOrderDate = c.FirstOrder,
LastOrderDate = c.LastOrder
});
}
}
public async IAsyncEnumerable<RealTimeAnalytics> StreamRealTimeAnalyticsAsync(
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
while (!cancellationToken.IsCancellationRequested)
{
var now = DateTime.UtcNow;
var hourAgo = now.AddHours(-1);
// Get orders from last hour
var recentOrders = await _context.Orders
.Where(o => o.OrderDate >= hourAgo)
.Include(o => o.Items)
.ThenInclude(oi => oi.Product)
.ToListAsync(cancellationToken);
// Calculate analytics
var analytics = new RealTimeAnalytics
{
Timestamp = now,
OrdersLastHour = recentOrders.Count,
RevenueLastHour = recentOrders.Sum(o => o.TotalAmount),
ActiveCustomers = recentOrders.Select(o => o.CustomerId).Distinct().Count(),
OrdersPerMinute = recentOrders.Count / 60.0
};
// Get top products
analytics.TopProductsLastHour = recentOrders
.SelectMany(o => o.Items)
.GroupBy(oi => oi.Product.Name)
.OrderByDescending(g => g.Sum(oi => oi.Quantity))
.Take(5)
.ToDictionary(g => g.Key, g => g.Sum(oi => oi.Quantity));
yield return analytics;
// Update memory stats
var process = Process.GetCurrentProcess();
_memoryStats.CurrentMemoryUsageMB = process.WorkingSet64 / (1024 * 1024);
_memoryStats.PeakMemoryUsageMB = Math.Max(_memoryStats.PeakMemoryUsageMB, _memoryStats.CurrentMemoryUsageMB);
await Task.Delay(1000, cancellationToken); // Wait before next update
}
}
public async Task<ReportResult> GenerateComplexReportAsync(
ReportRequest request,
string reportId,
ReportState? previousState,
CheckpointManager? checkpoint)
{
var stopwatch = Stopwatch.StartNew();
var state = previousState ?? new ReportState { ReportId = reportId };
var result = new ReportResult
{
ReportId = reportId,
GeneratedAt = DateTime.UtcNow,
Metrics = state.PartialResults
};
try
{
// Step 1: Calculate total revenue (0-25%)
if (state.ProgressPercent < 25)
{
var revenue = await CalculateTotalRevenueAsync(request.StartDate, request.EndDate);
result.Metrics["totalRevenue"] = revenue;
state.ProgressPercent = 25;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 2: Calculate category breakdown (25-50%)
if (state.ProgressPercent < 50)
{
var categoryRevenue = await GetRevenueByCategoryAsync(request.StartDate, request.EndDate);
result.Metrics["categoryBreakdown"] = categoryRevenue;
state.ProgressPercent = 50;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 3: Customer analytics (50-75%)
if (state.ProgressPercent < 75)
{
var topCustomers = await GetTopCustomersAsync(100, request.StartDate);
result.Metrics["topCustomers"] = topCustomers;
state.ProgressPercent = 75;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 4: Product performance (75-100%)
if (state.ProgressPercent < 100)
{
var productStats = await CalculateProductPerformanceAsync(request.StartDate, request.EndDate);
result.Metrics["productPerformance"] = productStats;
state.ProgressPercent = 100;
}
result.Completed = true;
result.ProgressPercent = 100;
result.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
result.MemoryUsedMB = _memoryStats.CurrentMemoryUsageMB;
_logger.LogInformation("Report {reportId} completed in {time}ms", reportId, result.ProcessingTimeMs);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating report {reportId}", reportId);
// Save checkpoint on error
if (checkpoint != null)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
}
throw;
}
}
public async Task<PatternAnalysisResult> AnalyzeOrderPatternsAsync(PatternAnalysisRequest request)
{
var stopwatch = Stopwatch.StartNew();
var result = new PatternAnalysisResult();
// Limit the analysis scope
var orders = await _context.Orders
.OrderByDescending(o => o.OrderDate)
.Take(request.MaxOrdersToAnalyze)
.Include(o => o.Items)
.ToListAsync();
result.RecordsProcessed = orders.Count;
// Analyze order patterns
result.OrderPatterns["averageOrderValue"] = orders.Average(o => (double)o.TotalAmount);
result.OrderPatterns["ordersPerDay"] = orders
.GroupBy(o => o.OrderDate.Date)
.Average(g => g.Count());
// Customer segmentation
if (request.IncludeCustomerSegmentation)
{
var customerGroups = orders
.GroupBy(o => o.CustomerId)
.Select(g => new
{
CustomerId = g.Key,
OrderCount = g.Count(),
TotalSpent = g.Sum(o => o.TotalAmount),
AverageOrder = g.Average(o => o.TotalAmount)
})
.ToList();
// Simple segmentation based on spending
result.CustomerSegments = new List<CustomerSegment>
{
new CustomerSegment
{
SegmentName = "High Value",
CustomerCount = customerGroups.Count(c => c.TotalSpent > 1000),
Characteristics = new Dictionary<string, double>
{
["averageOrderValue"] = customerGroups.Where(c => c.TotalSpent > 1000).Average(c => (double)c.AverageOrder),
["ordersPerCustomer"] = customerGroups.Where(c => c.TotalSpent > 1000).Average(c => c.OrderCount)
}
},
new CustomerSegment
{
SegmentName = "Regular",
CustomerCount = customerGroups.Count(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000),
Characteristics = new Dictionary<string, double>
{
["averageOrderValue"] = customerGroups.Where(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000).Average(c => (double)c.AverageOrder),
["ordersPerCustomer"] = customerGroups.Where(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000).Average(c => c.OrderCount)
}
}
};
}
// Seasonal analysis
if (request.IncludeSeasonalAnalysis)
{
result.SeasonalAnalysis = new SeasonalAnalysis
{
MonthlySalesPattern = orders
.GroupBy(o => o.OrderDate.Month)
.ToDictionary(g => g.Key.ToString(), g => (double)g.Sum(o => o.TotalAmount)),
WeeklySalesPattern = orders
.GroupBy(o => o.OrderDate.DayOfWeek)
.ToDictionary(g => g.Key.ToString(), g => (double)g.Sum(o => o.TotalAmount)),
PeakPeriods = orders
.GroupBy(o => o.OrderDate.Date)
.OrderByDescending(g => g.Sum(o => o.TotalAmount))
.Take(5)
.Select(g => g.Key.ToString("yyyy-MM-dd"))
.ToList()
};
}
result.AnalysisTimeMs = stopwatch.ElapsedMilliseconds;
result.MemoryUsedMB = _memoryStats.CurrentMemoryUsageMB;
return result;
}
public MemoryStatistics GetMemoryStatistics()
{
var process = Process.GetCurrentProcess();
_memoryStats.CurrentMemoryUsageMB = process.WorkingSet64 / (1024 * 1024);
// Determine memory pressure
var usagePercent = (_memoryStats.CurrentMemoryUsageMB * 100) / _memoryOptions.MaxMemoryMB;
_memoryStats.CurrentMemoryPressure = usagePercent switch
{
< 50 => "Low",
< 80 => "Medium",
_ => "High"
};
return _memoryStats;
}
private async Task<decimal> CalculateTotalRevenueAsync(DateTime startDate, DateTime endDate)
{
var revenue = await _context.Orders
.Where(o => o.OrderDate >= startDate && o.OrderDate <= endDate)
.SumAsync(o => o.TotalAmount);
return revenue;
}
private async Task<object> CalculateProductPerformanceAsync(DateTime startDate, DateTime endDate)
{
var query = _context.OrderItems
.Include(oi => oi.Product)
.Include(oi => oi.Order)
.Where(oi => oi.Order.OrderDate >= startDate && oi.Order.OrderDate <= endDate);
var productPerformance = await query
.GroupBy(oi => new { oi.ProductId, oi.Product.Name })
.Select(g => new
{
ProductId = g.Key.ProductId,
ProductName = g.Key.Name,
UnitsSold = g.Sum(oi => oi.Quantity),
Revenue = g.Sum(oi => oi.TotalPrice),
OrderCount = g.Select(oi => oi.OrderId).Distinct().Count()
})
.OrderByDescending(p => p.Revenue)
.Take(50)
.ToListAsync();
return productPerformance;
}
}

View File

@@ -0,0 +1,288 @@
using Microsoft.EntityFrameworkCore;
using SqrtSpace.SpaceTime.Core;
using SqrtSpace.SpaceTime.EntityFramework;
using SqrtSpace.SpaceTime.Linq;
using SampleWebApi.Data;
using SampleWebApi.Models;
using System.Text;
namespace SampleWebApi.Services;
public interface IProductService
{
Task<PagedResult<Product>> GetProductsPagedAsync(int page, int pageSize);
IAsyncEnumerable<Product> StreamProductsAsync(string? category, decimal? minPrice);
Task<IEnumerable<Product>> SearchProductsAsync(string query, string sortBy, bool descending);
Task<BulkUpdateResult> BulkUpdatePricesAsync(string? categoryFilter, decimal priceMultiplier, string operationId, CheckpointManager? checkpoint);
Task ExportToCsvAsync(Stream outputStream, string? category);
Task<ProductStatistics> GetStatisticsAsync(string? category);
}
public class ProductService : IProductService
{
private readonly SampleDbContext _context;
private readonly ILogger<ProductService> _logger;
public ProductService(SampleDbContext context, ILogger<ProductService> logger)
{
_context = context;
_logger = logger;
}
public async Task<PagedResult<Product>> GetProductsPagedAsync(int page, int pageSize)
{
var query = _context.Products.AsQueryable();
var totalCount = await query.CountAsync();
var items = await query
.Skip((page - 1) * pageSize)
.Take(pageSize)
.ToListAsync();
return new PagedResult<Product>
{
Items = items,
Page = page,
PageSize = pageSize,
TotalCount = totalCount
};
}
public async IAsyncEnumerable<Product> StreamProductsAsync(string? category, decimal? minPrice)
{
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(category))
{
query = query.Where(p => p.Category == category);
}
if (minPrice.HasValue)
{
query = query.Where(p => p.Price >= minPrice.Value);
}
// Use BatchBySqrtN to process in memory-efficient chunks
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var product in batch)
{
yield return product;
}
}
}
public async Task<IEnumerable<Product>> SearchProductsAsync(string query, string sortBy, bool descending)
{
var searchQuery = _context.Products
.Where(p => p.Name.Contains(query) || p.Description.Contains(query));
// Count to determine if we need external sorting
var count = await searchQuery.CountAsync();
_logger.LogInformation("Search found {count} products for query '{query}'", count, query);
IQueryable<Product> sortedQuery = sortBy.ToLower() switch
{
"price" => descending ? searchQuery.OrderByDescending(p => p.Price) : searchQuery.OrderBy(p => p.Price),
"category" => descending ? searchQuery.OrderByDescending(p => p.Category) : searchQuery.OrderBy(p => p.Category),
_ => descending ? searchQuery.OrderByDescending(p => p.Name) : searchQuery.OrderBy(p => p.Name)
};
// Use external sorting for large result sets
if (count > 10000)
{
_logger.LogInformation("Using external sorting for {count} products", count);
sortedQuery = sortedQuery.UseExternalSorting();
}
return await sortedQuery.ToListAsync();
}
public async Task<BulkUpdateResult> BulkUpdatePricesAsync(
string? categoryFilter,
decimal priceMultiplier,
string operationId,
CheckpointManager? checkpoint)
{
var state = new BulkUpdateState { OperationId = operationId };
// Try to restore from checkpoint
if (checkpoint != null)
{
var previousState = await checkpoint.RestoreLatestCheckpointAsync<BulkUpdateState>();
if (previousState != null)
{
state = previousState;
_logger.LogInformation("Resuming bulk update from checkpoint. Already processed: {count}",
state.ProcessedCount);
}
}
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(categoryFilter))
{
query = query.Where(p => p.Category == categoryFilter);
}
var totalProducts = await query.CountAsync();
var products = query.Skip(state.ProcessedCount);
// Process in batches using √n strategy
await foreach (var batch in products.BatchBySqrtNAsync())
{
try
{
foreach (var product in batch)
{
product.Price *= priceMultiplier;
product.UpdatedAt = DateTime.UtcNow;
state.ProcessedCount++;
state.UpdatedCount++;
}
await _context.SaveChangesAsync();
// Save checkpoint
if (checkpoint?.ShouldCheckpoint() == true)
{
state.LastCheckpoint = DateTime.UtcNow;
await checkpoint.CreateCheckpointAsync(state);
_logger.LogInformation("Checkpoint saved. Processed: {count}/{total}",
state.ProcessedCount, totalProducts);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error updating batch. Processed so far: {count}", state.ProcessedCount);
state.FailedCount += batch.Count - (state.ProcessedCount % batch.Count);
// Save checkpoint on error
if (checkpoint != null)
{
await checkpoint.CreateCheckpointAsync(state);
}
throw;
}
}
return new BulkUpdateResult
{
OperationId = operationId,
TotalProducts = totalProducts,
UpdatedProducts = state.UpdatedCount,
FailedProducts = state.FailedCount,
Completed = true,
CheckpointId = state.LastCheckpoint.ToString("O")
};
}
public async Task ExportToCsvAsync(Stream outputStream, string? category)
{
using var writer = new StreamWriter(outputStream, Encoding.UTF8);
// Write header
await writer.WriteLineAsync("Id,Name,Category,Price,StockQuantity,CreatedAt,UpdatedAt");
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(category))
{
query = query.Where(p => p.Category == category);
}
// Stream products in batches to minimize memory usage
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var product in batch)
{
await writer.WriteLineAsync(
$"{product.Id}," +
$"\"{product.Name.Replace("\"", "\"\"")}\"," +
$"\"{product.Category}\"," +
$"{product.Price}," +
$"{product.StockQuantity}," +
$"{product.CreatedAt:yyyy-MM-dd HH:mm:ss}," +
$"{product.UpdatedAt:yyyy-MM-dd HH:mm:ss}");
}
await writer.FlushAsync();
}
}
public async Task<ProductStatistics> GetStatisticsAsync(string? category)
{
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(category))
{
query = query.Where(p => p.Category == category);
}
var totalCount = await query.CountAsync();
var computationMethod = totalCount > 100000 ? "External" : "InMemory";
ProductStatistics stats;
if (computationMethod == "External")
{
_logger.LogInformation("Using external aggregation for {count} products", totalCount);
// For large datasets, compute statistics in batches
decimal totalPrice = 0;
decimal minPrice = decimal.MaxValue;
decimal maxPrice = decimal.MinValue;
var categoryStats = new Dictionary<string, (int count, decimal totalPrice)>();
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var product in batch)
{
totalPrice += product.Price;
minPrice = Math.Min(minPrice, product.Price);
maxPrice = Math.Max(maxPrice, product.Price);
if (!categoryStats.ContainsKey(product.Category))
{
categoryStats[product.Category] = (0, 0);
}
var current = categoryStats[product.Category];
categoryStats[product.Category] = (current.count + 1, current.totalPrice + product.Price);
}
}
stats = new ProductStatistics
{
TotalProducts = totalCount,
AveragePrice = totalCount > 0 ? totalPrice / totalCount : 0,
MinPrice = minPrice == decimal.MaxValue ? 0 : minPrice,
MaxPrice = maxPrice == decimal.MinValue ? 0 : maxPrice,
ProductsByCategory = categoryStats.ToDictionary(k => k.Key, v => v.Value.count),
AveragePriceByCategory = categoryStats.ToDictionary(
k => k.Key,
v => v.Value.count > 0 ? v.Value.totalPrice / v.Value.count : 0)
};
}
else
{
// For smaller datasets, use in-memory aggregation
var products = await query.ToListAsync();
stats = new ProductStatistics
{
TotalProducts = products.Count,
AveragePrice = products.Any() ? products.Average(p => p.Price) : 0,
MinPrice = products.Any() ? products.Min(p => p.Price) : 0,
MaxPrice = products.Any() ? products.Max(p => p.Price) : 0,
ProductsByCategory = products.GroupBy(p => p.Category)
.ToDictionary(g => g.Key, g => g.Count()),
AveragePriceByCategory = products.GroupBy(p => p.Category)
.ToDictionary(g => g.Key, g => g.Average(p => p.Price))
};
}
stats.ComputationTimeMs = stopwatch.ElapsedMilliseconds;
stats.ComputationMethod = computationMethod;
return stats;
}
}