Initial push

This commit is contained in:
2025-07-20 03:41:39 -04:00
commit d315f5d26e
118 changed files with 25819 additions and 0 deletions

View File

@@ -0,0 +1,131 @@
using Microsoft.Extensions.Options;
using SampleWebApi.Data;
using SampleWebApi.Models;
namespace SampleWebApi.Services;
/// <summary>
/// Background service that continuously generates new orders to simulate real-time data
/// </summary>
public class DataGeneratorService : BackgroundService
{
private readonly IServiceProvider _serviceProvider;
private readonly ILogger<DataGeneratorService> _logger;
private readonly Random _random = new();
public DataGeneratorService(IServiceProvider serviceProvider, ILogger<DataGeneratorService> logger)
{
_serviceProvider = serviceProvider;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation("Data generator service started");
while (!stoppingToken.IsCancellationRequested)
{
try
{
await GenerateNewOrdersAsync(stoppingToken);
// Wait between 5-15 seconds before generating next batch
var delay = _random.Next(5000, 15000);
await Task.Delay(delay, stoppingToken);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating data");
await Task.Delay(60000, stoppingToken); // Wait 1 minute on error
}
}
}
private async Task GenerateNewOrdersAsync(CancellationToken cancellationToken)
{
using var scope = _serviceProvider.CreateScope();
var context = scope.ServiceProvider.GetRequiredService<SampleDbContext>();
// Generate 1-5 new orders
var orderCount = _random.Next(1, 6);
// Get random customers and products
var customers = context.Customers
.OrderBy(c => Guid.NewGuid())
.Take(orderCount)
.ToList();
if (!customers.Any())
{
_logger.LogWarning("No customers found for data generation");
return;
}
var products = context.Products
.Where(p => p.StockQuantity > 0)
.OrderBy(p => Guid.NewGuid())
.Take(orderCount * 5) // Get more products for variety
.ToList();
if (!products.Any())
{
_logger.LogWarning("No products in stock for data generation");
return;
}
var newOrders = new List<Order>();
foreach (var customer in customers)
{
var itemCount = _random.Next(1, 6);
var orderItems = new List<OrderItem>();
decimal totalAmount = 0;
// Select random products for this order
var orderProducts = products
.OrderBy(p => Guid.NewGuid())
.Take(itemCount)
.ToList();
foreach (var product in orderProducts)
{
var quantity = Math.Min(_random.Next(1, 4), product.StockQuantity);
if (quantity == 0) continue;
var itemTotal = product.Price * quantity;
totalAmount += itemTotal;
orderItems.Add(new OrderItem
{
ProductId = product.Id,
Quantity = quantity,
UnitPrice = product.Price,
TotalPrice = itemTotal
});
// Update stock
product.StockQuantity -= quantity;
}
if (orderItems.Any())
{
newOrders.Add(new Order
{
CustomerId = customer.Id,
OrderDate = DateTime.UtcNow,
TotalAmount = totalAmount,
Status = "Pending",
Items = orderItems
});
}
}
if (newOrders.Any())
{
await context.Orders.AddRangeAsync(newOrders, cancellationToken);
await context.SaveChangesAsync(cancellationToken);
_logger.LogInformation("Generated {count} new orders", newOrders.Count);
}
}
}

View File

@@ -0,0 +1,473 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
using SqrtSpace.SpaceTime.Core;
using SqrtSpace.SpaceTime.EntityFramework;
using SqrtSpace.SpaceTime.Linq;
using SampleWebApi.Data;
using SampleWebApi.Models;
using System.Diagnostics;
namespace SampleWebApi.Services;
public interface IOrderAnalyticsService
{
Task<IEnumerable<CategoryRevenue>> GetRevenueByCategoryAsync(DateTime? startDate, DateTime? endDate);
Task<IEnumerable<CustomerSummary>> GetTopCustomersAsync(int top, DateTime? since);
IAsyncEnumerable<RealTimeAnalytics> StreamRealTimeAnalyticsAsync(CancellationToken cancellationToken);
Task<ReportResult> GenerateComplexReportAsync(ReportRequest request, string reportId, ReportState? previousState, CheckpointManager? checkpoint);
Task<PatternAnalysisResult> AnalyzeOrderPatternsAsync(PatternAnalysisRequest request);
MemoryStatistics GetMemoryStatistics();
}
public class OrderAnalyticsService : IOrderAnalyticsService
{
private readonly SampleDbContext _context;
private readonly ILogger<OrderAnalyticsService> _logger;
private readonly MemoryOptions _memoryOptions;
private static readonly MemoryStatistics _memoryStats = new();
public OrderAnalyticsService(
SampleDbContext context,
ILogger<OrderAnalyticsService> logger,
IOptions<MemoryOptions> memoryOptions)
{
_context = context;
_logger = logger;
_memoryOptions = memoryOptions.Value;
}
public async Task<IEnumerable<CategoryRevenue>> GetRevenueByCategoryAsync(DateTime? startDate, DateTime? endDate)
{
var query = _context.OrderItems
.Include(oi => oi.Product)
.Include(oi => oi.Order)
.AsQueryable();
if (startDate.HasValue)
query = query.Where(oi => oi.Order.OrderDate >= startDate.Value);
if (endDate.HasValue)
query = query.Where(oi => oi.Order.OrderDate <= endDate.Value);
var itemCount = await query.CountAsync();
_logger.LogInformation("Processing revenue for {count} order items", itemCount);
// Use external grouping for large datasets
if (itemCount > 50000)
{
_logger.LogInformation("Using external grouping for revenue calculation");
_memoryStats.ExternalSortOperations++;
var categoryRevenue = new Dictionary<string, (decimal revenue, int count)>();
// Process in memory-efficient batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var item in batch)
{
var category = item.Product.Category;
if (!categoryRevenue.ContainsKey(category))
{
categoryRevenue[category] = (0, 0);
}
var current = categoryRevenue[category];
categoryRevenue[category] = (current.revenue + item.TotalPrice, current.count + 1);
}
}
return categoryRevenue.Select(kvp => new CategoryRevenue
{
Category = kvp.Key,
TotalRevenue = kvp.Value.revenue,
OrderCount = kvp.Value.count,
AverageOrderValue = kvp.Value.count > 0 ? kvp.Value.revenue / kvp.Value.count : 0
}).OrderByDescending(c => c.TotalRevenue);
}
else
{
// Use in-memory grouping for smaller datasets
var grouped = await query
.GroupBy(oi => oi.Product.Category)
.Select(g => new CategoryRevenue
{
Category = g.Key,
TotalRevenue = g.Sum(oi => oi.TotalPrice),
OrderCount = g.Select(oi => oi.OrderId).Distinct().Count(),
AverageOrderValue = g.Average(oi => oi.TotalPrice)
})
.OrderByDescending(c => c.TotalRevenue)
.ToListAsync();
return grouped;
}
}
public async Task<IEnumerable<CustomerSummary>> GetTopCustomersAsync(int top, DateTime? since)
{
var query = _context.Orders.AsQueryable();
if (since.HasValue)
query = query.Where(o => o.OrderDate >= since.Value);
var orderCount = await query.CountAsync();
_logger.LogInformation("Finding top {top} customers from {count} orders", top, orderCount);
// For large datasets, use external sorting
if (orderCount > 100000)
{
_logger.LogInformation("Using external sorting for top customers");
_memoryStats.ExternalSortOperations++;
var customerData = new Dictionary<string, (decimal total, int count, DateTime first, DateTime last)>();
// Aggregate customer data in batches
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var order in batch)
{
if (!customerData.ContainsKey(order.CustomerId))
{
customerData[order.CustomerId] = (0, 0, order.OrderDate, order.OrderDate);
}
var current = customerData[order.CustomerId];
customerData[order.CustomerId] = (
current.total + order.TotalAmount,
current.count + 1,
order.OrderDate < current.first ? order.OrderDate : current.first,
order.OrderDate > current.last ? order.OrderDate : current.last
);
}
}
// Get customer details
var customerIds = customerData.Keys.ToList();
var customers = await _context.Customers
.Where(c => customerIds.Contains(c.Id))
.ToDictionaryAsync(c => c.Id, c => c.Name);
// Sort and take top N
return customerData
.OrderByDescending(kvp => kvp.Value.total)
.Take(top)
.Select(kvp => new CustomerSummary
{
CustomerId = kvp.Key,
CustomerName = customers.GetValueOrDefault(kvp.Key, "Unknown"),
TotalOrders = kvp.Value.count,
TotalSpent = kvp.Value.total,
AverageOrderValue = kvp.Value.total / kvp.Value.count,
FirstOrderDate = kvp.Value.first,
LastOrderDate = kvp.Value.last
});
}
else
{
// Use in-memory processing for smaller datasets
var topCustomers = await query
.GroupBy(o => o.CustomerId)
.Select(g => new
{
CustomerId = g.Key,
TotalSpent = g.Sum(o => o.TotalAmount),
OrderCount = g.Count(),
FirstOrder = g.Min(o => o.OrderDate),
LastOrder = g.Max(o => o.OrderDate)
})
.OrderByDescending(c => c.TotalSpent)
.Take(top)
.ToListAsync();
var customerIds = topCustomers.Select(c => c.CustomerId).ToList();
var customers = await _context.Customers
.Where(c => customerIds.Contains(c.Id))
.ToDictionaryAsync(c => c.Id, c => c.Name);
return topCustomers.Select(c => new CustomerSummary
{
CustomerId = c.CustomerId,
CustomerName = customers.GetValueOrDefault(c.CustomerId, "Unknown"),
TotalOrders = c.OrderCount,
TotalSpent = c.TotalSpent,
AverageOrderValue = c.TotalSpent / c.OrderCount,
FirstOrderDate = c.FirstOrder,
LastOrderDate = c.LastOrder
});
}
}
public async IAsyncEnumerable<RealTimeAnalytics> StreamRealTimeAnalyticsAsync(
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken)
{
while (!cancellationToken.IsCancellationRequested)
{
var now = DateTime.UtcNow;
var hourAgo = now.AddHours(-1);
// Get orders from last hour
var recentOrders = await _context.Orders
.Where(o => o.OrderDate >= hourAgo)
.Include(o => o.Items)
.ThenInclude(oi => oi.Product)
.ToListAsync(cancellationToken);
// Calculate analytics
var analytics = new RealTimeAnalytics
{
Timestamp = now,
OrdersLastHour = recentOrders.Count,
RevenueLastHour = recentOrders.Sum(o => o.TotalAmount),
ActiveCustomers = recentOrders.Select(o => o.CustomerId).Distinct().Count(),
OrdersPerMinute = recentOrders.Count / 60.0
};
// Get top products
analytics.TopProductsLastHour = recentOrders
.SelectMany(o => o.Items)
.GroupBy(oi => oi.Product.Name)
.OrderByDescending(g => g.Sum(oi => oi.Quantity))
.Take(5)
.ToDictionary(g => g.Key, g => g.Sum(oi => oi.Quantity));
yield return analytics;
// Update memory stats
var process = Process.GetCurrentProcess();
_memoryStats.CurrentMemoryUsageMB = process.WorkingSet64 / (1024 * 1024);
_memoryStats.PeakMemoryUsageMB = Math.Max(_memoryStats.PeakMemoryUsageMB, _memoryStats.CurrentMemoryUsageMB);
await Task.Delay(1000, cancellationToken); // Wait before next update
}
}
public async Task<ReportResult> GenerateComplexReportAsync(
ReportRequest request,
string reportId,
ReportState? previousState,
CheckpointManager? checkpoint)
{
var stopwatch = Stopwatch.StartNew();
var state = previousState ?? new ReportState { ReportId = reportId };
var result = new ReportResult
{
ReportId = reportId,
GeneratedAt = DateTime.UtcNow,
Metrics = state.PartialResults
};
try
{
// Step 1: Calculate total revenue (0-25%)
if (state.ProgressPercent < 25)
{
var revenue = await CalculateTotalRevenueAsync(request.StartDate, request.EndDate);
result.Metrics["totalRevenue"] = revenue;
state.ProgressPercent = 25;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 2: Calculate category breakdown (25-50%)
if (state.ProgressPercent < 50)
{
var categoryRevenue = await GetRevenueByCategoryAsync(request.StartDate, request.EndDate);
result.Metrics["categoryBreakdown"] = categoryRevenue;
state.ProgressPercent = 50;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 3: Customer analytics (50-75%)
if (state.ProgressPercent < 75)
{
var topCustomers = await GetTopCustomersAsync(100, request.StartDate);
result.Metrics["topCustomers"] = topCustomers;
state.ProgressPercent = 75;
if (checkpoint?.ShouldCheckpoint() == true)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
_memoryStats.CheckpointsSaved++;
}
}
// Step 4: Product performance (75-100%)
if (state.ProgressPercent < 100)
{
var productStats = await CalculateProductPerformanceAsync(request.StartDate, request.EndDate);
result.Metrics["productPerformance"] = productStats;
state.ProgressPercent = 100;
}
result.Completed = true;
result.ProgressPercent = 100;
result.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
result.MemoryUsedMB = _memoryStats.CurrentMemoryUsageMB;
_logger.LogInformation("Report {reportId} completed in {time}ms", reportId, result.ProcessingTimeMs);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error generating report {reportId}", reportId);
// Save checkpoint on error
if (checkpoint != null)
{
state.PartialResults = result.Metrics;
await checkpoint.CreateCheckpointAsync(state);
}
throw;
}
}
public async Task<PatternAnalysisResult> AnalyzeOrderPatternsAsync(PatternAnalysisRequest request)
{
var stopwatch = Stopwatch.StartNew();
var result = new PatternAnalysisResult();
// Limit the analysis scope
var orders = await _context.Orders
.OrderByDescending(o => o.OrderDate)
.Take(request.MaxOrdersToAnalyze)
.Include(o => o.Items)
.ToListAsync();
result.RecordsProcessed = orders.Count;
// Analyze order patterns
result.OrderPatterns["averageOrderValue"] = orders.Average(o => (double)o.TotalAmount);
result.OrderPatterns["ordersPerDay"] = orders
.GroupBy(o => o.OrderDate.Date)
.Average(g => g.Count());
// Customer segmentation
if (request.IncludeCustomerSegmentation)
{
var customerGroups = orders
.GroupBy(o => o.CustomerId)
.Select(g => new
{
CustomerId = g.Key,
OrderCount = g.Count(),
TotalSpent = g.Sum(o => o.TotalAmount),
AverageOrder = g.Average(o => o.TotalAmount)
})
.ToList();
// Simple segmentation based on spending
result.CustomerSegments = new List<CustomerSegment>
{
new CustomerSegment
{
SegmentName = "High Value",
CustomerCount = customerGroups.Count(c => c.TotalSpent > 1000),
Characteristics = new Dictionary<string, double>
{
["averageOrderValue"] = customerGroups.Where(c => c.TotalSpent > 1000).Average(c => (double)c.AverageOrder),
["ordersPerCustomer"] = customerGroups.Where(c => c.TotalSpent > 1000).Average(c => c.OrderCount)
}
},
new CustomerSegment
{
SegmentName = "Regular",
CustomerCount = customerGroups.Count(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000),
Characteristics = new Dictionary<string, double>
{
["averageOrderValue"] = customerGroups.Where(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000).Average(c => (double)c.AverageOrder),
["ordersPerCustomer"] = customerGroups.Where(c => c.TotalSpent >= 100 && c.TotalSpent <= 1000).Average(c => c.OrderCount)
}
}
};
}
// Seasonal analysis
if (request.IncludeSeasonalAnalysis)
{
result.SeasonalAnalysis = new SeasonalAnalysis
{
MonthlySalesPattern = orders
.GroupBy(o => o.OrderDate.Month)
.ToDictionary(g => g.Key.ToString(), g => (double)g.Sum(o => o.TotalAmount)),
WeeklySalesPattern = orders
.GroupBy(o => o.OrderDate.DayOfWeek)
.ToDictionary(g => g.Key.ToString(), g => (double)g.Sum(o => o.TotalAmount)),
PeakPeriods = orders
.GroupBy(o => o.OrderDate.Date)
.OrderByDescending(g => g.Sum(o => o.TotalAmount))
.Take(5)
.Select(g => g.Key.ToString("yyyy-MM-dd"))
.ToList()
};
}
result.AnalysisTimeMs = stopwatch.ElapsedMilliseconds;
result.MemoryUsedMB = _memoryStats.CurrentMemoryUsageMB;
return result;
}
public MemoryStatistics GetMemoryStatistics()
{
var process = Process.GetCurrentProcess();
_memoryStats.CurrentMemoryUsageMB = process.WorkingSet64 / (1024 * 1024);
// Determine memory pressure
var usagePercent = (_memoryStats.CurrentMemoryUsageMB * 100) / _memoryOptions.MaxMemoryMB;
_memoryStats.CurrentMemoryPressure = usagePercent switch
{
< 50 => "Low",
< 80 => "Medium",
_ => "High"
};
return _memoryStats;
}
private async Task<decimal> CalculateTotalRevenueAsync(DateTime startDate, DateTime endDate)
{
var revenue = await _context.Orders
.Where(o => o.OrderDate >= startDate && o.OrderDate <= endDate)
.SumAsync(o => o.TotalAmount);
return revenue;
}
private async Task<object> CalculateProductPerformanceAsync(DateTime startDate, DateTime endDate)
{
var query = _context.OrderItems
.Include(oi => oi.Product)
.Include(oi => oi.Order)
.Where(oi => oi.Order.OrderDate >= startDate && oi.Order.OrderDate <= endDate);
var productPerformance = await query
.GroupBy(oi => new { oi.ProductId, oi.Product.Name })
.Select(g => new
{
ProductId = g.Key.ProductId,
ProductName = g.Key.Name,
UnitsSold = g.Sum(oi => oi.Quantity),
Revenue = g.Sum(oi => oi.TotalPrice),
OrderCount = g.Select(oi => oi.OrderId).Distinct().Count()
})
.OrderByDescending(p => p.Revenue)
.Take(50)
.ToListAsync();
return productPerformance;
}
}

View File

@@ -0,0 +1,288 @@
using Microsoft.EntityFrameworkCore;
using SqrtSpace.SpaceTime.Core;
using SqrtSpace.SpaceTime.EntityFramework;
using SqrtSpace.SpaceTime.Linq;
using SampleWebApi.Data;
using SampleWebApi.Models;
using System.Text;
namespace SampleWebApi.Services;
public interface IProductService
{
Task<PagedResult<Product>> GetProductsPagedAsync(int page, int pageSize);
IAsyncEnumerable<Product> StreamProductsAsync(string? category, decimal? minPrice);
Task<IEnumerable<Product>> SearchProductsAsync(string query, string sortBy, bool descending);
Task<BulkUpdateResult> BulkUpdatePricesAsync(string? categoryFilter, decimal priceMultiplier, string operationId, CheckpointManager? checkpoint);
Task ExportToCsvAsync(Stream outputStream, string? category);
Task<ProductStatistics> GetStatisticsAsync(string? category);
}
public class ProductService : IProductService
{
private readonly SampleDbContext _context;
private readonly ILogger<ProductService> _logger;
public ProductService(SampleDbContext context, ILogger<ProductService> logger)
{
_context = context;
_logger = logger;
}
public async Task<PagedResult<Product>> GetProductsPagedAsync(int page, int pageSize)
{
var query = _context.Products.AsQueryable();
var totalCount = await query.CountAsync();
var items = await query
.Skip((page - 1) * pageSize)
.Take(pageSize)
.ToListAsync();
return new PagedResult<Product>
{
Items = items,
Page = page,
PageSize = pageSize,
TotalCount = totalCount
};
}
public async IAsyncEnumerable<Product> StreamProductsAsync(string? category, decimal? minPrice)
{
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(category))
{
query = query.Where(p => p.Category == category);
}
if (minPrice.HasValue)
{
query = query.Where(p => p.Price >= minPrice.Value);
}
// Use BatchBySqrtN to process in memory-efficient chunks
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var product in batch)
{
yield return product;
}
}
}
public async Task<IEnumerable<Product>> SearchProductsAsync(string query, string sortBy, bool descending)
{
var searchQuery = _context.Products
.Where(p => p.Name.Contains(query) || p.Description.Contains(query));
// Count to determine if we need external sorting
var count = await searchQuery.CountAsync();
_logger.LogInformation("Search found {count} products for query '{query}'", count, query);
IQueryable<Product> sortedQuery = sortBy.ToLower() switch
{
"price" => descending ? searchQuery.OrderByDescending(p => p.Price) : searchQuery.OrderBy(p => p.Price),
"category" => descending ? searchQuery.OrderByDescending(p => p.Category) : searchQuery.OrderBy(p => p.Category),
_ => descending ? searchQuery.OrderByDescending(p => p.Name) : searchQuery.OrderBy(p => p.Name)
};
// Use external sorting for large result sets
if (count > 10000)
{
_logger.LogInformation("Using external sorting for {count} products", count);
sortedQuery = sortedQuery.UseExternalSorting();
}
return await sortedQuery.ToListAsync();
}
public async Task<BulkUpdateResult> BulkUpdatePricesAsync(
string? categoryFilter,
decimal priceMultiplier,
string operationId,
CheckpointManager? checkpoint)
{
var state = new BulkUpdateState { OperationId = operationId };
// Try to restore from checkpoint
if (checkpoint != null)
{
var previousState = await checkpoint.RestoreLatestCheckpointAsync<BulkUpdateState>();
if (previousState != null)
{
state = previousState;
_logger.LogInformation("Resuming bulk update from checkpoint. Already processed: {count}",
state.ProcessedCount);
}
}
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(categoryFilter))
{
query = query.Where(p => p.Category == categoryFilter);
}
var totalProducts = await query.CountAsync();
var products = query.Skip(state.ProcessedCount);
// Process in batches using √n strategy
await foreach (var batch in products.BatchBySqrtNAsync())
{
try
{
foreach (var product in batch)
{
product.Price *= priceMultiplier;
product.UpdatedAt = DateTime.UtcNow;
state.ProcessedCount++;
state.UpdatedCount++;
}
await _context.SaveChangesAsync();
// Save checkpoint
if (checkpoint?.ShouldCheckpoint() == true)
{
state.LastCheckpoint = DateTime.UtcNow;
await checkpoint.CreateCheckpointAsync(state);
_logger.LogInformation("Checkpoint saved. Processed: {count}/{total}",
state.ProcessedCount, totalProducts);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error updating batch. Processed so far: {count}", state.ProcessedCount);
state.FailedCount += batch.Count - (state.ProcessedCount % batch.Count);
// Save checkpoint on error
if (checkpoint != null)
{
await checkpoint.CreateCheckpointAsync(state);
}
throw;
}
}
return new BulkUpdateResult
{
OperationId = operationId,
TotalProducts = totalProducts,
UpdatedProducts = state.UpdatedCount,
FailedProducts = state.FailedCount,
Completed = true,
CheckpointId = state.LastCheckpoint.ToString("O")
};
}
public async Task ExportToCsvAsync(Stream outputStream, string? category)
{
using var writer = new StreamWriter(outputStream, Encoding.UTF8);
// Write header
await writer.WriteLineAsync("Id,Name,Category,Price,StockQuantity,CreatedAt,UpdatedAt");
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(category))
{
query = query.Where(p => p.Category == category);
}
// Stream products in batches to minimize memory usage
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var product in batch)
{
await writer.WriteLineAsync(
$"{product.Id}," +
$"\"{product.Name.Replace("\"", "\"\"")}\"," +
$"\"{product.Category}\"," +
$"{product.Price}," +
$"{product.StockQuantity}," +
$"{product.CreatedAt:yyyy-MM-dd HH:mm:ss}," +
$"{product.UpdatedAt:yyyy-MM-dd HH:mm:ss}");
}
await writer.FlushAsync();
}
}
public async Task<ProductStatistics> GetStatisticsAsync(string? category)
{
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
var query = _context.Products.AsQueryable();
if (!string.IsNullOrEmpty(category))
{
query = query.Where(p => p.Category == category);
}
var totalCount = await query.CountAsync();
var computationMethod = totalCount > 100000 ? "External" : "InMemory";
ProductStatistics stats;
if (computationMethod == "External")
{
_logger.LogInformation("Using external aggregation for {count} products", totalCount);
// For large datasets, compute statistics in batches
decimal totalPrice = 0;
decimal minPrice = decimal.MaxValue;
decimal maxPrice = decimal.MinValue;
var categoryStats = new Dictionary<string, (int count, decimal totalPrice)>();
await foreach (var batch in query.BatchBySqrtNAsync())
{
foreach (var product in batch)
{
totalPrice += product.Price;
minPrice = Math.Min(minPrice, product.Price);
maxPrice = Math.Max(maxPrice, product.Price);
if (!categoryStats.ContainsKey(product.Category))
{
categoryStats[product.Category] = (0, 0);
}
var current = categoryStats[product.Category];
categoryStats[product.Category] = (current.count + 1, current.totalPrice + product.Price);
}
}
stats = new ProductStatistics
{
TotalProducts = totalCount,
AveragePrice = totalCount > 0 ? totalPrice / totalCount : 0,
MinPrice = minPrice == decimal.MaxValue ? 0 : minPrice,
MaxPrice = maxPrice == decimal.MinValue ? 0 : maxPrice,
ProductsByCategory = categoryStats.ToDictionary(k => k.Key, v => v.Value.count),
AveragePriceByCategory = categoryStats.ToDictionary(
k => k.Key,
v => v.Value.count > 0 ? v.Value.totalPrice / v.Value.count : 0)
};
}
else
{
// For smaller datasets, use in-memory aggregation
var products = await query.ToListAsync();
stats = new ProductStatistics
{
TotalProducts = products.Count,
AveragePrice = products.Any() ? products.Average(p => p.Price) : 0,
MinPrice = products.Any() ? products.Min(p => p.Price) : 0,
MaxPrice = products.Any() ? products.Max(p => p.Price) : 0,
ProductsByCategory = products.GroupBy(p => p.Category)
.ToDictionary(g => g.Key, g => g.Count()),
AveragePriceByCategory = products.GroupBy(p => p.Category)
.ToDictionary(g => g.Key, g => g.Average(p => p.Price))
};
}
stats.ComputationTimeMs = stopwatch.ElapsedMilliseconds;
stats.ComputationMethod = computationMethod;
return stats;
}
}