This commit is contained in:
2025-07-20 04:11:04 -04:00
commit 69b521b549
40 changed files with 7781 additions and 0 deletions

View File

@@ -0,0 +1,137 @@
"""
FastAPI application demonstrating SqrtSpace SpaceTime integration
"""
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import logging
from sqrtspace_spacetime import SpaceTimeConfig
from sqrtspace_spacetime.memory import MemoryPressureMonitor
from .config import settings
from .routers import products, analytics, ml, reports
from .services.cache_service import SpaceTimeCache
from .utils.memory import memory_monitor_middleware
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global instances
cache = SpaceTimeCache()
memory_monitor = MemoryPressureMonitor(settings.SPACETIME_MEMORY_LIMIT)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager"""
# Startup
logger.info("Starting FastAPI with SqrtSpace SpaceTime")
# Configure SpaceTime
SpaceTimeConfig.set_defaults(
memory_limit=settings.SPACETIME_MEMORY_LIMIT,
external_storage=settings.SPACETIME_EXTERNAL_STORAGE,
chunk_strategy=settings.SPACETIME_CHUNK_STRATEGY,
compression=settings.SPACETIME_COMPRESSION
)
# Initialize services
app.state.cache = cache
app.state.memory_monitor = memory_monitor
yield
# Shutdown
logger.info("Shutting down...")
cache.cleanup()
# Create FastAPI app
app = FastAPI(
title="SqrtSpace SpaceTime FastAPI Demo",
description="Memory-efficient API with √n space-time tradeoffs",
version="1.0.0",
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add custom middleware
app.middleware("http")(memory_monitor_middleware)
# Include routers
app.include_router(products.router, prefix="/products", tags=["products"])
app.include_router(analytics.router, prefix="/analytics", tags=["analytics"])
app.include_router(ml.router, prefix="/ml", tags=["machine-learning"])
app.include_router(reports.router, prefix="/reports", tags=["reports"])
@app.get("/")
async def root():
"""Root endpoint"""
return {
"message": "SqrtSpace SpaceTime FastAPI Demo",
"docs": "/docs",
"memory_usage": memory_monitor.get_memory_info()
}
@app.get("/health")
async def health_check():
"""Health check endpoint"""
memory_info = memory_monitor.get_memory_info()
return {
"status": "healthy",
"memory": {
"usage_mb": memory_info["used_mb"],
"available_mb": memory_info["available_mb"],
"percentage": memory_info["percentage"],
"pressure": memory_monitor.check().value
},
"cache": cache.get_stats()
}
@app.get("/system/memory")
async def system_memory():
"""Detailed memory statistics"""
import psutil
import os
process = psutil.Process(os.getpid())
return {
"process": {
"rss_mb": process.memory_info().rss / 1024 / 1024,
"vms_mb": process.memory_info().vms / 1024 / 1024,
"cpu_percent": process.cpu_percent(interval=0.1),
"num_threads": process.num_threads()
},
"spacetime": {
"memory_limit": settings.SPACETIME_MEMORY_LIMIT,
"external_storage": settings.SPACETIME_EXTERNAL_STORAGE,
"pressure_level": memory_monitor.check().value,
"cache_stats": cache.get_stats()
},
"system": {
"total_memory_mb": psutil.virtual_memory().total / 1024 / 1024,
"available_memory_mb": psutil.virtual_memory().available / 1024 / 1024,
"memory_percent": psutil.virtual_memory().percent,
"swap_percent": psutil.swap_memory().percent
}
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -0,0 +1,260 @@
"""
Product endpoints demonstrating streaming and memory-efficient operations
"""
from fastapi import APIRouter, Query, Response, HTTPException, BackgroundTasks
from fastapi.responses import StreamingResponse
from typing import Optional, List
import json
import csv
import io
from datetime import datetime
from sqrtspace_spacetime import Stream, external_sort
from sqrtspace_spacetime.checkpoint import CheckpointManager
from ..models import Product, ProductUpdate, BulkUpdateRequest, ImportStatus
from ..services.product_service import ProductService
from ..database import get_db
router = APIRouter()
product_service = ProductService()
checkpoint_manager = CheckpointManager()
@router.get("/")
async def list_products(
skip: int = Query(0, ge=0),
limit: int = Query(100, ge=1, le=1000),
category: Optional[str] = None,
min_price: Optional[float] = None,
max_price: Optional[float] = None
):
"""Get paginated list of products"""
filters = {}
if category:
filters['category'] = category
if min_price is not None:
filters['min_price'] = min_price
if max_price is not None:
filters['max_price'] = max_price
return await product_service.get_products(skip, limit, filters)
@router.get("/stream")
async def stream_products(
category: Optional[str] = None,
format: str = Query("ndjson", regex="^(ndjson|json)$")
):
"""
Stream all products as NDJSON or JSON array.
Memory-efficient streaming for large datasets.
"""
async def generate_ndjson():
async for product in product_service.stream_products(category):
yield json.dumps(product.dict()) + "\n"
async def generate_json():
yield "["
first = True
async for product in product_service.stream_products(category):
if not first:
yield ","
yield json.dumps(product.dict())
first = False
yield "]"
if format == "ndjson":
return StreamingResponse(
generate_ndjson(),
media_type="application/x-ndjson",
headers={"X-Accel-Buffering": "no"}
)
else:
return StreamingResponse(
generate_json(),
media_type="application/json",
headers={"X-Accel-Buffering": "no"}
)
@router.get("/export/csv")
async def export_csv(
category: Optional[str] = None,
columns: Optional[List[str]] = Query(None)
):
"""Export products as CSV with streaming"""
if not columns:
columns = ["id", "name", "sku", "category", "price", "stock", "created_at"]
async def generate():
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=columns)
# Write header
writer.writeheader()
output.seek(0)
yield output.read()
output.seek(0)
output.truncate()
# Stream products in batches
batch_count = 0
async for batch in product_service.stream_products_batched(category, batch_size=100):
for product in batch:
writer.writerow({col: getattr(product, col) for col in columns})
output.seek(0)
data = output.read()
output.seek(0)
output.truncate()
yield data
batch_count += 1
if batch_count % 10 == 0:
# Yield empty string to keep connection alive
yield ""
filename = f"products_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
return StreamingResponse(
generate(),
media_type="text/csv",
headers={
"Content-Disposition": f"attachment; filename={filename}",
"X-Accel-Buffering": "no"
}
)
@router.get("/search")
async def search_products(
q: str = Query(..., min_length=2),
sort_by: str = Query("relevance", regex="^(relevance|price_asc|price_desc|name)$"),
limit: int = Query(100, ge=1, le=1000)
):
"""
Search products with memory-efficient sorting.
Uses external sort for large result sets.
"""
results = await product_service.search_products(q, sort_by, limit)
# Use external sort if results are large
if len(results) > 1000:
sort_key = {
'price_asc': lambda x: x['price'],
'price_desc': lambda x: -x['price'],
'name': lambda x: x['name'],
'relevance': lambda x: -x['relevance_score']
}[sort_by]
results = external_sort(results, key_func=sort_key)
return {"results": results[:limit], "total": len(results)}
@router.post("/bulk-update")
async def bulk_update_prices(
request: BulkUpdateRequest,
background_tasks: BackgroundTasks
):
"""
Bulk update product prices with checkpointing.
Can be resumed if interrupted.
"""
job_id = f"bulk_update_{datetime.now().timestamp()}"
# Check for existing checkpoint
checkpoint = checkpoint_manager.restore(job_id)
if checkpoint:
return {
"message": "Resuming previous job",
"job_id": job_id,
"progress": checkpoint.get("progress", 0)
}
# Start background task
background_tasks.add_task(
product_service.bulk_update_prices,
request,
job_id
)
return {
"message": "Bulk update started",
"job_id": job_id,
"status_url": f"/products/bulk-update/{job_id}/status"
}
@router.get("/bulk-update/{job_id}/status")
async def bulk_update_status(job_id: str):
"""Check status of bulk update job"""
checkpoint = checkpoint_manager.restore(job_id)
if not checkpoint:
raise HTTPException(status_code=404, detail="Job not found")
return {
"job_id": job_id,
"status": checkpoint.get("status", "running"),
"progress": checkpoint.get("progress", 0),
"total": checkpoint.get("total", 0),
"updated": checkpoint.get("updated", 0),
"errors": checkpoint.get("errors", [])
}
@router.post("/import/csv")
async def import_csv(
file_url: str,
background_tasks: BackgroundTasks
):
"""Import products from CSV file"""
import_id = f"import_{datetime.now().timestamp()}"
background_tasks.add_task(
product_service.import_from_csv,
file_url,
import_id
)
return {
"message": "Import started",
"import_id": import_id,
"status_url": f"/products/import/{import_id}/status"
}
@router.get("/import/{import_id}/status")
async def import_status(import_id: str):
"""Check status of import job"""
status = await product_service.get_import_status(import_id)
if not status:
raise HTTPException(status_code=404, detail="Import job not found")
return status
@router.get("/statistics")
async def product_statistics():
"""
Get product statistics using memory-efficient aggregations.
Uses external grouping for large datasets.
"""
stats = await product_service.calculate_statistics()
return {
"total_products": stats["total_products"],
"total_value": stats["total_value"],
"by_category": stats["by_category"],
"price_distribution": stats["price_distribution"],
"stock_alerts": stats["stock_alerts"],
"processing_info": {
"memory_used_mb": stats["memory_used_mb"],
"external_operations": stats["external_operations"]
}
}