Initial
This commit is contained in:
504
examples/fastapi-app/README.md
Normal file
504
examples/fastapi-app/README.md
Normal file
@@ -0,0 +1,504 @@
|
||||
# SqrtSpace SpaceTime FastAPI Sample Application
|
||||
|
||||
This sample demonstrates how to build memory-efficient, high-performance APIs using FastAPI and SqrtSpace SpaceTime.
|
||||
|
||||
## Features Demonstrated
|
||||
|
||||
### 1. **Streaming Endpoints**
|
||||
- Server-Sent Events (SSE) for real-time data
|
||||
- Streaming file downloads without memory bloat
|
||||
- Chunked JSON responses for large datasets
|
||||
|
||||
### 2. **Background Tasks**
|
||||
- Memory-aware task processing
|
||||
- Checkpointed long-running operations
|
||||
- Progress tracking with resumable state
|
||||
|
||||
### 3. **Data Processing**
|
||||
- External sorting for large datasets
|
||||
- Memory-efficient aggregations
|
||||
- Streaming ETL pipelines
|
||||
|
||||
### 4. **Machine Learning Integration**
|
||||
- Batch prediction with memory limits
|
||||
- Model training with checkpoints
|
||||
- Feature extraction pipelines
|
||||
|
||||
## Installation
|
||||
|
||||
1. **Create virtual environment:**
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
2. **Install dependencies:**
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. **Configure environment:**
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env`:
|
||||
```
|
||||
SPACETIME_MEMORY_LIMIT=512MB
|
||||
SPACETIME_EXTERNAL_STORAGE=/tmp/spacetime
|
||||
SPACETIME_CHUNK_STRATEGY=sqrt_n
|
||||
SPACETIME_COMPRESSION=gzip
|
||||
DATABASE_URL=sqlite:///./app.db
|
||||
```
|
||||
|
||||
4. **Initialize database:**
|
||||
```bash
|
||||
python init_db.py
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
fastapi-app/
|
||||
├── app/
|
||||
│ ├── __init__.py
|
||||
│ ├── main.py # FastAPI app
|
||||
│ ├── config.py # Configuration
|
||||
│ ├── models.py # Pydantic models
|
||||
│ ├── database.py # Database setup
|
||||
│ ├── routers/
|
||||
│ │ ├── products.py # Product endpoints
|
||||
│ │ ├── analytics.py # Analytics endpoints
|
||||
│ │ ├── ml.py # ML endpoints
|
||||
│ │ └── reports.py # Report generation
|
||||
│ ├── services/
|
||||
│ │ ├── product_service.py # Business logic
|
||||
│ │ ├── analytics_service.py # Analytics processing
|
||||
│ │ ├── ml_service.py # ML operations
|
||||
│ │ └── cache_service.py # SpaceTime caching
|
||||
│ ├── workers/
|
||||
│ │ ├── background_tasks.py # Task workers
|
||||
│ │ └── checkpointed_jobs.py # Resumable jobs
|
||||
│ └── utils/
|
||||
│ ├── streaming.py # Streaming helpers
|
||||
│ └── memory.py # Memory monitoring
|
||||
├── requirements.txt
|
||||
├── Dockerfile
|
||||
└── docker-compose.yml
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### 1. Streaming Large Datasets
|
||||
|
||||
```python
|
||||
# app/routers/products.py
|
||||
from fastapi import APIRouter, Response
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqrtspace_spacetime import Stream
|
||||
import json
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.get("/products/stream")
|
||||
async def stream_products(category: str = None):
|
||||
"""Stream products as newline-delimited JSON"""
|
||||
|
||||
async def generate():
|
||||
query = db.query(Product)
|
||||
if category:
|
||||
query = query.filter(Product.category == category)
|
||||
|
||||
# Use SpaceTime stream for memory efficiency
|
||||
stream = Stream.from_query(query, chunk_size=100)
|
||||
|
||||
for product in stream:
|
||||
yield json.dumps(product.dict()) + "\n"
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
media_type="application/x-ndjson",
|
||||
headers={"X-Accel-Buffering": "no"}
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Server-Sent Events for Real-Time Data
|
||||
|
||||
```python
|
||||
# app/routers/analytics.py
|
||||
from fastapi import APIRouter
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
from sqrtspace_spacetime.memory import MemoryPressureMonitor
|
||||
import asyncio
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.get("/analytics/realtime")
|
||||
async def realtime_analytics():
|
||||
"""Stream real-time analytics using SSE"""
|
||||
|
||||
monitor = MemoryPressureMonitor("100MB")
|
||||
|
||||
async def event_generator():
|
||||
while True:
|
||||
# Get current stats
|
||||
stats = await analytics_service.get_current_stats()
|
||||
|
||||
# Check memory pressure
|
||||
if monitor.check() != MemoryPressureLevel.NONE:
|
||||
await analytics_service.compact_cache()
|
||||
|
||||
yield {
|
||||
"event": "update",
|
||||
"data": json.dumps(stats)
|
||||
}
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
return EventSourceResponse(event_generator())
|
||||
```
|
||||
|
||||
### 3. Memory-Efficient CSV Export
|
||||
|
||||
```python
|
||||
# app/routers/reports.py
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqrtspace_spacetime.file import CsvWriter
|
||||
import io
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.get("/reports/export/csv")
|
||||
async def export_csv(start_date: date, end_date: date):
|
||||
"""Export large dataset as CSV with streaming"""
|
||||
|
||||
async def generate():
|
||||
# Create in-memory buffer
|
||||
output = io.StringIO()
|
||||
writer = CsvWriter(output)
|
||||
|
||||
# Write headers
|
||||
writer.writerow(["Date", "Orders", "Revenue", "Customers"])
|
||||
|
||||
# Stream data in chunks
|
||||
async for batch in analytics_service.get_daily_stats_batched(
|
||||
start_date, end_date, batch_size=100
|
||||
):
|
||||
for row in batch:
|
||||
writer.writerow([
|
||||
row.date,
|
||||
row.order_count,
|
||||
row.total_revenue,
|
||||
row.unique_customers
|
||||
])
|
||||
|
||||
# Yield buffer content
|
||||
output.seek(0)
|
||||
data = output.read()
|
||||
output.seek(0)
|
||||
output.truncate()
|
||||
yield data
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
media_type="text/csv",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename=report_{start_date}_{end_date}.csv"
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### 4. Checkpointed Background Tasks
|
||||
|
||||
```python
|
||||
# app/workers/checkpointed_jobs.py
|
||||
from sqrtspace_spacetime.checkpoint import CheckpointManager, auto_checkpoint
|
||||
from sqrtspace_spacetime.collections import SpaceTimeArray
|
||||
|
||||
class DataProcessor:
|
||||
def __init__(self):
|
||||
self.checkpoint_manager = CheckpointManager()
|
||||
|
||||
@auto_checkpoint(total_iterations=10000)
|
||||
async def process_large_dataset(self, dataset_id: str):
|
||||
"""Process dataset with automatic checkpointing"""
|
||||
|
||||
# Initialize or restore state
|
||||
results = SpaceTimeArray(threshold=1000)
|
||||
processed_count = 0
|
||||
|
||||
# Get data in batches
|
||||
async for batch in self.get_data_batches(dataset_id):
|
||||
for item in batch:
|
||||
# Process item
|
||||
result = await self.process_item(item)
|
||||
results.append(result)
|
||||
processed_count += 1
|
||||
|
||||
# Yield state for checkpointing
|
||||
if processed_count % 100 == 0:
|
||||
yield {
|
||||
'processed': processed_count,
|
||||
'results': results,
|
||||
'last_item_id': item.id
|
||||
}
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
### 5. Machine Learning with Memory Constraints
|
||||
|
||||
```python
|
||||
# app/services/ml_service.py
|
||||
from sqrtspace_spacetime.ml import SpaceTimeOptimizer
|
||||
from sqrtspace_spacetime.streams import Stream
|
||||
import numpy as np
|
||||
|
||||
class MLService:
|
||||
def __init__(self):
|
||||
self.optimizer = SpaceTimeOptimizer(
|
||||
memory_limit="256MB",
|
||||
checkpoint_frequency=100
|
||||
)
|
||||
|
||||
async def train_model(self, training_data_path: str):
|
||||
"""Train model with memory-efficient data loading"""
|
||||
|
||||
# Stream training data
|
||||
data_stream = Stream.from_csv(
|
||||
training_data_path,
|
||||
chunk_size=1000
|
||||
)
|
||||
|
||||
# Process in mini-batches
|
||||
for epoch in range(10):
|
||||
for batch in data_stream.batch(32):
|
||||
X = np.array([item.features for item in batch])
|
||||
y = np.array([item.label for item in batch])
|
||||
|
||||
# Train step with automatic checkpointing
|
||||
loss = self.optimizer.step(
|
||||
self.model,
|
||||
X, y,
|
||||
epoch=epoch
|
||||
)
|
||||
|
||||
if self.optimizer.should_checkpoint():
|
||||
await self.save_checkpoint(epoch)
|
||||
|
||||
async def batch_predict(self, input_data):
|
||||
"""Memory-efficient batch prediction"""
|
||||
|
||||
results = SpaceTimeArray(threshold=1000)
|
||||
|
||||
# Process in chunks to avoid memory issues
|
||||
for chunk in Stream.from_iterable(input_data).chunk(100):
|
||||
predictions = self.model.predict(chunk)
|
||||
results.extend(predictions)
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
### 6. Advanced Caching with SpaceTime
|
||||
|
||||
```python
|
||||
# app/services/cache_service.py
|
||||
from sqrtspace_spacetime.collections import SpaceTimeDict
|
||||
from sqrtspace_spacetime.memory import MemoryPressureMonitor
|
||||
import asyncio
|
||||
|
||||
class SpaceTimeCache:
|
||||
def __init__(self):
|
||||
self.hot_cache = SpaceTimeDict(threshold=1000)
|
||||
self.monitor = MemoryPressureMonitor("128MB")
|
||||
self.stats = {
|
||||
'hits': 0,
|
||||
'misses': 0,
|
||||
'evictions': 0
|
||||
}
|
||||
|
||||
async def get(self, key: str):
|
||||
"""Get with automatic tier management"""
|
||||
|
||||
if key in self.hot_cache:
|
||||
self.stats['hits'] += 1
|
||||
return self.hot_cache[key]
|
||||
|
||||
self.stats['misses'] += 1
|
||||
|
||||
# Load from database
|
||||
value = await self.load_from_db(key)
|
||||
|
||||
# Add to cache if memory allows
|
||||
if self.monitor.can_allocate(len(str(value))):
|
||||
self.hot_cache[key] = value
|
||||
else:
|
||||
# Trigger cleanup
|
||||
self.cleanup()
|
||||
self.stats['evictions'] += len(self.hot_cache) // 2
|
||||
|
||||
return value
|
||||
|
||||
def cleanup(self):
|
||||
"""Remove least recently used items"""
|
||||
# SpaceTimeDict handles LRU automatically
|
||||
self.hot_cache.evict_cold_items(0.5)
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Products API
|
||||
- `GET /products` - Paginated list
|
||||
- `GET /products/stream` - Stream all products (NDJSON)
|
||||
- `GET /products/search` - Memory-efficient search
|
||||
- `POST /products/bulk-update` - Checkpointed bulk updates
|
||||
- `GET /products/export/csv` - Streaming CSV export
|
||||
|
||||
### Analytics API
|
||||
- `GET /analytics/summary` - Current statistics
|
||||
- `GET /analytics/realtime` - SSE stream of live data
|
||||
- `GET /analytics/trends` - Historical trends
|
||||
- `POST /analytics/aggregate` - Custom aggregations
|
||||
|
||||
### ML API
|
||||
- `POST /ml/train` - Train model (async with progress)
|
||||
- `POST /ml/predict/batch` - Batch predictions
|
||||
- `GET /ml/models/{id}/status` - Training status
|
||||
- `POST /ml/features/extract` - Feature extraction pipeline
|
||||
|
||||
### Reports API
|
||||
- `POST /reports/generate` - Generate large report
|
||||
- `GET /reports/{id}/progress` - Check progress
|
||||
- `GET /reports/{id}/download` - Download completed report
|
||||
|
||||
## Running the Application
|
||||
|
||||
### Development
|
||||
```bash
|
||||
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
### Production
|
||||
```bash
|
||||
gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker \
|
||||
--bind 0.0.0.0:8000 \
|
||||
--timeout 300 \
|
||||
--max-requests 1000 \
|
||||
--max-requests-jitter 50
|
||||
```
|
||||
|
||||
### With Docker
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
## Performance Configuration
|
||||
|
||||
### 1. Nginx Configuration
|
||||
```nginx
|
||||
location /products/stream {
|
||||
proxy_pass http://backend;
|
||||
proxy_buffering off;
|
||||
proxy_read_timeout 3600;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
|
||||
location /analytics/realtime {
|
||||
proxy_pass http://backend;
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 86400;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Worker Configuration
|
||||
```python
|
||||
# app/config.py
|
||||
WORKER_CONFIG = {
|
||||
'memory_limit': os.getenv('WORKER_MEMORY_LIMIT', '512MB'),
|
||||
'checkpoint_interval': 100,
|
||||
'batch_size': 1000,
|
||||
'external_storage': '/tmp/spacetime-workers'
|
||||
}
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Memory Usage Endpoint
|
||||
```python
|
||||
@router.get("/system/memory")
|
||||
async def memory_stats():
|
||||
"""Get current memory statistics"""
|
||||
|
||||
return {
|
||||
"current_usage_mb": memory_monitor.current_usage_mb,
|
||||
"peak_usage_mb": memory_monitor.peak_usage_mb,
|
||||
"available_mb": memory_monitor.available_mb,
|
||||
"pressure_level": memory_monitor.pressure_level,
|
||||
"cache_stats": cache_service.get_stats(),
|
||||
"external_files": len(os.listdir(EXTERNAL_STORAGE))
|
||||
}
|
||||
```
|
||||
|
||||
### Prometheus Metrics
|
||||
```python
|
||||
from prometheus_client import Counter, Histogram, Gauge
|
||||
|
||||
stream_requests = Counter('spacetime_stream_requests_total', 'Total streaming requests')
|
||||
memory_usage = Gauge('spacetime_memory_usage_bytes', 'Current memory usage')
|
||||
processing_time = Histogram('spacetime_processing_seconds', 'Processing time')
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
```bash
|
||||
pytest tests/unit -v
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
```bash
|
||||
pytest tests/integration -v
|
||||
```
|
||||
|
||||
### Load Testing
|
||||
```bash
|
||||
locust -f tests/load/locustfile.py --host http://localhost:8000
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always use streaming** for large responses
|
||||
2. **Configure memory limits** based on container size
|
||||
3. **Enable checkpointing** for long-running tasks
|
||||
4. **Monitor memory pressure** in production
|
||||
5. **Use external storage** on fast SSDs
|
||||
6. **Set appropriate timeouts** for streaming endpoints
|
||||
7. **Implement circuit breakers** for memory protection
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### High Memory Usage
|
||||
- Reduce chunk sizes
|
||||
- Enable more aggressive spillover
|
||||
- Check for memory leaks in custom code
|
||||
|
||||
### Slow Streaming
|
||||
- Ensure proxy buffering is disabled
|
||||
- Check network latency
|
||||
- Optimize chunk sizes
|
||||
|
||||
### Failed Checkpoints
|
||||
- Verify storage permissions
|
||||
- Check disk space
|
||||
- Monitor checkpoint frequency
|
||||
|
||||
## Learn More
|
||||
|
||||
- [SqrtSpace SpaceTime Docs](https://github.com/MarketAlly/Ubiquity)
|
||||
- [FastAPI Documentation](https://fastapi.tiangolo.com)
|
||||
- [Streaming Best Practices](https://example.com/streaming)
|
||||
137
examples/fastapi-app/app/main.py
Normal file
137
examples/fastapi-app/app/main.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
FastAPI application demonstrating SqrtSpace SpaceTime integration
|
||||
"""
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import logging
|
||||
|
||||
from sqrtspace_spacetime import SpaceTimeConfig
|
||||
from sqrtspace_spacetime.memory import MemoryPressureMonitor
|
||||
|
||||
from .config import settings
|
||||
from .routers import products, analytics, ml, reports
|
||||
from .services.cache_service import SpaceTimeCache
|
||||
from .utils.memory import memory_monitor_middleware
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global instances
|
||||
cache = SpaceTimeCache()
|
||||
memory_monitor = MemoryPressureMonitor(settings.SPACETIME_MEMORY_LIMIT)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager"""
|
||||
# Startup
|
||||
logger.info("Starting FastAPI with SqrtSpace SpaceTime")
|
||||
|
||||
# Configure SpaceTime
|
||||
SpaceTimeConfig.set_defaults(
|
||||
memory_limit=settings.SPACETIME_MEMORY_LIMIT,
|
||||
external_storage=settings.SPACETIME_EXTERNAL_STORAGE,
|
||||
chunk_strategy=settings.SPACETIME_CHUNK_STRATEGY,
|
||||
compression=settings.SPACETIME_COMPRESSION
|
||||
)
|
||||
|
||||
# Initialize services
|
||||
app.state.cache = cache
|
||||
app.state.memory_monitor = memory_monitor
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down...")
|
||||
cache.cleanup()
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="SqrtSpace SpaceTime FastAPI Demo",
|
||||
description="Memory-efficient API with √n space-time tradeoffs",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Add custom middleware
|
||||
app.middleware("http")(memory_monitor_middleware)
|
||||
|
||||
# Include routers
|
||||
app.include_router(products.router, prefix="/products", tags=["products"])
|
||||
app.include_router(analytics.router, prefix="/analytics", tags=["analytics"])
|
||||
app.include_router(ml.router, prefix="/ml", tags=["machine-learning"])
|
||||
app.include_router(reports.router, prefix="/reports", tags=["reports"])
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint"""
|
||||
return {
|
||||
"message": "SqrtSpace SpaceTime FastAPI Demo",
|
||||
"docs": "/docs",
|
||||
"memory_usage": memory_monitor.get_memory_info()
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
memory_info = memory_monitor.get_memory_info()
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"memory": {
|
||||
"usage_mb": memory_info["used_mb"],
|
||||
"available_mb": memory_info["available_mb"],
|
||||
"percentage": memory_info["percentage"],
|
||||
"pressure": memory_monitor.check().value
|
||||
},
|
||||
"cache": cache.get_stats()
|
||||
}
|
||||
|
||||
|
||||
@app.get("/system/memory")
|
||||
async def system_memory():
|
||||
"""Detailed memory statistics"""
|
||||
import psutil
|
||||
import os
|
||||
|
||||
process = psutil.Process(os.getpid())
|
||||
|
||||
return {
|
||||
"process": {
|
||||
"rss_mb": process.memory_info().rss / 1024 / 1024,
|
||||
"vms_mb": process.memory_info().vms / 1024 / 1024,
|
||||
"cpu_percent": process.cpu_percent(interval=0.1),
|
||||
"num_threads": process.num_threads()
|
||||
},
|
||||
"spacetime": {
|
||||
"memory_limit": settings.SPACETIME_MEMORY_LIMIT,
|
||||
"external_storage": settings.SPACETIME_EXTERNAL_STORAGE,
|
||||
"pressure_level": memory_monitor.check().value,
|
||||
"cache_stats": cache.get_stats()
|
||||
},
|
||||
"system": {
|
||||
"total_memory_mb": psutil.virtual_memory().total / 1024 / 1024,
|
||||
"available_memory_mb": psutil.virtual_memory().available / 1024 / 1024,
|
||||
"memory_percent": psutil.virtual_memory().percent,
|
||||
"swap_percent": psutil.swap_memory().percent
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
260
examples/fastapi-app/app/routers/products.py
Normal file
260
examples/fastapi-app/app/routers/products.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
Product endpoints demonstrating streaming and memory-efficient operations
|
||||
"""
|
||||
from fastapi import APIRouter, Query, Response, HTTPException, BackgroundTasks
|
||||
from fastapi.responses import StreamingResponse
|
||||
from typing import Optional, List
|
||||
import json
|
||||
import csv
|
||||
import io
|
||||
from datetime import datetime
|
||||
|
||||
from sqrtspace_spacetime import Stream, external_sort
|
||||
from sqrtspace_spacetime.checkpoint import CheckpointManager
|
||||
|
||||
from ..models import Product, ProductUpdate, BulkUpdateRequest, ImportStatus
|
||||
from ..services.product_service import ProductService
|
||||
from ..database import get_db
|
||||
|
||||
router = APIRouter()
|
||||
product_service = ProductService()
|
||||
checkpoint_manager = CheckpointManager()
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def list_products(
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
category: Optional[str] = None,
|
||||
min_price: Optional[float] = None,
|
||||
max_price: Optional[float] = None
|
||||
):
|
||||
"""Get paginated list of products"""
|
||||
filters = {}
|
||||
if category:
|
||||
filters['category'] = category
|
||||
if min_price is not None:
|
||||
filters['min_price'] = min_price
|
||||
if max_price is not None:
|
||||
filters['max_price'] = max_price
|
||||
|
||||
return await product_service.get_products(skip, limit, filters)
|
||||
|
||||
|
||||
@router.get("/stream")
|
||||
async def stream_products(
|
||||
category: Optional[str] = None,
|
||||
format: str = Query("ndjson", regex="^(ndjson|json)$")
|
||||
):
|
||||
"""
|
||||
Stream all products as NDJSON or JSON array.
|
||||
Memory-efficient streaming for large datasets.
|
||||
"""
|
||||
|
||||
async def generate_ndjson():
|
||||
async for product in product_service.stream_products(category):
|
||||
yield json.dumps(product.dict()) + "\n"
|
||||
|
||||
async def generate_json():
|
||||
yield "["
|
||||
first = True
|
||||
async for product in product_service.stream_products(category):
|
||||
if not first:
|
||||
yield ","
|
||||
yield json.dumps(product.dict())
|
||||
first = False
|
||||
yield "]"
|
||||
|
||||
if format == "ndjson":
|
||||
return StreamingResponse(
|
||||
generate_ndjson(),
|
||||
media_type="application/x-ndjson",
|
||||
headers={"X-Accel-Buffering": "no"}
|
||||
)
|
||||
else:
|
||||
return StreamingResponse(
|
||||
generate_json(),
|
||||
media_type="application/json",
|
||||
headers={"X-Accel-Buffering": "no"}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/export/csv")
|
||||
async def export_csv(
|
||||
category: Optional[str] = None,
|
||||
columns: Optional[List[str]] = Query(None)
|
||||
):
|
||||
"""Export products as CSV with streaming"""
|
||||
|
||||
if not columns:
|
||||
columns = ["id", "name", "sku", "category", "price", "stock", "created_at"]
|
||||
|
||||
async def generate():
|
||||
output = io.StringIO()
|
||||
writer = csv.DictWriter(output, fieldnames=columns)
|
||||
|
||||
# Write header
|
||||
writer.writeheader()
|
||||
output.seek(0)
|
||||
yield output.read()
|
||||
output.seek(0)
|
||||
output.truncate()
|
||||
|
||||
# Stream products in batches
|
||||
batch_count = 0
|
||||
async for batch in product_service.stream_products_batched(category, batch_size=100):
|
||||
for product in batch:
|
||||
writer.writerow({col: getattr(product, col) for col in columns})
|
||||
|
||||
output.seek(0)
|
||||
data = output.read()
|
||||
output.seek(0)
|
||||
output.truncate()
|
||||
yield data
|
||||
|
||||
batch_count += 1
|
||||
if batch_count % 10 == 0:
|
||||
# Yield empty string to keep connection alive
|
||||
yield ""
|
||||
|
||||
filename = f"products_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
media_type="text/csv",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={filename}",
|
||||
"X-Accel-Buffering": "no"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/search")
|
||||
async def search_products(
|
||||
q: str = Query(..., min_length=2),
|
||||
sort_by: str = Query("relevance", regex="^(relevance|price_asc|price_desc|name)$"),
|
||||
limit: int = Query(100, ge=1, le=1000)
|
||||
):
|
||||
"""
|
||||
Search products with memory-efficient sorting.
|
||||
Uses external sort for large result sets.
|
||||
"""
|
||||
results = await product_service.search_products(q, sort_by, limit)
|
||||
|
||||
# Use external sort if results are large
|
||||
if len(results) > 1000:
|
||||
sort_key = {
|
||||
'price_asc': lambda x: x['price'],
|
||||
'price_desc': lambda x: -x['price'],
|
||||
'name': lambda x: x['name'],
|
||||
'relevance': lambda x: -x['relevance_score']
|
||||
}[sort_by]
|
||||
|
||||
results = external_sort(results, key_func=sort_key)
|
||||
|
||||
return {"results": results[:limit], "total": len(results)}
|
||||
|
||||
|
||||
@router.post("/bulk-update")
|
||||
async def bulk_update_prices(
|
||||
request: BulkUpdateRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""
|
||||
Bulk update product prices with checkpointing.
|
||||
Can be resumed if interrupted.
|
||||
"""
|
||||
job_id = f"bulk_update_{datetime.now().timestamp()}"
|
||||
|
||||
# Check for existing checkpoint
|
||||
checkpoint = checkpoint_manager.restore(job_id)
|
||||
if checkpoint:
|
||||
return {
|
||||
"message": "Resuming previous job",
|
||||
"job_id": job_id,
|
||||
"progress": checkpoint.get("progress", 0)
|
||||
}
|
||||
|
||||
# Start background task
|
||||
background_tasks.add_task(
|
||||
product_service.bulk_update_prices,
|
||||
request,
|
||||
job_id
|
||||
)
|
||||
|
||||
return {
|
||||
"message": "Bulk update started",
|
||||
"job_id": job_id,
|
||||
"status_url": f"/products/bulk-update/{job_id}/status"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/bulk-update/{job_id}/status")
|
||||
async def bulk_update_status(job_id: str):
|
||||
"""Check status of bulk update job"""
|
||||
checkpoint = checkpoint_manager.restore(job_id)
|
||||
|
||||
if not checkpoint:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"status": checkpoint.get("status", "running"),
|
||||
"progress": checkpoint.get("progress", 0),
|
||||
"total": checkpoint.get("total", 0),
|
||||
"updated": checkpoint.get("updated", 0),
|
||||
"errors": checkpoint.get("errors", [])
|
||||
}
|
||||
|
||||
|
||||
@router.post("/import/csv")
|
||||
async def import_csv(
|
||||
file_url: str,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""Import products from CSV file"""
|
||||
import_id = f"import_{datetime.now().timestamp()}"
|
||||
|
||||
background_tasks.add_task(
|
||||
product_service.import_from_csv,
|
||||
file_url,
|
||||
import_id
|
||||
)
|
||||
|
||||
return {
|
||||
"message": "Import started",
|
||||
"import_id": import_id,
|
||||
"status_url": f"/products/import/{import_id}/status"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/import/{import_id}/status")
|
||||
async def import_status(import_id: str):
|
||||
"""Check status of import job"""
|
||||
status = await product_service.get_import_status(import_id)
|
||||
|
||||
if not status:
|
||||
raise HTTPException(status_code=404, detail="Import job not found")
|
||||
|
||||
return status
|
||||
|
||||
|
||||
@router.get("/statistics")
|
||||
async def product_statistics():
|
||||
"""
|
||||
Get product statistics using memory-efficient aggregations.
|
||||
Uses external grouping for large datasets.
|
||||
"""
|
||||
stats = await product_service.calculate_statistics()
|
||||
|
||||
return {
|
||||
"total_products": stats["total_products"],
|
||||
"total_value": stats["total_value"],
|
||||
"by_category": stats["by_category"],
|
||||
"price_distribution": stats["price_distribution"],
|
||||
"stock_alerts": stats["stock_alerts"],
|
||||
"processing_info": {
|
||||
"memory_used_mb": stats["memory_used_mb"],
|
||||
"external_operations": stats["external_operations"]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user