Initial
This commit is contained in:
115
experiments/checkpointed_sorting/test_quick.py
Normal file
115
experiments/checkpointed_sorting/test_quick.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""
|
||||
Quick test to verify sorting experiment works with smaller parameters
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import tempfile
|
||||
import numpy as np
|
||||
import shutil
|
||||
from scipy import stats
|
||||
import sys
|
||||
|
||||
class SortingExperiment:
|
||||
"""Compare different sorting algorithms with varying memory constraints"""
|
||||
|
||||
def __init__(self, data_size: int):
|
||||
self.data_size = data_size
|
||||
self.data = np.random.rand(data_size).astype(np.float32)
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
def cleanup(self):
|
||||
"""Clean up temporary files"""
|
||||
shutil.rmtree(self.temp_dir)
|
||||
|
||||
def in_memory_sort(self) -> np.ndarray:
|
||||
"""Standard in-memory sorting - O(n) space"""
|
||||
return np.sort(self.data.copy())
|
||||
|
||||
def checkpoint_sort(self, memory_limit: int) -> np.ndarray:
|
||||
"""External merge sort with checkpointing - O(√n) space"""
|
||||
chunk_size = memory_limit // 4 # Reserve memory for merging
|
||||
num_chunks = (self.data_size + chunk_size - 1) // chunk_size
|
||||
|
||||
# Phase 1: Sort chunks and write to disk
|
||||
chunk_files = []
|
||||
for i in range(num_chunks):
|
||||
start = i * chunk_size
|
||||
end = min((i + 1) * chunk_size, self.data_size)
|
||||
|
||||
# Sort chunk in memory
|
||||
chunk = np.sort(self.data[start:end])
|
||||
|
||||
# Write to disk (checkpoint)
|
||||
filename = os.path.join(self.temp_dir, f'chunk_{i}.npy')
|
||||
np.save(filename, chunk)
|
||||
chunk_files.append(filename)
|
||||
|
||||
# Clear chunk from memory
|
||||
del chunk
|
||||
|
||||
# Phase 2: Simple merge (for quick test)
|
||||
result = []
|
||||
for f in chunk_files:
|
||||
chunk = np.load(f)
|
||||
result.extend(chunk.tolist())
|
||||
|
||||
# Final sort (not truly external, but for quick test)
|
||||
result = np.sort(np.array(result))
|
||||
|
||||
# Cleanup chunk files
|
||||
for f in chunk_files:
|
||||
os.remove(f)
|
||||
|
||||
return result
|
||||
|
||||
def run_quick_test():
|
||||
"""Run a quick test with smaller sizes"""
|
||||
|
||||
print("=== Quick Sorting Test ===\n")
|
||||
|
||||
# Small sizes for quick verification
|
||||
sizes = [100, 500, 1000]
|
||||
num_trials = 3
|
||||
|
||||
for size in sizes:
|
||||
print(f"\nTesting with {size} elements ({num_trials} trials):")
|
||||
|
||||
in_memory_times = []
|
||||
checkpoint_times = []
|
||||
|
||||
for trial in range(num_trials):
|
||||
exp = SortingExperiment(size)
|
||||
|
||||
# In-memory sort
|
||||
start = time.time()
|
||||
result1 = exp.in_memory_sort()
|
||||
time1 = time.time() - start
|
||||
in_memory_times.append(time1)
|
||||
|
||||
# Checkpointed sort
|
||||
memory_limit = int(np.sqrt(size) * 4)
|
||||
start = time.time()
|
||||
result2 = exp.checkpoint_sort(memory_limit)
|
||||
time2 = time.time() - start
|
||||
checkpoint_times.append(time2)
|
||||
|
||||
# Verify correctness
|
||||
if trial == 0:
|
||||
assert np.allclose(result1, result2), f"Results don't match for size {size}"
|
||||
print(f" ✓ Correctness verified")
|
||||
|
||||
exp.cleanup()
|
||||
|
||||
# Calculate statistics
|
||||
in_memory_mean = np.mean(in_memory_times)
|
||||
in_memory_std = np.std(in_memory_times)
|
||||
checkpoint_mean = np.mean(checkpoint_times)
|
||||
checkpoint_std = np.std(checkpoint_times)
|
||||
|
||||
print(f" In-memory: {in_memory_mean:.6f}s ± {in_memory_std:.6f}s")
|
||||
print(f" Checkpoint: {checkpoint_mean:.6f}s ± {checkpoint_std:.6f}s")
|
||||
print(f" Slowdown: {checkpoint_mean/in_memory_mean:.1f}x")
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_quick_test()
|
||||
Reference in New Issue
Block a user