Initial
This commit is contained in:
309
tests/test_memory_pressure.py
Normal file
309
tests/test_memory_pressure.py
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Memory pressure tests to verify √n behavior under constrained memory.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import gc
|
||||
import os
|
||||
import psutil
|
||||
import resource
|
||||
import tempfile
|
||||
import shutil
|
||||
import random
|
||||
import time
|
||||
from sqrtspace_spacetime import (
|
||||
SpaceTimeArray, SpaceTimeDict, external_sort,
|
||||
external_groupby, SpaceTimeConfig
|
||||
)
|
||||
|
||||
|
||||
class TestMemoryPressure(unittest.TestCase):
|
||||
"""Test √n memory behavior under real memory constraints."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.process = psutil.Process()
|
||||
|
||||
# Configure strict memory limits
|
||||
SpaceTimeConfig.set_defaults(
|
||||
storage_path=self.temp_dir,
|
||||
memory_limit=50 * 1024 * 1024, # 50MB limit
|
||||
chunk_strategy='sqrt_n',
|
||||
compression='gzip'
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_array_under_memory_pressure(self):
|
||||
"""Test SpaceTimeArray behavior when memory is constrained."""
|
||||
print("\n=== Testing SpaceTimeArray under memory pressure ===")
|
||||
|
||||
# Create large objects that will force spillover
|
||||
large_object_size = 1024 # 1KB per object
|
||||
n_objects = 100_000 # Total: ~100MB if all in memory
|
||||
|
||||
array = SpaceTimeArray(threshold='auto')
|
||||
|
||||
# Track metrics
|
||||
spillovers = 0
|
||||
max_memory = 0
|
||||
start_time = time.time()
|
||||
|
||||
# Add objects and monitor memory
|
||||
for i in range(n_objects):
|
||||
# Create a large object
|
||||
obj = {
|
||||
'id': i,
|
||||
'data': 'x' * large_object_size,
|
||||
'timestamp': time.time()
|
||||
}
|
||||
array.append(obj)
|
||||
|
||||
# Monitor every 1000 items
|
||||
if i % 1000 == 0:
|
||||
gc.collect()
|
||||
current_memory = self.process.memory_info().rss / 1024 / 1024
|
||||
max_memory = max(max_memory, current_memory)
|
||||
|
||||
if i > 0:
|
||||
hot_count = len(array._hot_data)
|
||||
cold_count = len(array._cold_indices)
|
||||
print(f" Items: {i:,} | Memory: {current_memory:.1f}MB | "
|
||||
f"Hot: {hot_count} | Cold: {cold_count}")
|
||||
|
||||
# Check if spillover is happening
|
||||
if cold_count > spillovers:
|
||||
spillovers = cold_count
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Verify all data is accessible
|
||||
print("\nVerifying data accessibility...")
|
||||
sample_indices = random.sample(range(n_objects), min(100, n_objects))
|
||||
for idx in sample_indices:
|
||||
obj = array[idx]
|
||||
self.assertEqual(obj['id'], idx)
|
||||
self.assertEqual(len(obj['data']), large_object_size)
|
||||
|
||||
# Calculate statistics
|
||||
theoretical_sqrt_n = int(n_objects ** 0.5)
|
||||
actual_hot_items = len(array._hot_data)
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Total items: {n_objects:,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Max memory used: {max_memory:.1f} MB")
|
||||
print(f" Theoretical √n: {theoretical_sqrt_n:,}")
|
||||
print(f" Actual hot items: {actual_hot_items:,}")
|
||||
print(f" Cold items: {len(array._cold_indices):,}")
|
||||
print(f" Memory efficiency: {n_objects / max_memory:.0f} items/MB")
|
||||
|
||||
# Assertions
|
||||
self.assertEqual(len(array), n_objects)
|
||||
self.assertLess(max_memory, 150) # Should use much less than 100MB
|
||||
self.assertGreater(spillovers, 0) # Should have spilled to disk
|
||||
self.assertLessEqual(actual_hot_items, theoretical_sqrt_n * 2) # Within 2x of √n
|
||||
|
||||
def test_dict_with_memory_limit(self):
|
||||
"""Test SpaceTimeDict with strict memory limit."""
|
||||
print("\n=== Testing SpaceTimeDict under memory pressure ===")
|
||||
|
||||
# Create dictionary with explicit threshold
|
||||
cache = SpaceTimeDict(threshold=1000) # Keep only 1000 items in memory
|
||||
|
||||
n_items = 50_000
|
||||
value_size = 500 # 500 bytes per value
|
||||
|
||||
# Track evictions
|
||||
evictions = 0
|
||||
start_time = time.time()
|
||||
|
||||
# Add items
|
||||
for i in range(n_items):
|
||||
key = f"key_{i:06d}"
|
||||
value = {
|
||||
'id': i,
|
||||
'data': 'v' * value_size,
|
||||
'accessed': 0
|
||||
}
|
||||
cache[key] = value
|
||||
|
||||
# Check for evictions
|
||||
if i % 1000 == 0 and i > 0:
|
||||
current_hot = len(cache._hot_data)
|
||||
current_cold = len(cache._cold_keys)
|
||||
if current_cold > evictions:
|
||||
evictions = current_cold
|
||||
print(f" Items: {i:,} | Hot: {current_hot} | Cold: {current_cold}")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Test access patterns (LRU behavior)
|
||||
print("\nTesting LRU behavior...")
|
||||
# Access some old items
|
||||
for i in range(0, 100, 10):
|
||||
key = f"key_{i:06d}"
|
||||
value = cache[key]
|
||||
value['accessed'] += 1
|
||||
|
||||
# Add more items to trigger eviction
|
||||
for i in range(n_items, n_items + 1000):
|
||||
cache[f"key_{i:06d}"] = {'id': i, 'data': 'x' * value_size}
|
||||
|
||||
# Recent items should still be hot
|
||||
stats = cache.get_stats()
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Total items: {len(cache):,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Hot items: {len(cache._hot_data)}")
|
||||
print(f" Cold items: {len(cache._cold_keys)}")
|
||||
print(f" Stats: {stats}")
|
||||
|
||||
# Verify all items accessible
|
||||
sample_keys = random.sample([f"key_{i:06d}" for i in range(n_items)], 100)
|
||||
for key in sample_keys:
|
||||
self.assertIn(key, cache)
|
||||
value = cache[key]
|
||||
self.assertIsNotNone(value)
|
||||
|
||||
def test_algorithm_memory_scaling(self):
|
||||
"""Test that algorithms scale with √n memory usage."""
|
||||
print("\n=== Testing algorithm memory scaling ===")
|
||||
|
||||
datasets = [10_000, 40_000, 90_000, 160_000] # n, 4n, 9n, 16n
|
||||
results = []
|
||||
|
||||
for n in datasets:
|
||||
print(f"\nTesting with n = {n:,}")
|
||||
|
||||
# Generate data
|
||||
data = [random.randint(1, 1_000_000) for _ in range(n)]
|
||||
|
||||
# Measure memory for sorting
|
||||
gc.collect()
|
||||
mem_before = self.process.memory_info().rss / 1024 / 1024
|
||||
|
||||
sorted_data = external_sort(data)
|
||||
|
||||
gc.collect()
|
||||
mem_after = self.process.memory_info().rss / 1024 / 1024
|
||||
mem_used = mem_after - mem_before
|
||||
|
||||
# Verify correctness
|
||||
self.assertEqual(len(sorted_data), n)
|
||||
for i in range(min(1000, len(sorted_data) - 1)):
|
||||
self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
|
||||
|
||||
sqrt_n = int(n ** 0.5)
|
||||
results.append({
|
||||
'n': n,
|
||||
'sqrt_n': sqrt_n,
|
||||
'memory_used': mem_used,
|
||||
'ratio': mem_used / max(sqrt_n * 8 / 1024 / 1024, 0.001) # 8 bytes per int
|
||||
})
|
||||
|
||||
print(f" √n = {sqrt_n:,}")
|
||||
print(f" Memory used: {mem_used:.2f} MB")
|
||||
print(f" Ratio to theoretical: {results[-1]['ratio']:.2f}x")
|
||||
|
||||
# Verify √n scaling
|
||||
print("\nScaling Analysis:")
|
||||
print("n | √n | Memory (MB) | Ratio")
|
||||
print("---------|---------|-------------|-------")
|
||||
for r in results:
|
||||
print(f"{r['n']:8,} | {r['sqrt_n']:7,} | {r['memory_used']:11.2f} | {r['ratio']:6.2f}x")
|
||||
|
||||
# Memory should scale roughly with √n
|
||||
# As n increases 4x, memory should increase ~2x
|
||||
for i in range(1, len(results)):
|
||||
n_ratio = results[i]['n'] / results[i-1]['n']
|
||||
mem_ratio = results[i]['memory_used'] / max(results[i-1]['memory_used'], 0.1)
|
||||
expected_ratio = n_ratio ** 0.5
|
||||
|
||||
print(f"\nn increased {n_ratio:.1f}x, memory increased {mem_ratio:.1f}x "
|
||||
f"(expected ~{expected_ratio:.1f}x)")
|
||||
|
||||
# Allow some variance due to overheads
|
||||
self.assertLess(mem_ratio, expected_ratio * 3,
|
||||
f"Memory scaling worse than √n: {mem_ratio:.1f}x vs {expected_ratio:.1f}x")
|
||||
|
||||
def test_concurrent_memory_pressure(self):
|
||||
"""Test behavior under concurrent access with memory pressure."""
|
||||
print("\n=== Testing concurrent access under memory pressure ===")
|
||||
|
||||
import threading
|
||||
import queue
|
||||
|
||||
array = SpaceTimeArray(threshold=500)
|
||||
errors = queue.Queue()
|
||||
n_threads = 4
|
||||
items_per_thread = 25_000
|
||||
|
||||
def worker(thread_id, start_idx):
|
||||
try:
|
||||
for i in range(items_per_thread):
|
||||
item = {
|
||||
'thread': thread_id,
|
||||
'index': start_idx + i,
|
||||
'data': f"thread_{thread_id}_item_{i}" * 50
|
||||
}
|
||||
array.append(item)
|
||||
|
||||
# Occasionally read random items
|
||||
if i % 100 == 0 and len(array) > 10:
|
||||
idx = random.randint(0, len(array) - 1)
|
||||
_ = array[idx]
|
||||
except Exception as e:
|
||||
errors.put((thread_id, str(e)))
|
||||
|
||||
# Start threads
|
||||
threads = []
|
||||
start_time = time.time()
|
||||
|
||||
for i in range(n_threads):
|
||||
t = threading.Thread(
|
||||
target=worker,
|
||||
args=(i, i * items_per_thread)
|
||||
)
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
||||
# Monitor memory while threads run
|
||||
max_memory = 0
|
||||
while any(t.is_alive() for t in threads):
|
||||
current_memory = self.process.memory_info().rss / 1024 / 1024
|
||||
max_memory = max(max_memory, current_memory)
|
||||
time.sleep(0.1)
|
||||
|
||||
# Wait for completion
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Check for errors
|
||||
error_list = []
|
||||
while not errors.empty():
|
||||
error_list.append(errors.get())
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Threads: {n_threads}")
|
||||
print(f" Total items: {n_threads * items_per_thread:,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Max memory: {max_memory:.1f} MB")
|
||||
print(f" Errors: {len(error_list)}")
|
||||
print(f" Final array size: {len(array):,}")
|
||||
|
||||
# Assertions
|
||||
self.assertEqual(len(error_list), 0, f"Thread errors: {error_list}")
|
||||
self.assertEqual(len(array), n_threads * items_per_thread)
|
||||
self.assertLess(max_memory, 200) # Should handle memory pressure
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user