Initial
This commit is contained in:
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Ubiquity SpaceTime Test Suite
|
||||
234
tests/test_external_algorithms.py
Normal file
234
tests/test_external_algorithms.py
Normal file
@@ -0,0 +1,234 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for external algorithms with memory pressure.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import random
|
||||
import gc
|
||||
import psutil
|
||||
import time
|
||||
from sqrtspace_spacetime import external_sort, external_groupby, SpaceTimeConfig
|
||||
|
||||
|
||||
class TestExternalAlgorithms(unittest.TestCase):
|
||||
"""Test external algorithms under memory constraints."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
SpaceTimeConfig.set_defaults(
|
||||
memory_limit=100 * 1024 * 1024, # 100MB limit
|
||||
chunk_strategy='sqrt_n'
|
||||
)
|
||||
self.process = psutil.Process()
|
||||
|
||||
def test_external_sort_small(self):
|
||||
"""Test external sort with small dataset."""
|
||||
data = [random.randint(1, 1000) for _ in range(1000)]
|
||||
sorted_data = external_sort(data)
|
||||
|
||||
# Verify sorting
|
||||
self.assertEqual(len(sorted_data), len(data))
|
||||
for i in range(len(sorted_data) - 1):
|
||||
self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
|
||||
|
||||
# Verify all elements present
|
||||
self.assertEqual(sorted(data), sorted_data)
|
||||
|
||||
def test_external_sort_large_with_memory_tracking(self):
|
||||
"""Test external sort with large dataset and memory tracking."""
|
||||
n = 1_000_000 # 1 million items
|
||||
|
||||
# Generate data
|
||||
print(f"\nGenerating {n:,} random integers...")
|
||||
data = [random.randint(1, 10_000_000) for _ in range(n)]
|
||||
|
||||
# Track memory before sorting
|
||||
gc.collect()
|
||||
memory_before = self.process.memory_info().rss / 1024 / 1024
|
||||
peak_memory = memory_before
|
||||
|
||||
# Sort with memory tracking
|
||||
print("Sorting with external_sort...")
|
||||
start_time = time.time()
|
||||
|
||||
# Create a custom monitoring function
|
||||
memory_samples = []
|
||||
def monitor_memory():
|
||||
current = self.process.memory_info().rss / 1024 / 1024
|
||||
memory_samples.append(current)
|
||||
return current
|
||||
|
||||
# Sort data
|
||||
sorted_data = external_sort(data)
|
||||
|
||||
# Measure final state
|
||||
gc.collect()
|
||||
memory_after = self.process.memory_info().rss / 1024 / 1024
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Sample memory during verification
|
||||
for i in range(0, len(sorted_data) - 1, 10000):
|
||||
self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
|
||||
if i % 100000 == 0:
|
||||
peak_memory = max(peak_memory, monitor_memory())
|
||||
|
||||
# Calculate statistics
|
||||
memory_increase = memory_after - memory_before
|
||||
theoretical_sqrt_n = int(n ** 0.5)
|
||||
|
||||
print(f"\nExternal Sort Statistics:")
|
||||
print(f" Items sorted: {n:,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Memory before: {memory_before:.1f} MB")
|
||||
print(f" Memory after: {memory_after:.1f} MB")
|
||||
print(f" Peak memory: {peak_memory:.1f} MB")
|
||||
print(f" Memory increase: {memory_increase:.1f} MB")
|
||||
print(f" Theoretical √n: {theoretical_sqrt_n:,} items")
|
||||
print(f" Items per MB: {n / max(memory_increase, 0.1):,.0f}")
|
||||
|
||||
# Verify memory efficiency
|
||||
# With 1M items, sqrt(n) = 1000, so memory should be much less than full dataset
|
||||
self.assertLess(memory_increase, 50, f"Memory increase {memory_increase:.1f} MB is too high")
|
||||
|
||||
# Verify correctness on sample
|
||||
sample_indices = random.sample(range(len(sorted_data) - 1), min(1000, len(sorted_data) - 1))
|
||||
for i in sample_indices:
|
||||
self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
|
||||
|
||||
def test_external_groupby_memory_efficiency(self):
|
||||
"""Test external groupby with memory tracking."""
|
||||
n = 100_000
|
||||
|
||||
# Generate data with limited number of groups
|
||||
print(f"\nGenerating {n:,} items for groupby...")
|
||||
categories = [f"category_{i}" for i in range(100)]
|
||||
data = [
|
||||
{
|
||||
"id": i,
|
||||
"category": random.choice(categories),
|
||||
"value": random.randint(1, 1000),
|
||||
"data": f"data_{i}" * 10 # Make items larger
|
||||
}
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
# Track memory
|
||||
gc.collect()
|
||||
memory_before = self.process.memory_info().rss / 1024 / 1024
|
||||
|
||||
# Group by category
|
||||
print("Grouping by category...")
|
||||
start_time = time.time()
|
||||
grouped = external_groupby(data, key_func=lambda x: x["category"])
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Measure memory
|
||||
gc.collect()
|
||||
memory_after = self.process.memory_info().rss / 1024 / 1024
|
||||
memory_increase = memory_after - memory_before
|
||||
|
||||
print(f"\nExternal GroupBy Statistics:")
|
||||
print(f" Items grouped: {n:,}")
|
||||
print(f" Groups created: {len(grouped)}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Memory increase: {memory_increase:.1f} MB")
|
||||
print(f" Items per MB: {n / max(memory_increase, 0.1):,.0f}")
|
||||
|
||||
# Verify correctness
|
||||
self.assertEqual(len(grouped), len(categories))
|
||||
total_items = sum(len(group) for group in grouped.values())
|
||||
self.assertEqual(total_items, n)
|
||||
|
||||
# Verify grouping
|
||||
for category, items in grouped.items():
|
||||
for item in items[:10]: # Check first 10 items in each group
|
||||
self.assertEqual(item["category"], category)
|
||||
|
||||
# Memory should be reasonable
|
||||
self.assertLess(memory_increase, 100, f"Memory increase {memory_increase:.1f} MB is too high")
|
||||
|
||||
def test_stress_test_combined_operations(self):
|
||||
"""Stress test with combined operations."""
|
||||
n = 50_000
|
||||
|
||||
print(f"\nRunning stress test with {n:,} items...")
|
||||
|
||||
# Generate complex data
|
||||
data = []
|
||||
for i in range(n):
|
||||
data.append({
|
||||
"id": i,
|
||||
"group": f"group_{i % 50}",
|
||||
"value": random.randint(1, 1000),
|
||||
"score": random.random(),
|
||||
"text": f"This is item {i} with some text" * 5
|
||||
})
|
||||
|
||||
# Track initial memory
|
||||
gc.collect()
|
||||
initial_memory = self.process.memory_info().rss / 1024 / 1024
|
||||
|
||||
# Operation 1: Group by
|
||||
print(" 1. Grouping data...")
|
||||
grouped = external_groupby(data, key_func=lambda x: x["group"])
|
||||
|
||||
# Operation 2: Sort each group
|
||||
print(" 2. Sorting each group...")
|
||||
for group_key, group_items in grouped.items():
|
||||
# Sort by value
|
||||
sorted_items = external_sort(
|
||||
group_items,
|
||||
key=lambda x: x["value"]
|
||||
)
|
||||
grouped[group_key] = sorted_items
|
||||
|
||||
# Operation 3: Extract top items from each group
|
||||
print(" 3. Extracting top items...")
|
||||
top_items = []
|
||||
for group_items in grouped.values():
|
||||
# Get top 10 by value
|
||||
top_items.extend(group_items[-10:])
|
||||
|
||||
# Operation 4: Final sort
|
||||
print(" 4. Final sort of top items...")
|
||||
final_sorted = external_sort(
|
||||
top_items,
|
||||
key=lambda x: x["score"],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Measure final memory
|
||||
gc.collect()
|
||||
final_memory = self.process.memory_info().rss / 1024 / 1024
|
||||
total_memory_increase = final_memory - initial_memory
|
||||
|
||||
print(f"\nStress Test Results:")
|
||||
print(f" Initial memory: {initial_memory:.1f} MB")
|
||||
print(f" Final memory: {final_memory:.1f} MB")
|
||||
print(f" Total increase: {total_memory_increase:.1f} MB")
|
||||
print(f" Groups processed: {len(grouped)}")
|
||||
print(f" Top items selected: {len(top_items)}")
|
||||
|
||||
# Verify results
|
||||
self.assertEqual(len(grouped), 50) # 50 groups
|
||||
self.assertEqual(len(top_items), 50 * 10) # Top 10 from each
|
||||
self.assertEqual(len(final_sorted), len(top_items))
|
||||
|
||||
# Verify sorting
|
||||
for i in range(len(final_sorted) - 1):
|
||||
self.assertGreaterEqual(
|
||||
final_sorted[i]["score"],
|
||||
final_sorted[i + 1]["score"]
|
||||
)
|
||||
|
||||
# Memory should still be reasonable after all operations
|
||||
self.assertLess(
|
||||
total_memory_increase,
|
||||
150,
|
||||
f"Memory increase {total_memory_increase:.1f} MB is too high"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
309
tests/test_memory_pressure.py
Normal file
309
tests/test_memory_pressure.py
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Memory pressure tests to verify √n behavior under constrained memory.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import gc
|
||||
import os
|
||||
import psutil
|
||||
import resource
|
||||
import tempfile
|
||||
import shutil
|
||||
import random
|
||||
import time
|
||||
from sqrtspace_spacetime import (
|
||||
SpaceTimeArray, SpaceTimeDict, external_sort,
|
||||
external_groupby, SpaceTimeConfig
|
||||
)
|
||||
|
||||
|
||||
class TestMemoryPressure(unittest.TestCase):
|
||||
"""Test √n memory behavior under real memory constraints."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.process = psutil.Process()
|
||||
|
||||
# Configure strict memory limits
|
||||
SpaceTimeConfig.set_defaults(
|
||||
storage_path=self.temp_dir,
|
||||
memory_limit=50 * 1024 * 1024, # 50MB limit
|
||||
chunk_strategy='sqrt_n',
|
||||
compression='gzip'
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_array_under_memory_pressure(self):
|
||||
"""Test SpaceTimeArray behavior when memory is constrained."""
|
||||
print("\n=== Testing SpaceTimeArray under memory pressure ===")
|
||||
|
||||
# Create large objects that will force spillover
|
||||
large_object_size = 1024 # 1KB per object
|
||||
n_objects = 100_000 # Total: ~100MB if all in memory
|
||||
|
||||
array = SpaceTimeArray(threshold='auto')
|
||||
|
||||
# Track metrics
|
||||
spillovers = 0
|
||||
max_memory = 0
|
||||
start_time = time.time()
|
||||
|
||||
# Add objects and monitor memory
|
||||
for i in range(n_objects):
|
||||
# Create a large object
|
||||
obj = {
|
||||
'id': i,
|
||||
'data': 'x' * large_object_size,
|
||||
'timestamp': time.time()
|
||||
}
|
||||
array.append(obj)
|
||||
|
||||
# Monitor every 1000 items
|
||||
if i % 1000 == 0:
|
||||
gc.collect()
|
||||
current_memory = self.process.memory_info().rss / 1024 / 1024
|
||||
max_memory = max(max_memory, current_memory)
|
||||
|
||||
if i > 0:
|
||||
hot_count = len(array._hot_data)
|
||||
cold_count = len(array._cold_indices)
|
||||
print(f" Items: {i:,} | Memory: {current_memory:.1f}MB | "
|
||||
f"Hot: {hot_count} | Cold: {cold_count}")
|
||||
|
||||
# Check if spillover is happening
|
||||
if cold_count > spillovers:
|
||||
spillovers = cold_count
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Verify all data is accessible
|
||||
print("\nVerifying data accessibility...")
|
||||
sample_indices = random.sample(range(n_objects), min(100, n_objects))
|
||||
for idx in sample_indices:
|
||||
obj = array[idx]
|
||||
self.assertEqual(obj['id'], idx)
|
||||
self.assertEqual(len(obj['data']), large_object_size)
|
||||
|
||||
# Calculate statistics
|
||||
theoretical_sqrt_n = int(n_objects ** 0.5)
|
||||
actual_hot_items = len(array._hot_data)
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Total items: {n_objects:,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Max memory used: {max_memory:.1f} MB")
|
||||
print(f" Theoretical √n: {theoretical_sqrt_n:,}")
|
||||
print(f" Actual hot items: {actual_hot_items:,}")
|
||||
print(f" Cold items: {len(array._cold_indices):,}")
|
||||
print(f" Memory efficiency: {n_objects / max_memory:.0f} items/MB")
|
||||
|
||||
# Assertions
|
||||
self.assertEqual(len(array), n_objects)
|
||||
self.assertLess(max_memory, 150) # Should use much less than 100MB
|
||||
self.assertGreater(spillovers, 0) # Should have spilled to disk
|
||||
self.assertLessEqual(actual_hot_items, theoretical_sqrt_n * 2) # Within 2x of √n
|
||||
|
||||
def test_dict_with_memory_limit(self):
|
||||
"""Test SpaceTimeDict with strict memory limit."""
|
||||
print("\n=== Testing SpaceTimeDict under memory pressure ===")
|
||||
|
||||
# Create dictionary with explicit threshold
|
||||
cache = SpaceTimeDict(threshold=1000) # Keep only 1000 items in memory
|
||||
|
||||
n_items = 50_000
|
||||
value_size = 500 # 500 bytes per value
|
||||
|
||||
# Track evictions
|
||||
evictions = 0
|
||||
start_time = time.time()
|
||||
|
||||
# Add items
|
||||
for i in range(n_items):
|
||||
key = f"key_{i:06d}"
|
||||
value = {
|
||||
'id': i,
|
||||
'data': 'v' * value_size,
|
||||
'accessed': 0
|
||||
}
|
||||
cache[key] = value
|
||||
|
||||
# Check for evictions
|
||||
if i % 1000 == 0 and i > 0:
|
||||
current_hot = len(cache._hot_data)
|
||||
current_cold = len(cache._cold_keys)
|
||||
if current_cold > evictions:
|
||||
evictions = current_cold
|
||||
print(f" Items: {i:,} | Hot: {current_hot} | Cold: {current_cold}")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Test access patterns (LRU behavior)
|
||||
print("\nTesting LRU behavior...")
|
||||
# Access some old items
|
||||
for i in range(0, 100, 10):
|
||||
key = f"key_{i:06d}"
|
||||
value = cache[key]
|
||||
value['accessed'] += 1
|
||||
|
||||
# Add more items to trigger eviction
|
||||
for i in range(n_items, n_items + 1000):
|
||||
cache[f"key_{i:06d}"] = {'id': i, 'data': 'x' * value_size}
|
||||
|
||||
# Recent items should still be hot
|
||||
stats = cache.get_stats()
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Total items: {len(cache):,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Hot items: {len(cache._hot_data)}")
|
||||
print(f" Cold items: {len(cache._cold_keys)}")
|
||||
print(f" Stats: {stats}")
|
||||
|
||||
# Verify all items accessible
|
||||
sample_keys = random.sample([f"key_{i:06d}" for i in range(n_items)], 100)
|
||||
for key in sample_keys:
|
||||
self.assertIn(key, cache)
|
||||
value = cache[key]
|
||||
self.assertIsNotNone(value)
|
||||
|
||||
def test_algorithm_memory_scaling(self):
|
||||
"""Test that algorithms scale with √n memory usage."""
|
||||
print("\n=== Testing algorithm memory scaling ===")
|
||||
|
||||
datasets = [10_000, 40_000, 90_000, 160_000] # n, 4n, 9n, 16n
|
||||
results = []
|
||||
|
||||
for n in datasets:
|
||||
print(f"\nTesting with n = {n:,}")
|
||||
|
||||
# Generate data
|
||||
data = [random.randint(1, 1_000_000) for _ in range(n)]
|
||||
|
||||
# Measure memory for sorting
|
||||
gc.collect()
|
||||
mem_before = self.process.memory_info().rss / 1024 / 1024
|
||||
|
||||
sorted_data = external_sort(data)
|
||||
|
||||
gc.collect()
|
||||
mem_after = self.process.memory_info().rss / 1024 / 1024
|
||||
mem_used = mem_after - mem_before
|
||||
|
||||
# Verify correctness
|
||||
self.assertEqual(len(sorted_data), n)
|
||||
for i in range(min(1000, len(sorted_data) - 1)):
|
||||
self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
|
||||
|
||||
sqrt_n = int(n ** 0.5)
|
||||
results.append({
|
||||
'n': n,
|
||||
'sqrt_n': sqrt_n,
|
||||
'memory_used': mem_used,
|
||||
'ratio': mem_used / max(sqrt_n * 8 / 1024 / 1024, 0.001) # 8 bytes per int
|
||||
})
|
||||
|
||||
print(f" √n = {sqrt_n:,}")
|
||||
print(f" Memory used: {mem_used:.2f} MB")
|
||||
print(f" Ratio to theoretical: {results[-1]['ratio']:.2f}x")
|
||||
|
||||
# Verify √n scaling
|
||||
print("\nScaling Analysis:")
|
||||
print("n | √n | Memory (MB) | Ratio")
|
||||
print("---------|---------|-------------|-------")
|
||||
for r in results:
|
||||
print(f"{r['n']:8,} | {r['sqrt_n']:7,} | {r['memory_used']:11.2f} | {r['ratio']:6.2f}x")
|
||||
|
||||
# Memory should scale roughly with √n
|
||||
# As n increases 4x, memory should increase ~2x
|
||||
for i in range(1, len(results)):
|
||||
n_ratio = results[i]['n'] / results[i-1]['n']
|
||||
mem_ratio = results[i]['memory_used'] / max(results[i-1]['memory_used'], 0.1)
|
||||
expected_ratio = n_ratio ** 0.5
|
||||
|
||||
print(f"\nn increased {n_ratio:.1f}x, memory increased {mem_ratio:.1f}x "
|
||||
f"(expected ~{expected_ratio:.1f}x)")
|
||||
|
||||
# Allow some variance due to overheads
|
||||
self.assertLess(mem_ratio, expected_ratio * 3,
|
||||
f"Memory scaling worse than √n: {mem_ratio:.1f}x vs {expected_ratio:.1f}x")
|
||||
|
||||
def test_concurrent_memory_pressure(self):
|
||||
"""Test behavior under concurrent access with memory pressure."""
|
||||
print("\n=== Testing concurrent access under memory pressure ===")
|
||||
|
||||
import threading
|
||||
import queue
|
||||
|
||||
array = SpaceTimeArray(threshold=500)
|
||||
errors = queue.Queue()
|
||||
n_threads = 4
|
||||
items_per_thread = 25_000
|
||||
|
||||
def worker(thread_id, start_idx):
|
||||
try:
|
||||
for i in range(items_per_thread):
|
||||
item = {
|
||||
'thread': thread_id,
|
||||
'index': start_idx + i,
|
||||
'data': f"thread_{thread_id}_item_{i}" * 50
|
||||
}
|
||||
array.append(item)
|
||||
|
||||
# Occasionally read random items
|
||||
if i % 100 == 0 and len(array) > 10:
|
||||
idx = random.randint(0, len(array) - 1)
|
||||
_ = array[idx]
|
||||
except Exception as e:
|
||||
errors.put((thread_id, str(e)))
|
||||
|
||||
# Start threads
|
||||
threads = []
|
||||
start_time = time.time()
|
||||
|
||||
for i in range(n_threads):
|
||||
t = threading.Thread(
|
||||
target=worker,
|
||||
args=(i, i * items_per_thread)
|
||||
)
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
||||
# Monitor memory while threads run
|
||||
max_memory = 0
|
||||
while any(t.is_alive() for t in threads):
|
||||
current_memory = self.process.memory_info().rss / 1024 / 1024
|
||||
max_memory = max(max_memory, current_memory)
|
||||
time.sleep(0.1)
|
||||
|
||||
# Wait for completion
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Check for errors
|
||||
error_list = []
|
||||
while not errors.empty():
|
||||
error_list.append(errors.get())
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Threads: {n_threads}")
|
||||
print(f" Total items: {n_threads * items_per_thread:,}")
|
||||
print(f" Time taken: {elapsed:.2f} seconds")
|
||||
print(f" Max memory: {max_memory:.1f} MB")
|
||||
print(f" Errors: {len(error_list)}")
|
||||
print(f" Final array size: {len(array):,}")
|
||||
|
||||
# Assertions
|
||||
self.assertEqual(len(error_list), 0, f"Thread errors: {error_list}")
|
||||
self.assertEqual(len(array), n_threads * items_per_thread)
|
||||
self.assertLess(max_memory, 200) # Should handle memory pressure
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
202
tests/test_spacetime_array.py
Normal file
202
tests/test_spacetime_array.py
Normal file
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for SpaceTimeArray with memory pressure simulation.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import tempfile
|
||||
import shutil
|
||||
import os
|
||||
import gc
|
||||
import psutil
|
||||
from sqrtspace_spacetime import SpaceTimeArray, SpaceTimeConfig
|
||||
|
||||
|
||||
class TestSpaceTimeArray(unittest.TestCase):
|
||||
"""Test SpaceTimeArray functionality."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
SpaceTimeConfig.set_defaults(
|
||||
storage_path=self.temp_dir,
|
||||
memory_limit=50 * 1024 * 1024, # 50MB for testing
|
||||
chunk_strategy='sqrt_n'
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_basic_operations(self):
|
||||
"""Test basic array operations."""
|
||||
array = SpaceTimeArray(threshold=100)
|
||||
|
||||
# Test append
|
||||
for i in range(50):
|
||||
array.append(f"item_{i}")
|
||||
|
||||
self.assertEqual(len(array), 50)
|
||||
self.assertEqual(array[0], "item_0")
|
||||
self.assertEqual(array[49], "item_49")
|
||||
|
||||
# Test negative indexing
|
||||
self.assertEqual(array[-1], "item_49")
|
||||
self.assertEqual(array[-50], "item_0")
|
||||
|
||||
# Test slice
|
||||
slice_result = array[10:20]
|
||||
self.assertEqual(len(slice_result), 10)
|
||||
self.assertEqual(slice_result[0], "item_10")
|
||||
|
||||
def test_automatic_spillover(self):
|
||||
"""Test automatic spillover to disk."""
|
||||
# Create array with small threshold
|
||||
array = SpaceTimeArray(threshold=10)
|
||||
|
||||
# Add more items than threshold
|
||||
for i in range(100):
|
||||
array.append(f"value_{i}")
|
||||
|
||||
# Check that spillover happened
|
||||
self.assertEqual(len(array), 100)
|
||||
self.assertGreater(len(array._cold_indices), 0)
|
||||
self.assertLessEqual(len(array._hot_data), array.threshold)
|
||||
|
||||
# Verify all items are accessible
|
||||
for i in range(100):
|
||||
self.assertEqual(array[i], f"value_{i}")
|
||||
|
||||
def test_memory_pressure_handling(self):
|
||||
"""Test behavior under memory pressure."""
|
||||
# Create array with auto threshold
|
||||
array = SpaceTimeArray()
|
||||
|
||||
# Generate large data items
|
||||
large_item = "x" * 10000 # 10KB string
|
||||
|
||||
# Add items until memory pressure detected
|
||||
for i in range(1000):
|
||||
array.append(f"{large_item}_{i}")
|
||||
|
||||
# Check memory usage periodically
|
||||
if i % 100 == 0:
|
||||
process = psutil.Process()
|
||||
memory_mb = process.memory_info().rss / 1024 / 1024
|
||||
# Ensure we're not using excessive memory
|
||||
self.assertLess(memory_mb, 200, f"Memory usage too high at iteration {i}")
|
||||
|
||||
# Verify all items still accessible
|
||||
self.assertEqual(len(array), 1000)
|
||||
self.assertTrue(array[0].endswith("_0"))
|
||||
self.assertTrue(array[999].endswith("_999"))
|
||||
|
||||
def test_large_dataset_sqrt_n_memory(self):
|
||||
"""Test √n memory usage with large dataset."""
|
||||
# Configure for sqrt_n strategy
|
||||
SpaceTimeConfig.set_defaults(chunk_strategy='sqrt_n')
|
||||
|
||||
n = 10000 # Total items
|
||||
sqrt_n = int(n ** 0.5) # Expected memory items
|
||||
|
||||
array = SpaceTimeArray()
|
||||
|
||||
# Track initial memory
|
||||
gc.collect()
|
||||
process = psutil.Process()
|
||||
initial_memory = process.memory_info().rss
|
||||
|
||||
# Add n items
|
||||
for i in range(n):
|
||||
array.append({"id": i, "data": f"item_{i}" * 10})
|
||||
|
||||
# Force garbage collection
|
||||
gc.collect()
|
||||
|
||||
# Check memory usage
|
||||
final_memory = process.memory_info().rss
|
||||
memory_increase_mb = (final_memory - initial_memory) / 1024 / 1024
|
||||
|
||||
# Verify sqrt_n behavior
|
||||
self.assertEqual(len(array), n)
|
||||
self.assertLessEqual(len(array._hot_data), sqrt_n * 2) # Allow some buffer
|
||||
self.assertGreater(len(array._cold_indices), n - sqrt_n * 2)
|
||||
|
||||
# Memory should be much less than storing all items
|
||||
# Rough estimate: each item ~100 bytes, so n items = ~1MB
|
||||
# With sqrt_n, should use ~10KB in memory
|
||||
self.assertLess(memory_increase_mb, 10, f"Memory increase {memory_increase_mb}MB is too high")
|
||||
|
||||
# Verify random access still works
|
||||
import random
|
||||
for _ in range(100):
|
||||
idx = random.randint(0, n - 1)
|
||||
self.assertEqual(array[idx]["id"], idx)
|
||||
|
||||
def test_persistence_across_sessions(self):
|
||||
"""Test data persistence when array is recreated."""
|
||||
storage_path = os.path.join(self.temp_dir, "persist_test")
|
||||
|
||||
# Create and populate array
|
||||
array1 = SpaceTimeArray(threshold=10, storage_path=storage_path)
|
||||
for i in range(50):
|
||||
array1.append(f"persistent_{i}")
|
||||
|
||||
# Force spillover
|
||||
array1._check_and_spill()
|
||||
del array1
|
||||
|
||||
# Create new array with same storage path
|
||||
array2 = SpaceTimeArray(threshold=10, storage_path=storage_path)
|
||||
|
||||
# Data should be accessible
|
||||
self.assertEqual(len(array2), 50)
|
||||
for i in range(50):
|
||||
self.assertEqual(array2[i], f"persistent_{i}")
|
||||
|
||||
def test_concurrent_access(self):
|
||||
"""Test thread-safe access to array."""
|
||||
import threading
|
||||
|
||||
array = SpaceTimeArray(threshold=100)
|
||||
errors = []
|
||||
|
||||
def writer(start, count):
|
||||
try:
|
||||
for i in range(start, start + count):
|
||||
array.append(f"thread_{i}")
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
def reader(count):
|
||||
try:
|
||||
for _ in range(count):
|
||||
if len(array) > 0:
|
||||
_ = array[0] # Just access, don't verify
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Create threads
|
||||
threads = []
|
||||
for i in range(5):
|
||||
t = threading.Thread(target=writer, args=(i * 100, 100))
|
||||
threads.append(t)
|
||||
|
||||
for i in range(3):
|
||||
t = threading.Thread(target=reader, args=(50,))
|
||||
threads.append(t)
|
||||
|
||||
# Run threads
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Check for errors
|
||||
self.assertEqual(len(errors), 0, f"Thread errors: {errors}")
|
||||
self.assertEqual(len(array), 500)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user