Initial

2025-07-20 04:11:04 -04:00
commit 69b521b549
40 changed files with 7781 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1 @@
+# Ubiquity SpaceTime Test Suite
--- a/tests/test_external_algorithms.py
+++ b/tests/test_external_algorithms.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""
+Tests for external algorithms with memory pressure.
+"""
+
+import unittest
+import random
+import gc
+import psutil
+import time
+from sqrtspace_spacetime import external_sort, external_groupby, SpaceTimeConfig
+
+
+class TestExternalAlgorithms(unittest.TestCase):
+    """Test external algorithms under memory constraints."""
+    
+    def setUp(self):
+        """Set up test environment."""
+        SpaceTimeConfig.set_defaults(
+            memory_limit=100 * 1024 * 1024,  # 100MB limit
+            chunk_strategy='sqrt_n'
+        )
+        self.process = psutil.Process()
+    
+    def test_external_sort_small(self):
+        """Test external sort with small dataset."""
+        data = [random.randint(1, 1000) for _ in range(1000)]
+        sorted_data = external_sort(data)
+        
+        # Verify sorting
+        self.assertEqual(len(sorted_data), len(data))
+        for i in range(len(sorted_data) - 1):
+            self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
+        
+        # Verify all elements present
+        self.assertEqual(sorted(data), sorted_data)
+    
+    def test_external_sort_large_with_memory_tracking(self):
+        """Test external sort with large dataset and memory tracking."""
+        n = 1_000_000  # 1 million items
+        
+        # Generate data
+        print(f"\nGenerating {n:,} random integers...")
+        data = [random.randint(1, 10_000_000) for _ in range(n)]
+        
+        # Track memory before sorting
+        gc.collect()
+        memory_before = self.process.memory_info().rss / 1024 / 1024
+        peak_memory = memory_before
+        
+        # Sort with memory tracking
+        print("Sorting with external_sort...")
+        start_time = time.time()
+        
+        # Create a custom monitoring function
+        memory_samples = []
+        def monitor_memory():
+            current = self.process.memory_info().rss / 1024 / 1024
+            memory_samples.append(current)
+            return current
+        
+        # Sort data
+        sorted_data = external_sort(data)
+        
+        # Measure final state
+        gc.collect()
+        memory_after = self.process.memory_info().rss / 1024 / 1024
+        elapsed = time.time() - start_time
+        
+        # Sample memory during verification
+        for i in range(0, len(sorted_data) - 1, 10000):
+            self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
+            if i % 100000 == 0:
+                peak_memory = max(peak_memory, monitor_memory())
+        
+        # Calculate statistics
+        memory_increase = memory_after - memory_before
+        theoretical_sqrt_n = int(n ** 0.5)
+        
+        print(f"\nExternal Sort Statistics:")
+        print(f"  Items sorted: {n:,}")
+        print(f"  Time taken: {elapsed:.2f} seconds")
+        print(f"  Memory before: {memory_before:.1f} MB")
+        print(f"  Memory after: {memory_after:.1f} MB")
+        print(f"  Peak memory: {peak_memory:.1f} MB")
+        print(f"  Memory increase: {memory_increase:.1f} MB")
+        print(f"  Theoretical √n: {theoretical_sqrt_n:,} items")
+        print(f"  Items per MB: {n / max(memory_increase, 0.1):,.0f}")
+        
+        # Verify memory efficiency
+        # With 1M items, sqrt(n) = 1000, so memory should be much less than full dataset
+        self.assertLess(memory_increase, 50, f"Memory increase {memory_increase:.1f} MB is too high")
+        
+        # Verify correctness on sample
+        sample_indices = random.sample(range(len(sorted_data) - 1), min(1000, len(sorted_data) - 1))
+        for i in sample_indices:
+            self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
+    
+    def test_external_groupby_memory_efficiency(self):
+        """Test external groupby with memory tracking."""
+        n = 100_000
+        
+        # Generate data with limited number of groups
+        print(f"\nGenerating {n:,} items for groupby...")
+        categories = [f"category_{i}" for i in range(100)]
+        data = [
+            {
+                "id": i,
+                "category": random.choice(categories),
+                "value": random.randint(1, 1000),
+                "data": f"data_{i}" * 10  # Make items larger
+            }
+            for i in range(n)
+        ]
+        
+        # Track memory
+        gc.collect()
+        memory_before = self.process.memory_info().rss / 1024 / 1024
+        
+        # Group by category
+        print("Grouping by category...")
+        start_time = time.time()
+        grouped = external_groupby(data, key_func=lambda x: x["category"])
+        elapsed = time.time() - start_time
+        
+        # Measure memory
+        gc.collect()
+        memory_after = self.process.memory_info().rss / 1024 / 1024
+        memory_increase = memory_after - memory_before
+        
+        print(f"\nExternal GroupBy Statistics:")
+        print(f"  Items grouped: {n:,}")
+        print(f"  Groups created: {len(grouped)}")
+        print(f"  Time taken: {elapsed:.2f} seconds")
+        print(f"  Memory increase: {memory_increase:.1f} MB")
+        print(f"  Items per MB: {n / max(memory_increase, 0.1):,.0f}")
+        
+        # Verify correctness
+        self.assertEqual(len(grouped), len(categories))
+        total_items = sum(len(group) for group in grouped.values())
+        self.assertEqual(total_items, n)
+        
+        # Verify grouping
+        for category, items in grouped.items():
+            for item in items[:10]:  # Check first 10 items in each group
+                self.assertEqual(item["category"], category)
+        
+        # Memory should be reasonable
+        self.assertLess(memory_increase, 100, f"Memory increase {memory_increase:.1f} MB is too high")
+    
+    def test_stress_test_combined_operations(self):
+        """Stress test with combined operations."""
+        n = 50_000
+        
+        print(f"\nRunning stress test with {n:,} items...")
+        
+        # Generate complex data
+        data = []
+        for i in range(n):
+            data.append({
+                "id": i,
+                "group": f"group_{i % 50}",
+                "value": random.randint(1, 1000),
+                "score": random.random(),
+                "text": f"This is item {i} with some text" * 5
+            })
+        
+        # Track initial memory
+        gc.collect()
+        initial_memory = self.process.memory_info().rss / 1024 / 1024
+        
+        # Operation 1: Group by
+        print("  1. Grouping data...")
+        grouped = external_groupby(data, key_func=lambda x: x["group"])
+        
+        # Operation 2: Sort each group
+        print("  2. Sorting each group...")
+        for group_key, group_items in grouped.items():
+            # Sort by value
+            sorted_items = external_sort(
+                group_items,
+                key=lambda x: x["value"]
+            )
+            grouped[group_key] = sorted_items
+        
+        # Operation 3: Extract top items from each group
+        print("  3. Extracting top items...")
+        top_items = []
+        for group_items in grouped.values():
+            # Get top 10 by value
+            top_items.extend(group_items[-10:])
+        
+        # Operation 4: Final sort
+        print("  4. Final sort of top items...")
+        final_sorted = external_sort(
+            top_items,
+            key=lambda x: x["score"],
+            reverse=True
+        )
+        
+        # Measure final memory
+        gc.collect()
+        final_memory = self.process.memory_info().rss / 1024 / 1024
+        total_memory_increase = final_memory - initial_memory
+        
+        print(f"\nStress Test Results:")
+        print(f"  Initial memory: {initial_memory:.1f} MB")
+        print(f"  Final memory: {final_memory:.1f} MB")
+        print(f"  Total increase: {total_memory_increase:.1f} MB")
+        print(f"  Groups processed: {len(grouped)}")
+        print(f"  Top items selected: {len(top_items)}")
+        
+        # Verify results
+        self.assertEqual(len(grouped), 50)  # 50 groups
+        self.assertEqual(len(top_items), 50 * 10)  # Top 10 from each
+        self.assertEqual(len(final_sorted), len(top_items))
+        
+        # Verify sorting
+        for i in range(len(final_sorted) - 1):
+            self.assertGreaterEqual(
+                final_sorted[i]["score"],
+                final_sorted[i + 1]["score"]
+            )
+        
+        # Memory should still be reasonable after all operations
+        self.assertLess(
+            total_memory_increase, 
+            150, 
+            f"Memory increase {total_memory_increase:.1f} MB is too high"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_memory_pressure.py
+++ b/tests/test_memory_pressure.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+"""
+Memory pressure tests to verify √n behavior under constrained memory.
+"""
+
+import unittest
+import gc
+import os
+import psutil
+import resource
+import tempfile
+import shutil
+import random
+import time
+from sqrtspace_spacetime import (
+    SpaceTimeArray, SpaceTimeDict, external_sort, 
+    external_groupby, SpaceTimeConfig
+)
+
+
+class TestMemoryPressure(unittest.TestCase):
+    """Test √n memory behavior under real memory constraints."""
+    
+    def setUp(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.process = psutil.Process()
+        
+        # Configure strict memory limits
+        SpaceTimeConfig.set_defaults(
+            storage_path=self.temp_dir,
+            memory_limit=50 * 1024 * 1024,  # 50MB limit
+            chunk_strategy='sqrt_n',
+            compression='gzip'
+        )
+    
+    def tearDown(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+    
+    def test_array_under_memory_pressure(self):
+        """Test SpaceTimeArray behavior when memory is constrained."""
+        print("\n=== Testing SpaceTimeArray under memory pressure ===")
+        
+        # Create large objects that will force spillover
+        large_object_size = 1024  # 1KB per object
+        n_objects = 100_000  # Total: ~100MB if all in memory
+        
+        array = SpaceTimeArray(threshold='auto')
+        
+        # Track metrics
+        spillovers = 0
+        max_memory = 0
+        start_time = time.time()
+        
+        # Add objects and monitor memory
+        for i in range(n_objects):
+            # Create a large object
+            obj = {
+                'id': i,
+                'data': 'x' * large_object_size,
+                'timestamp': time.time()
+            }
+            array.append(obj)
+            
+            # Monitor every 1000 items
+            if i % 1000 == 0:
+                gc.collect()
+                current_memory = self.process.memory_info().rss / 1024 / 1024
+                max_memory = max(max_memory, current_memory)
+                
+                if i > 0:
+                    hot_count = len(array._hot_data)
+                    cold_count = len(array._cold_indices)
+                    print(f"  Items: {i:,} | Memory: {current_memory:.1f}MB | "
+                          f"Hot: {hot_count} | Cold: {cold_count}")
+                    
+                    # Check if spillover is happening
+                    if cold_count > spillovers:
+                        spillovers = cold_count
+        
+        elapsed = time.time() - start_time
+        
+        # Verify all data is accessible
+        print("\nVerifying data accessibility...")
+        sample_indices = random.sample(range(n_objects), min(100, n_objects))
+        for idx in sample_indices:
+            obj = array[idx]
+            self.assertEqual(obj['id'], idx)
+            self.assertEqual(len(obj['data']), large_object_size)
+        
+        # Calculate statistics
+        theoretical_sqrt_n = int(n_objects ** 0.5)
+        actual_hot_items = len(array._hot_data)
+        
+        print(f"\nResults:")
+        print(f"  Total items: {n_objects:,}")
+        print(f"  Time taken: {elapsed:.2f} seconds")
+        print(f"  Max memory used: {max_memory:.1f} MB")
+        print(f"  Theoretical √n: {theoretical_sqrt_n:,}")
+        print(f"  Actual hot items: {actual_hot_items:,}")
+        print(f"  Cold items: {len(array._cold_indices):,}")
+        print(f"  Memory efficiency: {n_objects / max_memory:.0f} items/MB")
+        
+        # Assertions
+        self.assertEqual(len(array), n_objects)
+        self.assertLess(max_memory, 150)  # Should use much less than 100MB
+        self.assertGreater(spillovers, 0)  # Should have spilled to disk
+        self.assertLessEqual(actual_hot_items, theoretical_sqrt_n * 2)  # Within 2x of √n
+    
+    def test_dict_with_memory_limit(self):
+        """Test SpaceTimeDict with strict memory limit."""
+        print("\n=== Testing SpaceTimeDict under memory pressure ===")
+        
+        # Create dictionary with explicit threshold
+        cache = SpaceTimeDict(threshold=1000)  # Keep only 1000 items in memory
+        
+        n_items = 50_000
+        value_size = 500  # 500 bytes per value
+        
+        # Track evictions
+        evictions = 0
+        start_time = time.time()
+        
+        # Add items
+        for i in range(n_items):
+            key = f"key_{i:06d}"
+            value = {
+                'id': i,
+                'data': 'v' * value_size,
+                'accessed': 0
+            }
+            cache[key] = value
+            
+            # Check for evictions
+            if i % 1000 == 0 and i > 0:
+                current_hot = len(cache._hot_data)
+                current_cold = len(cache._cold_keys)
+                if current_cold > evictions:
+                    evictions = current_cold
+                    print(f"  Items: {i:,} | Hot: {current_hot} | Cold: {current_cold}")
+        
+        elapsed = time.time() - start_time
+        
+        # Test access patterns (LRU behavior)
+        print("\nTesting LRU behavior...")
+        # Access some old items
+        for i in range(0, 100, 10):
+            key = f"key_{i:06d}"
+            value = cache[key]
+            value['accessed'] += 1
+        
+        # Add more items to trigger eviction
+        for i in range(n_items, n_items + 1000):
+            cache[f"key_{i:06d}"] = {'id': i, 'data': 'x' * value_size}
+        
+        # Recent items should still be hot
+        stats = cache.get_stats()
+        
+        print(f"\nResults:")
+        print(f"  Total items: {len(cache):,}")
+        print(f"  Time taken: {elapsed:.2f} seconds")
+        print(f"  Hot items: {len(cache._hot_data)}")
+        print(f"  Cold items: {len(cache._cold_keys)}")
+        print(f"  Stats: {stats}")
+        
+        # Verify all items accessible
+        sample_keys = random.sample([f"key_{i:06d}" for i in range(n_items)], 100)
+        for key in sample_keys:
+            self.assertIn(key, cache)
+            value = cache[key]
+            self.assertIsNotNone(value)
+    
+    def test_algorithm_memory_scaling(self):
+        """Test that algorithms scale with √n memory usage."""
+        print("\n=== Testing algorithm memory scaling ===")
+        
+        datasets = [10_000, 40_000, 90_000, 160_000]  # n, 4n, 9n, 16n
+        results = []
+        
+        for n in datasets:
+            print(f"\nTesting with n = {n:,}")
+            
+            # Generate data
+            data = [random.randint(1, 1_000_000) for _ in range(n)]
+            
+            # Measure memory for sorting
+            gc.collect()
+            mem_before = self.process.memory_info().rss / 1024 / 1024
+            
+            sorted_data = external_sort(data)
+            
+            gc.collect()
+            mem_after = self.process.memory_info().rss / 1024 / 1024
+            mem_used = mem_after - mem_before
+            
+            # Verify correctness
+            self.assertEqual(len(sorted_data), n)
+            for i in range(min(1000, len(sorted_data) - 1)):
+                self.assertLessEqual(sorted_data[i], sorted_data[i + 1])
+            
+            sqrt_n = int(n ** 0.5)
+            results.append({
+                'n': n,
+                'sqrt_n': sqrt_n,
+                'memory_used': mem_used,
+                'ratio': mem_used / max(sqrt_n * 8 / 1024 / 1024, 0.001)  # 8 bytes per int
+            })
+            
+            print(f"  √n = {sqrt_n:,}")
+            print(f"  Memory used: {mem_used:.2f} MB")
+            print(f"  Ratio to theoretical: {results[-1]['ratio']:.2f}x")
+        
+        # Verify √n scaling
+        print("\nScaling Analysis:")
+        print("n        | √n      | Memory (MB) | Ratio")
+        print("---------|---------|-------------|-------")
+        for r in results:
+            print(f"{r['n']:8,} | {r['sqrt_n']:7,} | {r['memory_used']:11.2f} | {r['ratio']:6.2f}x")
+        
+        # Memory should scale roughly with √n
+        # As n increases 4x, memory should increase ~2x
+        for i in range(1, len(results)):
+            n_ratio = results[i]['n'] / results[i-1]['n']
+            mem_ratio = results[i]['memory_used'] / max(results[i-1]['memory_used'], 0.1)
+            expected_ratio = n_ratio ** 0.5
+            
+            print(f"\nn increased {n_ratio:.1f}x, memory increased {mem_ratio:.1f}x "
+                  f"(expected ~{expected_ratio:.1f}x)")
+            
+            # Allow some variance due to overheads
+            self.assertLess(mem_ratio, expected_ratio * 3,
+                           f"Memory scaling worse than √n: {mem_ratio:.1f}x vs {expected_ratio:.1f}x")
+    
+    def test_concurrent_memory_pressure(self):
+        """Test behavior under concurrent access with memory pressure."""
+        print("\n=== Testing concurrent access under memory pressure ===")
+        
+        import threading
+        import queue
+        
+        array = SpaceTimeArray(threshold=500)
+        errors = queue.Queue()
+        n_threads = 4
+        items_per_thread = 25_000
+        
+        def worker(thread_id, start_idx):
+            try:
+                for i in range(items_per_thread):
+                    item = {
+                        'thread': thread_id,
+                        'index': start_idx + i,
+                        'data': f"thread_{thread_id}_item_{i}" * 50
+                    }
+                    array.append(item)
+                    
+                    # Occasionally read random items
+                    if i % 100 == 0 and len(array) > 10:
+                        idx = random.randint(0, len(array) - 1)
+                        _ = array[idx]
+            except Exception as e:
+                errors.put((thread_id, str(e)))
+        
+        # Start threads
+        threads = []
+        start_time = time.time()
+        
+        for i in range(n_threads):
+            t = threading.Thread(
+                target=worker,
+                args=(i, i * items_per_thread)
+            )
+            threads.append(t)
+            t.start()
+        
+        # Monitor memory while threads run
+        max_memory = 0
+        while any(t.is_alive() for t in threads):
+            current_memory = self.process.memory_info().rss / 1024 / 1024
+            max_memory = max(max_memory, current_memory)
+            time.sleep(0.1)
+        
+        # Wait for completion
+        for t in threads:
+            t.join()
+        
+        elapsed = time.time() - start_time
+        
+        # Check for errors
+        error_list = []
+        while not errors.empty():
+            error_list.append(errors.get())
+        
+        print(f"\nResults:")
+        print(f"  Threads: {n_threads}")
+        print(f"  Total items: {n_threads * items_per_thread:,}")
+        print(f"  Time taken: {elapsed:.2f} seconds")
+        print(f"  Max memory: {max_memory:.1f} MB")
+        print(f"  Errors: {len(error_list)}")
+        print(f"  Final array size: {len(array):,}")
+        
+        # Assertions
+        self.assertEqual(len(error_list), 0, f"Thread errors: {error_list}")
+        self.assertEqual(len(array), n_threads * items_per_thread)
+        self.assertLess(max_memory, 200)  # Should handle memory pressure
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_spacetime_array.py
+++ b/tests/test_spacetime_array.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""
+Tests for SpaceTimeArray with memory pressure simulation.
+"""
+
+import unittest
+import tempfile
+import shutil
+import os
+import gc
+import psutil
+from sqrtspace_spacetime import SpaceTimeArray, SpaceTimeConfig
+
+
+class TestSpaceTimeArray(unittest.TestCase):
+    """Test SpaceTimeArray functionality."""
+    
+    def setUp(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        SpaceTimeConfig.set_defaults(
+            storage_path=self.temp_dir,
+            memory_limit=50 * 1024 * 1024,  # 50MB for testing
+            chunk_strategy='sqrt_n'
+        )
+    
+    def tearDown(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+    
+    def test_basic_operations(self):
+        """Test basic array operations."""
+        array = SpaceTimeArray(threshold=100)
+        
+        # Test append
+        for i in range(50):
+            array.append(f"item_{i}")
+        
+        self.assertEqual(len(array), 50)
+        self.assertEqual(array[0], "item_0")
+        self.assertEqual(array[49], "item_49")
+        
+        # Test negative indexing
+        self.assertEqual(array[-1], "item_49")
+        self.assertEqual(array[-50], "item_0")
+        
+        # Test slice
+        slice_result = array[10:20]
+        self.assertEqual(len(slice_result), 10)
+        self.assertEqual(slice_result[0], "item_10")
+    
+    def test_automatic_spillover(self):
+        """Test automatic spillover to disk."""
+        # Create array with small threshold
+        array = SpaceTimeArray(threshold=10)
+        
+        # Add more items than threshold
+        for i in range(100):
+            array.append(f"value_{i}")
+        
+        # Check that spillover happened
+        self.assertEqual(len(array), 100)
+        self.assertGreater(len(array._cold_indices), 0)
+        self.assertLessEqual(len(array._hot_data), array.threshold)
+        
+        # Verify all items are accessible
+        for i in range(100):
+            self.assertEqual(array[i], f"value_{i}")
+    
+    def test_memory_pressure_handling(self):
+        """Test behavior under memory pressure."""
+        # Create array with auto threshold
+        array = SpaceTimeArray()
+        
+        # Generate large data items
+        large_item = "x" * 10000  # 10KB string
+        
+        # Add items until memory pressure detected
+        for i in range(1000):
+            array.append(f"{large_item}_{i}")
+            
+            # Check memory usage periodically
+            if i % 100 == 0:
+                process = psutil.Process()
+                memory_mb = process.memory_info().rss / 1024 / 1024
+                # Ensure we're not using excessive memory
+                self.assertLess(memory_mb, 200, f"Memory usage too high at iteration {i}")
+        
+        # Verify all items still accessible
+        self.assertEqual(len(array), 1000)
+        self.assertTrue(array[0].endswith("_0"))
+        self.assertTrue(array[999].endswith("_999"))
+    
+    def test_large_dataset_sqrt_n_memory(self):
+        """Test √n memory usage with large dataset."""
+        # Configure for sqrt_n strategy
+        SpaceTimeConfig.set_defaults(chunk_strategy='sqrt_n')
+        
+        n = 10000  # Total items
+        sqrt_n = int(n ** 0.5)  # Expected memory items
+        
+        array = SpaceTimeArray()
+        
+        # Track initial memory
+        gc.collect()
+        process = psutil.Process()
+        initial_memory = process.memory_info().rss
+        
+        # Add n items
+        for i in range(n):
+            array.append({"id": i, "data": f"item_{i}" * 10})
+        
+        # Force garbage collection
+        gc.collect()
+        
+        # Check memory usage
+        final_memory = process.memory_info().rss
+        memory_increase_mb = (final_memory - initial_memory) / 1024 / 1024
+        
+        # Verify sqrt_n behavior
+        self.assertEqual(len(array), n)
+        self.assertLessEqual(len(array._hot_data), sqrt_n * 2)  # Allow some buffer
+        self.assertGreater(len(array._cold_indices), n - sqrt_n * 2)
+        
+        # Memory should be much less than storing all items
+        # Rough estimate: each item ~100 bytes, so n items = ~1MB
+        # With sqrt_n, should use ~10KB in memory
+        self.assertLess(memory_increase_mb, 10, f"Memory increase {memory_increase_mb}MB is too high")
+        
+        # Verify random access still works
+        import random
+        for _ in range(100):
+            idx = random.randint(0, n - 1)
+            self.assertEqual(array[idx]["id"], idx)
+    
+    def test_persistence_across_sessions(self):
+        """Test data persistence when array is recreated."""
+        storage_path = os.path.join(self.temp_dir, "persist_test")
+        
+        # Create and populate array
+        array1 = SpaceTimeArray(threshold=10, storage_path=storage_path)
+        for i in range(50):
+            array1.append(f"persistent_{i}")
+        
+        # Force spillover
+        array1._check_and_spill()
+        del array1
+        
+        # Create new array with same storage path
+        array2 = SpaceTimeArray(threshold=10, storage_path=storage_path)
+        
+        # Data should be accessible
+        self.assertEqual(len(array2), 50)
+        for i in range(50):
+            self.assertEqual(array2[i], f"persistent_{i}")
+    
+    def test_concurrent_access(self):
+        """Test thread-safe access to array."""
+        import threading
+        
+        array = SpaceTimeArray(threshold=100)
+        errors = []
+        
+        def writer(start, count):
+            try:
+                for i in range(start, start + count):
+                    array.append(f"thread_{i}")
+            except Exception as e:
+                errors.append(e)
+        
+        def reader(count):
+            try:
+                for _ in range(count):
+                    if len(array) > 0:
+                        _ = array[0]  # Just access, don't verify
+            except Exception as e:
+                errors.append(e)
+        
+        # Create threads
+        threads = []
+        for i in range(5):
+            t = threading.Thread(target=writer, args=(i * 100, 100))
+            threads.append(t)
+        
+        for i in range(3):
+            t = threading.Thread(target=reader, args=(50,))
+            threads.append(t)
+        
+        # Run threads
+        for t in threads:
+            t.start()
+        
+        for t in threads:
+            t.join()
+        
+        # Check for errors
+        self.assertEqual(len(errors), 0, f"Thread errors: {errors}")
+        self.assertEqual(len(array), 500)
+
+
+if __name__ == "__main__":
+    unittest.main()