This commit is contained in:
2025-07-20 03:56:21 -04:00
commit 59539f4daa
65 changed files with 6964 additions and 0 deletions

76
dashboard/README.md Normal file
View File

@@ -0,0 +1,76 @@
# Interactive Dashboard
A comprehensive Streamlit dashboard for exploring space-time tradeoffs in computing systems.
## Features
### 1. Overview Page
- Visualizes Williams' theoretical bound: TIME[t] ⊆ SPACE[√(t log t)]
- Shows the fundamental space-time tradeoff curve
- Compares theoretical vs practical bounds
### 2. Theoretical Explorer
- Interactive parameter adjustment
- Real-time visualization of space requirements for given time bounds
- Constant factor analysis
### 3. Experimental Results
- **Maze Solver**: BFS vs memory-limited algorithms
- **Sorting**: In-memory vs checkpointed sorting
- **Streaming**: Sliding window performance
- Summary of all experimental findings
### 4. Real-World Systems
- **Databases**: Query optimization and join algorithms
- **LLMs**: Memory optimization techniques
- **Distributed Computing**: MapReduce and shuffle optimization
### 5. Tradeoff Calculator
- Input your system parameters
- Get recommendations for optimal configurations
- Compare different strategies
### 6. Interactive Demos
- Sorting visualizer
- Cache hierarchy simulator
- Live demonstrations of space-time tradeoffs
## Running the Dashboard
### Option 1: Using the launcher script
```bash
cd dashboard
python run_dashboard.py
```
### Option 2: Direct streamlit command
```bash
cd dashboard
pip install -r requirements.txt
streamlit run app.py
```
The dashboard will open in your default browser at http://localhost:8501
## Technology Stack
- **Streamlit**: Interactive web framework
- **Plotly**: Advanced interactive visualizations
- **Pandas**: Data manipulation
- **NumPy**: Numerical computations
## Customization
The dashboard is fully customizable:
- Add new visualizations to `app.py`
- Modify color schemes in the CSS section
- Add new pages in the sidebar navigation
- Import real experimental data to replace simulated data
## Screenshots
The dashboard includes:
- Dark theme optimized for data visualization
- Responsive layout for different screen sizes
- Interactive controls for exploring parameters
- Real-time updates as you adjust settings

728
dashboard/app.py Normal file
View File

@@ -0,0 +1,728 @@
"""
Interactive Dashboard for Space-Time Tradeoffs
Visualizes Williams' theoretical result and practical manifestations
"""
import streamlit as st
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import json
from pathlib import Path
# Page configuration
st.set_page_config(
page_title="Space-Time Tradeoffs Dashboard",
page_icon="📊",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
.main {padding-top: 1rem;}
.stPlotlyChart {background-color: #0e1117;}
div[data-testid="metric-container"] {
background-color: #262730;
border: 1px solid #333;
padding: 5px 10px;
border-radius: 5px;
margin: 5px 0;
}
</style>
""", unsafe_allow_html=True)
# Title and introduction
st.title("🔄 The Ubiquity of Space-Time Tradeoffs")
st.markdown("""
This dashboard demonstrates **Ryan Williams' 2025 result**: TIME[t] ⊆ SPACE[√(t log t)]
Explore how this theoretical bound manifests in real computing systems.
""")
# Sidebar navigation
page = st.sidebar.selectbox(
"Choose a visualization",
["Overview", "Theoretical Explorer", "Experimental Results",
"Real-World Systems", "Tradeoff Calculator", "Interactive Demos"]
)
# Helper functions
def create_space_time_curve(n_points=100):
"""Generate theoretical space-time tradeoff curve"""
t = np.logspace(1, 6, n_points)
s_williams = np.sqrt(t * np.log(t))
s_naive = t
s_minimal = np.log(t)
return t, s_williams, s_naive, s_minimal
def create_3d_tradeoff_surface():
"""Create 3D visualization of space-time-quality tradeoffs"""
space = np.logspace(0, 3, 50)
time = np.logspace(0, 3, 50)
S, T = np.meshgrid(space, time)
# Quality as function of space and time
Q = 1 / (1 + np.exp(-(np.log(S) + np.log(T) - 4)))
return S, T, Q
# Page: Overview
if page == "Overview":
st.header("Key Concepts")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Theoretical Bound", "√(t log t)", "Space for time t")
st.info("Any computation taking time t can be done with √(t log t) memory")
with col2:
st.metric("Practical Factor", "100-10,000×", "Constant overhead")
st.warning("Real systems have I/O, cache hierarchies, coordination costs")
with col3:
st.metric("Ubiquity", "Everywhere", "In modern systems")
st.success("Databases, ML, distributed systems all use these tradeoffs")
# Main visualization
st.subheader("The Fundamental Tradeoff")
t, s_williams, s_naive, s_minimal = create_space_time_curve()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=t, y=s_naive,
mode='lines',
name='Naive (Space = Time)',
line=dict(color='red', dash='dash')
))
fig.add_trace(go.Scatter(
x=t, y=s_williams,
mode='lines',
name='Williams\' Bound: √(t log t)',
line=dict(color='blue', width=3)
))
fig.add_trace(go.Scatter(
x=t, y=s_minimal,
mode='lines',
name='Minimal Space: log(t)',
line=dict(color='green', dash='dot')
))
fig.update_xaxes(type="log", title="Time (t)")
fig.update_yaxes(type="log", title="Space (s)")
fig.update_layout(
title="Theoretical Space-Time Bounds",
height=500,
hovermode='x',
template="plotly_dark"
)
st.plotly_chart(fig, use_container_width=True)
# Page: Theoretical Explorer
elif page == "Theoretical Explorer":
st.header("Interactive Theoretical Explorer")
col1, col2 = st.columns([1, 2])
with col1:
st.subheader("Parameters")
time_complexity = st.slider(
"Time Complexity (log scale)",
min_value=1.0,
max_value=6.0,
value=3.0,
step=0.1
)
show_practical = st.checkbox("Show practical bounds", value=True)
constant_factor = st.slider(
"Constant factor",
min_value=1,
max_value=1000,
value=100,
disabled=not show_practical
)
t_value = 10 ** time_complexity
s_theory = np.sqrt(t_value * np.log(t_value))
s_practical = s_theory * constant_factor if show_practical else s_theory
st.metric("Time (t)", f"{t_value:,.0f}")
st.metric("Space (theory)", f"{s_theory:,.0f}")
if show_practical:
st.metric("Space (practical)", f"{s_practical:,.0f}")
with col2:
# Create visualization
t_range = np.logspace(1, 6, 100)
s_range_theory = np.sqrt(t_range * np.log(t_range))
s_range_practical = s_range_theory * constant_factor
fig = go.Figure()
fig.add_trace(go.Scatter(
x=t_range, y=s_range_theory,
mode='lines',
name='Theoretical Bound',
line=dict(color='blue', width=2)
))
if show_practical:
fig.add_trace(go.Scatter(
x=t_range, y=s_range_practical,
mode='lines',
name=f'Practical ({constant_factor}× overhead)',
line=dict(color='orange', width=2)
))
# Add current point
fig.add_trace(go.Scatter(
x=[t_value], y=[s_theory],
mode='markers',
name='Current Selection',
marker=dict(size=15, color='red', symbol='star')
))
fig.update_xaxes(type="log", title="Time")
fig.update_yaxes(type="log", title="Space")
fig.update_layout(
title="Space Requirements for Time-Bounded Computation",
height=500,
template="plotly_dark"
)
st.plotly_chart(fig, use_container_width=True)
# Page: Experimental Results
elif page == "Experimental Results":
st.header("Experimental Validation")
tabs = st.tabs(["Maze Solver", "Sorting", "Streaming", "Summary"])
with tabs[0]:
st.subheader("Maze Solving Algorithms")
# Simulated data (in practice, load from experiment results)
maze_data = pd.DataFrame({
'Size': [20, 30, 40, 50],
'BFS_Time': [0.001, 0.003, 0.008, 0.015],
'BFS_Memory': [1600, 3600, 6400, 10000],
'Limited_Time': [0.01, 0.05, 0.15, 0.35],
'Limited_Memory': [80, 120, 160, 200]
})
fig = make_subplots(
rows=1, cols=2,
subplot_titles=("Time Complexity", "Memory Usage")
)
fig.add_trace(
go.Scatter(x=maze_data['Size'], y=maze_data['BFS_Time'],
name='BFS', mode='lines+markers'),
row=1, col=1
)
fig.add_trace(
go.Scatter(x=maze_data['Size'], y=maze_data['Limited_Time'],
name='Memory-Limited', mode='lines+markers'),
row=1, col=1
)
fig.add_trace(
go.Scatter(x=maze_data['Size'], y=maze_data['BFS_Memory'],
name='BFS', mode='lines+markers', showlegend=False),
row=1, col=2
)
fig.add_trace(
go.Scatter(x=maze_data['Size'], y=maze_data['Limited_Memory'],
name='Memory-Limited', mode='lines+markers', showlegend=False),
row=1, col=2
)
fig.update_xaxes(title_text="Maze Size", row=1, col=1)
fig.update_xaxes(title_text="Maze Size", row=1, col=2)
fig.update_yaxes(title_text="Time (s)", row=1, col=1)
fig.update_yaxes(title_text="Memory (cells)", row=1, col=2)
fig.update_layout(height=400, template="plotly_dark")
st.plotly_chart(fig, use_container_width=True)
st.info("Memory-limited DFS uses √n memory but requires ~n√n time due to recomputation")
with tabs[1]:
st.subheader("Sorting with Checkpoints")
sort_times = {
'Size': [1000, 5000, 10000, 20000],
'In_Memory': [0.00001, 0.0001, 0.0003, 0.0008],
'Checkpointed': [0.268, 2.5, 8.2, 25.3],
'Ratio': [26800, 25000, 27333, 31625]
}
df = pd.DataFrame(sort_times)
fig = px.bar(df, x='Size', y=['In_Memory', 'Checkpointed'],
title="Sorting Time: In-Memory vs Checkpointed",
labels={'value': 'Time (seconds)', 'variable': 'Method'},
log_y=True,
barmode='group',
template="plotly_dark")
st.plotly_chart(fig, use_container_width=True)
st.warning("Checkpointed sorting shows massive overhead (>1000×) due to disk I/O")
with tabs[2]:
st.subheader("Stream Processing")
stream_data = {
'Window_Size': [10, 50, 100, 500, 1000],
'Full_Storage_Time': [0.005, 0.025, 0.05, 0.25, 0.5],
'Sliding_Window_Time': [0.001, 0.001, 0.001, 0.002, 0.003],
'Memory_Ratio': [100, 100, 100, 100, 100]
}
df = pd.DataFrame(stream_data)
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df['Window_Size'], y=df['Full_Storage_Time'],
mode='lines+markers',
name='Full Storage',
line=dict(color='red')
))
fig.add_trace(go.Scatter(
x=df['Window_Size'], y=df['Sliding_Window_Time'],
mode='lines+markers',
name='Sliding Window',
line=dict(color='green')
))
fig.update_xaxes(title="Window Size")
fig.update_yaxes(title="Time (seconds)", type="log")
fig.update_layout(
title="Stream Processing: Less Memory = Faster!",
template="plotly_dark",
height=400
)
st.plotly_chart(fig, use_container_width=True)
st.success("Sliding window (O(w) space) is faster due to cache locality!")
with tabs[3]:
st.subheader("Summary of Findings")
findings = pd.DataFrame({
'Experiment': ['Maze Solver', 'Sorting', 'Streaming'],
'Space Reduction': ['n → √n', 'n → √n', 'n → w'],
'Time Increase': ['√n×', '>1000×', '0.1× (faster!)'],
'Bottleneck': ['Recomputation', 'Disk I/O', 'Cache Locality']
})
st.table(findings)
# Page: Real-World Systems
elif page == "Real-World Systems":
st.header("Space-Time Tradeoffs in Production")
system = st.selectbox(
"Choose a system",
["Databases", "Large Language Models", "Distributed Computing"]
)
if system == "Databases":
st.subheader("Database Query Processing")
col1, col2 = st.columns(2)
with col1:
st.markdown("### Hash Join vs Nested Loop")
memory_limit = st.slider("work_mem (MB)", 1, 1024, 64)
table_size = st.slider("Table size (GB)", 1, 100, 10)
# Simulate query planner decision
if memory_limit > table_size * 10:
join_type = "Hash Join"
time_estimate = table_size * 0.1
memory_use = min(memory_limit, table_size * 50)
else:
join_type = "Nested Loop"
time_estimate = table_size ** 2 * 0.01
memory_use = 1
st.metric("Selected Algorithm", join_type)
st.metric("Estimated Time", f"{time_estimate:.1f} seconds")
st.metric("Memory Usage", f"{memory_use} MB")
with col2:
# Visualization
mem_range = np.logspace(0, 3, 100)
hash_time = np.ones_like(mem_range) * table_size * 0.1
nested_time = np.ones_like(mem_range) * table_size ** 2 * 0.01
# Hash join only works with enough memory
hash_time[mem_range < table_size * 10] = np.inf
fig = go.Figure()
fig.add_trace(go.Scatter(
x=mem_range, y=hash_time,
mode='lines',
name='Hash Join',
line=dict(color='blue')
))
fig.add_trace(go.Scatter(
x=mem_range, y=nested_time,
mode='lines',
name='Nested Loop',
line=dict(color='red')
))
fig.add_vline(x=memory_limit, line_dash="dash", line_color="green",
annotation_text="Current work_mem")
fig.update_xaxes(type="log", title="Memory Available (MB)")
fig.update_yaxes(type="log", title="Query Time (seconds)")
fig.update_layout(
title="Join Algorithm Selection",
template="plotly_dark",
height=400
)
st.plotly_chart(fig, use_container_width=True)
elif system == "Large Language Models":
st.subheader("LLM Memory Optimizations")
col1, col2 = st.columns([1, 2])
with col1:
model_size = st.selectbox("Model Size", ["7B", "13B", "70B", "175B"])
optimization = st.multiselect(
"Optimizations",
["Quantization (INT8)", "Flash Attention", "Multi-Query Attention"],
default=[]
)
# Calculate memory requirements
base_memory = {"7B": 28, "13B": 52, "70B": 280, "175B": 700}[model_size]
memory = base_memory
speedup = 1.0
if "Quantization (INT8)" in optimization:
memory /= 4
speedup *= 0.8
if "Flash Attention" in optimization:
memory *= 0.7
speedup *= 0.9
if "Multi-Query Attention" in optimization:
memory *= 0.6
speedup *= 0.95
st.metric("Memory Required", f"{memory:.0f} GB")
st.metric("Relative Speed", f"{speedup:.2f}×")
st.metric("Context Length", f"{int(100000 / (memory / base_memory))} tokens")
with col2:
# Create optimization impact chart
categories = ['Memory', 'Speed', 'Context Length', 'Quality']
fig = go.Figure()
# Baseline
fig.add_trace(go.Scatterpolar(
r=[100, 100, 100, 100],
theta=categories,
fill='toself',
name='Baseline',
line=dict(color='red')
))
# With optimizations
memory_score = (base_memory / memory) * 100
speed_score = speedup * 100
context_score = (memory_score) * 100 / 100
quality_score = 95 if optimization else 100
fig.add_trace(go.Scatterpolar(
r=[memory_score, speed_score, context_score, quality_score],
theta=categories,
fill='toself',
name='With Optimizations',
line=dict(color='green')
))
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 200]
)),
showlegend=True,
template="plotly_dark",
title="Optimization Impact"
)
st.plotly_chart(fig, use_container_width=True)
elif system == "Distributed Computing":
st.subheader("MapReduce Shuffle Memory")
# Interactive shuffle buffer sizing
cluster_size = st.slider("Cluster Size (nodes)", 10, 1000, 100)
data_size = st.slider("Data Size (TB)", 1, 100, 10)
# Calculate optimal buffer size
data_per_node = data_size * 1024 / cluster_size # GB per node
optimal_buffer = np.sqrt(data_per_node * 1024) # MB
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Data per Node", f"{data_per_node:.1f} GB")
with col2:
st.metric("Optimal Buffer Size", f"{optimal_buffer:.0f} MB")
with col3:
st.metric("Buffer/Data Ratio", f"1:{int(data_per_node * 1024 / optimal_buffer)}")
# Visualization of shuffle performance
buffer_sizes = np.logspace(1, 4, 100)
# Performance model
io_time = data_per_node * 1024 / buffer_sizes * 10 # More I/O with small buffers
cpu_time = buffer_sizes / 100 # More CPU with large buffers
total_time = io_time + cpu_time
fig = go.Figure()
fig.add_trace(go.Scatter(
x=buffer_sizes, y=io_time,
mode='lines',
name='I/O Time',
line=dict(color='red')
))
fig.add_trace(go.Scatter(
x=buffer_sizes, y=cpu_time,
mode='lines',
name='CPU Time',
line=dict(color='blue')
))
fig.add_trace(go.Scatter(
x=buffer_sizes, y=total_time,
mode='lines',
name='Total Time',
line=dict(color='green', width=3)
))
fig.add_vline(x=optimal_buffer, line_dash="dash", line_color="white",
annotation_text="√n Optimal")
fig.update_xaxes(type="log", title="Shuffle Buffer Size (MB)")
fig.update_yaxes(type="log", title="Time (seconds)")
fig.update_layout(
title="Shuffle Performance vs Buffer Size",
template="plotly_dark",
height=400
)
st.plotly_chart(fig, use_container_width=True)
st.info("The optimal buffer size follows the √n pattern predicted by theory!")
# Page: Tradeoff Calculator
elif page == "Tradeoff Calculator":
st.header("Space-Time Tradeoff Calculator")
st.markdown("Calculate optimal configurations for your system")
col1, col2 = st.columns(2)
with col1:
st.subheader("System Parameters")
total_data = st.number_input("Total Data Size (GB)", min_value=1, value=100)
available_memory = st.number_input("Available Memory (GB)", min_value=1, value=16)
io_speed = st.slider("I/O Speed (MB/s)", 50, 5000, 500)
cpu_speed = st.slider("CPU Speed (GFLOPS)", 10, 1000, 100)
workload_type = st.selectbox(
"Workload Type",
["Batch Processing", "Stream Processing", "Interactive Query", "ML Training"]
)
with col2:
st.subheader("Recommendations")
# Calculate recommendations based on workload
memory_ratio = available_memory / total_data
if memory_ratio > 1:
st.success("✅ Everything fits in memory!")
strategy = "In-memory processing"
chunk_size = total_data
elif memory_ratio > 0.1:
st.info("📊 Use hybrid approach")
strategy = "Partial caching with smart eviction"
chunk_size = np.sqrt(total_data * available_memory)
else:
st.warning("⚠️ Heavy space constraints")
strategy = "Streaming with checkpoints"
chunk_size = available_memory / 10
st.metric("Recommended Strategy", strategy)
st.metric("Optimal Chunk Size", f"{chunk_size:.1f} GB")
# Time estimates
if workload_type == "Batch Processing":
time_memory = total_data / cpu_speed
time_disk = total_data / io_speed * 1000 + total_data / cpu_speed * 2
time_optimal = total_data / np.sqrt(available_memory) * 10
else:
time_memory = 1
time_disk = 100
time_optimal = 10
# Comparison chart
fig = go.Figure(data=[
go.Bar(name='All in Memory', x=['Time'], y=[time_memory]),
go.Bar(name='All on Disk', x=['Time'], y=[time_disk]),
go.Bar(name='Optimal √n', x=['Time'], y=[time_optimal])
])
fig.update_layout(
title="Processing Time Comparison",
yaxis_title="Time (seconds)",
template="plotly_dark",
height=300
)
st.plotly_chart(fig, use_container_width=True)
# Page: Interactive Demos
elif page == "Interactive Demos":
st.header("Interactive Demonstrations")
demo = st.selectbox(
"Choose a demo",
["Sorting Visualizer", "Cache Simulator", "Attention Mechanism"]
)
if demo == "Sorting Visualizer":
st.subheader("Watch Space-Time Tradeoffs in Action")
size = st.slider("Array Size", 10, 100, 50)
algorithm = st.radio("Algorithm", ["In-Memory Sort", "External Sort with √n Memory"])
if st.button("Run Sorting"):
# Simulate sorting
progress = st.progress(0)
status = st.empty()
if algorithm == "In-Memory Sort":
steps = size * np.log2(size)
for i in range(int(steps)):
progress.progress(i / steps)
status.text(f"Comparing elements... Step {i}/{int(steps)}")
st.success(f"Completed in {steps:.0f} operations using {size} memory units")
else:
chunks = int(np.sqrt(size))
total_steps = size * np.log2(size) * chunks
for i in range(int(total_steps)):
progress.progress(i / total_steps)
if i % size == 0:
status.text(f"Writing checkpoint {i//size}/{chunks}...")
else:
status.text(f"Processing... Step {i}/{int(total_steps)}")
st.warning(f"Completed in {total_steps:.0f} operations using {chunks} memory units")
elif demo == "Cache Simulator":
st.subheader("Memory Hierarchy Simulation")
# Create memory hierarchy visualization
levels = {
'L1 Cache': {'size': 32, 'latency': 1},
'L2 Cache': {'size': 256, 'latency': 10},
'L3 Cache': {'size': 8192, 'latency': 50},
'RAM': {'size': 32768, 'latency': 100},
'SSD': {'size': 512000, 'latency': 10000}
}
access_pattern = st.selectbox(
"Access Pattern",
["Sequential", "Random", "Strided"]
)
working_set = st.slider("Working Set Size (KB)", 1, 100000, 1000, step=10)
# Determine which level serves the request
for level, specs in levels.items():
if working_set <= specs['size']:
serving_level = level
latency = specs['latency']
break
col1, col2 = st.columns(2)
with col1:
st.metric("Data Served From", serving_level)
st.metric("Average Latency", f"{latency} ns")
st.metric("Throughput", f"{1000/latency:.1f} GB/s")
with col2:
# Visualization
fig = go.Figure()
sizes = [specs['size'] for specs in levels.values()]
latencies = [specs['latency'] for specs in levels.values()]
names = list(levels.keys())
fig.add_trace(go.Scatter(
x=sizes, y=latencies,
mode='markers+text',
text=names,
textposition="top center",
marker=dict(size=20)
))
fig.add_vline(x=working_set, line_dash="dash", line_color="red",
annotation_text="Working Set")
fig.update_xaxes(type="log", title="Capacity (KB)")
fig.update_yaxes(type="log", title="Latency (ns)")
fig.update_layout(
title="Memory Hierarchy",
template="plotly_dark",
height=400
)
st.plotly_chart(fig, use_container_width=True)
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center'>
<p>Created for the Ubiquity Project | Based on Ryan Williams' 2025 STOC paper</p>
<p>TIME[t] ⊆ SPACE[√(t log t)] - A fundamental limit of computation</p>
</div>
""", unsafe_allow_html=True)

View File

@@ -0,0 +1,4 @@
streamlit==1.29.0
plotly==5.18.0
pandas==2.1.4
numpy==1.26.2

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python3
"""
Launch the Space-Time Tradeoffs Dashboard
"""
import subprocess
import sys
import os
def main():
# Check if streamlit is installed
try:
import streamlit
except ImportError:
print("Streamlit not found. Installing requirements...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
# Launch the dashboard
print("Launching Space-Time Tradeoffs Dashboard...")
print("Opening in your default browser...")
os.system("streamlit run app.py")
if __name__ == "__main__":
main()