Sliding Window Pattern
The Sliding Window pattern maintains a fixed-size buffer of the most recent interactions, automatically discarding older entries as new ones arrive. This approach balances memory efficiency with contextual relevance, making it ideal for real-time applications with limited resources.
Overview
The Sliding Window pattern keeps a rolling buffer of recent context, typically including:
- Last N interactions or conversations
- Recent system states and observations
- Time-bounded context (e.g., last 30 minutes)
- Fixed-size memory footprint regardless of agent lifetime
This pattern excels in scenarios where recent context is most relevant and you need predictable memory usage with consistent performance characteristics.
Architecture
from collections import deque
from datetime import datetime, timedelta
class SlidingWindowMemory:
def __init__(self, max_size=100, time_window_hours=None):
self.max_size = max_size
self.time_window = timedelta(hours=time_window_hours) if time_window_hours else None
self.interactions = deque(maxlen=max_size)
self.timestamps = deque(maxlen=max_size)
def add_interaction(self, user_input, agent_response, context=None):
interaction = {
'user_input': user_input,
'agent_response': agent_response,
'context': context or {},
'timestamp': datetime.now()
}
self.interactions.append(interaction)
self.timestamps.append(interaction['timestamp'])
self._cleanup_expired()
def _cleanup_expired(self):
if not self.time_window:
return
cutoff_time = datetime.now() - self.time_window
while (self.timestamps and
self.timestamps[0] < cutoff_time):
self.timestamps.popleft()
self.interactions.popleft()
def get_context(self, limit=None):
if limit:
return list(self.interactions)[-limit:]
return list(self.interactions)Implementation Considerations
Window Sizing Strategies
Fixed-Size Window
class FixedSizeWindow:
def __init__(self, size):
self.buffer = deque(maxlen=size)
def add(self, item):
self.buffer.append(item) # Automatically evicts oldestTime-Based Window
class TimeBasedWindow:
def __init__(self, duration_minutes):
self.duration = timedelta(minutes=duration_minutes)
self.items = []
def add(self, item):
self.items.append((datetime.now(), item))
self._evict_expired()
def _evict_expired(self):
cutoff = datetime.now() - self.duration
self.items = [(ts, item) for ts, item in self.items if ts > cutoff]Adaptive Window
class AdaptiveWindow:
def __init__(self, base_size=50):
self.base_size = base_size
self.buffer = deque()
self.importance_scores = deque()
def add(self, item, importance=1.0):
self.buffer.append(item)
self.importance_scores.append(importance)
self._adaptive_eviction()
def _adaptive_eviction(self):
# Keep important items longer
target_size = self.base_size
while len(self.buffer) > target_size:
min_importance_idx = self.importance_scores.index(
min(self.importance_scores)
)
del self.buffer[min_importance_idx]
del self.importance_scores[min_importance_idx]Memory-Efficient Implementation
class OptimizedSlidingWindow:
def __init__(self, max_size=100):
self.max_size = max_size
self.ring_buffer = [None] * max_size
self.head = 0
self.size = 0
def add(self, interaction):
self.ring_buffer[self.head] = interaction
self.head = (self.head + 1) % self.max_size
if self.size < self.max_size:
self.size += 1
def get_recent(self, count=None):
if count is None:
count = self.size
result = []
for i in range(min(count, self.size)):
idx = (self.head - 1 - i) % self.max_size
if self.ring_buffer[idx] is not None:
result.append(self.ring_buffer[idx])
return resultPerformance Characteristics
Pros
- Constant Memory: Fixed memory footprint regardless of lifetime
- Fast Access: O(1) insertion and recent retrieval
- Predictable Performance: No degradation over time
- Low Latency: Minimal processing overhead
- Cache-Friendly: Good memory locality
Cons
- Information Loss: Older interactions permanently lost
- Context Gaps: May lose important historical context
- Fixed Capacity: Cannot adapt to varying context needs
- Temporal Bias: Prioritizes recent over important information
Performance Metrics
# Performance characteristics
STORAGE_USAGE = "O(k)" # Where k is window size
INSERTION_TIME = "O(1)" # Constant time insertion
RETRIEVAL_TIME = "O(k)" # Linear in window size
MEMORY_FOOTPRINT = "Constant" # Fixed regardless of lifetimeWhen to Use
Ideal Scenarios
- Resource-constrained environments (mobile, embedded systems)
- Real-time applications requiring consistent performance
- Conversational agents where recent context dominates
- High-throughput systems processing many concurrent sessions
- Stateless services needing bounded memory per session
Not Recommended For
- Long-term learning requiring historical data access
- Complex reasoning needing extensive context
- Compliance requirements mandating complete records
- Research applications where data loss is problematic
Implementation Examples
Basic Conversation Memory
class ConversationMemory:
def __init__(self, context_turns=10):
self.turns = deque(maxlen=context_turns)
def add_turn(self, user_message, bot_response):
turn = {
'user': user_message,
'bot': bot_response,
'timestamp': datetime.now()
}
self.turns.append(turn)
def get_context_string(self):
context_lines = []
for turn in self.turns:
context_lines.append(f"User: {turn['user']}")
context_lines.append(f"Bot: {turn['bot']}")
return "\n".join(context_lines)Multi-Modal Sliding Window
class MultiModalWindow:
def __init__(self, text_size=50, image_size=10, action_size=20):
self.text_buffer = deque(maxlen=text_size)
self.image_buffer = deque(maxlen=image_size)
self.action_buffer = deque(maxlen=action_size)
def add_text(self, text, metadata=None):
self.text_buffer.append({
'content': text,
'metadata': metadata,
'timestamp': datetime.now()
})
def add_image(self, image_path, description=None):
self.image_buffer.append({
'path': image_path,
'description': description,
'timestamp': datetime.now()
})
def get_multimodal_context(self):
return {
'text': list(self.text_buffer),
'images': list(self.image_buffer),
'actions': list(self.action_buffer)
}Priority-Based Eviction
class PriorityWindow:
def __init__(self, max_size=100):
self.max_size = max_size
self.items = [] # (priority, timestamp, data)
def add(self, data, priority=1.0):
import heapq
timestamp = datetime.now()
heapq.heappush(self.items, (priority, timestamp, data))
# Evict lowest priority items if over limit
while len(self.items) > self.max_size:
heapq.heappop(self.items)
def get_recent_high_priority(self, count=10):
# Return highest priority recent items
sorted_items = sorted(self.items,
key=lambda x: (x[0], x[1]),
reverse=True)
return [item[2] for item in sorted_items[:count]]Best Practices
Window Size Optimization
def optimize_window_size(conversation_lengths, target_coverage=0.8):
"""Determine optimal window size based on conversation patterns"""
sorted_lengths = sorted(conversation_lengths)
target_index = int(len(sorted_lengths) * target_coverage)
return sorted_lengths[target_index]
# Example usage
conversation_data = [5, 8, 12, 15, 20, 25, 30, 45, 60, 100]
optimal_size = optimize_window_size(conversation_data, 0.8)
print(f"Optimal window size: {optimal_size}") # Covers 80% of conversationsContext Compression
class CompressedSlidingWindow:
def __init__(self, max_size=100, compression_threshold=0.7):
self.max_size = max_size
self.buffer = deque(maxlen=max_size)
self.compression_threshold = compression_threshold
def add(self, interaction):
# Compress older interactions when buffer is nearly full
if len(self.buffer) > self.max_size * self.compression_threshold:
self._compress_older_interactions()
self.buffer.append(interaction)
def _compress_older_interactions(self):
# Summarize older interactions to save space
old_count = len(self.buffer) // 3
old_interactions = [self.buffer.popleft() for _ in range(old_count)]
summary = self._create_summary(old_interactions)
self.buffer.appendleft(summary)Monitoring and Metrics
class InstrumentedSlidingWindow:
def __init__(self, max_size=100):
self.memory = SlidingWindowMemory(max_size)
self.metrics = {
'total_interactions': 0,
'evictions': 0,
'average_retention_time': 0
}
def add_interaction(self, *args, **kwargs):
self.metrics['total_interactions'] += 1
if len(self.memory.interactions) == self.memory.max_size:
self.metrics['evictions'] += 1
self.memory.add_interaction(*args, **kwargs)
self._update_metrics()
def _update_metrics(self):
if self.memory.interactions:
retention_times = [
(datetime.now() - interaction['timestamp']).total_seconds()
for interaction in self.memory.interactions
]
self.metrics['average_retention_time'] = sum(retention_times) / len(retention_times)Integration with Other Patterns
Hybrid Memory Architecture
class HybridSlidingMemory:
def __init__(self):
self.sliding_window = SlidingWindowMemory(max_size=50)
self.important_memory = {} # Key interactions preserved
self.summary_memory = [] # Compressed older context
def add_interaction(self, interaction, importance=None):
self.sliding_window.add_interaction(interaction)
# Preserve important interactions
if importance and importance > 0.8:
self.important_memory[interaction.id] = interaction
def get_full_context(self):
return {
'recent': self.sliding_window.get_context(),
'important': list(self.important_memory.values()),
'summary': self.summary_memory
}Vector-Enhanced Sliding Window
class VectorSlidingWindow:
def __init__(self, window_size=100):
self.sliding_window = SlidingWindowMemory(window_size)
self.vector_index = {} # interaction_id -> embedding
def add_interaction(self, interaction):
self.sliding_window.add_interaction(interaction)
# Generate and store embedding
embedding = self._generate_embedding(interaction['user_input'])
self.vector_index[interaction.id] = embedding
def retrieve_relevant(self, query, top_k=5):
query_embedding = self._generate_embedding(query)
# Search within sliding window using vector similarity
similarities = []
for interaction in self.sliding_window.get_context():
if interaction.id in self.vector_index:
similarity = self._cosine_similarity(
query_embedding,
self.vector_index[interaction.id]
)
similarities.append((similarity, interaction))
# Return top-k most relevant
similarities.sort(reverse=True)
return [interaction for _, interaction in similarities[:top_k]]Testing and Validation
Unit Tests
import pytest
from datetime import datetime, timedelta
def test_sliding_window_size_limit():
memory = SlidingWindowMemory(max_size=3)
# Add more interactions than max_size
for i in range(5):
memory.add_interaction(f"input_{i}", f"response_{i}")
# Should only keep last 3
assert len(memory.interactions) == 3
recent = memory.get_context()
assert recent[0]['user_input'] == 'input_2' # Oldest kept
assert recent[-1]['user_input'] == 'input_4' # Most recent
def test_time_based_eviction():
memory = SlidingWindowMemory(max_size=100, time_window_hours=1)
# Add old interaction
old_interaction = {
'user_input': 'old',
'agent_response': 'old_response',
'timestamp': datetime.now() - timedelta(hours=2)
}
memory.interactions.append(old_interaction)
memory.timestamps.append(old_interaction['timestamp'])
# Add new interaction (should trigger cleanup)
memory.add_interaction('new', 'new_response')
# Old interaction should be evicted
assert len(memory.interactions) == 1
assert memory.interactions[0]['user_input'] == 'new'
def test_context_retrieval():
memory = SlidingWindowMemory(max_size=10)
for i in range(5):
memory.add_interaction(f"input_{i}", f"response_{i}")
# Test limited retrieval
recent_3 = memory.get_context(limit=3)
assert len(recent_3) == 3
assert recent_3[0]['user_input'] == 'input_2'Performance Benchmarks
def benchmark_sliding_window():
import time
memory = SlidingWindowMemory(max_size=1000)
# Benchmark insertion
start_time = time.time()
for i in range(10000):
memory.add_interaction(f"input_{i}", f"response_{i}")
insertion_time = time.time() - start_time
# Benchmark retrieval
start_time = time.time()
for _ in range(1000):
context = memory.get_context(limit=50)
retrieval_time = time.time() - start_time
print(f"Insertion: {insertion_time:.4f}s for 10k items")
print(f"Retrieval: {retrieval_time:.4f}s for 1k retrievals")
print(f"Memory usage: {len(memory.interactions)} items")Migration and Scaling
Migration from Full History
def migrate_to_sliding_window(full_history_memory, window_size=100):
"""Migrate from full history to sliding window"""
sliding_memory = SlidingWindowMemory(max_size=window_size)
# Take most recent interactions
recent_interactions = full_history_memory.get_recent(window_size)
for interaction in recent_interactions:
sliding_memory.add_interaction(
interaction['user_input'],
interaction['agent_response'],
interaction['context']
)
return sliding_memoryDistributed Sliding Windows
class DistributedSlidingWindow:
def __init__(self, shard_count=4, window_size=100):
self.shards = [
SlidingWindowMemory(max_size=window_size // shard_count)
for _ in range(shard_count)
]
self.shard_count = shard_count
def add_interaction(self, interaction, session_id):
shard_id = hash(session_id) % self.shard_count
self.shards[shard_id].add_interaction(interaction)
def get_context(self, session_id):
shard_id = hash(session_id) % self.shard_count
return self.shards[shard_id].get_context()Related Patterns
- Vector Retrieval: Enhance sliding window with semantic search
- Full History: Alternative for complete information retention
- Graph Memory: Build graphs from sliding window interactions
- Hybrid Approaches: Combine sliding window with other patterns
Next Steps
- Determine appropriate window size for your use case
- Choose between time-based or count-based eviction
- Implement monitoring for window effectiveness
- Consider hybrid approaches for enhanced capability
- Test performance under expected load patterns
- Plan for integration with existing memory systems
The Sliding Window pattern provides an excellent balance of efficiency and functionality, making it one of the most practical choices for production agent systems with constrained resources.