streaming_display_manager.py 21.9 KB

# AIfeng/2025-07-07 15:25:48
# 流式显示管理模块 - 增量更新与刷新策略

import time
import threading
from typing import List, Dict, Optional, Callable, Any
from dataclasses import dataclass, field
from enum import Enum
from collections import deque, defaultdict
import logging
import asyncio
from concurrent.futures import ThreadPoolExecutor

class UpdateType(Enum):
    """更新类型"""
    APPEND = "append"                    # 追加显示
    REPLACE_PARTIAL = "replace_partial"  # 替换部分内容
    REPLACE_FINAL = "replace_final"      # 最终替换
    INSERT = "insert"                    # 插入内容
    DELETE = "delete"                    # 删除内容
    HIGHLIGHT = "highlight"              # 高亮显示

class RefreshStrategy(Enum):
    """刷新策略"""
    IMMEDIATE = "immediate"              # 立即刷新
    DEBOUNCED = "debounced"              # 防抖刷新
    BATCH = "batch"                      # 批量刷新
    ADAPTIVE = "adaptive"                # 自适应刷新

class DisplayPriority(Enum):
    """显示优先级"""
    LOW = 1
    NORMAL = 2
    HIGH = 3
    URGENT = 4

@dataclass
class DisplayUpdate:
    """显示更新信息"""
    session_id: str
    segment_id: str
    content: str
    update_type: UpdateType
    priority: DisplayPriority
    timestamp: float
    position: Optional[int] = None
    metadata: Dict = field(default_factory=dict)
    
    def __lt__(self, other):
        """用于优先级队列排序"""
        if self.priority.value != other.priority.value:
            return self.priority.value > other.priority.value  # 高优先级在前
        return self.timestamp < other.timestamp  # 时间早的在前

@dataclass
class DisplaySegment:
    """显示片段"""
    segment_id: str
    content: str
    position: int
    confidence: float
    is_final: bool
    last_updated: float
    highlight: bool = False
    metadata: Dict = field(default_factory=dict)

class DisplayBuffer:
    """显示缓冲区"""
    
    def __init__(self, max_size: int = 1000):
        self.max_size = max_size
        self.segments = {}  # segment_id -> DisplaySegment
        self.position_map = {}  # position -> segment_id
        self.next_position = 0
        self._lock = threading.RLock()
    
    def add_segment(self, segment: DisplaySegment) -> int:
        """添加显示片段"""
        with self._lock:
            if segment.position is None:
                segment.position = self.next_position
                self.next_position += 1
            
            self.segments[segment.segment_id] = segment
            self.position_map[segment.position] = segment.segment_id
            
            # 检查缓冲区大小
            self._cleanup_if_needed()
            
            return segment.position
    
    def update_segment(self, segment_id: str, content: str = None, 
                      confidence: float = None, is_final: bool = None,
                      highlight: bool = None) -> bool:
        """更新显示片段"""
        with self._lock:
            if segment_id not in self.segments:
                return False
            
            segment = self.segments[segment_id]
            
            if content is not None:
                segment.content = content
            if confidence is not None:
                segment.confidence = confidence
            if is_final is not None:
                segment.is_final = is_final
            if highlight is not None:
                segment.highlight = highlight
            
            segment.last_updated = time.time()
            return True
    
    def get_segment(self, segment_id: str) -> Optional[DisplaySegment]:
        """获取显示片段"""
        with self._lock:
            return self.segments.get(segment_id)
    
    def get_segments_by_range(self, start_pos: int, end_pos: int) -> List[DisplaySegment]:
        """按位置范围获取片段"""
        with self._lock:
            segments = []
            for pos in range(start_pos, end_pos + 1):
                if pos in self.position_map:
                    segment_id = self.position_map[pos]
                    if segment_id in self.segments:
                        segments.append(self.segments[segment_id])
            return segments
    
    def get_all_segments(self) -> List[DisplaySegment]:
        """获取所有片段(按位置排序)"""
        with self._lock:
            sorted_positions = sorted(self.position_map.keys())
            segments = []
            for pos in sorted_positions:
                segment_id = self.position_map[pos]
                if segment_id in self.segments:
                    segments.append(self.segments[segment_id])
            return segments
    
    def remove_segment(self, segment_id: str) -> bool:
        """移除显示片段"""
        with self._lock:
            if segment_id not in self.segments:
                return False
            
            segment = self.segments[segment_id]
            del self.segments[segment_id]
            del self.position_map[segment.position]
            
            return True
    
    def _cleanup_if_needed(self):
        """必要时清理缓冲区"""
        if len(self.segments) <= self.max_size:
            return
        
        # 按最后更新时间排序,移除最旧的片段
        segments_by_time = sorted(
            self.segments.values(),
            key=lambda s: s.last_updated
        )
        
        # 移除超出限制的片段
        excess_count = len(self.segments) - self.max_size
        for i in range(excess_count):
            segment = segments_by_time[i]
            self.remove_segment(segment.segment_id)
    
    def clear(self):
        """清空缓冲区"""
        with self._lock:
            self.segments.clear()
            self.position_map.clear()
            self.next_position = 0

class StreamingDisplayManager:
    """流式显示管理器"""
    
    def __init__(self, config: Dict = None):
        self.config = config or self._get_default_config()
        
        # 显示缓冲区(按会话组织)
        self.session_buffers = defaultdict(lambda: DisplayBuffer(
            self.config.get('max_buffer_size', 1000)
        ))
        
        # 更新队列
        self.update_queue = deque()
        self.pending_updates = defaultdict(list)  # session_id -> [updates]
        
        # 刷新策略
        self.refresh_strategies = {
            RefreshStrategy.IMMEDIATE: self._immediate_refresh,
            RefreshStrategy.DEBOUNCED: self._debounced_refresh,
            RefreshStrategy.BATCH: self._batch_refresh,
            RefreshStrategy.ADAPTIVE: self._adaptive_refresh
        }
        
        # 防抖定时器
        self.pending_refreshes = {}  # session_id -> Timer
        
        # 回调函数
        self.display_callbacks = []  # 显示更新回调
        self.error_callbacks = []   # 错误处理回调
        
        # 性能监控
        self.performance_stats = {
            'total_updates': 0,
            'successful_updates': 0,
            'failed_updates': 0,
            'average_update_time': 0.0,
            'queue_size': 0
        }
        
        # 线程池
        self.executor = ThreadPoolExecutor(
            max_workers=self.config.get('max_workers', 4),
            thread_name_prefix='DisplayManager'
        )
        
        self.logger = logging.getLogger(__name__)
        self._lock = threading.RLock()
        self._running = True
        
        # 启动处理线程
        self._start_processing_thread()
    
    def _get_default_config(self) -> Dict:
        """获取默认配置"""
        return {
            'max_buffer_size': 1000,
            'debounce_delay': 0.2,
            'batch_size': 5,
            'batch_timeout': 1.0,
            'max_refresh_rate': 10,  # 每秒最大刷新次数
            'max_workers': 4,
            'enable_highlighting': True,
            'auto_scroll': True,
            'preserve_formatting': True
        }
    
    def register_display_callback(self, callback: Callable[[str, List[DisplaySegment]], None]):
        """注册显示更新回调"""
        self.display_callbacks.append(callback)
    
    def register_error_callback(self, callback: Callable[[str, Exception], None]):
        """注册错误处理回调"""
        self.error_callbacks.append(callback)
    
    def update_display(self, session_id: str, segment_id: str, content: str,
                      update_type: UpdateType, confidence: float = 0.0,
                      is_final: bool = False, priority: DisplayPriority = DisplayPriority.NORMAL,
                      strategy: RefreshStrategy = RefreshStrategy.DEBOUNCED,
                      metadata: Dict = None) -> bool:
        """更新显示内容"""
        try:
            update_info = DisplayUpdate(
                session_id=session_id,
                segment_id=segment_id,
                content=content,
                update_type=update_type,
                priority=priority,
                timestamp=time.time(),
                metadata=metadata or {}
            )
            
            # 添加到更新队列
            with self._lock:
                self.update_queue.append(update_info)
                self.performance_stats['total_updates'] += 1
                self.performance_stats['queue_size'] = len(self.update_queue)
            
            # 根据策略执行刷新
            refresh_func = self.refresh_strategies.get(strategy, self._debounced_refresh)
            refresh_func(update_info)
            
            return True
            
        except Exception as e:
            self.logger.error(f"更新显示时出错: {e}")
            self._handle_error(session_id, e)
            return False
    
    def _determine_update_type(self, content: str, confidence: float, is_final: bool) -> UpdateType:
        """确定更新类型"""
        if is_final:
            return UpdateType.REPLACE_FINAL
        elif confidence > 0.8:
            return UpdateType.REPLACE_PARTIAL
        else:
            return UpdateType.APPEND
    
    def _immediate_refresh(self, update_info: DisplayUpdate):
        """立即刷新策略"""
        self.executor.submit(self._process_update, update_info)
    
    def _debounced_refresh(self, update_info: DisplayUpdate, delay: float = None):
        """防抖刷新策略"""
        session_id = update_info.session_id
        delay = delay or self.config.get('debounce_delay', 0.2)
        
        with self._lock:
            # 取消之前的定时器
            if session_id in self.pending_refreshes:
                self.pending_refreshes[session_id].cancel()
            
            # 设置新的定时器
            timer = threading.Timer(
                delay, 
                self._execute_pending_updates, 
                args=[session_id]
            )
            self.pending_refreshes[session_id] = timer
            
            # 添加到待处理更新
            self.pending_updates[session_id].append(update_info)
            
            timer.start()
    
    def _batch_refresh(self, update_info: DisplayUpdate):
        """批量刷新策略"""
        session_id = update_info.session_id
        
        with self._lock:
            self.pending_updates[session_id].append(update_info)
            
            # 检查是否达到批量大小或超时
            batch_size = self.config.get('batch_size', 5)
            if len(self.pending_updates[session_id]) >= batch_size:
                self._execute_pending_updates(session_id)
    
    def _adaptive_refresh(self, update_info: DisplayUpdate):
        """自适应刷新策略"""
        # 根据优先级和系统负载选择策略
        if update_info.priority == DisplayPriority.URGENT:
            self._immediate_refresh(update_info)
        elif self.performance_stats['queue_size'] > 10:
            self._batch_refresh(update_info)
        else:
            self._debounced_refresh(update_info)
    
    def _execute_pending_updates(self, session_id: str):
        """执行待处理的更新"""
        with self._lock:
            if session_id not in self.pending_updates:
                return
            
            updates = self.pending_updates[session_id].copy()
            self.pending_updates[session_id].clear()
            
            # 清理定时器
            if session_id in self.pending_refreshes:
                del self.pending_refreshes[session_id]
        
        # 批量处理更新
        self.executor.submit(self._process_batch_updates, session_id, updates)
    
    def _process_update(self, update_info: DisplayUpdate):
        """处理单个更新"""
        start_time = time.time()
        
        try:
            session_id = update_info.session_id
            segment_id = update_info.segment_id
            buffer = self.session_buffers[session_id]
            
            # 根据更新类型处理
            if update_info.update_type == UpdateType.APPEND:
                self._handle_append_update(buffer, update_info)
            elif update_info.update_type == UpdateType.REPLACE_PARTIAL:
                self._handle_replace_partial_update(buffer, update_info)
            elif update_info.update_type == UpdateType.REPLACE_FINAL:
                self._handle_replace_final_update(buffer, update_info)
            elif update_info.update_type == UpdateType.INSERT:
                self._handle_insert_update(buffer, update_info)
            elif update_info.update_type == UpdateType.DELETE:
                self._handle_delete_update(buffer, update_info)
            elif update_info.update_type == UpdateType.HIGHLIGHT:
                self._handle_highlight_update(buffer, update_info)
            
            # 触发显示回调
            self._trigger_display_callbacks(session_id, buffer.get_all_segments())
            
            # 更新性能统计
            processing_time = time.time() - start_time
            self._update_performance_stats(processing_time, True)
            
        except Exception as e:
            self.logger.error(f"处理更新时出错: {e}")
            self._update_performance_stats(time.time() - start_time, False)
            self._handle_error(update_info.session_id, e)
    
    def _process_batch_updates(self, session_id: str, updates: List[DisplayUpdate]):
        """批量处理更新"""
        start_time = time.time()
        
        try:
            buffer = self.session_buffers[session_id]
            
            # 按优先级和时间戳排序
            updates.sort()
            
            # 批量处理
            for update_info in updates:
                self._process_single_update_in_batch(buffer, update_info)
            
            # 触发显示回调
            self._trigger_display_callbacks(session_id, buffer.get_all_segments())
            
            # 更新性能统计
            processing_time = time.time() - start_time
            self._update_performance_stats(processing_time, True, len(updates))
            
        except Exception as e:
            self.logger.error(f"批量处理更新时出错: {e}")
            self._update_performance_stats(time.time() - start_time, False, len(updates))
            self._handle_error(session_id, e)
    
    def _handle_append_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理追加更新"""
        segment = DisplaySegment(
            segment_id=update_info.segment_id,
            content=update_info.content,
            position=update_info.position,
            confidence=update_info.metadata.get('confidence', 0.0),
            is_final=update_info.metadata.get('is_final', False),
            last_updated=update_info.timestamp,
            metadata=update_info.metadata
        )
        
        buffer.add_segment(segment)
    
    def _handle_replace_partial_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理部分替换更新"""
        success = buffer.update_segment(
            update_info.segment_id,
            content=update_info.content,
            confidence=update_info.metadata.get('confidence'),
            is_final=False
        )
        
        if not success:
            # 如果片段不存在,创建新片段
            self._handle_append_update(buffer, update_info)
    
    def _handle_replace_final_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理最终替换更新"""
        success = buffer.update_segment(
            update_info.segment_id,
            content=update_info.content,
            confidence=update_info.metadata.get('confidence', 1.0),
            is_final=True,
            highlight=self.config.get('enable_highlighting', True)
        )
        
        if not success:
            # 如果片段不存在,创建新的最终片段
            update_info.metadata['is_final'] = True
            self._handle_append_update(buffer, update_info)
    
    def _handle_insert_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理插入更新"""
        # 插入逻辑需要重新排列位置
        # 这里简化为追加处理
        self._handle_append_update(buffer, update_info)
    
    def _handle_delete_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理删除更新"""
        buffer.remove_segment(update_info.segment_id)
    
    def _handle_highlight_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理高亮更新"""
        buffer.update_segment(
            update_info.segment_id,
            highlight=True
        )
    
    def _process_single_update_in_batch(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """在批处理中处理单个更新"""
        # 简化的批处理逻辑
        if update_info.update_type in [UpdateType.APPEND, UpdateType.REPLACE_PARTIAL, UpdateType.REPLACE_FINAL]:
            self._handle_append_update(buffer, update_info)
        elif update_info.update_type == UpdateType.DELETE:
            self._handle_delete_update(buffer, update_info)
    
    def _trigger_display_callbacks(self, session_id: str, segments: List[DisplaySegment]):
        """触发显示回调"""
        for callback in self.display_callbacks:
            try:
                callback(session_id, segments)
            except Exception as e:
                self.logger.error(f"显示回调执行出错: {e}")
    
    def _handle_error(self, session_id: str, error: Exception):
        """处理错误"""
        for callback in self.error_callbacks:
            try:
                callback(session_id, error)
            except Exception as e:
                self.logger.error(f"错误回调执行出错: {e}")
    
    def _update_performance_stats(self, processing_time: float, success: bool, batch_size: int = 1):
        """更新性能统计"""
        with self._lock:
            if success:
                self.performance_stats['successful_updates'] += batch_size
            else:
                self.performance_stats['failed_updates'] += batch_size
            
            # 更新平均处理时间 - 添加除零保护
            total_successful = self.performance_stats['successful_updates']
            if total_successful > 0:
                current_avg = self.performance_stats['average_update_time']
                new_avg = (current_avg * (total_successful - batch_size) + processing_time) / total_successful
                self.performance_stats['average_update_time'] = new_avg
            else:
                # 当没有成功更新时,直接设置为当前处理时间
                self.performance_stats['average_update_time'] = processing_time
    
    def _start_processing_thread(self):
        """启动处理线程"""
        def processing_worker():
            while self._running:
                try:
                    # 检查批量超时
                    self._check_batch_timeouts()
                    time.sleep(0.1)  # 100ms 检查间隔
                except Exception as e:
                    self.logger.error(f"处理线程出错: {e}")
        
        processing_thread = threading.Thread(target=processing_worker, daemon=True)
        processing_thread.start()
        self.logger.info("显示管理处理线程已启动")
    
    def _check_batch_timeouts(self):
        """检查批量超时"""
        current_time = time.time()
        batch_timeout = self.config.get('batch_timeout', 1.0)
        
        with self._lock:
            expired_sessions = []
            
            for session_id, updates in self.pending_updates.items():
                if updates:
                    oldest_update = min(updates, key=lambda u: u.timestamp)
                    if current_time - oldest_update.timestamp > batch_timeout:
                        expired_sessions.append(session_id)
            
            # 处理超时的批量更新
            for session_id in expired_sessions:
                self._execute_pending_updates(session_id)
    
    def get_session_display(self, session_id: str) -> List[DisplaySegment]:
        """获取会话的显示内容"""
        buffer = self.session_buffers[session_id]
        return buffer.get_all_segments()
    
    def clear_session_display(self, session_id: str):
        """清空会话显示"""
        if session_id in self.session_buffers:
            self.session_buffers[session_id].clear()
            self.logger.info(f"已清空会话显示: {session_id}")
    
    def get_performance_stats(self) -> Dict:
        """获取性能统计"""
        with self._lock:
            stats = self.performance_stats.copy()
            stats['queue_size'] = len(self.update_queue)
            return stats
    
    def shutdown(self):
        """关闭显示管理器"""
        self._running = False
        
        # 取消所有待处理的定时器
        with self._lock:
            for timer in self.pending_refreshes.values():
                timer.cancel()
            self.pending_refreshes.clear()
        
        # 关闭线程池
        self.executor.shutdown(wait=True)
        
        self.logger.info("流式显示管理器已关闭")