streaming_display_manager.py 21.9 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
# AIfeng/2025-07-07 15:25:48
# 流式显示管理模块 - 增量更新与刷新策略

import time
import threading
from typing import List, Dict, Optional, Callable, Any
from dataclasses import dataclass, field
from enum import Enum
from collections import deque, defaultdict
import logging
import asyncio
from concurrent.futures import ThreadPoolExecutor

class UpdateType(Enum):
    """更新类型"""
    APPEND = "append"                    # 追加显示
    REPLACE_PARTIAL = "replace_partial"  # 替换部分内容
    REPLACE_FINAL = "replace_final"      # 最终替换
    INSERT = "insert"                    # 插入内容
    DELETE = "delete"                    # 删除内容
    HIGHLIGHT = "highlight"              # 高亮显示

class RefreshStrategy(Enum):
    """刷新策略"""
    IMMEDIATE = "immediate"              # 立即刷新
    DEBOUNCED = "debounced"              # 防抖刷新
    BATCH = "batch"                      # 批量刷新
    ADAPTIVE = "adaptive"                # 自适应刷新

class DisplayPriority(Enum):
    """显示优先级"""
    LOW = 1
    NORMAL = 2
    HIGH = 3
    URGENT = 4

@dataclass
class DisplayUpdate:
    """显示更新信息"""
    session_id: str
    segment_id: str
    content: str
    update_type: UpdateType
    priority: DisplayPriority
    timestamp: float
    position: Optional[int] = None
    metadata: Dict = field(default_factory=dict)
    
    def __lt__(self, other):
        """用于优先级队列排序"""
        if self.priority.value != other.priority.value:
            return self.priority.value > other.priority.value  # 高优先级在前
        return self.timestamp < other.timestamp  # 时间早的在前

@dataclass
class DisplaySegment:
    """显示片段"""
    segment_id: str
    content: str
    position: int
    confidence: float
    is_final: bool
    last_updated: float
    highlight: bool = False
    metadata: Dict = field(default_factory=dict)

class DisplayBuffer:
    """显示缓冲区"""
    
    def __init__(self, max_size: int = 1000):
        self.max_size = max_size
        self.segments = {}  # segment_id -> DisplaySegment
        self.position_map = {}  # position -> segment_id
        self.next_position = 0
        self._lock = threading.RLock()
    
    def add_segment(self, segment: DisplaySegment) -> int:
        """添加显示片段"""
        with self._lock:
            if segment.position is None:
                segment.position = self.next_position
                self.next_position += 1
            
            self.segments[segment.segment_id] = segment
            self.position_map[segment.position] = segment.segment_id
            
            # 检查缓冲区大小
            self._cleanup_if_needed()
            
            return segment.position
    
    def update_segment(self, segment_id: str, content: str = None, 
                      confidence: float = None, is_final: bool = None,
                      highlight: bool = None) -> bool:
        """更新显示片段"""
        with self._lock:
            if segment_id not in self.segments:
                return False
            
            segment = self.segments[segment_id]
            
            if content is not None:
                segment.content = content
            if confidence is not None:
                segment.confidence = confidence
            if is_final is not None:
                segment.is_final = is_final
            if highlight is not None:
                segment.highlight = highlight
            
            segment.last_updated = time.time()
            return True
    
    def get_segment(self, segment_id: str) -> Optional[DisplaySegment]:
        """获取显示片段"""
        with self._lock:
            return self.segments.get(segment_id)
    
    def get_segments_by_range(self, start_pos: int, end_pos: int) -> List[DisplaySegment]:
        """按位置范围获取片段"""
        with self._lock:
            segments = []
            for pos in range(start_pos, end_pos + 1):
                if pos in self.position_map:
                    segment_id = self.position_map[pos]
                    if segment_id in self.segments:
                        segments.append(self.segments[segment_id])
            return segments
    
    def get_all_segments(self) -> List[DisplaySegment]:
        """获取所有片段(按位置排序)"""
        with self._lock:
            sorted_positions = sorted(self.position_map.keys())
            segments = []
            for pos in sorted_positions:
                segment_id = self.position_map[pos]
                if segment_id in self.segments:
                    segments.append(self.segments[segment_id])
            return segments
    
    def remove_segment(self, segment_id: str) -> bool:
        """移除显示片段"""
        with self._lock:
            if segment_id not in self.segments:
                return False
            
            segment = self.segments[segment_id]
            del self.segments[segment_id]
            del self.position_map[segment.position]
            
            return True
    
    def _cleanup_if_needed(self):
        """必要时清理缓冲区"""
        if len(self.segments) <= self.max_size:
            return
        
        # 按最后更新时间排序,移除最旧的片段
        segments_by_time = sorted(
            self.segments.values(),
            key=lambda s: s.last_updated
        )
        
        # 移除超出限制的片段
        excess_count = len(self.segments) - self.max_size
        for i in range(excess_count):
            segment = segments_by_time[i]
            self.remove_segment(segment.segment_id)
    
    def clear(self):
        """清空缓冲区"""
        with self._lock:
            self.segments.clear()
            self.position_map.clear()
            self.next_position = 0

class StreamingDisplayManager:
    """流式显示管理器"""
    
    def __init__(self, config: Dict = None):
        self.config = config or self._get_default_config()
        
        # 显示缓冲区(按会话组织)
        self.session_buffers = defaultdict(lambda: DisplayBuffer(
            self.config.get('max_buffer_size', 1000)
        ))
        
        # 更新队列
        self.update_queue = deque()
        self.pending_updates = defaultdict(list)  # session_id -> [updates]
        
        # 刷新策略
        self.refresh_strategies = {
            RefreshStrategy.IMMEDIATE: self._immediate_refresh,
            RefreshStrategy.DEBOUNCED: self._debounced_refresh,
            RefreshStrategy.BATCH: self._batch_refresh,
            RefreshStrategy.ADAPTIVE: self._adaptive_refresh
        }
        
        # 防抖定时器
        self.pending_refreshes = {}  # session_id -> Timer
        
        # 回调函数
        self.display_callbacks = []  # 显示更新回调
        self.error_callbacks = []   # 错误处理回调
        
        # 性能监控
        self.performance_stats = {
            'total_updates': 0,
            'successful_updates': 0,
            'failed_updates': 0,
            'average_update_time': 0.0,
            'queue_size': 0
        }
        
        # 线程池
        self.executor = ThreadPoolExecutor(
            max_workers=self.config.get('max_workers', 4),
            thread_name_prefix='DisplayManager'
        )
        
        self.logger = logging.getLogger(__name__)
        self._lock = threading.RLock()
        self._running = True
        
        # 启动处理线程
        self._start_processing_thread()
    
    def _get_default_config(self) -> Dict:
        """获取默认配置"""
        return {
            'max_buffer_size': 1000,
            'debounce_delay': 0.2,
            'batch_size': 5,
            'batch_timeout': 1.0,
            'max_refresh_rate': 10,  # 每秒最大刷新次数
            'max_workers': 4,
            'enable_highlighting': True,
            'auto_scroll': True,
            'preserve_formatting': True
        }
    
    def register_display_callback(self, callback: Callable[[str, List[DisplaySegment]], None]):
        """注册显示更新回调"""
        self.display_callbacks.append(callback)
    
    def register_error_callback(self, callback: Callable[[str, Exception], None]):
        """注册错误处理回调"""
        self.error_callbacks.append(callback)
    
    def update_display(self, session_id: str, segment_id: str, content: str,
                      update_type: UpdateType, confidence: float = 0.0,
                      is_final: bool = False, priority: DisplayPriority = DisplayPriority.NORMAL,
                      strategy: RefreshStrategy = RefreshStrategy.DEBOUNCED,
                      metadata: Dict = None) -> bool:
        """更新显示内容"""
        try:
            update_info = DisplayUpdate(
                session_id=session_id,
                segment_id=segment_id,
                content=content,
                update_type=update_type,
                priority=priority,
                timestamp=time.time(),
                metadata=metadata or {}
            )
            
            # 添加到更新队列
            with self._lock:
                self.update_queue.append(update_info)
                self.performance_stats['total_updates'] += 1
                self.performance_stats['queue_size'] = len(self.update_queue)
            
            # 根据策略执行刷新
            refresh_func = self.refresh_strategies.get(strategy, self._debounced_refresh)
            refresh_func(update_info)
            
            return True
            
        except Exception as e:
            self.logger.error(f"更新显示时出错: {e}")
            self._handle_error(session_id, e)
            return False
    
    def _determine_update_type(self, content: str, confidence: float, is_final: bool) -> UpdateType:
        """确定更新类型"""
        if is_final:
            return UpdateType.REPLACE_FINAL
        elif confidence > 0.8:
            return UpdateType.REPLACE_PARTIAL
        else:
            return UpdateType.APPEND
    
    def _immediate_refresh(self, update_info: DisplayUpdate):
        """立即刷新策略"""
        self.executor.submit(self._process_update, update_info)
    
    def _debounced_refresh(self, update_info: DisplayUpdate, delay: float = None):
        """防抖刷新策略"""
        session_id = update_info.session_id
        delay = delay or self.config.get('debounce_delay', 0.2)
        
        with self._lock:
            # 取消之前的定时器
            if session_id in self.pending_refreshes:
                self.pending_refreshes[session_id].cancel()
            
            # 设置新的定时器
            timer = threading.Timer(
                delay, 
                self._execute_pending_updates, 
                args=[session_id]
            )
            self.pending_refreshes[session_id] = timer
            
            # 添加到待处理更新
            self.pending_updates[session_id].append(update_info)
            
            timer.start()
    
    def _batch_refresh(self, update_info: DisplayUpdate):
        """批量刷新策略"""
        session_id = update_info.session_id
        
        with self._lock:
            self.pending_updates[session_id].append(update_info)
            
            # 检查是否达到批量大小或超时
            batch_size = self.config.get('batch_size', 5)
            if len(self.pending_updates[session_id]) >= batch_size:
                self._execute_pending_updates(session_id)
    
    def _adaptive_refresh(self, update_info: DisplayUpdate):
        """自适应刷新策略"""
        # 根据优先级和系统负载选择策略
        if update_info.priority == DisplayPriority.URGENT:
            self._immediate_refresh(update_info)
        elif self.performance_stats['queue_size'] > 10:
            self._batch_refresh(update_info)
        else:
            self._debounced_refresh(update_info)
    
    def _execute_pending_updates(self, session_id: str):
        """执行待处理的更新"""
        with self._lock:
            if session_id not in self.pending_updates:
                return
            
            updates = self.pending_updates[session_id].copy()
            self.pending_updates[session_id].clear()
            
            # 清理定时器
            if session_id in self.pending_refreshes:
                del self.pending_refreshes[session_id]
        
        # 批量处理更新
        self.executor.submit(self._process_batch_updates, session_id, updates)
    
    def _process_update(self, update_info: DisplayUpdate):
        """处理单个更新"""
        start_time = time.time()
        
        try:
            session_id = update_info.session_id
            segment_id = update_info.segment_id
            buffer = self.session_buffers[session_id]
            
            # 根据更新类型处理
            if update_info.update_type == UpdateType.APPEND:
                self._handle_append_update(buffer, update_info)
            elif update_info.update_type == UpdateType.REPLACE_PARTIAL:
                self._handle_replace_partial_update(buffer, update_info)
            elif update_info.update_type == UpdateType.REPLACE_FINAL:
                self._handle_replace_final_update(buffer, update_info)
            elif update_info.update_type == UpdateType.INSERT:
                self._handle_insert_update(buffer, update_info)
            elif update_info.update_type == UpdateType.DELETE:
                self._handle_delete_update(buffer, update_info)
            elif update_info.update_type == UpdateType.HIGHLIGHT:
                self._handle_highlight_update(buffer, update_info)
            
            # 触发显示回调
            self._trigger_display_callbacks(session_id, buffer.get_all_segments())
            
            # 更新性能统计
            processing_time = time.time() - start_time
            self._update_performance_stats(processing_time, True)
            
        except Exception as e:
            self.logger.error(f"处理更新时出错: {e}")
            self._update_performance_stats(time.time() - start_time, False)
            self._handle_error(update_info.session_id, e)
    
    def _process_batch_updates(self, session_id: str, updates: List[DisplayUpdate]):
        """批量处理更新"""
        start_time = time.time()
        
        try:
            buffer = self.session_buffers[session_id]
            
            # 按优先级和时间戳排序
            updates.sort()
            
            # 批量处理
            for update_info in updates:
                self._process_single_update_in_batch(buffer, update_info)
            
            # 触发显示回调
            self._trigger_display_callbacks(session_id, buffer.get_all_segments())
            
            # 更新性能统计
            processing_time = time.time() - start_time
            self._update_performance_stats(processing_time, True, len(updates))
            
        except Exception as e:
            self.logger.error(f"批量处理更新时出错: {e}")
            self._update_performance_stats(time.time() - start_time, False, len(updates))
            self._handle_error(session_id, e)
    
    def _handle_append_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理追加更新"""
        segment = DisplaySegment(
            segment_id=update_info.segment_id,
            content=update_info.content,
            position=update_info.position,
            confidence=update_info.metadata.get('confidence', 0.0),
            is_final=update_info.metadata.get('is_final', False),
            last_updated=update_info.timestamp,
            metadata=update_info.metadata
        )
        
        buffer.add_segment(segment)
    
    def _handle_replace_partial_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理部分替换更新"""
        success = buffer.update_segment(
            update_info.segment_id,
            content=update_info.content,
            confidence=update_info.metadata.get('confidence'),
            is_final=False
        )
        
        if not success:
            # 如果片段不存在,创建新片段
            self._handle_append_update(buffer, update_info)
    
    def _handle_replace_final_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理最终替换更新"""
        success = buffer.update_segment(
            update_info.segment_id,
            content=update_info.content,
            confidence=update_info.metadata.get('confidence', 1.0),
            is_final=True,
            highlight=self.config.get('enable_highlighting', True)
        )
        
        if not success:
            # 如果片段不存在,创建新的最终片段
            update_info.metadata['is_final'] = True
            self._handle_append_update(buffer, update_info)
    
    def _handle_insert_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理插入更新"""
        # 插入逻辑需要重新排列位置
        # 这里简化为追加处理
        self._handle_append_update(buffer, update_info)
    
    def _handle_delete_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理删除更新"""
        buffer.remove_segment(update_info.segment_id)
    
    def _handle_highlight_update(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """处理高亮更新"""
        buffer.update_segment(
            update_info.segment_id,
            highlight=True
        )
    
    def _process_single_update_in_batch(self, buffer: DisplayBuffer, update_info: DisplayUpdate):
        """在批处理中处理单个更新"""
        # 简化的批处理逻辑
        if update_info.update_type in [UpdateType.APPEND, UpdateType.REPLACE_PARTIAL, UpdateType.REPLACE_FINAL]:
            self._handle_append_update(buffer, update_info)
        elif update_info.update_type == UpdateType.DELETE:
            self._handle_delete_update(buffer, update_info)
    
    def _trigger_display_callbacks(self, session_id: str, segments: List[DisplaySegment]):
        """触发显示回调"""
        for callback in self.display_callbacks:
            try:
                callback(session_id, segments)
            except Exception as e:
                self.logger.error(f"显示回调执行出错: {e}")
    
    def _handle_error(self, session_id: str, error: Exception):
        """处理错误"""
        for callback in self.error_callbacks:
            try:
                callback(session_id, error)
            except Exception as e:
                self.logger.error(f"错误回调执行出错: {e}")
    
    def _update_performance_stats(self, processing_time: float, success: bool, batch_size: int = 1):
        """更新性能统计"""
        with self._lock:
            if success:
                self.performance_stats['successful_updates'] += batch_size
            else:
                self.performance_stats['failed_updates'] += batch_size
            
            # 更新平均处理时间 - 添加除零保护
            total_successful = self.performance_stats['successful_updates']
            if total_successful > 0:
                current_avg = self.performance_stats['average_update_time']
                new_avg = (current_avg * (total_successful - batch_size) + processing_time) / total_successful
                self.performance_stats['average_update_time'] = new_avg
            else:
                # 当没有成功更新时,直接设置为当前处理时间
                self.performance_stats['average_update_time'] = processing_time
    
    def _start_processing_thread(self):
        """启动处理线程"""
        def processing_worker():
            while self._running:
                try:
                    # 检查批量超时
                    self._check_batch_timeouts()
                    time.sleep(0.1)  # 100ms 检查间隔
                except Exception as e:
                    self.logger.error(f"处理线程出错: {e}")
        
        processing_thread = threading.Thread(target=processing_worker, daemon=True)
        processing_thread.start()
        self.logger.info("显示管理处理线程已启动")
    
    def _check_batch_timeouts(self):
        """检查批量超时"""
        current_time = time.time()
        batch_timeout = self.config.get('batch_timeout', 1.0)
        
        with self._lock:
            expired_sessions = []
            
            for session_id, updates in self.pending_updates.items():
                if updates:
                    oldest_update = min(updates, key=lambda u: u.timestamp)
                    if current_time - oldest_update.timestamp > batch_timeout:
                        expired_sessions.append(session_id)
            
            # 处理超时的批量更新
            for session_id in expired_sessions:
                self._execute_pending_updates(session_id)
    
    def get_session_display(self, session_id: str) -> List[DisplaySegment]:
        """获取会话的显示内容"""
        buffer = self.session_buffers[session_id]
        return buffer.get_all_segments()
    
    def clear_session_display(self, session_id: str):
        """清空会话显示"""
        if session_id in self.session_buffers:
            self.session_buffers[session_id].clear()
            self.logger.info(f"已清空会话显示: {session_id}")
    
    def get_performance_stats(self) -> Dict:
        """获取性能统计"""
        with self._lock:
            stats = self.performance_stats.copy()
            stats['queue_size'] = len(self.update_queue)
            return stats
    
    def shutdown(self):
        """关闭显示管理器"""
        self._running = False
        
        # 取消所有待处理的定时器
        with self._lock:
            for timer in self.pending_refreshes.values():
                timer.cancel()
            self.pending_refreshes.clear()
        
        # 关闭线程池
        self.executor.shutdown(wait=True)
        
        self.logger.info("流式显示管理器已关闭")