llm.py
5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import time
import os
import time
import json
from basereal import BaseReal
from logger import logger
def llm_response(message, nerfreal: BaseReal):
"""LLM响应函数,支持多种模型配置"""
start = time.perf_counter()
# 加载LLM配置
llm_config = _load_llm_config()
model_type = llm_config.get("model_type", "qwen") # 默认使用通义千问
logger.info(f"使用LLM模型: {model_type}")
try:
if model_type == "doubao":
return _handle_doubao_response(message, nerfreal, start)
elif model_type == "qwen":
return _handle_qwen_response(message, nerfreal, start)
else:
logger.error(f"不支持的模型类型: {model_type}")
nerfreal.put_msg_txt("抱歉,当前模型配置有误,请检查配置文件。")
except Exception as e:
logger.error(f"LLM响应处理异常: {e}")
nerfreal.put_msg_txt("抱歉,我现在无法回答您的问题,请稍后再试。")
def _load_llm_config():
"""加载LLM配置文件"""
config_path = "config/llm_config.json"
try:
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
logger.warning(f"LLM配置文件 {config_path} 不存在,使用默认配置")
return {"model_type": "qwen"}
except json.JSONDecodeError as e:
logger.error(f"LLM配置文件格式错误: {e}")
return {"model_type": "qwen"}
def _handle_doubao_response(message, nerfreal, start_time):
"""处理豆包模型响应"""
try:
from llm.Doubao import Doubao
doubao = Doubao()
# 设置LLM实例到nerfreal对象,用于页面显示模型信息
nerfreal.llm = doubao
nerfreal.llm_model_name = doubao.model_name
end = time.perf_counter()
logger.info(f"豆包模型初始化时间: {end-start_time:.3f}s")
result = ""
first = True
def token_callback(content):
nonlocal result, first
if first:
end = time.perf_counter()
logger.info(f"豆包首个token时间: {end-start_time:.3f}s")
first = False
# 处理分句逻辑
lastpos = 0
for i, char in enumerate(content):
if char in ",.!;:,。!?:;":
result = result + content[lastpos:i+1]
lastpos = i+1
if len(result) > 10:
logger.info(f"豆包分句输出: {result}")
nerfreal.put_msg_txt(result)
result = ""
result = result + content[lastpos:]
# 使用流式响应
full_response = doubao.chat_stream(message, callback=token_callback)
# 输出剩余内容
if result:
logger.info(f"豆包最终输出: {result}")
nerfreal.put_msg_txt(result)
end = time.perf_counter()
logger.info(f"豆包总响应时间: {end-start_time:.3f}s")
return full_response
except ImportError:
logger.error("豆包模块导入失败,请检查Doubao.py文件")
nerfreal.put_msg_txt("抱歉,豆包模型暂时不可用。")
except Exception as e:
logger.error(f"豆包模型处理异常: {e}")
nerfreal.put_msg_txt("抱歉,豆包模型处理出现问题。")
def _handle_qwen_response(message, nerfreal, start_time):
"""处理通义千问模型响应(保持原有逻辑)"""
from openai import OpenAI
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
# 创建模型信息包装类
class QwenWrapper:
def __init__(self):
self.model_name = "通义千问"
# 设置LLM实例到nerfreal对象,用于页面显示模型信息
nerfreal.llm = QwenWrapper()
nerfreal.llm_model_name = "通义千问"
end = time.perf_counter()
logger.info(f"通义千问初始化时间: {end-start_time:.3f}s")
completion = client.chat.completions.create(
model="qwen-plus",
messages=[{'role': 'system', 'content': '你是小艺,是由艺云展陈开发的AI语音聊天机器人,回答风格精简。'},
{'role': 'user', 'content': message}],
stream=True,
stream_options={"include_usage": True}
)
result = ""
first = True
for chunk in completion:
if len(chunk.choices) > 0:
if first:
end = time.perf_counter()
logger.info(f"通义千问首个token时间: {end-start_time:.3f}s")
first = False
msg = chunk.choices[0].delta.content
if msg:
lastpos = 0
for i, char in enumerate(msg):
if char in ",.!;:,。!?:;":
result = result + msg[lastpos:i+1]
lastpos = i+1
if len(result) > 10:
logger.info(f"通义千问分句输出: {result}")
nerfreal.put_msg_txt(result)
result = ""
result = result + msg[lastpos:]
end = time.perf_counter()
logger.info(f"通义千问总响应时间: {end-start_time:.3f}s")
if result:
nerfreal.put_msg_txt(result)