戒酒的李白

Private database operation tools completed.

1 """ 1 """
2 -专为 AI Agent 设计的舆情搜索工具集 (Tavily) 2 +专为 AI Agent 设计的本地舆情数据库查询工具集 (MediaCrawlerDB)
3 3
4 -版本: 1.5 4 +版本: 3.0
5 最后更新: 2025-08-22 5 最后更新: 2025-08-22
6 6
7 -此脚本将复杂的Tavily搜索功能分解为一系列目标明确、参数极少的独立工具,  
8 -专为AI Agent调用而设计。Agent只需根据任务意图选择合适的工具,  
9 -无需理解复杂的参数组合。所有工具默认搜索“新闻”(topic='news')。 7 +此脚本将复杂的本地MySQL数据库查询功能封装成一系列目标明确、参数清晰的独立工具,
  8 +专为AI Agent调用而设计。Agent只需根据任务意图(如搜索热点、全局搜索话题、
  9 +按时间范围分析、获取评论)选择合适的工具,无需编写复杂的SQL语句。
10 10
11 -新特性:  
12 -- 新增 `basic_search_news` 工具,用于执行标准、通用的新闻搜索。  
13 -- 每个搜索结果现在都包含 `published_date` (新闻发布日期)。 11 +V3.0 核心更新:
  12 +- 智能热度计算: `search_hot_content`不再需要`sort_by`参数,改为内部使用统一的加权热度算法,
  13 + 综合点赞、评论、分享、观看等数据计算热度分值,使结果更智能、更符合综合热度。
  14 +- 新增平台精搜工具: 新增 `search_topic_on_platform` 工具,作为特例,
  15 + 允许Agent在特定平台(B站、微博等七大平台)上对某一话题进行精确搜索,并支持时间筛选。
  16 +- 结构优化: 调整了数据结构与函数文档,以适应新功能。
14 17
15 主要工具: 18 主要工具:
16 -- basic_search_news: (新增) 执行标准、快速的通用新闻搜索。  
17 -- deep_search_news: 对主题进行最全面的深度分析。  
18 -- search_news_last_24_hours: 获取24小时内的最新动态。  
19 -- search_news_last_week: 获取过去一周的主要报道。  
20 -- search_images_for_news: 查找与新闻主题相关的图片。  
21 -- search_news_by_date: 在指定的历史日期范围内搜索。 19 +- search_hot_content: 查找指定时间范围内的综合热度最高的内容。
  20 +- search_topic_globally: 在整个数据库中全局搜索与特定话题相关的所有内容和评论。
  21 +- search_topic_by_date: 在指定的历史日期范围内搜索与特定话题相关的内容。
  22 +- get_comments_for_topic: 专门提取公众对于某一特定话题的评论数据。
  23 +- search_topic_on_platform: 在指定的单个社交媒体平台上搜索特定话题。
22 """ 24 """
23 25
24 import os 26 import os
25 -from typing import List, Dict, Any, Optional 27 +import json
  28 +import pymysql
  29 +import pymysql.cursors
  30 +from typing import List, Dict, Any, Optional, Literal
26 from dataclasses import dataclass, field 31 from dataclasses import dataclass, field
27 -  
28 -# 运行前请确保已安装Tavily库: pip install tavily-python  
29 -try:  
30 - from tavily import TavilyClient  
31 -except ImportError:  
32 - raise ImportError("Tavily库未安装,请运行 `pip install tavily-python` 进行安装。") 32 +from datetime import datetime, timedelta, date
33 33
34 # --- 1. 数据结构定义 --- 34 # --- 1. 数据结构定义 ---
35 35
36 @dataclass 36 @dataclass
37 -class SearchResult:  
38 - """  
39 - 网页搜索结果数据类  
40 - 包含 published_date 属性来存储新闻发布日期  
41 - """  
42 - title: str  
43 - url: str  
44 - content: str  
45 - score: Optional[float] = None  
46 - raw_content: Optional[str] = None  
47 - published_date: Optional[str] = None  
48 -  
49 -@dataclass  
50 -class ImageResult:  
51 - """图片搜索结果数据类"""  
52 - url: str  
53 - description: Optional[str] = None 37 +class QueryResult:
  38 + """统一的数据库查询结果数据类"""
  39 + platform: str
  40 + content_type: str
  41 + title_or_content: str
  42 + author_nickname: Optional[str] = None
  43 + url: Optional[str] = None
  44 + publish_time: Optional[datetime] = None
  45 + engagement: Dict[str, int] = field(default_factory=dict)
  46 + source_keyword: Optional[str] = None
  47 + hotness_score: float = 0.0 # 新增:综合热度分
  48 + source_table: str = ""
54 49
55 @dataclass 50 @dataclass
56 -class TavilyResponse:  
57 - """封装Tavily API的完整返回结果,以便在工具间传递"""  
58 - query: str  
59 - answer: Optional[str] = None  
60 - results: List[SearchResult] = field(default_factory=list)  
61 - images: List[ImageResult] = field(default_factory=list)  
62 - response_time: Optional[float] = None  
63 - 51 +class DBResponse:
  52 + """封装工具的完整返回结果"""
  53 + tool_name: str
  54 + parameters: Dict[str, Any]
  55 + results: List[QueryResult] = field(default_factory=list)
  56 + results_count: int = 0
  57 + error_message: Optional[str] = None
64 58
65 # --- 2. 核心客户端与专用工具集 --- 59 # --- 2. 核心客户端与专用工具集 ---
66 60
67 -class TavilyNewsAgency:  
68 - """  
69 - 一个包含多种专用新闻舆情搜索工具的客户端。  
70 - 每个公共方法都设计为供 AI Agent 独立调用的工具。  
71 - """ 61 +class MediaCrawlerDB:
  62 + """包含多种专用舆情数据库查询工具的客户端"""
  63 + # 权重定义
  64 + W_LIKE = 1.0
  65 + W_COMMENT = 5.0
  66 + W_SHARE = 10.0 # 分享/转发/收藏/投币等高价值互动
  67 + W_VIEW = 0.1
  68 + W_DANMAKU = 0.5
72 69
73 - def __init__(self, api_key: Optional[str] = None): 70 + def __init__(self):
74 """ 71 """
75 - 初始化客户端。  
76 - Args:  
77 - api_key: Tavily API密钥,若不提供则从环境变量 TAVILY_API_KEY 读取。 72 + 初始化客户端。连接信息从环境变量自动读取:
  73 + - DB_HOST, DB_USER, DB_PASSWORD, DB_NAME
  74 + - DB_PORT (可选, 默认 3306)
  75 + - DB_CHARSET (可选, 默认 utf8mb4)
78 """ 76 """
79 - if api_key is None:  
80 - api_key = os.getenv("TAVILY_API_KEY")  
81 - if not api_key:  
82 - raise ValueError("Tavily API Key未找到!请设置TAVILY_API_KEY环境变量或在初始化时提供")  
83 - self._client = TavilyClient(api_key=api_key)  
84 -  
85 - def _search_internal(self, **kwargs) -> TavilyResponse:  
86 - """内部通用的搜索执行器,所有工具最终都调用此方法""" 77 + self.db_config = {
  78 + 'host': os.getenv("DB_HOST"),
  79 + 'user': os.getenv("DB_USER"),
  80 + 'password': os.getenv("DB_PASSWORD"),
  81 + 'db': os.getenv("DB_NAME"),
  82 + 'port': int(os.getenv("DB_PORT", 3306)),
  83 + 'charset': os.getenv("DB_CHARSET", "utf8mb4"),
  84 + 'cursorclass': pymysql.cursors.DictCursor
  85 + }
  86 + required = ['host', 'user', 'password', 'db']
  87 + if missing := [k for k in required if not self.db_config[k]]:
  88 + raise ValueError(f"数据库配置缺失! 请设置环境变量或在代码中提供: {', '.join([f'DB_{k.upper()}' for k in missing])}")
  89 +
  90 + def _execute_query(self, query: str, params: tuple = None) -> List[Dict[str, Any]]:
  91 + conn = None
87 try: 92 try:
88 - kwargs['topic'] = 'general'  
89 - api_params = {k: v for k, v in kwargs.items() if v is not None}  
90 - response_dict = self._client.search(**api_params)  
91 -  
92 - search_results = [  
93 - SearchResult(  
94 - title=item.get('title'),  
95 - url=item.get('url'),  
96 - content=item.get('content'),  
97 - score=item.get('score'),  
98 - raw_content=item.get('raw_content'),  
99 - published_date=item.get('published_date')  
100 - ) for item in response_dict.get('results', [])  
101 - ]  
102 -  
103 - image_results = [ImageResult(url=item.get('url'), description=item.get('description')) for item in response_dict.get('images', [])]  
104 -  
105 - return TavilyResponse(  
106 - query=response_dict.get('query'), answer=response_dict.get('answer'),  
107 - results=search_results, images=image_results,  
108 - response_time=response_dict.get('response_time')  
109 - )  
110 - except Exception as e:  
111 - print(f"搜索时发生错误: {str(e)}")  
112 - return TavilyResponse(query=kwargs.get("query", "Unknown Query")) 93 + conn = pymysql.connect(**self.db_config)
  94 + with conn.cursor() as cursor:
  95 + cursor.execute(query, params or ())
  96 + return cursor.fetchall()
  97 + except pymysql.Error as e:
  98 + print(f"数据库查询时发生错误: {e}")
  99 + return []
  100 + finally:
  101 + if conn: conn.close()
  102 +
  103 + @staticmethod
  104 + def _to_datetime(ts: Any) -> Optional[datetime]:
  105 + if not ts: return None
  106 + try:
  107 + if isinstance(ts, datetime): return ts
  108 + if isinstance(ts, date): return datetime.combine(ts, datetime.min.time())
  109 + if isinstance(ts, (int, float)) or str(ts).isdigit():
  110 + val = float(ts)
  111 + return datetime.fromtimestamp(val / 1000 if val > 1_000_000_000_000 else val)
  112 + if isinstance(ts, str):
  113 + return datetime.fromisoformat(ts.split('+')[0].strip())
  114 + except (ValueError, TypeError): return None
  115 +
  116 + _table_columns_cache = {}
  117 + def _get_table_columns(self, table_name: str) -> List[str]:
  118 + if table_name in self._table_columns_cache: return self._table_columns_cache[table_name]
  119 + results = self._execute_query(f"SHOW COLUMNS FROM `{table_name}`")
  120 + columns = [row['Field'] for row in results] if results else []
  121 + self._table_columns_cache[table_name] = columns
  122 + return columns
  123 +
  124 + def _extract_engagement(self, row: Dict[str, Any]) -> Dict[str, int]:
  125 + """从数据行中提取并统一互动指标"""
  126 + engagement = {}
  127 + mapping = { 'likes': ['liked_count', 'like_count', 'voteup_count', 'comment_like_count'], 'comments': ['video_comment', 'comments_count', 'comment_count', 'total_replay_num', 'sub_comment_count'], 'shares': ['video_share_count', 'shared_count', 'share_count', 'total_forwards'], 'views': ['video_play_count', 'viewd_count'], 'favorites': ['video_favorite_count', 'collected_count'], 'coins': ['video_coin_count'], 'danmaku': ['video_danmaku'], }
  128 + for key, potential_cols in mapping.items():
  129 + for col in potential_cols:
  130 + if col in row and row[col] is not None:
  131 + try: engagement[key] = int(row[col])
  132 + except (ValueError, TypeError): engagement[key] = 0
  133 + break
  134 + return engagement
  135 +
  136 + def search_hot_content(
  137 + self,
  138 + time_period: Literal['24h', 'week', 'year'] = 'week',
  139 + limit: int = 10
  140 + ) -> DBResponse:
  141 + """
  142 + 【工具】查找热点内容: (已简化) 获取最近一段时间内综合热度最高的内容。
113 143
114 - # --- Agent 可用的工具方法 --- 144 + Args:
  145 + time_period (Literal['24h', 'week', 'year']): 时间范围,默认为 'week'。
  146 + limit (int): 返回结果的最大数量,默认为 10。
115 147
116 - def basic_search_news(self, query: str, max_results: int = 7) -> TavilyResponse:  
117 - """  
118 - 【工具】基础新闻搜索: 执行一次标准、快速的新闻搜索。  
119 - 这是最常用的通用搜索工具,适用于不确定需要何种特定搜索时。  
120 - Agent可提供搜索查询(query)和可选的最大结果数(max_results)。 148 + Returns:
  149 + DBResponse: 包含按综合热度排序后的内容列表。
121 """ 150 """
122 - print(f"--- TOOL: 基础新闻搜索 (query: {query}) ---")  
123 - return self._search_internal(  
124 - query=query,  
125 - max_results=max_results,  
126 - search_depth="basic",  
127 - include_answer=False  
128 - )  
129 -  
130 - def deep_search_news(self, query: str) -> TavilyResponse:  
131 - """  
132 - 【工具】深度新闻分析: 对一个主题进行最全面、最深入的搜索。  
133 - 返回AI生成的“高级”详细摘要答案和最多20条最相关的新闻结果。适用于需要全面了解某个事件背景的场景。  
134 - Agent只需提供搜索查询(query)。 151 + params_for_log = {'time_period': time_period, 'limit': limit}
  152 + print(f"--- TOOL: 查找热点内容 (params: {params_for_log}) ---")
  153 +
  154 + now = datetime.now()
  155 + start_time = now - timedelta(days={'24h': 1, 'week': 7}.get(time_period, 365))
  156 +
  157 + # 定义各平台的热度计算SQL片段
  158 + hotness_formulas = {
  159 + 'bilibili_video': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(video_comment AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(video_share_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(video_favorite_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(video_coin_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(video_danmaku AS UNSIGNED), 0) * {self.W_DANMAKU} + COALESCE(CAST(video_play_count AS DECIMAL(20,2)), 0) * {self.W_VIEW})",
  160 + 'douyin_aweme': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comment_count AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(share_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(collected_count AS UNSIGNED), 0) * {self.W_SHARE})",
  161 + 'weibo_note': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comments_count AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(shared_count AS UNSIGNED), 0) * {self.W_SHARE})",
  162 + 'xhs_note': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comment_count AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(share_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(collected_count AS UNSIGNED), 0) * {self.W_SHARE})",
  163 + 'kuaishou_video': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(viewd_count AS DECIMAL(20,2)), 0) * {self.W_VIEW})",
  164 + 'zhihu_content': f"(COALESCE(CAST(voteup_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comment_count AS UNSIGNED), 0) * {self.W_COMMENT})",
  165 + }
  166 +
  167 + all_queries, params = [], []
  168 + for table, formula in hotness_formulas.items():
  169 + time_filter_sql, time_filter_param = "", None
  170 + if table == 'weibo_note': time_filter_sql, time_filter_param = "`create_date_time` >= %s", start_time.strftime('%Y-%m-%d %H:%M:%S')
  171 + elif table in ['kuaishou_video', 'xhs_note', 'douyin_aweme']: time_col = 'time' if table == 'xhs_note' else 'create_time'; time_filter_sql, time_filter_param = f"`{time_col}` >= %s", str(int(start_time.timestamp() * 1000))
  172 + elif table == 'zhihu_content': time_filter_sql, time_filter_param = "CAST(`created_time` AS UNSIGNED) >= %s", str(int(start_time.timestamp()))
  173 + else: time_filter_sql, time_filter_param = "`create_time` >= %s", str(int(start_time.timestamp()))
  174 +
  175 + content_type = 'note' if table in ['weibo_note', 'xhs_note'] else 'content' if table == 'zhihu_content' else 'video'
  176 + query_template = "SELECT '{platform}' as p, '{type}' as t, {title} as title, {author} as author, {url} as url, {ts} as ts, {formula} as hotness_score, source_keyword, '{tbl}' as tbl FROM `{tbl}` WHERE {time_filter}"
  177 +
  178 + field_subs = {'platform': table.split('_')[0], 'type': content_type, 'title': 'title', 'author': 'nickname', 'url': 'video_url', 'ts': 'create_time', 'formula': formula, 'tbl': table, 'time_filter': time_filter_sql}
  179 + if table == 'weibo_note': field_subs.update({'title': 'content', 'url': 'note_url', 'ts': 'create_date_time'})
  180 + elif table == 'xhs_note': field_subs.update({'ts': 'time', 'url': 'note_url'})
  181 + elif table == 'zhihu_content': field_subs.update({'author': 'user_nickname', 'url': 'content_url', 'ts': 'created_time'})
  182 + elif table == 'douyin_aweme': field_subs.update({'url': 'aweme_url'})
  183 +
  184 + all_queries.append(query_template.format(**field_subs))
  185 + params.append(time_filter_param)
  186 +
  187 + final_query = f"({' ) UNION ALL ( '.join(all_queries)}) ORDER BY hotness_score DESC LIMIT %s"
  188 + raw_results = self._execute_query(final_query, tuple(params) + (limit,))
  189 +
  190 + formatted_results = [QueryResult(platform=r['p'], content_type=r['t'], title_or_content=r['title'], author_nickname=r.get('author'), url=r['url'], publish_time=self._to_datetime(r['ts']), engagement=self._extract_engagement(r), hotness_score=r.get('hotness_score', 0.0), source_keyword=r.get('source_keyword'), source_table=r['tbl']) for r in raw_results]
  191 + return DBResponse("search_hot_content", params_for_log, results=formatted_results, results_count=len(formatted_results))
  192 +
  193 + def search_topic_globally(self, topic: str, limit_per_table: int = 5) -> DBResponse:
135 """ 194 """
136 - print(f"--- TOOL: 深度新闻分析 (query: {query}) ---")  
137 - return self._search_internal(  
138 - query=query, search_depth="advanced", max_results=20, include_answer="advanced"  
139 - ) 195 + 【工具】全局话题搜索: 在数据库中(内容、评论、标签、来源关键字)全面搜索指定话题。
140 196
141 - def search_news_last_24_hours(self, query: str) -> TavilyResponse: 197 + Args:
  198 + topic (str): 要搜索的话题关键词。
  199 + limit_per_table (int): 从每个相关表中返回的最大记录数,默认为 5。
  200 +
  201 + Returns:
  202 + DBResponse: 包含所有匹配结果的聚合列表。
142 """ 203 """
143 - 【工具】搜索24小时内新闻: 获取关于某个主题的最新动态。  
144 - 此工具专门查找过去24小时内发布的新闻。适用于追踪突发事件或最新进展。  
145 - Agent只需提供搜索查询(query)。 204 + params_for_log = {'topic': topic, 'limit_per_table': limit_per_table}
  205 + print(f"--- TOOL: 全局话题搜索 (params: {params_for_log}) ---")
  206 +
  207 + search_term, all_results = f"%{topic}%", []
  208 + search_configs = { 'bilibili_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video'}, 'bilibili_video_comment': {'fields': ['content'], 'type': 'comment'}, 'douyin_aweme': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video'}, 'douyin_aweme_comment': {'fields': ['content'], 'type': 'comment'}, 'kuaishou_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video'}, 'kuaishou_video_comment': {'fields': ['content'], 'type': 'comment'}, 'weibo_note': {'fields': ['content', 'source_keyword'], 'type': 'note'}, 'weibo_note_comment': {'fields': ['content'], 'type': 'comment'}, 'xhs_note': {'fields': ['title', 'desc', 'tag_list', 'source_keyword'], 'type': 'note'}, 'xhs_note_comment': {'fields': ['content'], 'type': 'comment'}, 'zhihu_content': {'fields': ['title', 'desc', 'content_text', 'source_keyword'], 'type': 'content'}, 'zhihu_comment': {'fields': ['content'], 'type': 'comment'}, 'tieba_note': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'note'}, 'tieba_comment': {'fields': ['content'], 'type': 'comment'}, 'daily_news': {'fields': ['title'], 'type': 'news'}, }
  209 +
  210 + for table, config in search_configs.items():
  211 + where_clause = " OR ".join([f"`{field}` LIKE %s" for field in config['fields']])
  212 + query = f"SELECT * FROM `{table}` WHERE {where_clause} ORDER BY id DESC LIMIT %s"
  213 + params = (search_term,) * len(config['fields']) + (limit_per_table,)
  214 + raw_results = self._execute_query(query, params)
  215 + for row in raw_results:
  216 + content = (row.get('title') or row.get('content') or row.get('desc') or row.get('content_text', ''))
  217 + time_key = row.get('create_time') or row.get('time') or row.get('created_time') or row.get('publish_time') or row.get('crawl_date')
  218 + all_results.append(QueryResult(
  219 + platform=table.split('_')[0], content_type=config['type'],
  220 + title_or_content=content[:500] if content else '',
  221 + author_nickname=row.get('nickname') or row.get('user_nickname') or row.get('user_name'),
  222 + url=row.get('video_url') or row.get('note_url') or row.get('content_url') or row.get('url') or row.get('aweme_url'),
  223 + publish_time=self._to_datetime(time_key),
  224 + engagement=self._extract_engagement(row),
  225 + source_keyword=row.get('source_keyword'),
  226 + source_table=table
  227 + ))
  228 + return DBResponse("search_topic_globally", params_for_log, results=all_results, results_count=len(all_results))
  229 +
  230 + def search_topic_by_date(self, topic: str, start_date: str, end_date: str, limit_per_table: int = 10) -> DBResponse:
146 """ 231 """
147 - print(f"--- TOOL: 搜索24小时内新闻 (query: {query}) ---")  
148 - return self._search_internal(query=query, time_range='d', max_results=10) 232 + 【工具】按日期搜索话题: 在明确的历史时间段内,搜索与特定话题相关的内容。
149 233
150 - def search_news_last_week(self, query: str) -> TavilyResponse: 234 + Args:
  235 + topic (str): 要搜索的话题关键词。
  236 + start_date (str): 开始日期,格式 'YYYY-MM-DD'。
  237 + end_date (str): 结束日期,格式 'YYYY-MM-DD'。
  238 + limit_per_table (int): 从每个相关表中返回的最大记录数,默认为 10。
  239 +
  240 + Returns:
  241 + DBResponse: 包含在指定日期范围内找到的结果的聚合列表。
151 """ 242 """
152 - 【工具】搜索本周新闻: 获取关于某个主题过去一周内的主要新闻报道。  
153 - 适用于进行周度舆情总结或回顾。  
154 - Agent只需提供搜索查询(query)。 243 + params_for_log = {'topic': topic, 'start_date': start_date, 'end_date': end_date, 'limit_per_table': limit_per_table}
  244 + print(f"--- TOOL: 按日期搜索话题 (params: {params_for_log}) ---")
  245 +
  246 + try:
  247 + start_dt, end_dt = datetime.strptime(start_date, '%Y-%m-%d'), datetime.strptime(end_date, '%Y-%m-%d') + timedelta(days=1)
  248 + except ValueError:
  249 + return DBResponse("search_topic_by_date", params_for_log, error_message="日期格式错误,请使用 'YYYY-MM-DD' 格式。")
  250 +
  251 + search_term, all_results = f"%{topic}%", []
  252 + search_configs = {
  253 + 'bilibili_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'sec'}, 'douyin_aweme': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'},
  254 + 'kuaishou_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, 'weibo_note': {'fields': ['content', 'source_keyword'], 'type': 'note', 'time_col': 'create_date_time', 'time_type': 'str'},
  255 + 'xhs_note': {'fields': ['title', 'desc', 'tag_list', 'source_keyword'], 'type': 'note', 'time_col': 'time', 'time_type': 'ms'}, 'zhihu_content': {'fields': ['title', 'desc', 'content_text', 'source_keyword'], 'type': 'content', 'time_col': 'created_time', 'time_type': 'sec_str'},
  256 + 'tieba_note': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'note', 'time_col': 'publish_time', 'time_type': 'str'}, 'daily_news': {'fields': ['title'], 'type': 'news', 'time_col': 'crawl_date', 'time_type': 'date_str'},
  257 + }
  258 +
  259 + for table, config in search_configs.items():
  260 + topic_clause = " OR ".join([f"`{field}` LIKE %s" for field in config['fields']])
  261 + time_col, time_type = config['time_col'], config['time_type']
  262 + if time_type == 'sec': time_params = (int(start_dt.timestamp()), int(end_dt.timestamp()))
  263 + elif time_type == 'ms': time_params = (int(start_dt.timestamp() * 1000), int(end_dt.timestamp() * 1000))
  264 + elif time_type in ['str', 'date_str']: time_params = (start_dt.strftime('%Y-%m-%d'), end_dt.strftime('%Y-%m-%d'))
  265 + else: time_params = (str(int(start_dt.timestamp())), str(int(end_dt.timestamp())))
  266 + time_clause = f"`{time_col}` >= %s AND `{time_col}` < %s"
  267 + if table == 'zhihu_content': time_clause = f"CAST(`{time_col}` AS UNSIGNED) >= %s AND CAST(`{time_col}` AS UNSIGNED) < %s"
  268 + query = f"SELECT * FROM `{table}` WHERE ({topic_clause}) AND ({time_clause}) ORDER BY id DESC LIMIT %s"
  269 + params = (search_term,) * len(config['fields']) + time_params + (limit_per_table,)
  270 + raw_results = self._execute_query(query, params)
  271 + for row in raw_results:
  272 + content = (row.get('title') or row.get('content') or row.get('desc') or row.get('content_text', ''))
  273 + all_results.append(QueryResult(
  274 + platform=table.split('_')[0], content_type=config['type'],
  275 + title_or_content=content[:500] if content else '',
  276 + author_nickname=row.get('nickname') or row.get('user_nickname'),
  277 + url=row.get('video_url') or row.get('note_url') or row.get('content_url') or row.get('url') or row.get('aweme_url'),
  278 + publish_time=self._to_datetime(row.get(config['time_col'])),
  279 + engagement=self._extract_engagement(row),
  280 + source_keyword=row.get('source_keyword'),
  281 + source_table=table
  282 + ))
  283 + return DBResponse("search_topic_by_date", params_for_log, results=all_results, results_count=len(all_results))
  284 +
  285 + def get_comments_for_topic(self, topic: str, limit: int = 50) -> DBResponse:
155 """ 286 """
156 - print(f"--- TOOL: 搜索本周新闻 (query: {query}) ---")  
157 - return self._search_internal(query=query, time_range='w', max_results=10) 287 + 【工具】获取话题评论: 专门搜索并返回所有平台中与特定话题相关的公众评论数据。
158 288
159 - def search_images_for_news(self, query: str) -> TavilyResponse: 289 + Args:
  290 + topic (str): 要搜索的话题关键词。
  291 + limit (int): 返回评论的总数量上限,默认为 50。
  292 +
  293 + Returns:
  294 + DBResponse: 包含匹配的评论列表。
160 """ 295 """
161 - 【工具】查找新闻图片: 搜索与某个新闻主题相关的图片。  
162 - 此工具会返回图片链接及描述,适用于需要为报告或文章配图的场景。  
163 - Agent只需提供搜索查询(query)。 296 + params_for_log = {'topic': topic, 'limit': limit}
  297 + print(f"--- TOOL: 获取话题评论 (params: {params_for_log}) ---")
  298 +
  299 + search_term = f"%{topic}%"
  300 + comment_tables = ['bilibili_video_comment', 'douyin_aweme_comment', 'kuaishou_video_comment', 'weibo_note_comment', 'xhs_note_comment', 'zhihu_comment', 'tieba_comment']
  301 +
  302 + all_queries = []
  303 + for table in comment_tables:
  304 + cols = self._get_table_columns(table)
  305 + author_col = 'user_nickname' if 'user_nickname' in cols else 'nickname'
  306 + like_col = 'comment_like_count' if 'comment_like_count' in cols else 'like_count' if 'like_count' in cols else None
  307 + time_col = 'publish_time' if 'publish_time' in cols else 'create_date_time' if 'create_date_time' in cols else 'create_time'
  308 + like_select = f"`{like_col}` as likes" if like_col else "'0' as likes"
  309 +
  310 + query = (f"SELECT '{table.split('_')[0]}' as platform, `content`, `{author_col}` as author, "
  311 + f"`{time_col}` as ts, {like_select}, '{table}' as source_table "
  312 + f"FROM `{table}` WHERE `content` LIKE %s")
  313 + all_queries.append(query)
  314 +
  315 + final_query = f"({' ) UNION ALL ( '.join(all_queries)}) ORDER BY ts DESC LIMIT %s"
  316 + params = (search_term,) * len(comment_tables) + (limit,)
  317 + raw_results = self._execute_query(final_query, params)
  318 +
  319 + formatted = [QueryResult(platform=r['platform'], content_type='comment', title_or_content=r['content'], author_nickname=r['author'], publish_time=self._to_datetime(r['ts']), engagement={'likes': int(r['likes']) if str(r['likes']).isdigit() else 0}, source_table=r['source_table']) for r in raw_results]
  320 + return DBResponse("get_comments_for_topic", params_for_log, results=formatted, results_count=len(formatted))
  321 +
  322 + def search_topic_on_platform(
  323 + self,
  324 + platform: Literal['bilibili', 'weibo', 'douyin', 'kuaishou', 'xhs', 'zhihu', 'tieba'],
  325 + topic: str,
  326 + start_date: Optional[str] = None,
  327 + end_date: Optional[str] = None,
  328 + limit: int = 20
  329 + ) -> DBResponse:
164 """ 330 """
165 - print(f"--- TOOL: 查找新闻图片 (query: {query}) ---")  
166 - return self._search_internal(  
167 - query=query, include_images=True, include_image_descriptions=True, max_results=5  
168 - ) 331 + 【工具】平台定向搜索: (新增) 在指定的单个社交媒体平台上搜索特定话题。
169 332
170 - def search_news_by_date(self, query: str, start_date: str, end_date: str) -> TavilyResponse:  
171 - """  
172 - 【工具】按指定日期范围搜索新闻: 在一个明确的历史时间段内搜索新闻。  
173 - 这是唯一需要Agent提供详细时间参数的工具。适用于需要对特定历史事件进行分析的场景。  
174 - Agent需要提供查询(query)、开始日期(start_date)和结束日期(end_date),格式均为 'YYYY-MM-DD'。 333 + Args:
  334 + platform (Literal['bilibili', ...]): 要搜索的平台,必须是七个支持的平台之一。
  335 + topic (str): 要搜索的话题关键词。
  336 + start_date (Optional[str]): 开始日期,格式 'YYYY-MM-DD'。默认为None。
  337 + end_date (Optional[str]): 结束日期,格式 'YYYY-MM-DD'。默认为None。
  338 + limit (int): 返回结果的最大数量,默认为 20。
  339 +
  340 + Returns:
  341 + DBResponse: 包含在该平台找到的结果列表。
175 """ 342 """
176 - print(f"--- TOOL: 按指定日期范围搜索新闻 (query: {query}, from: {start_date}, to: {end_date}) ---")  
177 - return self._search_internal(  
178 - query=query, start_date=start_date, end_date=end_date, max_results=15  
179 - ) 343 + params_for_log = {'platform': platform, 'topic': topic, 'start_date': start_date, 'end_date': end_date, 'limit': limit}
  344 + print(f"--- TOOL: 平台定向搜索 (params: {params_for_log}) ---")
180 345
  346 + all_configs = { 'bilibili': [{'table': 'bilibili_video', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'sec'}, {'table': 'bilibili_video_comment', 'fields': ['content'], 'type': 'comment'}], 'douyin': [{'table': 'douyin_aweme', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, {'table': 'douyin_aweme_comment', 'fields': ['content'], 'type': 'comment'}], 'kuaishou': [{'table': 'kuaishou_video', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, {'table': 'kuaishou_video_comment', 'fields': ['content'], 'type': 'comment'}], 'weibo': [{'table': 'weibo_note', 'fields': ['content', 'source_keyword'], 'type': 'note', 'time_col': 'create_date_time', 'time_type': 'str'}, {'table': 'weibo_note_comment', 'fields': ['content'], 'type': 'comment'}], 'xhs': [{'table': 'xhs_note', 'fields': ['title', 'desc', 'tag_list', 'source_keyword'], 'type': 'note', 'time_col': 'time', 'time_type': 'ms'}, {'table': 'xhs_note_comment', 'fields': ['content'], 'type': 'comment'}], 'zhihu': [{'table': 'zhihu_content', 'fields': ['title', 'desc', 'content_text', 'source_keyword'], 'type': 'content', 'time_col': 'created_time', 'time_type': 'sec_str'}, {'table': 'zhihu_comment', 'fields': ['content'], 'type': 'comment'}], 'tieba': [{'table': 'tieba_note', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'note', 'time_col': 'publish_time', 'time_type': 'str'}, {'table': 'tieba_comment', 'fields': ['content'], 'type': 'comment'}] }
  347 +
  348 + if platform not in all_configs:
  349 + return DBResponse("search_topic_on_platform", params_for_log, error_message=f"不支持的平台: {platform}")
  350 +
  351 + search_term, all_results = f"%{topic}%", []
  352 + platform_configs = all_configs[platform]
  353 +
  354 + time_clause, time_params_tuple = "", ()
  355 + if start_date and end_date:
  356 + try:
  357 + start_dt, end_dt = datetime.strptime(start_date, '%Y-%m-%d'), datetime.strptime(end_date, '%Y-%m-%d') + timedelta(days=1)
  358 + except ValueError:
  359 + return DBResponse("search_topic_on_platform", params_for_log, error_message="日期格式错误,请使用 'YYYY-MM-DD' 格式。")
  360 + else:
  361 + start_dt, end_dt = None, None
  362 +
  363 + for config in platform_configs:
  364 + table = config['table']
  365 + topic_clause = " OR ".join([f"`{field}` LIKE %s" for field in config['fields']])
  366 + query = f"SELECT * FROM `{table}` WHERE {topic_clause}"
  367 + params = [search_term] * len(config['fields'])
  368 +
  369 + if start_dt and end_dt and 'time_col' in config:
  370 + time_col, time_type = config['time_col'], config['time_type']
  371 + if time_type == 'sec': t_params = (int(start_dt.timestamp()), int(end_dt.timestamp()))
  372 + elif time_type == 'ms': t_params = (int(start_dt.timestamp() * 1000), int(end_dt.timestamp() * 1000))
  373 + elif time_type in ['str', 'date_str']: t_params = (start_dt.strftime('%Y-%m-%d'), end_dt.strftime('%Y-%m-%d'))
  374 + else: t_params = (str(int(start_dt.timestamp())), str(int(end_dt.timestamp())))
  375 +
  376 + t_clause = f"`{time_col}` >= %s AND `{time_col}` < %s"
  377 + if table == 'zhihu_content': t_clause = f"CAST(`{time_col}` AS UNSIGNED) >= %s AND CAST(`{time_col}` AS UNSIGNED) < %s"
  378 +
  379 + query += f" AND ({t_clause})"
  380 + params.extend(t_params)
  381 +
  382 + query += f" ORDER BY id DESC LIMIT %s"
  383 + params.append(limit)
  384 +
  385 + raw_results = self._execute_query(query, tuple(params))
  386 + for row in raw_results:
  387 + content = (row.get('title') or row.get('content') or row.get('desc') or row.get('content_text', ''))
  388 + time_key = config.get('time_col') and row.get(config.get('time_col'))
  389 + all_results.append(QueryResult(platform=platform, content_type=config['type'], title_or_content=content[:500] if content else '', author_nickname=row.get('nickname') or row.get('user_nickname'), url=row.get('video_url') or row.get('note_url') or row.get('content_url') or row.get('url') or row.get('aweme_url'), publish_time=self._to_datetime(time_key), engagement=self._extract_engagement(row), source_keyword=row.get('source_keyword'), source_table=table))
  390 +
  391 + return DBResponse("search_topic_on_platform", params_for_log, results=all_results, results_count=len(all_results))
181 392
182 # --- 3. 测试与使用示例 --- 393 # --- 3. 测试与使用示例 ---
183 -  
184 -def print_response_summary(response: TavilyResponse):  
185 - """简化的打印函数,用于展示测试结果,现在会显示发布日期"""  
186 - if not response or not response.query:  
187 - print("未能获取有效响应。") 394 +def print_response_summary(response: DBResponse):
  395 + """简化的打印函数,用于展示测试结果"""
  396 + if response.error_message:
  397 + print(f"工具 '{response.tool_name}' 执行出错: {response.error_message}")
  398 + print("-" * 80)
188 return 399 return
189 -  
190 - print(f"\n查询: '{response.query}' | 耗时: {response.response_time}s")  
191 - if response.answer:  
192 - print(f"AI摘要: {response.answer[:120]}...")  
193 - print(f"找到 {len(response.results)} 条网页, {len(response.images)} 张图片。")  
194 - if response.results:  
195 - first_result = response.results[0]  
196 - date_info = f"(发布于: {first_result.published_date})" if first_result.published_date else ""  
197 - print(f"第一条结果: {first_result.title} {date_info}")  
198 - print("-" * 60)  
199 400
  401 + params_str = ", ".join(f"{k}='{v}'" for k, v in response.parameters.items())
  402 + print(f"查询: 工具='{response.tool_name}', 参数=[{params_str}]")
  403 + print(f"找到 {response.results_count} 条相关记录。")
  404 +
  405 + if response.results:
  406 + print("--- 前5条结果示例 ---")
  407 + for i, res in enumerate(response.results[:5]):
  408 + engagement_str = ", ".join(f"{k}: {v}" for k, v in res.engagement.items() if v)
  409 + content_preview = (res.title_or_content.replace('\n', ' ')[:70] + '...') if res.title_or_content and len(res.title_or_content) > 70 else res.title_or_content
  410 + hotness_str = f", hotness: {res.hotness_score:.2f}" if res.hotness_score > 0 else ""
  411 + print(
  412 + f"{i+1}. [{res.platform.upper()}/{res.content_type}] {content_preview}\n"
  413 + f" by: {res.author_nickname}, at: {res.publish_time.strftime('%Y-%m-%d %H:%M') if res.publish_time else 'N/A'}"
  414 + f", src_kw: '{res.source_keyword or 'N/A'}'{hotness_str}"
  415 + f", engagement: {{{engagement_str}}}"
  416 + )
  417 + print("-" * 80)
200 418
201 if __name__ == "__main__": 419 if __name__ == "__main__":
202 - # 在运行前,请确保您已设置 TAVILY_API_KEY 环境变量  
203 420
204 try: 421 try:
205 - # 初始化“新闻社”客户端,它内部包含了所有工具  
206 - agency = TavilyNewsAgency()  
207 -  
208 - # 场景1: Agent 进行一次常规、快速的搜索  
209 - response1 = agency.basic_search_news(query="奥运会最新赛况", max_results=5) 422 + db_agent_tools = MediaCrawlerDB()
  423 + print("数据库工具初始化成功,开始执行测试场景...\n")
  424 +
  425 + # 场景1: (新) 查找过去一周综合热度最高的内容 (不再需要sort_by)
  426 + response1 = db_agent_tools.search_hot_content(time_period='week', limit=5)
210 print_response_summary(response1) 427 print_response_summary(response1)
211 428
212 - # 场景2: Agent 需要全面了解“全球芯片技术竞争”的背景  
213 - response2 = agency.deep_search_news(query="全球芯片技术竞争") 429 + # 场景2: 查找过去24小时内综合热度最高的内容
  430 + response2 = db_agent_tools.search_hot_content(time_period='24h', limit=5)
214 print_response_summary(response2) 431 print_response_summary(response2)
215 432
216 - # 场景3: Agent 需要追踪“GTC大会”的最新消息  
217 - response3 = agency.search_news_last_24_hours(query="Nvidia GTC大会 最新发布") 433 + # 场景3: 全局搜索"罗永浩"
  434 + response3 = db_agent_tools.search_topic_globally(topic="罗永浩", limit_per_table=2)
218 print_response_summary(response3) 435 print_response_summary(response3)
219 -  
220 - # 场景4: Agent 需要为一篇关于“自动驾驶”的周报查找素材  
221 - response4 = agency.search_news_last_week(query="自动驾驶商业化落地") 436 +
  437 + # 场景4: (新增) 在B站上精确搜索"论文"
  438 + response4 = db_agent_tools.search_topic_on_platform(platform='bilibili', topic="论文", limit=5)
222 print_response_summary(response4) 439 print_response_summary(response4)
223 -  
224 - # 场景5: Agent 需要查找“韦伯太空望远镜”的新闻图片  
225 - response5 = agency.search_images_for_news(query="韦伯太空望远镜最新发现")  
226 - print_response_summary(response5)  
227 440
228 - # 场景6: Agent 需要研究2025年第一季度关于“人工智能法规”的新闻  
229 - response6 = agency.search_news_by_date(  
230 - query="人工智能法规",  
231 - start_date="2025-01-01",  
232 - end_date="2025-03-31"  
233 - )  
234 - print_response_summary(response6) 441 + # 场景5: (新增) 在微博上精确搜索 "许凯" 在特定一天内的内容
  442 + response5 = db_agent_tools.search_topic_on_platform(platform='weibo', topic="许凯", start_date='2025-08-22', end_date='2025-08-22', limit=5)
  443 + print_response_summary(response5)
235 444
236 except ValueError as e: 445 except ValueError as e:
237 print(f"初始化失败: {e}") 446 print(f"初始化失败: {e}")
238 - print("请确保 TAVILY_API_KEY 环境变量已正确设置。") 447 + print("请确保相关的数据库环境变量已正确设置, 或在代码中直接提供连接信息。")
239 except Exception as e: 448 except Exception as e:
240 print(f"测试过程中发生未知错误: {e}") 449 print(f"测试过程中发生未知错误: {e}")