Showing
1 changed file
with
389 additions
and
180 deletions
| 1 | """ | 1 | """ |
| 2 | -专为 AI Agent 设计的舆情搜索工具集 (Tavily) | 2 | +专为 AI Agent 设计的本地舆情数据库查询工具集 (MediaCrawlerDB) |
| 3 | 3 | ||
| 4 | -版本: 1.5 | 4 | +版本: 3.0 |
| 5 | 最后更新: 2025-08-22 | 5 | 最后更新: 2025-08-22 |
| 6 | 6 | ||
| 7 | -此脚本将复杂的Tavily搜索功能分解为一系列目标明确、参数极少的独立工具, | ||
| 8 | -专为AI Agent调用而设计。Agent只需根据任务意图选择合适的工具, | ||
| 9 | -无需理解复杂的参数组合。所有工具默认搜索“新闻”(topic='news')。 | 7 | +此脚本将复杂的本地MySQL数据库查询功能封装成一系列目标明确、参数清晰的独立工具, |
| 8 | +专为AI Agent调用而设计。Agent只需根据任务意图(如搜索热点、全局搜索话题、 | ||
| 9 | +按时间范围分析、获取评论)选择合适的工具,无需编写复杂的SQL语句。 | ||
| 10 | 10 | ||
| 11 | -新特性: | ||
| 12 | -- 新增 `basic_search_news` 工具,用于执行标准、通用的新闻搜索。 | ||
| 13 | -- 每个搜索结果现在都包含 `published_date` (新闻发布日期)。 | 11 | +V3.0 核心更新: |
| 12 | +- 智能热度计算: `search_hot_content`不再需要`sort_by`参数,改为内部使用统一的加权热度算法, | ||
| 13 | + 综合点赞、评论、分享、观看等数据计算热度分值,使结果更智能、更符合综合热度。 | ||
| 14 | +- 新增平台精搜工具: 新增 `search_topic_on_platform` 工具,作为特例, | ||
| 15 | + 允许Agent在特定平台(B站、微博等七大平台)上对某一话题进行精确搜索,并支持时间筛选。 | ||
| 16 | +- 结构优化: 调整了数据结构与函数文档,以适应新功能。 | ||
| 14 | 17 | ||
| 15 | 主要工具: | 18 | 主要工具: |
| 16 | -- basic_search_news: (新增) 执行标准、快速的通用新闻搜索。 | ||
| 17 | -- deep_search_news: 对主题进行最全面的深度分析。 | ||
| 18 | -- search_news_last_24_hours: 获取24小时内的最新动态。 | ||
| 19 | -- search_news_last_week: 获取过去一周的主要报道。 | ||
| 20 | -- search_images_for_news: 查找与新闻主题相关的图片。 | ||
| 21 | -- search_news_by_date: 在指定的历史日期范围内搜索。 | 19 | +- search_hot_content: 查找指定时间范围内的综合热度最高的内容。 |
| 20 | +- search_topic_globally: 在整个数据库中全局搜索与特定话题相关的所有内容和评论。 | ||
| 21 | +- search_topic_by_date: 在指定的历史日期范围内搜索与特定话题相关的内容。 | ||
| 22 | +- get_comments_for_topic: 专门提取公众对于某一特定话题的评论数据。 | ||
| 23 | +- search_topic_on_platform: 在指定的单个社交媒体平台上搜索特定话题。 | ||
| 22 | """ | 24 | """ |
| 23 | 25 | ||
| 24 | import os | 26 | import os |
| 25 | -from typing import List, Dict, Any, Optional | 27 | +import json |
| 28 | +import pymysql | ||
| 29 | +import pymysql.cursors | ||
| 30 | +from typing import List, Dict, Any, Optional, Literal | ||
| 26 | from dataclasses import dataclass, field | 31 | from dataclasses import dataclass, field |
| 27 | - | ||
| 28 | -# 运行前请确保已安装Tavily库: pip install tavily-python | ||
| 29 | -try: | ||
| 30 | - from tavily import TavilyClient | ||
| 31 | -except ImportError: | ||
| 32 | - raise ImportError("Tavily库未安装,请运行 `pip install tavily-python` 进行安装。") | 32 | +from datetime import datetime, timedelta, date |
| 33 | 33 | ||
| 34 | # --- 1. 数据结构定义 --- | 34 | # --- 1. 数据结构定义 --- |
| 35 | 35 | ||
| 36 | @dataclass | 36 | @dataclass |
| 37 | -class SearchResult: | ||
| 38 | - """ | ||
| 39 | - 网页搜索结果数据类 | ||
| 40 | - 包含 published_date 属性来存储新闻发布日期 | ||
| 41 | - """ | ||
| 42 | - title: str | ||
| 43 | - url: str | ||
| 44 | - content: str | ||
| 45 | - score: Optional[float] = None | ||
| 46 | - raw_content: Optional[str] = None | ||
| 47 | - published_date: Optional[str] = None | ||
| 48 | - | ||
| 49 | -@dataclass | ||
| 50 | -class ImageResult: | ||
| 51 | - """图片搜索结果数据类""" | ||
| 52 | - url: str | ||
| 53 | - description: Optional[str] = None | 37 | +class QueryResult: |
| 38 | + """统一的数据库查询结果数据类""" | ||
| 39 | + platform: str | ||
| 40 | + content_type: str | ||
| 41 | + title_or_content: str | ||
| 42 | + author_nickname: Optional[str] = None | ||
| 43 | + url: Optional[str] = None | ||
| 44 | + publish_time: Optional[datetime] = None | ||
| 45 | + engagement: Dict[str, int] = field(default_factory=dict) | ||
| 46 | + source_keyword: Optional[str] = None | ||
| 47 | + hotness_score: float = 0.0 # 新增:综合热度分 | ||
| 48 | + source_table: str = "" | ||
| 54 | 49 | ||
| 55 | @dataclass | 50 | @dataclass |
| 56 | -class TavilyResponse: | ||
| 57 | - """封装Tavily API的完整返回结果,以便在工具间传递""" | ||
| 58 | - query: str | ||
| 59 | - answer: Optional[str] = None | ||
| 60 | - results: List[SearchResult] = field(default_factory=list) | ||
| 61 | - images: List[ImageResult] = field(default_factory=list) | ||
| 62 | - response_time: Optional[float] = None | ||
| 63 | - | 51 | +class DBResponse: |
| 52 | + """封装工具的完整返回结果""" | ||
| 53 | + tool_name: str | ||
| 54 | + parameters: Dict[str, Any] | ||
| 55 | + results: List[QueryResult] = field(default_factory=list) | ||
| 56 | + results_count: int = 0 | ||
| 57 | + error_message: Optional[str] = None | ||
| 64 | 58 | ||
| 65 | # --- 2. 核心客户端与专用工具集 --- | 59 | # --- 2. 核心客户端与专用工具集 --- |
| 66 | 60 | ||
| 67 | -class TavilyNewsAgency: | ||
| 68 | - """ | ||
| 69 | - 一个包含多种专用新闻舆情搜索工具的客户端。 | ||
| 70 | - 每个公共方法都设计为供 AI Agent 独立调用的工具。 | ||
| 71 | - """ | 61 | +class MediaCrawlerDB: |
| 62 | + """包含多种专用舆情数据库查询工具的客户端""" | ||
| 63 | + # 权重定义 | ||
| 64 | + W_LIKE = 1.0 | ||
| 65 | + W_COMMENT = 5.0 | ||
| 66 | + W_SHARE = 10.0 # 分享/转发/收藏/投币等高价值互动 | ||
| 67 | + W_VIEW = 0.1 | ||
| 68 | + W_DANMAKU = 0.5 | ||
| 72 | 69 | ||
| 73 | - def __init__(self, api_key: Optional[str] = None): | 70 | + def __init__(self): |
| 74 | """ | 71 | """ |
| 75 | - 初始化客户端。 | ||
| 76 | - Args: | ||
| 77 | - api_key: Tavily API密钥,若不提供则从环境变量 TAVILY_API_KEY 读取。 | 72 | + 初始化客户端。连接信息从环境变量自动读取: |
| 73 | + - DB_HOST, DB_USER, DB_PASSWORD, DB_NAME | ||
| 74 | + - DB_PORT (可选, 默认 3306) | ||
| 75 | + - DB_CHARSET (可选, 默认 utf8mb4) | ||
| 78 | """ | 76 | """ |
| 79 | - if api_key is None: | ||
| 80 | - api_key = os.getenv("TAVILY_API_KEY") | ||
| 81 | - if not api_key: | ||
| 82 | - raise ValueError("Tavily API Key未找到!请设置TAVILY_API_KEY环境变量或在初始化时提供") | ||
| 83 | - self._client = TavilyClient(api_key=api_key) | ||
| 84 | - | ||
| 85 | - def _search_internal(self, **kwargs) -> TavilyResponse: | ||
| 86 | - """内部通用的搜索执行器,所有工具最终都调用此方法""" | 77 | + self.db_config = { |
| 78 | + 'host': os.getenv("DB_HOST"), | ||
| 79 | + 'user': os.getenv("DB_USER"), | ||
| 80 | + 'password': os.getenv("DB_PASSWORD"), | ||
| 81 | + 'db': os.getenv("DB_NAME"), | ||
| 82 | + 'port': int(os.getenv("DB_PORT", 3306)), | ||
| 83 | + 'charset': os.getenv("DB_CHARSET", "utf8mb4"), | ||
| 84 | + 'cursorclass': pymysql.cursors.DictCursor | ||
| 85 | + } | ||
| 86 | + required = ['host', 'user', 'password', 'db'] | ||
| 87 | + if missing := [k for k in required if not self.db_config[k]]: | ||
| 88 | + raise ValueError(f"数据库配置缺失! 请设置环境变量或在代码中提供: {', '.join([f'DB_{k.upper()}' for k in missing])}") | ||
| 89 | + | ||
| 90 | + def _execute_query(self, query: str, params: tuple = None) -> List[Dict[str, Any]]: | ||
| 91 | + conn = None | ||
| 87 | try: | 92 | try: |
| 88 | - kwargs['topic'] = 'general' | ||
| 89 | - api_params = {k: v for k, v in kwargs.items() if v is not None} | ||
| 90 | - response_dict = self._client.search(**api_params) | ||
| 91 | - | ||
| 92 | - search_results = [ | ||
| 93 | - SearchResult( | ||
| 94 | - title=item.get('title'), | ||
| 95 | - url=item.get('url'), | ||
| 96 | - content=item.get('content'), | ||
| 97 | - score=item.get('score'), | ||
| 98 | - raw_content=item.get('raw_content'), | ||
| 99 | - published_date=item.get('published_date') | ||
| 100 | - ) for item in response_dict.get('results', []) | ||
| 101 | - ] | ||
| 102 | - | ||
| 103 | - image_results = [ImageResult(url=item.get('url'), description=item.get('description')) for item in response_dict.get('images', [])] | ||
| 104 | - | ||
| 105 | - return TavilyResponse( | ||
| 106 | - query=response_dict.get('query'), answer=response_dict.get('answer'), | ||
| 107 | - results=search_results, images=image_results, | ||
| 108 | - response_time=response_dict.get('response_time') | ||
| 109 | - ) | ||
| 110 | - except Exception as e: | ||
| 111 | - print(f"搜索时发生错误: {str(e)}") | ||
| 112 | - return TavilyResponse(query=kwargs.get("query", "Unknown Query")) | 93 | + conn = pymysql.connect(**self.db_config) |
| 94 | + with conn.cursor() as cursor: | ||
| 95 | + cursor.execute(query, params or ()) | ||
| 96 | + return cursor.fetchall() | ||
| 97 | + except pymysql.Error as e: | ||
| 98 | + print(f"数据库查询时发生错误: {e}") | ||
| 99 | + return [] | ||
| 100 | + finally: | ||
| 101 | + if conn: conn.close() | ||
| 102 | + | ||
| 103 | + @staticmethod | ||
| 104 | + def _to_datetime(ts: Any) -> Optional[datetime]: | ||
| 105 | + if not ts: return None | ||
| 106 | + try: | ||
| 107 | + if isinstance(ts, datetime): return ts | ||
| 108 | + if isinstance(ts, date): return datetime.combine(ts, datetime.min.time()) | ||
| 109 | + if isinstance(ts, (int, float)) or str(ts).isdigit(): | ||
| 110 | + val = float(ts) | ||
| 111 | + return datetime.fromtimestamp(val / 1000 if val > 1_000_000_000_000 else val) | ||
| 112 | + if isinstance(ts, str): | ||
| 113 | + return datetime.fromisoformat(ts.split('+')[0].strip()) | ||
| 114 | + except (ValueError, TypeError): return None | ||
| 115 | + | ||
| 116 | + _table_columns_cache = {} | ||
| 117 | + def _get_table_columns(self, table_name: str) -> List[str]: | ||
| 118 | + if table_name in self._table_columns_cache: return self._table_columns_cache[table_name] | ||
| 119 | + results = self._execute_query(f"SHOW COLUMNS FROM `{table_name}`") | ||
| 120 | + columns = [row['Field'] for row in results] if results else [] | ||
| 121 | + self._table_columns_cache[table_name] = columns | ||
| 122 | + return columns | ||
| 123 | + | ||
| 124 | + def _extract_engagement(self, row: Dict[str, Any]) -> Dict[str, int]: | ||
| 125 | + """从数据行中提取并统一互动指标""" | ||
| 126 | + engagement = {} | ||
| 127 | + mapping = { 'likes': ['liked_count', 'like_count', 'voteup_count', 'comment_like_count'], 'comments': ['video_comment', 'comments_count', 'comment_count', 'total_replay_num', 'sub_comment_count'], 'shares': ['video_share_count', 'shared_count', 'share_count', 'total_forwards'], 'views': ['video_play_count', 'viewd_count'], 'favorites': ['video_favorite_count', 'collected_count'], 'coins': ['video_coin_count'], 'danmaku': ['video_danmaku'], } | ||
| 128 | + for key, potential_cols in mapping.items(): | ||
| 129 | + for col in potential_cols: | ||
| 130 | + if col in row and row[col] is not None: | ||
| 131 | + try: engagement[key] = int(row[col]) | ||
| 132 | + except (ValueError, TypeError): engagement[key] = 0 | ||
| 133 | + break | ||
| 134 | + return engagement | ||
| 135 | + | ||
| 136 | + def search_hot_content( | ||
| 137 | + self, | ||
| 138 | + time_period: Literal['24h', 'week', 'year'] = 'week', | ||
| 139 | + limit: int = 10 | ||
| 140 | + ) -> DBResponse: | ||
| 141 | + """ | ||
| 142 | + 【工具】查找热点内容: (已简化) 获取最近一段时间内综合热度最高的内容。 | ||
| 113 | 143 | ||
| 114 | - # --- Agent 可用的工具方法 --- | 144 | + Args: |
| 145 | + time_period (Literal['24h', 'week', 'year']): 时间范围,默认为 'week'。 | ||
| 146 | + limit (int): 返回结果的最大数量,默认为 10。 | ||
| 115 | 147 | ||
| 116 | - def basic_search_news(self, query: str, max_results: int = 7) -> TavilyResponse: | ||
| 117 | - """ | ||
| 118 | - 【工具】基础新闻搜索: 执行一次标准、快速的新闻搜索。 | ||
| 119 | - 这是最常用的通用搜索工具,适用于不确定需要何种特定搜索时。 | ||
| 120 | - Agent可提供搜索查询(query)和可选的最大结果数(max_results)。 | 148 | + Returns: |
| 149 | + DBResponse: 包含按综合热度排序后的内容列表。 | ||
| 121 | """ | 150 | """ |
| 122 | - print(f"--- TOOL: 基础新闻搜索 (query: {query}) ---") | ||
| 123 | - return self._search_internal( | ||
| 124 | - query=query, | ||
| 125 | - max_results=max_results, | ||
| 126 | - search_depth="basic", | ||
| 127 | - include_answer=False | ||
| 128 | - ) | ||
| 129 | - | ||
| 130 | - def deep_search_news(self, query: str) -> TavilyResponse: | ||
| 131 | - """ | ||
| 132 | - 【工具】深度新闻分析: 对一个主题进行最全面、最深入的搜索。 | ||
| 133 | - 返回AI生成的“高级”详细摘要答案和最多20条最相关的新闻结果。适用于需要全面了解某个事件背景的场景。 | ||
| 134 | - Agent只需提供搜索查询(query)。 | 151 | + params_for_log = {'time_period': time_period, 'limit': limit} |
| 152 | + print(f"--- TOOL: 查找热点内容 (params: {params_for_log}) ---") | ||
| 153 | + | ||
| 154 | + now = datetime.now() | ||
| 155 | + start_time = now - timedelta(days={'24h': 1, 'week': 7}.get(time_period, 365)) | ||
| 156 | + | ||
| 157 | + # 定义各平台的热度计算SQL片段 | ||
| 158 | + hotness_formulas = { | ||
| 159 | + 'bilibili_video': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(video_comment AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(video_share_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(video_favorite_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(video_coin_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(video_danmaku AS UNSIGNED), 0) * {self.W_DANMAKU} + COALESCE(CAST(video_play_count AS DECIMAL(20,2)), 0) * {self.W_VIEW})", | ||
| 160 | + 'douyin_aweme': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comment_count AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(share_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(collected_count AS UNSIGNED), 0) * {self.W_SHARE})", | ||
| 161 | + 'weibo_note': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comments_count AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(shared_count AS UNSIGNED), 0) * {self.W_SHARE})", | ||
| 162 | + 'xhs_note': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comment_count AS UNSIGNED), 0) * {self.W_COMMENT} + COALESCE(CAST(share_count AS UNSIGNED), 0) * {self.W_SHARE} + COALESCE(CAST(collected_count AS UNSIGNED), 0) * {self.W_SHARE})", | ||
| 163 | + 'kuaishou_video': f"(COALESCE(CAST(liked_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(viewd_count AS DECIMAL(20,2)), 0) * {self.W_VIEW})", | ||
| 164 | + 'zhihu_content': f"(COALESCE(CAST(voteup_count AS UNSIGNED), 0) * {self.W_LIKE} + COALESCE(CAST(comment_count AS UNSIGNED), 0) * {self.W_COMMENT})", | ||
| 165 | + } | ||
| 166 | + | ||
| 167 | + all_queries, params = [], [] | ||
| 168 | + for table, formula in hotness_formulas.items(): | ||
| 169 | + time_filter_sql, time_filter_param = "", None | ||
| 170 | + if table == 'weibo_note': time_filter_sql, time_filter_param = "`create_date_time` >= %s", start_time.strftime('%Y-%m-%d %H:%M:%S') | ||
| 171 | + elif table in ['kuaishou_video', 'xhs_note', 'douyin_aweme']: time_col = 'time' if table == 'xhs_note' else 'create_time'; time_filter_sql, time_filter_param = f"`{time_col}` >= %s", str(int(start_time.timestamp() * 1000)) | ||
| 172 | + elif table == 'zhihu_content': time_filter_sql, time_filter_param = "CAST(`created_time` AS UNSIGNED) >= %s", str(int(start_time.timestamp())) | ||
| 173 | + else: time_filter_sql, time_filter_param = "`create_time` >= %s", str(int(start_time.timestamp())) | ||
| 174 | + | ||
| 175 | + content_type = 'note' if table in ['weibo_note', 'xhs_note'] else 'content' if table == 'zhihu_content' else 'video' | ||
| 176 | + query_template = "SELECT '{platform}' as p, '{type}' as t, {title} as title, {author} as author, {url} as url, {ts} as ts, {formula} as hotness_score, source_keyword, '{tbl}' as tbl FROM `{tbl}` WHERE {time_filter}" | ||
| 177 | + | ||
| 178 | + field_subs = {'platform': table.split('_')[0], 'type': content_type, 'title': 'title', 'author': 'nickname', 'url': 'video_url', 'ts': 'create_time', 'formula': formula, 'tbl': table, 'time_filter': time_filter_sql} | ||
| 179 | + if table == 'weibo_note': field_subs.update({'title': 'content', 'url': 'note_url', 'ts': 'create_date_time'}) | ||
| 180 | + elif table == 'xhs_note': field_subs.update({'ts': 'time', 'url': 'note_url'}) | ||
| 181 | + elif table == 'zhihu_content': field_subs.update({'author': 'user_nickname', 'url': 'content_url', 'ts': 'created_time'}) | ||
| 182 | + elif table == 'douyin_aweme': field_subs.update({'url': 'aweme_url'}) | ||
| 183 | + | ||
| 184 | + all_queries.append(query_template.format(**field_subs)) | ||
| 185 | + params.append(time_filter_param) | ||
| 186 | + | ||
| 187 | + final_query = f"({' ) UNION ALL ( '.join(all_queries)}) ORDER BY hotness_score DESC LIMIT %s" | ||
| 188 | + raw_results = self._execute_query(final_query, tuple(params) + (limit,)) | ||
| 189 | + | ||
| 190 | + formatted_results = [QueryResult(platform=r['p'], content_type=r['t'], title_or_content=r['title'], author_nickname=r.get('author'), url=r['url'], publish_time=self._to_datetime(r['ts']), engagement=self._extract_engagement(r), hotness_score=r.get('hotness_score', 0.0), source_keyword=r.get('source_keyword'), source_table=r['tbl']) for r in raw_results] | ||
| 191 | + return DBResponse("search_hot_content", params_for_log, results=formatted_results, results_count=len(formatted_results)) | ||
| 192 | + | ||
| 193 | + def search_topic_globally(self, topic: str, limit_per_table: int = 5) -> DBResponse: | ||
| 135 | """ | 194 | """ |
| 136 | - print(f"--- TOOL: 深度新闻分析 (query: {query}) ---") | ||
| 137 | - return self._search_internal( | ||
| 138 | - query=query, search_depth="advanced", max_results=20, include_answer="advanced" | ||
| 139 | - ) | 195 | + 【工具】全局话题搜索: 在数据库中(内容、评论、标签、来源关键字)全面搜索指定话题。 |
| 140 | 196 | ||
| 141 | - def search_news_last_24_hours(self, query: str) -> TavilyResponse: | 197 | + Args: |
| 198 | + topic (str): 要搜索的话题关键词。 | ||
| 199 | + limit_per_table (int): 从每个相关表中返回的最大记录数,默认为 5。 | ||
| 200 | + | ||
| 201 | + Returns: | ||
| 202 | + DBResponse: 包含所有匹配结果的聚合列表。 | ||
| 142 | """ | 203 | """ |
| 143 | - 【工具】搜索24小时内新闻: 获取关于某个主题的最新动态。 | ||
| 144 | - 此工具专门查找过去24小时内发布的新闻。适用于追踪突发事件或最新进展。 | ||
| 145 | - Agent只需提供搜索查询(query)。 | 204 | + params_for_log = {'topic': topic, 'limit_per_table': limit_per_table} |
| 205 | + print(f"--- TOOL: 全局话题搜索 (params: {params_for_log}) ---") | ||
| 206 | + | ||
| 207 | + search_term, all_results = f"%{topic}%", [] | ||
| 208 | + search_configs = { 'bilibili_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video'}, 'bilibili_video_comment': {'fields': ['content'], 'type': 'comment'}, 'douyin_aweme': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video'}, 'douyin_aweme_comment': {'fields': ['content'], 'type': 'comment'}, 'kuaishou_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video'}, 'kuaishou_video_comment': {'fields': ['content'], 'type': 'comment'}, 'weibo_note': {'fields': ['content', 'source_keyword'], 'type': 'note'}, 'weibo_note_comment': {'fields': ['content'], 'type': 'comment'}, 'xhs_note': {'fields': ['title', 'desc', 'tag_list', 'source_keyword'], 'type': 'note'}, 'xhs_note_comment': {'fields': ['content'], 'type': 'comment'}, 'zhihu_content': {'fields': ['title', 'desc', 'content_text', 'source_keyword'], 'type': 'content'}, 'zhihu_comment': {'fields': ['content'], 'type': 'comment'}, 'tieba_note': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'note'}, 'tieba_comment': {'fields': ['content'], 'type': 'comment'}, 'daily_news': {'fields': ['title'], 'type': 'news'}, } | ||
| 209 | + | ||
| 210 | + for table, config in search_configs.items(): | ||
| 211 | + where_clause = " OR ".join([f"`{field}` LIKE %s" for field in config['fields']]) | ||
| 212 | + query = f"SELECT * FROM `{table}` WHERE {where_clause} ORDER BY id DESC LIMIT %s" | ||
| 213 | + params = (search_term,) * len(config['fields']) + (limit_per_table,) | ||
| 214 | + raw_results = self._execute_query(query, params) | ||
| 215 | + for row in raw_results: | ||
| 216 | + content = (row.get('title') or row.get('content') or row.get('desc') or row.get('content_text', '')) | ||
| 217 | + time_key = row.get('create_time') or row.get('time') or row.get('created_time') or row.get('publish_time') or row.get('crawl_date') | ||
| 218 | + all_results.append(QueryResult( | ||
| 219 | + platform=table.split('_')[0], content_type=config['type'], | ||
| 220 | + title_or_content=content[:500] if content else '', | ||
| 221 | + author_nickname=row.get('nickname') or row.get('user_nickname') or row.get('user_name'), | ||
| 222 | + url=row.get('video_url') or row.get('note_url') or row.get('content_url') or row.get('url') or row.get('aweme_url'), | ||
| 223 | + publish_time=self._to_datetime(time_key), | ||
| 224 | + engagement=self._extract_engagement(row), | ||
| 225 | + source_keyword=row.get('source_keyword'), | ||
| 226 | + source_table=table | ||
| 227 | + )) | ||
| 228 | + return DBResponse("search_topic_globally", params_for_log, results=all_results, results_count=len(all_results)) | ||
| 229 | + | ||
| 230 | + def search_topic_by_date(self, topic: str, start_date: str, end_date: str, limit_per_table: int = 10) -> DBResponse: | ||
| 146 | """ | 231 | """ |
| 147 | - print(f"--- TOOL: 搜索24小时内新闻 (query: {query}) ---") | ||
| 148 | - return self._search_internal(query=query, time_range='d', max_results=10) | 232 | + 【工具】按日期搜索话题: 在明确的历史时间段内,搜索与特定话题相关的内容。 |
| 149 | 233 | ||
| 150 | - def search_news_last_week(self, query: str) -> TavilyResponse: | 234 | + Args: |
| 235 | + topic (str): 要搜索的话题关键词。 | ||
| 236 | + start_date (str): 开始日期,格式 'YYYY-MM-DD'。 | ||
| 237 | + end_date (str): 结束日期,格式 'YYYY-MM-DD'。 | ||
| 238 | + limit_per_table (int): 从每个相关表中返回的最大记录数,默认为 10。 | ||
| 239 | + | ||
| 240 | + Returns: | ||
| 241 | + DBResponse: 包含在指定日期范围内找到的结果的聚合列表。 | ||
| 151 | """ | 242 | """ |
| 152 | - 【工具】搜索本周新闻: 获取关于某个主题过去一周内的主要新闻报道。 | ||
| 153 | - 适用于进行周度舆情总结或回顾。 | ||
| 154 | - Agent只需提供搜索查询(query)。 | 243 | + params_for_log = {'topic': topic, 'start_date': start_date, 'end_date': end_date, 'limit_per_table': limit_per_table} |
| 244 | + print(f"--- TOOL: 按日期搜索话题 (params: {params_for_log}) ---") | ||
| 245 | + | ||
| 246 | + try: | ||
| 247 | + start_dt, end_dt = datetime.strptime(start_date, '%Y-%m-%d'), datetime.strptime(end_date, '%Y-%m-%d') + timedelta(days=1) | ||
| 248 | + except ValueError: | ||
| 249 | + return DBResponse("search_topic_by_date", params_for_log, error_message="日期格式错误,请使用 'YYYY-MM-DD' 格式。") | ||
| 250 | + | ||
| 251 | + search_term, all_results = f"%{topic}%", [] | ||
| 252 | + search_configs = { | ||
| 253 | + 'bilibili_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'sec'}, 'douyin_aweme': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, | ||
| 254 | + 'kuaishou_video': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, 'weibo_note': {'fields': ['content', 'source_keyword'], 'type': 'note', 'time_col': 'create_date_time', 'time_type': 'str'}, | ||
| 255 | + 'xhs_note': {'fields': ['title', 'desc', 'tag_list', 'source_keyword'], 'type': 'note', 'time_col': 'time', 'time_type': 'ms'}, 'zhihu_content': {'fields': ['title', 'desc', 'content_text', 'source_keyword'], 'type': 'content', 'time_col': 'created_time', 'time_type': 'sec_str'}, | ||
| 256 | + 'tieba_note': {'fields': ['title', 'desc', 'source_keyword'], 'type': 'note', 'time_col': 'publish_time', 'time_type': 'str'}, 'daily_news': {'fields': ['title'], 'type': 'news', 'time_col': 'crawl_date', 'time_type': 'date_str'}, | ||
| 257 | + } | ||
| 258 | + | ||
| 259 | + for table, config in search_configs.items(): | ||
| 260 | + topic_clause = " OR ".join([f"`{field}` LIKE %s" for field in config['fields']]) | ||
| 261 | + time_col, time_type = config['time_col'], config['time_type'] | ||
| 262 | + if time_type == 'sec': time_params = (int(start_dt.timestamp()), int(end_dt.timestamp())) | ||
| 263 | + elif time_type == 'ms': time_params = (int(start_dt.timestamp() * 1000), int(end_dt.timestamp() * 1000)) | ||
| 264 | + elif time_type in ['str', 'date_str']: time_params = (start_dt.strftime('%Y-%m-%d'), end_dt.strftime('%Y-%m-%d')) | ||
| 265 | + else: time_params = (str(int(start_dt.timestamp())), str(int(end_dt.timestamp()))) | ||
| 266 | + time_clause = f"`{time_col}` >= %s AND `{time_col}` < %s" | ||
| 267 | + if table == 'zhihu_content': time_clause = f"CAST(`{time_col}` AS UNSIGNED) >= %s AND CAST(`{time_col}` AS UNSIGNED) < %s" | ||
| 268 | + query = f"SELECT * FROM `{table}` WHERE ({topic_clause}) AND ({time_clause}) ORDER BY id DESC LIMIT %s" | ||
| 269 | + params = (search_term,) * len(config['fields']) + time_params + (limit_per_table,) | ||
| 270 | + raw_results = self._execute_query(query, params) | ||
| 271 | + for row in raw_results: | ||
| 272 | + content = (row.get('title') or row.get('content') or row.get('desc') or row.get('content_text', '')) | ||
| 273 | + all_results.append(QueryResult( | ||
| 274 | + platform=table.split('_')[0], content_type=config['type'], | ||
| 275 | + title_or_content=content[:500] if content else '', | ||
| 276 | + author_nickname=row.get('nickname') or row.get('user_nickname'), | ||
| 277 | + url=row.get('video_url') or row.get('note_url') or row.get('content_url') or row.get('url') or row.get('aweme_url'), | ||
| 278 | + publish_time=self._to_datetime(row.get(config['time_col'])), | ||
| 279 | + engagement=self._extract_engagement(row), | ||
| 280 | + source_keyword=row.get('source_keyword'), | ||
| 281 | + source_table=table | ||
| 282 | + )) | ||
| 283 | + return DBResponse("search_topic_by_date", params_for_log, results=all_results, results_count=len(all_results)) | ||
| 284 | + | ||
| 285 | + def get_comments_for_topic(self, topic: str, limit: int = 50) -> DBResponse: | ||
| 155 | """ | 286 | """ |
| 156 | - print(f"--- TOOL: 搜索本周新闻 (query: {query}) ---") | ||
| 157 | - return self._search_internal(query=query, time_range='w', max_results=10) | 287 | + 【工具】获取话题评论: 专门搜索并返回所有平台中与特定话题相关的公众评论数据。 |
| 158 | 288 | ||
| 159 | - def search_images_for_news(self, query: str) -> TavilyResponse: | 289 | + Args: |
| 290 | + topic (str): 要搜索的话题关键词。 | ||
| 291 | + limit (int): 返回评论的总数量上限,默认为 50。 | ||
| 292 | + | ||
| 293 | + Returns: | ||
| 294 | + DBResponse: 包含匹配的评论列表。 | ||
| 160 | """ | 295 | """ |
| 161 | - 【工具】查找新闻图片: 搜索与某个新闻主题相关的图片。 | ||
| 162 | - 此工具会返回图片链接及描述,适用于需要为报告或文章配图的场景。 | ||
| 163 | - Agent只需提供搜索查询(query)。 | 296 | + params_for_log = {'topic': topic, 'limit': limit} |
| 297 | + print(f"--- TOOL: 获取话题评论 (params: {params_for_log}) ---") | ||
| 298 | + | ||
| 299 | + search_term = f"%{topic}%" | ||
| 300 | + comment_tables = ['bilibili_video_comment', 'douyin_aweme_comment', 'kuaishou_video_comment', 'weibo_note_comment', 'xhs_note_comment', 'zhihu_comment', 'tieba_comment'] | ||
| 301 | + | ||
| 302 | + all_queries = [] | ||
| 303 | + for table in comment_tables: | ||
| 304 | + cols = self._get_table_columns(table) | ||
| 305 | + author_col = 'user_nickname' if 'user_nickname' in cols else 'nickname' | ||
| 306 | + like_col = 'comment_like_count' if 'comment_like_count' in cols else 'like_count' if 'like_count' in cols else None | ||
| 307 | + time_col = 'publish_time' if 'publish_time' in cols else 'create_date_time' if 'create_date_time' in cols else 'create_time' | ||
| 308 | + like_select = f"`{like_col}` as likes" if like_col else "'0' as likes" | ||
| 309 | + | ||
| 310 | + query = (f"SELECT '{table.split('_')[0]}' as platform, `content`, `{author_col}` as author, " | ||
| 311 | + f"`{time_col}` as ts, {like_select}, '{table}' as source_table " | ||
| 312 | + f"FROM `{table}` WHERE `content` LIKE %s") | ||
| 313 | + all_queries.append(query) | ||
| 314 | + | ||
| 315 | + final_query = f"({' ) UNION ALL ( '.join(all_queries)}) ORDER BY ts DESC LIMIT %s" | ||
| 316 | + params = (search_term,) * len(comment_tables) + (limit,) | ||
| 317 | + raw_results = self._execute_query(final_query, params) | ||
| 318 | + | ||
| 319 | + formatted = [QueryResult(platform=r['platform'], content_type='comment', title_or_content=r['content'], author_nickname=r['author'], publish_time=self._to_datetime(r['ts']), engagement={'likes': int(r['likes']) if str(r['likes']).isdigit() else 0}, source_table=r['source_table']) for r in raw_results] | ||
| 320 | + return DBResponse("get_comments_for_topic", params_for_log, results=formatted, results_count=len(formatted)) | ||
| 321 | + | ||
| 322 | + def search_topic_on_platform( | ||
| 323 | + self, | ||
| 324 | + platform: Literal['bilibili', 'weibo', 'douyin', 'kuaishou', 'xhs', 'zhihu', 'tieba'], | ||
| 325 | + topic: str, | ||
| 326 | + start_date: Optional[str] = None, | ||
| 327 | + end_date: Optional[str] = None, | ||
| 328 | + limit: int = 20 | ||
| 329 | + ) -> DBResponse: | ||
| 164 | """ | 330 | """ |
| 165 | - print(f"--- TOOL: 查找新闻图片 (query: {query}) ---") | ||
| 166 | - return self._search_internal( | ||
| 167 | - query=query, include_images=True, include_image_descriptions=True, max_results=5 | ||
| 168 | - ) | 331 | + 【工具】平台定向搜索: (新增) 在指定的单个社交媒体平台上搜索特定话题。 |
| 169 | 332 | ||
| 170 | - def search_news_by_date(self, query: str, start_date: str, end_date: str) -> TavilyResponse: | ||
| 171 | - """ | ||
| 172 | - 【工具】按指定日期范围搜索新闻: 在一个明确的历史时间段内搜索新闻。 | ||
| 173 | - 这是唯一需要Agent提供详细时间参数的工具。适用于需要对特定历史事件进行分析的场景。 | ||
| 174 | - Agent需要提供查询(query)、开始日期(start_date)和结束日期(end_date),格式均为 'YYYY-MM-DD'。 | 333 | + Args: |
| 334 | + platform (Literal['bilibili', ...]): 要搜索的平台,必须是七个支持的平台之一。 | ||
| 335 | + topic (str): 要搜索的话题关键词。 | ||
| 336 | + start_date (Optional[str]): 开始日期,格式 'YYYY-MM-DD'。默认为None。 | ||
| 337 | + end_date (Optional[str]): 结束日期,格式 'YYYY-MM-DD'。默认为None。 | ||
| 338 | + limit (int): 返回结果的最大数量,默认为 20。 | ||
| 339 | + | ||
| 340 | + Returns: | ||
| 341 | + DBResponse: 包含在该平台找到的结果列表。 | ||
| 175 | """ | 342 | """ |
| 176 | - print(f"--- TOOL: 按指定日期范围搜索新闻 (query: {query}, from: {start_date}, to: {end_date}) ---") | ||
| 177 | - return self._search_internal( | ||
| 178 | - query=query, start_date=start_date, end_date=end_date, max_results=15 | ||
| 179 | - ) | 343 | + params_for_log = {'platform': platform, 'topic': topic, 'start_date': start_date, 'end_date': end_date, 'limit': limit} |
| 344 | + print(f"--- TOOL: 平台定向搜索 (params: {params_for_log}) ---") | ||
| 180 | 345 | ||
| 346 | + all_configs = { 'bilibili': [{'table': 'bilibili_video', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'sec'}, {'table': 'bilibili_video_comment', 'fields': ['content'], 'type': 'comment'}], 'douyin': [{'table': 'douyin_aweme', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, {'table': 'douyin_aweme_comment', 'fields': ['content'], 'type': 'comment'}], 'kuaishou': [{'table': 'kuaishou_video', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'video', 'time_col': 'create_time', 'time_type': 'ms'}, {'table': 'kuaishou_video_comment', 'fields': ['content'], 'type': 'comment'}], 'weibo': [{'table': 'weibo_note', 'fields': ['content', 'source_keyword'], 'type': 'note', 'time_col': 'create_date_time', 'time_type': 'str'}, {'table': 'weibo_note_comment', 'fields': ['content'], 'type': 'comment'}], 'xhs': [{'table': 'xhs_note', 'fields': ['title', 'desc', 'tag_list', 'source_keyword'], 'type': 'note', 'time_col': 'time', 'time_type': 'ms'}, {'table': 'xhs_note_comment', 'fields': ['content'], 'type': 'comment'}], 'zhihu': [{'table': 'zhihu_content', 'fields': ['title', 'desc', 'content_text', 'source_keyword'], 'type': 'content', 'time_col': 'created_time', 'time_type': 'sec_str'}, {'table': 'zhihu_comment', 'fields': ['content'], 'type': 'comment'}], 'tieba': [{'table': 'tieba_note', 'fields': ['title', 'desc', 'source_keyword'], 'type': 'note', 'time_col': 'publish_time', 'time_type': 'str'}, {'table': 'tieba_comment', 'fields': ['content'], 'type': 'comment'}] } | ||
| 347 | + | ||
| 348 | + if platform not in all_configs: | ||
| 349 | + return DBResponse("search_topic_on_platform", params_for_log, error_message=f"不支持的平台: {platform}") | ||
| 350 | + | ||
| 351 | + search_term, all_results = f"%{topic}%", [] | ||
| 352 | + platform_configs = all_configs[platform] | ||
| 353 | + | ||
| 354 | + time_clause, time_params_tuple = "", () | ||
| 355 | + if start_date and end_date: | ||
| 356 | + try: | ||
| 357 | + start_dt, end_dt = datetime.strptime(start_date, '%Y-%m-%d'), datetime.strptime(end_date, '%Y-%m-%d') + timedelta(days=1) | ||
| 358 | + except ValueError: | ||
| 359 | + return DBResponse("search_topic_on_platform", params_for_log, error_message="日期格式错误,请使用 'YYYY-MM-DD' 格式。") | ||
| 360 | + else: | ||
| 361 | + start_dt, end_dt = None, None | ||
| 362 | + | ||
| 363 | + for config in platform_configs: | ||
| 364 | + table = config['table'] | ||
| 365 | + topic_clause = " OR ".join([f"`{field}` LIKE %s" for field in config['fields']]) | ||
| 366 | + query = f"SELECT * FROM `{table}` WHERE {topic_clause}" | ||
| 367 | + params = [search_term] * len(config['fields']) | ||
| 368 | + | ||
| 369 | + if start_dt and end_dt and 'time_col' in config: | ||
| 370 | + time_col, time_type = config['time_col'], config['time_type'] | ||
| 371 | + if time_type == 'sec': t_params = (int(start_dt.timestamp()), int(end_dt.timestamp())) | ||
| 372 | + elif time_type == 'ms': t_params = (int(start_dt.timestamp() * 1000), int(end_dt.timestamp() * 1000)) | ||
| 373 | + elif time_type in ['str', 'date_str']: t_params = (start_dt.strftime('%Y-%m-%d'), end_dt.strftime('%Y-%m-%d')) | ||
| 374 | + else: t_params = (str(int(start_dt.timestamp())), str(int(end_dt.timestamp()))) | ||
| 375 | + | ||
| 376 | + t_clause = f"`{time_col}` >= %s AND `{time_col}` < %s" | ||
| 377 | + if table == 'zhihu_content': t_clause = f"CAST(`{time_col}` AS UNSIGNED) >= %s AND CAST(`{time_col}` AS UNSIGNED) < %s" | ||
| 378 | + | ||
| 379 | + query += f" AND ({t_clause})" | ||
| 380 | + params.extend(t_params) | ||
| 381 | + | ||
| 382 | + query += f" ORDER BY id DESC LIMIT %s" | ||
| 383 | + params.append(limit) | ||
| 384 | + | ||
| 385 | + raw_results = self._execute_query(query, tuple(params)) | ||
| 386 | + for row in raw_results: | ||
| 387 | + content = (row.get('title') or row.get('content') or row.get('desc') or row.get('content_text', '')) | ||
| 388 | + time_key = config.get('time_col') and row.get(config.get('time_col')) | ||
| 389 | + all_results.append(QueryResult(platform=platform, content_type=config['type'], title_or_content=content[:500] if content else '', author_nickname=row.get('nickname') or row.get('user_nickname'), url=row.get('video_url') or row.get('note_url') or row.get('content_url') or row.get('url') or row.get('aweme_url'), publish_time=self._to_datetime(time_key), engagement=self._extract_engagement(row), source_keyword=row.get('source_keyword'), source_table=table)) | ||
| 390 | + | ||
| 391 | + return DBResponse("search_topic_on_platform", params_for_log, results=all_results, results_count=len(all_results)) | ||
| 181 | 392 | ||
| 182 | # --- 3. 测试与使用示例 --- | 393 | # --- 3. 测试与使用示例 --- |
| 183 | - | ||
| 184 | -def print_response_summary(response: TavilyResponse): | ||
| 185 | - """简化的打印函数,用于展示测试结果,现在会显示发布日期""" | ||
| 186 | - if not response or not response.query: | ||
| 187 | - print("未能获取有效响应。") | 394 | +def print_response_summary(response: DBResponse): |
| 395 | + """简化的打印函数,用于展示测试结果""" | ||
| 396 | + if response.error_message: | ||
| 397 | + print(f"工具 '{response.tool_name}' 执行出错: {response.error_message}") | ||
| 398 | + print("-" * 80) | ||
| 188 | return | 399 | return |
| 189 | - | ||
| 190 | - print(f"\n查询: '{response.query}' | 耗时: {response.response_time}s") | ||
| 191 | - if response.answer: | ||
| 192 | - print(f"AI摘要: {response.answer[:120]}...") | ||
| 193 | - print(f"找到 {len(response.results)} 条网页, {len(response.images)} 张图片。") | ||
| 194 | - if response.results: | ||
| 195 | - first_result = response.results[0] | ||
| 196 | - date_info = f"(发布于: {first_result.published_date})" if first_result.published_date else "" | ||
| 197 | - print(f"第一条结果: {first_result.title} {date_info}") | ||
| 198 | - print("-" * 60) | ||
| 199 | 400 | ||
| 401 | + params_str = ", ".join(f"{k}='{v}'" for k, v in response.parameters.items()) | ||
| 402 | + print(f"查询: 工具='{response.tool_name}', 参数=[{params_str}]") | ||
| 403 | + print(f"找到 {response.results_count} 条相关记录。") | ||
| 404 | + | ||
| 405 | + if response.results: | ||
| 406 | + print("--- 前5条结果示例 ---") | ||
| 407 | + for i, res in enumerate(response.results[:5]): | ||
| 408 | + engagement_str = ", ".join(f"{k}: {v}" for k, v in res.engagement.items() if v) | ||
| 409 | + content_preview = (res.title_or_content.replace('\n', ' ')[:70] + '...') if res.title_or_content and len(res.title_or_content) > 70 else res.title_or_content | ||
| 410 | + hotness_str = f", hotness: {res.hotness_score:.2f}" if res.hotness_score > 0 else "" | ||
| 411 | + print( | ||
| 412 | + f"{i+1}. [{res.platform.upper()}/{res.content_type}] {content_preview}\n" | ||
| 413 | + f" by: {res.author_nickname}, at: {res.publish_time.strftime('%Y-%m-%d %H:%M') if res.publish_time else 'N/A'}" | ||
| 414 | + f", src_kw: '{res.source_keyword or 'N/A'}'{hotness_str}" | ||
| 415 | + f", engagement: {{{engagement_str}}}" | ||
| 416 | + ) | ||
| 417 | + print("-" * 80) | ||
| 200 | 418 | ||
| 201 | if __name__ == "__main__": | 419 | if __name__ == "__main__": |
| 202 | - # 在运行前,请确保您已设置 TAVILY_API_KEY 环境变量 | ||
| 203 | 420 | ||
| 204 | try: | 421 | try: |
| 205 | - # 初始化“新闻社”客户端,它内部包含了所有工具 | ||
| 206 | - agency = TavilyNewsAgency() | ||
| 207 | - | ||
| 208 | - # 场景1: Agent 进行一次常规、快速的搜索 | ||
| 209 | - response1 = agency.basic_search_news(query="奥运会最新赛况", max_results=5) | 422 | + db_agent_tools = MediaCrawlerDB() |
| 423 | + print("数据库工具初始化成功,开始执行测试场景...\n") | ||
| 424 | + | ||
| 425 | + # 场景1: (新) 查找过去一周综合热度最高的内容 (不再需要sort_by) | ||
| 426 | + response1 = db_agent_tools.search_hot_content(time_period='week', limit=5) | ||
| 210 | print_response_summary(response1) | 427 | print_response_summary(response1) |
| 211 | 428 | ||
| 212 | - # 场景2: Agent 需要全面了解“全球芯片技术竞争”的背景 | ||
| 213 | - response2 = agency.deep_search_news(query="全球芯片技术竞争") | 429 | + # 场景2: 查找过去24小时内综合热度最高的内容 |
| 430 | + response2 = db_agent_tools.search_hot_content(time_period='24h', limit=5) | ||
| 214 | print_response_summary(response2) | 431 | print_response_summary(response2) |
| 215 | 432 | ||
| 216 | - # 场景3: Agent 需要追踪“GTC大会”的最新消息 | ||
| 217 | - response3 = agency.search_news_last_24_hours(query="Nvidia GTC大会 最新发布") | 433 | + # 场景3: 全局搜索"罗永浩" |
| 434 | + response3 = db_agent_tools.search_topic_globally(topic="罗永浩", limit_per_table=2) | ||
| 218 | print_response_summary(response3) | 435 | print_response_summary(response3) |
| 219 | - | ||
| 220 | - # 场景4: Agent 需要为一篇关于“自动驾驶”的周报查找素材 | ||
| 221 | - response4 = agency.search_news_last_week(query="自动驾驶商业化落地") | 436 | + |
| 437 | + # 场景4: (新增) 在B站上精确搜索"论文" | ||
| 438 | + response4 = db_agent_tools.search_topic_on_platform(platform='bilibili', topic="论文", limit=5) | ||
| 222 | print_response_summary(response4) | 439 | print_response_summary(response4) |
| 223 | - | ||
| 224 | - # 场景5: Agent 需要查找“韦伯太空望远镜”的新闻图片 | ||
| 225 | - response5 = agency.search_images_for_news(query="韦伯太空望远镜最新发现") | ||
| 226 | - print_response_summary(response5) | ||
| 227 | 440 | ||
| 228 | - # 场景6: Agent 需要研究2025年第一季度关于“人工智能法规”的新闻 | ||
| 229 | - response6 = agency.search_news_by_date( | ||
| 230 | - query="人工智能法规", | ||
| 231 | - start_date="2025-01-01", | ||
| 232 | - end_date="2025-03-31" | ||
| 233 | - ) | ||
| 234 | - print_response_summary(response6) | 441 | + # 场景5: (新增) 在微博上精确搜索 "许凯" 在特定一天内的内容 |
| 442 | + response5 = db_agent_tools.search_topic_on_platform(platform='weibo', topic="许凯", start_date='2025-08-22', end_date='2025-08-22', limit=5) | ||
| 443 | + print_response_summary(response5) | ||
| 235 | 444 | ||
| 236 | except ValueError as e: | 445 | except ValueError as e: |
| 237 | print(f"初始化失败: {e}") | 446 | print(f"初始化失败: {e}") |
| 238 | - print("请确保 TAVILY_API_KEY 环境变量已正确设置。") | 447 | + print("请确保相关的数据库环境变量已正确设置, 或在代码中直接提供连接信息。") |
| 239 | except Exception as e: | 448 | except Exception as e: |
| 240 | print(f"测试过程中发生未知错误: {e}") | 449 | print(f"测试过程中发生未知错误: {e}") |
-
Please register or login to post a comment