Doiiars

修复fetch_news无法正常获取的问题

@@ -12,6 +12,7 @@ import json @@ -12,6 +12,7 @@ import json
12 from datetime import datetime, date 12 from datetime import datetime, date
13 from pathlib import Path 13 from pathlib import Path
14 from typing import List, Dict, Optional 14 from typing import List, Dict, Optional
  15 +from loguru import logger
15 16
16 # 添加项目根目录到路径 17 # 添加项目根目录到路径
17 project_root = Path(__file__).parent.parent 18 project_root = Path(__file__).parent.parent
@@ -38,8 +39,7 @@ SOURCE_NAMES = { @@ -38,8 +39,7 @@ SOURCE_NAMES = {
38 "wallstreetcn": "华尔街见闻", 39 "wallstreetcn": "华尔街见闻",
39 "thepaper": "澎湃新闻", 40 "thepaper": "澎湃新闻",
40 "cls-hot": "财联社", 41 "cls-hot": "财联社",
41 - "xueqiu": "雪球热榜",  
42 - "kuaishou": "快手热榜" 42 + "xueqiu": "雪球热榜"
43 } 43 }
44 44
45 class NewsCollector: 45 class NewsCollector:
@@ -72,15 +72,25 @@ class NewsCollector: @@ -72,15 +72,25 @@ class NewsCollector:
72 async def fetch_news(self, source: str) -> dict: 72 async def fetch_news(self, source: str) -> dict:
73 """从指定源获取最新新闻""" 73 """从指定源获取最新新闻"""
74 url = f"{BASE_URL}/api/s?id={source}&latest" 74 url = f"{BASE_URL}/api/s?id={source}&latest"
75 - headers = {"Accept": "application/json"} 75 + headers = {
  76 + "Accept": "application/json, text/plain, */*",
  77 + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  78 + "User-Agent": (
  79 + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
  80 + "AppleWebKit/537.36 (KHTML, like Gecko) "
  81 + "Chrome/124.0.0.0 Safari/537.36"
  82 + ),
  83 + "Referer": BASE_URL,
  84 + "Connection": "keep-alive",
  85 + }
76 86
77 try: 87 try:
78 - async with httpx.AsyncClient(timeout=30.0) as client: 88 + async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
79 response = await client.get(url, headers=headers) 89 response = await client.get(url, headers=headers)
80 response.raise_for_status() 90 response.raise_for_status()
81 91
82 # 解析JSON响应 92 # 解析JSON响应
83 - data = json.loads(response.text) 93 + data = response.json()
84 return { 94 return {
85 "source": source, 95 "source": source,
86 "status": "success", 96 "status": "success",
@@ -91,21 +101,21 @@ class NewsCollector: @@ -91,21 +101,21 @@ class NewsCollector:
91 return { 101 return {
92 "source": source, 102 "source": source,
93 "status": "timeout", 103 "status": "timeout",
94 - "error": "请求超时", 104 + "error": f"请求超时: {source}({url})",
95 "timestamp": datetime.now().isoformat() 105 "timestamp": datetime.now().isoformat()
96 } 106 }
97 except httpx.HTTPStatusError as e: 107 except httpx.HTTPStatusError as e:
98 return { 108 return {
99 "source": source, 109 "source": source,
100 "status": "http_error", 110 "status": "http_error",
101 - "error": f"HTTP错误: {e.response.status_code}", 111 + "error": f"HTTP错误: {source}({url}) - {e.response.status_code}",
102 "timestamp": datetime.now().isoformat() 112 "timestamp": datetime.now().isoformat()
103 } 113 }
104 except Exception as e: 114 except Exception as e:
105 return { 115 return {
106 "source": source, 116 "source": source,
107 "status": "error", 117 "status": "error",
108 - "error": f"未知错误: {str(e)}", 118 + "error": f"未知错误: {source}({url}) - {str(e)}",
109 "timestamp": datetime.now().isoformat() 119 "timestamp": datetime.now().isoformat()
110 } 120 }
111 121
@@ -114,13 +124,13 @@ class NewsCollector: @@ -114,13 +124,13 @@ class NewsCollector:
114 if sources is None: 124 if sources is None:
115 sources = list(SOURCE_NAMES.keys()) 125 sources = list(SOURCE_NAMES.keys())
116 126
117 - print(f"正在获取 {len(sources)} 个新闻源的最新内容...")  
118 - print("=" * 80) 127 + logger.info(f"正在获取 {len(sources)} 个新闻源的最新内容...")
  128 + logger.info("=" * 80)
119 129
120 results = [] 130 results = []
121 for source in sources: 131 for source in sources:
122 source_name = SOURCE_NAMES.get(source, source) 132 source_name = SOURCE_NAMES.get(source, source)
123 - print(f"正在获取 {source_name} 的新闻...") 133 + logger.info(f"正在获取 {source_name} 的新闻...")
124 result = await self.fetch_news(source) 134 result = await self.fetch_news(source)
125 results.append(result) 135 results.append(result)
126 136
@@ -128,11 +138,11 @@ class NewsCollector: @@ -128,11 +138,11 @@ class NewsCollector:
128 data = result["data"] 138 data = result["data"]
129 if 'items' in data and isinstance(data['items'], list): 139 if 'items' in data and isinstance(data['items'], list):
130 count = len(data['items']) 140 count = len(data['items'])
131 - print(f"✓ {source_name}: 获取成功,共 {count} 条新闻") 141 + logger.info(f"✓ {source_name}: 获取成功,共 {count} 条新闻")
132 else: 142 else:
133 - print(f"✓ {source_name}: 获取成功") 143 + logger.info(f"✓ {source_name}: 获取成功")
134 else: 144 else:
135 - print(f"✗ {source_name}: {result.get('error', '获取失败')}") 145 + logger.error(f"✗ {source_name}: {result.get('error', '获取失败')}")
136 146
137 # 避免请求过快 147 # 避免请求过快
138 await asyncio.sleep(0.5) 148 await asyncio.sleep(0.5)
@@ -151,18 +161,21 @@ class NewsCollector: @@ -151,18 +161,21 @@ class NewsCollector:
151 Returns: 161 Returns:
152 包含收集结果的字典 162 包含收集结果的字典
153 """ 163 """
154 - print(f"开始收集每日热点新闻...")  
155 - print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") 164 + collection_summary_message = ""
  165 + collection_summary_message += "\n开始收集每日热点新闻...\n"
  166 + collection_summary_message += f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
156 167
157 # 选择新闻源 168 # 选择新闻源
158 if sources is None: 169 if sources is None:
159 # 使用所有支持的新闻源 170 # 使用所有支持的新闻源
160 sources = list(SOURCE_NAMES.keys()) 171 sources = list(SOURCE_NAMES.keys())
161 172
162 - print(f"将从 {len(sources)} 个新闻源收集数据:") 173 + collection_summary_message += f"将从 {len(sources)} 个新闻源收集数据:\n"
163 for source in sources: 174 for source in sources:
164 source_name = SOURCE_NAMES.get(source, source) 175 source_name = SOURCE_NAMES.get(source, source)
165 - print(f" - {source_name}") 176 + collection_summary_message += f" - {source_name}\n"
  177 +
  178 + logger.info(collection_summary_message)
166 179
167 try: 180 try:
168 # 获取新闻数据 181 # 获取新闻数据
@@ -185,7 +198,7 @@ class NewsCollector: @@ -185,7 +198,7 @@ class NewsCollector:
185 return processed_data 198 return processed_data
186 199
187 except Exception as e: 200 except Exception as e:
188 - print(f"收集新闻失败: {e}") 201 + logger.exception(f"收集新闻失败: {e}")
189 return { 202 return {
190 'success': False, 203 'success': False,
191 'error': str(e), 204 'error': str(e),
@@ -255,35 +268,30 @@ class NewsCollector: @@ -255,35 +268,30 @@ class NewsCollector:
255 } 268 }
256 269
257 except Exception as e: 270 except Exception as e:
258 - print(f"处理新闻项失败: {e}") 271 + logger.exception(f"处理新闻项失败: {e}")
259 return None 272 return None
260 273
261 def _print_collection_summary(self, data: Dict): 274 def _print_collection_summary(self, data: Dict):
262 """打印收集摘要""" 275 """打印收集摘要"""
263 - print("\n" + "=" * 50)  
264 - print("新闻收集摘要")  
265 - print("=" * 50)  
266 -  
267 - print(f"总新闻源: {data['total_sources']}")  
268 - print(f"成功源数: {data['successful_sources']}")  
269 - print(f"总新闻数: {data['total_news']}")  
270 - 276 + collection_summary_message = ""
  277 + collection_summary_message += f"\n总新闻源: {data['total_sources']}\n"
  278 + collection_summary_message += f"成功源数: {data['successful_sources']}\n"
  279 + collection_summary_message += f"总新闻数: {data['total_news']}\n"
271 if 'saved_count' in data: 280 if 'saved_count' in data:
272 - print(f"已保存数: {data['saved_count']}")  
273 -  
274 - print("=" * 50) 281 + collection_summary_message += f"已保存数: {data['saved_count']}\n"
  282 + logger.info(collection_summary_message)
275 283
276 def get_today_news(self) -> List[Dict]: 284 def get_today_news(self) -> List[Dict]:
277 """获取今天的新闻""" 285 """获取今天的新闻"""
278 try: 286 try:
279 return self.db_manager.get_daily_news(date.today()) 287 return self.db_manager.get_daily_news(date.today())
280 except Exception as e: 288 except Exception as e:
281 - print(f"获取今日新闻失败: {e}") 289 + logger.exception(f"获取今日新闻失败: {e}")
282 return [] 290 return []
283 291
284 async def main(): 292 async def main():
285 """测试新闻收集器""" 293 """测试新闻收集器"""
286 - print("测试新闻收集器...") 294 + logger.info("测试新闻收集器...")
287 295
288 async with NewsCollector() as collector: 296 async with NewsCollector() as collector:
289 # 收集新闻 297 # 收集新闻
@@ -292,9 +300,9 @@ async def main(): @@ -292,9 +300,9 @@ async def main():
292 ) 300 )
293 301
294 if result['success']: 302 if result['success']:
295 - print(f"收集成功!共获取 {result['total_news']} 条新闻") 303 + logger.info(f"收集成功!共获取 {result['total_news']} 条新闻")
296 else: 304 else:
297 - print(f"收集失败: {result.get('error', '未知错误')}") 305 + logger.error(f"收集失败: {result.get('error', '未知错误')}")
298 306
299 if __name__ == "__main__": 307 if __name__ == "__main__":
300 asyncio.run(main()) 308 asyncio.run(main())
@@ -72,3 +72,4 @@ flake8>=6.0.0 @@ -72,3 +72,4 @@ flake8>=6.0.0
72 # ===== Web服务器 ===== 72 # ===== Web服务器 =====
73 fastapi==0.110.2 73 fastapi==0.110.2
74 uvicorn==0.29.0 74 uvicorn==0.29.0
  75 +loguru