models_sa.py
5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""
MindSpider 数据库ORM模型(SQLAlchemy 2.x)
此模块定义 MindSpider 扩展表(与原 MediaCrawler 表解耦)的 ORM 模型。
数据模型定义位置:
- 本文件(MindSpider/schema/models_sa.py)
"""
from __future__ import annotations
from typing import Optional
from datetime import date
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from sqlalchemy import Integer, String, Text, BigInteger, Date, Float, ForeignKey, Index, UniqueConstraint
from sqlalchemy.schema import ForeignKeyConstraint
from sqlalchemy.orm import relationship
__all__ = [
"Base",
"DailyNews",
"DailyTopic",
"TopicNewsRelation",
"CrawlingTask",
]
class Base(DeclarativeBase):
pass
class DailyNews(Base):
__tablename__ = "daily_news"
__table_args__ = (
UniqueConstraint("news_id", name="uq_daily_news_id_unique"), # 为外键引用添加唯一约束
UniqueConstraint("news_id", "source_platform", "crawl_date", name="uq_daily_news_unique"),
Index("idx_daily_news_date", "crawl_date"),
Index("idx_daily_news_platform", "source_platform"),
Index("idx_daily_news_rank", "rank_position"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
news_id: Mapped[str] = mapped_column(String(128), nullable=False)
source_platform: Mapped[str] = mapped_column(String(32), nullable=False)
title: Mapped[str] = mapped_column(String(500), nullable=False)
url: Mapped[Optional[str]] = mapped_column(String(512))
description: Mapped[Optional[str]] = mapped_column(Text)
extra_info: Mapped[Optional[str]] = mapped_column(Text)
crawl_date: Mapped[date] = mapped_column(Date, nullable=False)
rank_position: Mapped[Optional[int]] = mapped_column(Integer)
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
last_modify_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
class DailyTopic(Base):
__tablename__ = "daily_topics"
__table_args__ = (
UniqueConstraint("topic_id", name="uq_daily_topics_id_unique"), # 为外键引用添加唯一约束
UniqueConstraint("topic_id", "extract_date", name="uq_daily_topics_unique"),
Index("idx_daily_topics_date", "extract_date"),
Index("idx_daily_topics_status", "processing_status"),
Index("idx_daily_topics_score", "relevance_score"),
Index("idx_topic_date_status", "extract_date", "processing_status"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
topic_id: Mapped[str] = mapped_column(String(64), nullable=False)
topic_name: Mapped[str] = mapped_column(String(255), nullable=False)
topic_description: Mapped[Optional[str]] = mapped_column(Text)
keywords: Mapped[Optional[str]] = mapped_column(Text)
extract_date: Mapped[date] = mapped_column(Date, nullable=False)
relevance_score: Mapped[Optional[float]] = mapped_column(Float)
news_count: Mapped[Optional[int]] = mapped_column(Integer, default=0)
processing_status: Mapped[Optional[str]] = mapped_column(String(16), default="pending")
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
last_modify_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
class TopicNewsRelation(Base):
__tablename__ = "topic_news_relation"
__table_args__ = (
UniqueConstraint("topic_id", "news_id", "extract_date", name="uq_topic_news_unique"),
Index("idx_topic_news_topic", "topic_id"),
Index("idx_topic_news_news", "news_id"),
Index("idx_topic_news_date", "extract_date"),
ForeignKeyConstraint(["topic_id"], ["daily_topics.topic_id"], ondelete="CASCADE"),
ForeignKeyConstraint(["news_id"], ["daily_news.news_id"], ondelete="CASCADE"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
topic_id: Mapped[str] = mapped_column(String(64), nullable=False)
news_id: Mapped[str] = mapped_column(String(128), nullable=False)
relation_score: Mapped[Optional[float]] = mapped_column(Float)
extract_date: Mapped[date] = mapped_column(Date, nullable=False)
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
class CrawlingTask(Base):
__tablename__ = "crawling_tasks"
__table_args__ = (
UniqueConstraint("task_id", name="uq_crawling_tasks_unique"),
Index("idx_crawling_tasks_topic", "topic_id"),
Index("idx_crawling_tasks_platform", "platform"),
Index("idx_crawling_tasks_status", "task_status"),
Index("idx_crawling_tasks_date", "scheduled_date"),
Index("idx_task_topic_platform", "topic_id", "platform", "task_status"),
ForeignKeyConstraint(["topic_id"], ["daily_topics.topic_id"], ondelete="CASCADE"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
task_id: Mapped[str] = mapped_column(String(64), nullable=False)
topic_id: Mapped[str] = mapped_column(String(64), nullable=False)
platform: Mapped[str] = mapped_column(String(32), nullable=False)
search_keywords: Mapped[str] = mapped_column(Text, nullable=False)
task_status: Mapped[Optional[str]] = mapped_column(String(16), default="pending")
start_time: Mapped[Optional[int]] = mapped_column(BigInteger)
end_time: Mapped[Optional[int]] = mapped_column(BigInteger)
total_crawled: Mapped[Optional[int]] = mapped_column(Integer, default=0)
success_count: Mapped[Optional[int]] = mapped_column(Integer, default=0)
error_count: Mapped[Optional[int]] = mapped_column(Integer, default=0)
error_message: Mapped[Optional[str]] = mapped_column(Text)
config_params: Mapped[Optional[str]] = mapped_column(Text)
scheduled_date: Mapped[date] = mapped_column(Date, nullable=False)
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
last_modify_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)