戒酒的李白
... ... @@ -18,6 +18,8 @@
**微博舆情分析预测系统** 是一个用于监控、分析和预测社交媒体平台(如微博)上的公众舆情趋势的**社交网络舆情分析系统**。该系统利用深度学习、自然语言处理(NLP)和机器学习技术,从大量社交媒体数据中提取有价值的舆情信息,帮助政府、企业及其他组织及时了解公众态度、应对突发事件并优化决策。📈
<img src="https://starchart.cc/666ghj/Weibo_PublicOpinion_AnalysisSystem.svg" alt="Weibo Public Opinion Analysis System">
通过强大的数据采集与处理能力,微博舆情分析预测系统实现了实时数据收集、情感分析、话题分类和舆情预测等功能,确保用户能够在复杂多变的社交网络环境中获得准确、全面的舆情洞察。系统采用模块化设计,易于维护和扩展,旨在为用户提供一个高效、可靠的舆情分析工具,助力各类组织在信息化时代做出明智决策。
## ✨ 功能
... ...
... ... @@ -16,6 +16,8 @@
**Weibo Public Opinion Analysis and Prediction System** is a **social network public opinion analysis system** designed to monitor, analyze, and predict public opinion trends on social media platforms such as Weibo. This system leverages deep learning, natural language processing (NLP), and machine learning technologies to extract valuable public opinion information from vast amounts of social media data, helping governments, enterprises, and other organizations promptly understand public attitudes, respond to emergencies, and optimize decision-making. 📈
<img src="https://starchart.cc/666ghj/Weibo_PublicOpinion_AnalysisSystem.svg" alt="Weibo Public Opinion Analysis System">
Through powerful data collection and processing capabilities, the Weibo Public Opinion Analysis and Prediction System achieves real-time data collection, sentiment analysis, topic classification, and public opinion prediction, ensuring that users can obtain accurate and comprehensive insights into public opinion in the complex and changing social network environment. The system adopts a modular design, making it easy to maintain and expand, aiming to provide users with an efficient and reliable public opinion analysis tool, assisting various organizations in making informed decisions in the information age.
## ✨ Features
... ...
from utils.getPublicData import *
from utils.mynlp import SnowNLP
articleList = getAllArticleData()
commentList = getAllCommentsData()
from utils.getPublicData import * # Import utility functions for data retrieval
from utils.mynlp import SnowNLP # Import SnowNLP for sentiment analysis
from collections import Counter # Import Counter for counting occurrences
articleList = getAllArticleData() # Retrieve all article data
commentList = getAllCommentsData() # Retrieve all comment data
def getTypeList():
return list(set([x[8] for x in getAllArticleData()]))
# Return a list of unique article types
return list(set([x[8] for x in articleList]))
def getArticleByType(type):
articles = []
for i in articleList:
if i[8] == type:
articles.append(i)
return articles
# Return a list of articles that match the specified type
return [article for article in articleList if article[8] == type]
def getArticleLikeCount(type):
# Categorize articles by the number of likes they have
articles = getArticleByType(type)
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
Y = [0 for x in range(len(X))]
intervals = [(0, 100), (100, 1000), (1000, 5000), (5000, 15000),
(15000, 30000), (30000, 50000), (50000, float('inf'))]
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000',
'30000-50000','50000-~']
Y = [0] * len(intervals)
for article in articles:
likeCount = int(article[1])
if likeCount < 100:
Y[0] += 1
elif likeCount < 1000:
Y[1] += 1
elif likeCount < 5000:
Y[2] += 1
elif likeCount < 15000:
Y[3] += 1
elif likeCount < 30000:
Y[4] += 1
elif likeCount < 50000:
Y[5] += 1
elif likeCount >= 50000:
Y[6] += 1
return X,Y
for i, (lower, upper) in enumerate(intervals):
if lower <= likeCount < upper:
Y[i] += 1
break
return X, Y
def getArticleCommentsLen(type):
# Categorize articles by the length of comments they have
articles = getArticleByType(type)
X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
Y = [0 for x in range(len(X))]
intervals = [(0, 100), (100, 500), (500, 1000), (1000, 1500),
(1500, 3000), (3000, 5000), (5000, 10000),
(10000, 15000), (15000, float('inf'))]
X = ['0-100','100-500','500-1000','1000-1500','1500-3000',
'3000-5000','5000-10000','10000-15000','15000-~']
Y = [0] * len(intervals)
for article in articles:
commentLen = int(article[2])
if commentLen < 100:
Y[0] += 1
elif commentLen < 500:
Y[1] += 1
elif commentLen < 5000:
Y[2] += 1
elif commentLen < 1000:
Y[3] += 1
elif commentLen < 1500:
Y[4] += 1
elif commentLen < 3000:
Y[5] += 1
elif commentLen < 5000:
Y[6] += 1
elif commentLen < 10000:
Y[7] += 1
elif commentLen >= 15000:
Y[8] += 1
return X,Y
for i, (lower, upper) in enumerate(intervals):
if lower <= commentLen < upper:
Y[i] += 1
break
return X, Y
def getArticleRepotsLen(type):
# Categorize articles by the number of reposts
articles = getArticleByType(type)
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
Y = [0 for x in range(len(X))]
intervals = [(0, 100), (100, 300), (300, 500), (500, 1000),
(1000, 2000), (2000, 3000), (3000, 4000),
(4000, 5000), (5000, 10000), (10000, 15000),
(15000, 30000), (30000, 70000), (70000, float('inf'))]
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000',
'3000-4000','4000-5000','5000-10000','10000-15000','15000-30000',
'30000-70000','70000-~']
Y = [0] * len(intervals)
for article in articles:
repostsCount = int(article[3])
if repostsCount < 100:
Y[0] += 1
elif repostsCount < 300:
Y[1] += 1
elif repostsCount < 500:
Y[2] += 1
elif repostsCount < 1000:
Y[3] += 1
elif repostsCount < 3000:
Y[4] += 1
elif repostsCount < 4000:
Y[5] += 1
elif repostsCount < 5000:
Y[6] += 1
elif repostsCount < 10000:
Y[7] += 1
elif repostsCount < 15000:
Y[8] += 1
elif repostsCount < 30000:
Y[9] += 1
elif repostsCount < 70000:
Y[10] += 1
elif repostsCount >= 70000:
Y[11] += 1
return X,Y
for i, (lower, upper) in enumerate(intervals):
if lower <= repostsCount < upper:
Y[i] += 1
break
return X, Y
def getIPByArticleRegion():
articleRegionDic = {}
for i in articleList:
if i[4] != '无':
if i[4] in articleRegionDic.keys():
articleRegionDic[i[4]] += 1
else:
articleRegionDic[i[4]] = 1
resultData = []
for key,value in articleRegionDic.items():
resultData.append({
'name':key,
'value':value
})
# Count articles by their regions, excluding '无'
regions = [article[4] for article in articleList if article[4] != '无']
region_counts = Counter(regions)
resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
return resultData
def getIPByCommentsRegion():
commentRegionDic = {}
for i in commentList:
if i[3] != '无':
if i[3] in commentRegionDic.keys():
commentRegionDic[i[3]] += 1
else:
commentRegionDic[i[3]] = 1
resultData = []
for key,value in commentRegionDic.items():
resultData.append({
'name':key,
'value':value
})
# Count comments by their regions, excluding '无'
regions = [comment[3] for comment in commentList if comment[3] != '无']
region_counts = Counter(regions)
resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
return resultData
def getCommentDataOne():
X = []
# Categorize comments based on some numerical value, possibly length or count
rangeNum = 20
for item in range(100):
X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
Y = [0 for x in range(len(X))]
intervals = [(rangeNum * i, rangeNum * (i + 1)) for i in range(100)]
X = [f"{lower}-{upper}" for lower, upper in intervals]
Y = [0] * len(intervals)
for comment in commentList:
for item in range(100):
if int(comment[2]) < rangeNum * (item + 1):
Y[item] += 1
comment_value = int(comment[2])
for i, (lower, upper) in enumerate(intervals):
if lower <= comment_value < upper:
Y[i] += 1
break
return X,Y
return X, Y
def getCommentDataTwo():
genderDic = {}
for i in commentList:
if i[6] in genderDic.keys():
genderDic[i[6]] += 1
else:
genderDic[i[6]] = 1
resultData = [{
'name':x[0],
'value':x[1]
} for x in genderDic.items()]
# Count comments by gender
genders = [comment[6] for comment in commentList]
gender_counts = Counter(genders)
resultData = [{'name': key, 'value': value} for key, value in gender_counts.items()]
return resultData
def getYuQingCharDataOne():
# Analyze sentiment of hot words
hotWordList = getAllHotWords()
X = ['正面','中性','负面']
Y = [0,0,0]
sentiments = []
for word in hotWordList:
emotionValue = SnowNLP(word[0]).sentiments
if emotionValue > 0.4:
Y[0] += 1
sentiments.append('正面')
elif emotionValue < 0.2:
Y[2] += 1
sentiments.append('负面')
else:
Y[1] += 1
biedata = [{
'name':x,
'value':Y[index]
} for index,x in enumerate(X)]
return X,Y,biedata
sentiments.append('中性')
counts = Counter(sentiments)
X = ['正面','中性','负面']
Y = [counts.get(sentiment, 0) for sentiment in X]
biedata = [{'name': x, 'value': y} for x, y in zip(X, Y)]
return X, Y, biedata
def getYuQingCharDataTwo():
X = ['正面', '中性', '负面']
biedata1 = [{
'name':x,
'value':0
} for x in X]
biedata2 = [{
'name': x,
'value': 0
} for x in X]
# Analyze sentiment of comments and articles
comment_sentiments = []
for comment in commentList:
emotionValue = SnowNLP(comment[4]).sentiments
if emotionValue > 0.4:
biedata1[0]['value'] += 1
comment_sentiments.append('正面')
elif emotionValue < 0.2:
biedata1[2]['value'] += 1
comment_sentiments.append('负面')
else:
biedata1[1]['value'] += 1
for artile in articleList:
emotionValue = SnowNLP(artile[5]).sentiments
comment_sentiments.append('中性')
comment_counts = Counter(comment_sentiments)
article_sentiments = []
for article in articleList:
emotionValue = SnowNLP(article[5]).sentiments
if emotionValue > 0.4:
biedata2[0]['value'] += 1
article_sentiments.append('正面')
elif emotionValue < 0.2:
biedata2[2]['value'] += 1
article_sentiments.append('负面')
else:
biedata2[1]['value'] += 1
return biedata1,biedata2
article_sentiments.append('中性')
article_counts = Counter(article_sentiments)
X = ['正面', '中性', '负面']
biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X]
biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X]
return biedata1, biedata2
def getYuQingCharDataThree():
# Retrieve top 10 hot words and their counts
hotWordList = getAllHotWords()
x1Data = []
y1Data = []
for i in hotWordList[:10]:
x1Data.append(i[0])
y1Data.append(int(i[1]))
return x1Data,y1Data
x1Data = [word[0] for word in hotWordList[:10]]
y1Data = [int(word[1]) for word in hotWordList[:10]]
return x1Data, y1Data
\ No newline at end of file
... ...