Merge branch 'main' of https://github.com/666ghj/Weibo_PublicOpinion_AnalysisSystem

戒酒的李白
Commit a9108a909cd94f73d85c1139ba562c54db7ab389 a9108a90 2 parents c62f2b2a 766d45de
Showing 3 changed files with 105 additions and 155 deletions
README-CN.md
README.md
utils/getEchartsData.py
--- a/README-CN.md
View file @a9108a9
+++ b/README-CN.md
View file @a9108a9
@@ -18,6 +18,8 @@
 **微博舆情分析预测系统** 是一个用于监控、分析和预测社交媒体平台（如微博）上的公众舆情趋势的**社交网络舆情分析系统**。该系统利用深度学习、自然语言处理（NLP）和机器学习技术，从大量社交媒体数据中提取有价值的舆情信息，帮助政府、企业及其他组织及时了解公众态度、应对突发事件并优化决策。📈
+<img src="https://starchart.cc/666ghj/Weibo_PublicOpinion_AnalysisSystem.svg" alt="Weibo Public Opinion Analysis System">
+
 通过强大的数据采集与处理能力，微博舆情分析预测系统实现了实时数据收集、情感分析、话题分类和舆情预测等功能，确保用户能够在复杂多变的社交网络环境中获得准确、全面的舆情洞察。系统采用模块化设计，易于维护和扩展，旨在为用户提供一个高效、可靠的舆情分析工具，助力各类组织在信息化时代做出明智决策。
 ## ✨ 功能
--- a/README.md
View file @a9108a9
+++ b/README.md
View file @a9108a9
@@ -16,6 +16,8 @@
 **Weibo Public Opinion Analysis and Prediction System** is a **social network public opinion analysis system** designed to monitor, analyze, and predict public opinion trends on social media platforms such as Weibo. This system leverages deep learning, natural language processing (NLP), and machine learning technologies to extract valuable public opinion information from vast amounts of social media data, helping governments, enterprises, and other organizations promptly understand public attitudes, respond to emergencies, and optimize decision-making. 📈
+<img src="https://starchart.cc/666ghj/Weibo_PublicOpinion_AnalysisSystem.svg" alt="Weibo Public Opinion Analysis System">
+
 Through powerful data collection and processing capabilities, the Weibo Public Opinion Analysis and Prediction System achieves real-time data collection, sentiment analysis, topic classification, and public opinion prediction, ensuring that users can obtain accurate and comprehensive insights into public opinion in the complex and changing social network environment. The system adopts a modular design, making it easy to maintain and expand, aiming to provide users with an efficient and reliable public opinion analysis tool, assisting various organizations in making informed decisions in the information age.
 ## ✨ Features
--- a/utils/getEchartsData.py
View file @a9108a9
+++ b/utils/getEchartsData.py
View file @a9108a9
-from utils.getPublicData import *
-from utils.mynlp import SnowNLP
-articleList = getAllArticleData()
-commentList = getAllCommentsData()
+from utils.getPublicData import *  # Import utility functions for data retrieval
+from utils.mynlp import SnowNLP  # Import SnowNLP for sentiment analysis
+from collections import Counter  # Import Counter for counting occurrences
+
+articleList = getAllArticleData()  # Retrieve all article data
+commentList = getAllCommentsData()  # Retrieve all comment data
 def getTypeList():
-    return list(set([x[8] for x in getAllArticleData()]))
+    # Return a list of unique article types
+    return list(set([x[8] for x in articleList]))
 def getArticleByType(type):
-    articles = []
-    for i in articleList:
-        if i[8] == type:
-            articles.append(i)
-    return articles
+    # Return a list of articles that match the specified type
+    return [article for article in articleList if article[8] == type]
 def getArticleLikeCount(type):
+    # Categorize articles by the number of likes they have
     articles = getArticleByType(type)
-    X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
-    Y = [0 for x in range(len(X))]
+    intervals = [(0, 100), (100, 1000), (1000, 5000), (5000, 15000),
+                 (15000, 30000), (30000, 50000), (50000, float('inf'))]
+    X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000',
+         '30000-50000','50000-~']
+    Y = [0] * len(intervals)
     for article in articles:
         likeCount = int(article[1])
-        if likeCount < 100:
-            Y[0] += 1
-        elif likeCount < 1000:
-            Y[1] += 1
-        elif likeCount < 5000:
-            Y[2] += 1
-        elif likeCount < 15000:
-            Y[3] += 1
-        elif likeCount < 30000:
-            Y[4] += 1
-        elif likeCount < 50000:
-            Y[5] += 1
-        elif likeCount >= 50000:
-            Y[6] += 1
-    return X,Y
+        for i, (lower, upper) in enumerate(intervals):
+            if lower <= likeCount < upper:
+                Y[i] += 1
+                break
+    return X, Y
 def getArticleCommentsLen(type):
+    # Categorize articles by the length of comments they have
     articles = getArticleByType(type)
-    X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
-    Y = [0 for x in range(len(X))]
+    intervals = [(0, 100), (100, 500), (500, 1000), (1000, 1500),
+                 (1500, 3000), (3000, 5000), (5000, 10000),
+                 (10000, 15000), (15000, float('inf'))]
+    X = ['0-100','100-500','500-1000','1000-1500','1500-3000',
+         '3000-5000','5000-10000','10000-15000','15000-~']
+    Y = [0] * len(intervals)
     for article in articles:
         commentLen = int(article[2])
-        if commentLen < 100:
-            Y[0] += 1
-        elif commentLen < 500:
-            Y[1] += 1
-        elif commentLen < 5000:
-            Y[2] += 1
-        elif commentLen < 1000:
-            Y[3] += 1
-        elif commentLen < 1500:
-            Y[4] += 1
-        elif commentLen < 3000:
-            Y[5] += 1
-        elif commentLen < 5000:
-            Y[6] += 1
-        elif commentLen < 10000:
-            Y[7] += 1
-        elif commentLen >= 15000:
-            Y[8] += 1
-    return X,Y
+        for i, (lower, upper) in enumerate(intervals):
+            if lower <= commentLen < upper:
+                Y[i] += 1
+                break
+    return X, Y
 def getArticleRepotsLen(type):
+    # Categorize articles by the number of reposts
     articles = getArticleByType(type)
-    X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
-    Y = [0 for x in range(len(X))]
+    intervals = [(0, 100), (100, 300), (300, 500), (500, 1000),
+                 (1000, 2000), (2000, 3000), (3000, 4000),
+                 (4000, 5000), (5000, 10000), (10000, 15000),
+                 (15000, 30000), (30000, 70000), (70000, float('inf'))]
+    X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000',
+         '3000-4000','4000-5000','5000-10000','10000-15000','15000-30000',
+         '30000-70000','70000-~']
+    Y = [0] * len(intervals)
     for article in articles:
         repostsCount = int(article[3])
-        if repostsCount < 100:
-            Y[0] += 1
-        elif repostsCount < 300:
-            Y[1] += 1
-        elif repostsCount < 500:
-            Y[2] += 1
-        elif repostsCount < 1000:
-            Y[3] += 1
-        elif repostsCount < 3000:
-            Y[4] += 1
-        elif repostsCount < 4000:
-            Y[5] += 1
-        elif repostsCount < 5000:
-            Y[6] += 1
-        elif repostsCount < 10000:
-            Y[7] += 1
-        elif repostsCount < 15000:
-            Y[8] += 1
-        elif repostsCount < 30000:
-            Y[9] += 1
-        elif repostsCount < 70000:
-            Y[10] += 1
-        elif repostsCount >= 70000:
-            Y[11] += 1
-    return X,Y
+        for i, (lower, upper) in enumerate(intervals):
+            if lower <= repostsCount < upper:
+                Y[i] += 1
+                break
+    return X, Y
 def getIPByArticleRegion():
-    articleRegionDic = {}
-    for i in articleList:
-        if i[4] != '无':
-            if i[4] in articleRegionDic.keys():
-                articleRegionDic[i[4]] += 1
-            else:
-                articleRegionDic[i[4]] = 1
-    resultData = []
-    for key,value in articleRegionDic.items():
-        resultData.append({
-            'name':key,
-            'value':value
-        })
+    # Count articles by their regions, excluding '无'
+    regions = [article[4] for article in articleList if article[4] != '无']
+    region_counts = Counter(regions)
+    resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
     return resultData
 def getIPByCommentsRegion():
-    commentRegionDic = {}
-    for i in commentList:
-        if i[3] != '无':
-            if i[3] in commentRegionDic.keys():
-                commentRegionDic[i[3]] += 1
-            else:
-                commentRegionDic[i[3]] = 1
-    resultData = []
-    for key,value in commentRegionDic.items():
-        resultData.append({
-            'name':key,
-            'value':value
-        })
+    # Count comments by their regions, excluding '无'
+    regions = [comment[3] for comment in commentList if comment[3] != '无']
+    region_counts = Counter(regions)
+    resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
     return resultData
 def getCommentDataOne():
-    X = []
+    # Categorize comments based on some numerical value, possibly length or count
     rangeNum = 20
-    for item in range(100):
-        X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
-    Y = [0 for x in range(len(X))]
+    intervals = [(rangeNum * i, rangeNum * (i + 1)) for i in range(100)]
+    X = [f"{lower}-{upper}" for lower, upper in intervals]
+    Y = [0] * len(intervals)
     for comment in commentList:
-        for item in range(100):
-            if int(comment[2]) < rangeNum * (item + 1):
-                Y[item] += 1
+        comment_value = int(comment[2])
+        for i, (lower, upper) in enumerate(intervals):
+            if lower <= comment_value < upper:
+                Y[i] += 1
                 break
-    return X,Y
+    return X, Y
 def getCommentDataTwo():
-    genderDic = {}
-    for i in commentList:
-        if i[6] in genderDic.keys():
-            genderDic[i[6]] += 1
-        else:
-            genderDic[i[6]] = 1
-    resultData = [{
-        'name':x[0],
-        'value':x[1]
-    } for x in genderDic.items()]
+    # Count comments by gender
+    genders = [comment[6] for comment in commentList]
+    gender_counts = Counter(genders)
+    resultData = [{'name': key, 'value': value} for key, value in gender_counts.items()]
     return resultData
 def getYuQingCharDataOne():
+    # Analyze sentiment of hot words
     hotWordList = getAllHotWords()
-    X = ['正面','中性','负面']
-    Y = [0,0,0]
+    sentiments = []
     for word in hotWordList:
         emotionValue = SnowNLP(word[0]).sentiments
         if emotionValue > 0.4:
-            Y[0] += 1
+            sentiments.append('正面')
         elif emotionValue < 0.2:
-            Y[2] += 1
+            sentiments.append('负面')
         else:
-            Y[1] += 1
-    biedata = [{
-        'name':x,
-        'value':Y[index]
-    } for index,x in enumerate(X)]
-    return X,Y,biedata
+            sentiments.append('中性')
+    counts = Counter(sentiments)
+    X = ['正面','中性','负面']
+    Y = [counts.get(sentiment, 0) for sentiment in X]
+    biedata = [{'name': x, 'value': y} for x, y in zip(X, Y)]
+    return X, Y, biedata
 def getYuQingCharDataTwo():
-    X = ['正面', '中性', '负面']
-    biedata1 = [{
-        'name':x,
-        'value':0
-    } for x in X]
-    biedata2 = [{
-        'name': x,
-        'value': 0
-    } for x in X]
-
+    # Analyze sentiment of comments and articles
+    comment_sentiments = []
     for comment in commentList:
         emotionValue = SnowNLP(comment[4]).sentiments
         if emotionValue > 0.4:
-            biedata1[0]['value'] += 1
+            comment_sentiments.append('正面')
         elif emotionValue < 0.2:
-            biedata1[2]['value'] += 1
+            comment_sentiments.append('负面')
         else:
-            biedata1[1]['value'] += 1
-    for artile in articleList:
-        emotionValue = SnowNLP(artile[5]).sentiments
+            comment_sentiments.append('中性')
+    comment_counts = Counter(comment_sentiments)
+    
+    article_sentiments = []
+    for article in articleList:
+        emotionValue = SnowNLP(article[5]).sentiments
         if emotionValue > 0.4:
-            biedata2[0]['value'] += 1
+            article_sentiments.append('正面')
         elif emotionValue < 0.2:
-            biedata2[2]['value'] += 1
+            article_sentiments.append('负面')
         else:
-            biedata2[1]['value'] += 1
-    return biedata1,biedata2
+            article_sentiments.append('中性')
+    article_counts = Counter(article_sentiments)
+    
+    X = ['正面', '中性', '负面']
+    biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X]
+    biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X]
+    return biedata1, biedata2
 def getYuQingCharDataThree():
+    # Retrieve top 10 hot words and their counts
     hotWordList = getAllHotWords()
-    x1Data = []
-    y1Data = []
-    for i in hotWordList[:10]:
-        x1Data.append(i[0])
-        y1Data.append(int(i[1]))
-    return x1Data,y1Data
-
+    x1Data = [word[0] for word in hotWordList[:10]]
+    y1Data = [int(word[1]) for word in hotWordList[:10]]
+    return x1Data, y1Data