戒酒的李白
Committed by GitHub

Merge pull request #15 from zhaisang111/main

Optimized the getEchartsData.py script, improving code efficiency and…
1 -from utils.getPublicData import *  
2 -from utils.mynlp import SnowNLP  
3 -articleList = getAllArticleData()  
4 -commentList = getAllCommentsData() 1 +from utils.getPublicData import * # Import utility functions for data retrieval
  2 +from utils.mynlp import SnowNLP # Import SnowNLP for sentiment analysis
  3 +from collections import Counter # Import Counter for counting occurrences
  4 +
  5 +articleList = getAllArticleData() # Retrieve all article data
  6 +commentList = getAllCommentsData() # Retrieve all comment data
5 7
6 def getTypeList(): 8 def getTypeList():
7 - return list(set([x[8] for x in getAllArticleData()])) 9 + # Return a list of unique article types
  10 + return list(set([x[8] for x in articleList]))
8 11
9 def getArticleByType(type): 12 def getArticleByType(type):
10 - articles = []  
11 - for i in articleList:  
12 - if i[8] == type:  
13 - articles.append(i)  
14 - return articles 13 + # Return a list of articles that match the specified type
  14 + return [article for article in articleList if article[8] == type]
15 15
16 def getArticleLikeCount(type): 16 def getArticleLikeCount(type):
  17 + # Categorize articles by the number of likes they have
17 articles = getArticleByType(type) 18 articles = getArticleByType(type)
18 - X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']  
19 - Y = [0 for x in range(len(X))] 19 + intervals = [(0, 100), (100, 1000), (1000, 5000), (5000, 15000),
  20 + (15000, 30000), (30000, 50000), (50000, float('inf'))]
  21 + X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000',
  22 + '30000-50000','50000-~']
  23 + Y = [0] * len(intervals)
20 for article in articles: 24 for article in articles:
21 likeCount = int(article[1]) 25 likeCount = int(article[1])
22 - if likeCount < 100:  
23 - Y[0] += 1  
24 - elif likeCount < 1000:  
25 - Y[1] += 1  
26 - elif likeCount < 5000:  
27 - Y[2] += 1  
28 - elif likeCount < 15000:  
29 - Y[3] += 1  
30 - elif likeCount < 30000:  
31 - Y[4] += 1  
32 - elif likeCount < 50000:  
33 - Y[5] += 1  
34 - elif likeCount >= 50000:  
35 - Y[6] += 1  
36 - return X,Y 26 + for i, (lower, upper) in enumerate(intervals):
  27 + if lower <= likeCount < upper:
  28 + Y[i] += 1
  29 + break
  30 + return X, Y
37 31
38 def getArticleCommentsLen(type): 32 def getArticleCommentsLen(type):
  33 + # Categorize articles by the length of comments they have
39 articles = getArticleByType(type) 34 articles = getArticleByType(type)
40 - X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']  
41 - Y = [0 for x in range(len(X))] 35 + intervals = [(0, 100), (100, 500), (500, 1000), (1000, 1500),
  36 + (1500, 3000), (3000, 5000), (5000, 10000),
  37 + (10000, 15000), (15000, float('inf'))]
  38 + X = ['0-100','100-500','500-1000','1000-1500','1500-3000',
  39 + '3000-5000','5000-10000','10000-15000','15000-~']
  40 + Y = [0] * len(intervals)
42 for article in articles: 41 for article in articles:
43 commentLen = int(article[2]) 42 commentLen = int(article[2])
44 - if commentLen < 100:  
45 - Y[0] += 1  
46 - elif commentLen < 500:  
47 - Y[1] += 1  
48 - elif commentLen < 5000:  
49 - Y[2] += 1  
50 - elif commentLen < 1000:  
51 - Y[3] += 1  
52 - elif commentLen < 1500:  
53 - Y[4] += 1  
54 - elif commentLen < 3000:  
55 - Y[5] += 1  
56 - elif commentLen < 5000:  
57 - Y[6] += 1  
58 - elif commentLen < 10000:  
59 - Y[7] += 1  
60 - elif commentLen >= 15000:  
61 - Y[8] += 1  
62 - return X,Y 43 + for i, (lower, upper) in enumerate(intervals):
  44 + if lower <= commentLen < upper:
  45 + Y[i] += 1
  46 + break
  47 + return X, Y
63 48
64 def getArticleRepotsLen(type): 49 def getArticleRepotsLen(type):
  50 + # Categorize articles by the number of reposts
65 articles = getArticleByType(type) 51 articles = getArticleByType(type)
66 - X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']  
67 - Y = [0 for x in range(len(X))] 52 + intervals = [(0, 100), (100, 300), (300, 500), (500, 1000),
  53 + (1000, 2000), (2000, 3000), (3000, 4000),
  54 + (4000, 5000), (5000, 10000), (10000, 15000),
  55 + (15000, 30000), (30000, 70000), (70000, float('inf'))]
  56 + X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000',
  57 + '3000-4000','4000-5000','5000-10000','10000-15000','15000-30000',
  58 + '30000-70000','70000-~']
  59 + Y = [0] * len(intervals)
68 for article in articles: 60 for article in articles:
69 repostsCount = int(article[3]) 61 repostsCount = int(article[3])
70 - if repostsCount < 100:  
71 - Y[0] += 1  
72 - elif repostsCount < 300:  
73 - Y[1] += 1  
74 - elif repostsCount < 500:  
75 - Y[2] += 1  
76 - elif repostsCount < 1000:  
77 - Y[3] += 1  
78 - elif repostsCount < 3000:  
79 - Y[4] += 1  
80 - elif repostsCount < 4000:  
81 - Y[5] += 1  
82 - elif repostsCount < 5000:  
83 - Y[6] += 1  
84 - elif repostsCount < 10000:  
85 - Y[7] += 1  
86 - elif repostsCount < 15000:  
87 - Y[8] += 1  
88 - elif repostsCount < 30000:  
89 - Y[9] += 1  
90 - elif repostsCount < 70000:  
91 - Y[10] += 1  
92 - elif repostsCount >= 70000:  
93 - Y[11] += 1  
94 - return X,Y 62 + for i, (lower, upper) in enumerate(intervals):
  63 + if lower <= repostsCount < upper:
  64 + Y[i] += 1
  65 + break
  66 + return X, Y
95 67
96 def getIPByArticleRegion(): 68 def getIPByArticleRegion():
97 - articleRegionDic = {}  
98 - for i in articleList:  
99 - if i[4] != '无':  
100 - if i[4] in articleRegionDic.keys():  
101 - articleRegionDic[i[4]] += 1  
102 - else:  
103 - articleRegionDic[i[4]] = 1  
104 - resultData = []  
105 - for key,value in articleRegionDic.items():  
106 - resultData.append({  
107 - 'name':key,  
108 - 'value':value  
109 - }) 69 + # Count articles by their regions, excluding '无'
  70 + regions = [article[4] for article in articleList if article[4] != '无']
  71 + region_counts = Counter(regions)
  72 + resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
110 return resultData 73 return resultData
111 74
112 def getIPByCommentsRegion(): 75 def getIPByCommentsRegion():
113 - commentRegionDic = {}  
114 - for i in commentList:  
115 - if i[3] != '无':  
116 - if i[3] in commentRegionDic.keys():  
117 - commentRegionDic[i[3]] += 1  
118 - else:  
119 - commentRegionDic[i[3]] = 1  
120 - resultData = []  
121 - for key,value in commentRegionDic.items():  
122 - resultData.append({  
123 - 'name':key,  
124 - 'value':value  
125 - }) 76 + # Count comments by their regions, excluding '无'
  77 + regions = [comment[3] for comment in commentList if comment[3] != '无']
  78 + region_counts = Counter(regions)
  79 + resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
126 return resultData 80 return resultData
127 81
128 def getCommentDataOne(): 82 def getCommentDataOne():
129 - X = [] 83 + # Categorize comments based on some numerical value, possibly length or count
130 rangeNum = 20 84 rangeNum = 20
131 - for item in range(100):  
132 - X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))  
133 - Y = [0 for x in range(len(X))] 85 + intervals = [(rangeNum * i, rangeNum * (i + 1)) for i in range(100)]
  86 + X = [f"{lower}-{upper}" for lower, upper in intervals]
  87 + Y = [0] * len(intervals)
134 for comment in commentList: 88 for comment in commentList:
135 - for item in range(100):  
136 - if int(comment[2]) < rangeNum * (item + 1):  
137 - Y[item] += 1 89 + comment_value = int(comment[2])
  90 + for i, (lower, upper) in enumerate(intervals):
  91 + if lower <= comment_value < upper:
  92 + Y[i] += 1
138 break 93 break
139 - return X,Y 94 + return X, Y
140 95
141 def getCommentDataTwo(): 96 def getCommentDataTwo():
142 - genderDic = {}  
143 - for i in commentList:  
144 - if i[6] in genderDic.keys():  
145 - genderDic[i[6]] += 1  
146 - else:  
147 - genderDic[i[6]] = 1  
148 - resultData = [{  
149 - 'name':x[0],  
150 - 'value':x[1]  
151 - } for x in genderDic.items()] 97 + # Count comments by gender
  98 + genders = [comment[6] for comment in commentList]
  99 + gender_counts = Counter(genders)
  100 + resultData = [{'name': key, 'value': value} for key, value in gender_counts.items()]
152 return resultData 101 return resultData
153 102
154 def getYuQingCharDataOne(): 103 def getYuQingCharDataOne():
  104 + # Analyze sentiment of hot words
155 hotWordList = getAllHotWords() 105 hotWordList = getAllHotWords()
156 - X = ['正面','中性','负面']  
157 - Y = [0,0,0] 106 + sentiments = []
158 for word in hotWordList: 107 for word in hotWordList:
159 emotionValue = SnowNLP(word[0]).sentiments 108 emotionValue = SnowNLP(word[0]).sentiments
160 if emotionValue > 0.4: 109 if emotionValue > 0.4:
161 - Y[0] += 1 110 + sentiments.append('正面')
162 elif emotionValue < 0.2: 111 elif emotionValue < 0.2:
163 - Y[2] += 1 112 + sentiments.append('负面')
164 else: 113 else:
165 - Y[1] += 1  
166 - biedata = [{  
167 - 'name':x,  
168 - 'value':Y[index]  
169 - } for index,x in enumerate(X)]  
170 - return X,Y,biedata 114 + sentiments.append('中性')
  115 + counts = Counter(sentiments)
  116 + X = ['正面','中性','负面']
  117 + Y = [counts.get(sentiment, 0) for sentiment in X]
  118 + biedata = [{'name': x, 'value': y} for x, y in zip(X, Y)]
  119 + return X, Y, biedata
171 120
172 def getYuQingCharDataTwo(): 121 def getYuQingCharDataTwo():
173 - X = ['正面', '中性', '负面']  
174 - biedata1 = [{  
175 - 'name':x,  
176 - 'value':0  
177 - } for x in X]  
178 - biedata2 = [{  
179 - 'name': x,  
180 - 'value': 0  
181 - } for x in X]  
182 - 122 + # Analyze sentiment of comments and articles
  123 + comment_sentiments = []
183 for comment in commentList: 124 for comment in commentList:
184 emotionValue = SnowNLP(comment[4]).sentiments 125 emotionValue = SnowNLP(comment[4]).sentiments
185 if emotionValue > 0.4: 126 if emotionValue > 0.4:
186 - biedata1[0]['value'] += 1 127 + comment_sentiments.append('正面')
187 elif emotionValue < 0.2: 128 elif emotionValue < 0.2:
188 - biedata1[2]['value'] += 1 129 + comment_sentiments.append('负面')
189 else: 130 else:
190 - biedata1[1]['value'] += 1  
191 - for artile in articleList:  
192 - emotionValue = SnowNLP(artile[5]).sentiments 131 + comment_sentiments.append('中性')
  132 + comment_counts = Counter(comment_sentiments)
  133 +
  134 + article_sentiments = []
  135 + for article in articleList:
  136 + emotionValue = SnowNLP(article[5]).sentiments
193 if emotionValue > 0.4: 137 if emotionValue > 0.4:
194 - biedata2[0]['value'] += 1 138 + article_sentiments.append('正面')
195 elif emotionValue < 0.2: 139 elif emotionValue < 0.2:
196 - biedata2[2]['value'] += 1 140 + article_sentiments.append('负面')
197 else: 141 else:
198 - biedata2[1]['value'] += 1  
199 - return biedata1,biedata2 142 + article_sentiments.append('中性')
  143 + article_counts = Counter(article_sentiments)
  144 +
  145 + X = ['正面', '中性', '负面']
  146 + biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X]
  147 + biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X]
  148 + return biedata1, biedata2
200 149
201 def getYuQingCharDataThree(): 150 def getYuQingCharDataThree():
  151 + # Retrieve top 10 hot words and their counts
202 hotWordList = getAllHotWords() 152 hotWordList = getAllHotWords()
203 - x1Data = []  
204 - y1Data = []  
205 - for i in hotWordList[:10]:  
206 - x1Data.append(i[0])  
207 - y1Data.append(int(i[1]))  
208 - return x1Data,y1Data  
209 - 153 + x1Data = [word[0] for word in hotWordList[:10]]
  154 + y1Data = [int(word[1]) for word in hotWordList[:10]]
  155 + return x1Data, y1Data