juanboy

【getEchartsData.py】对数据库获取到的数据按要求处理的函数

  1 +from utils.getPublicData import *
  2 +articleList = getAllArticleData()
  3 +commentList = getAllCommentsData()
  4 +
  5 +def getTypeList():# 返回爬取到的所有文章的类型(已去重)
  6 + return list(set([x[8] for x in getAllArticleData()]))
  7 +
  8 +def getArticleByType(type):# 根据特定文章类型筛选文章
  9 + articles = []
  10 + for i in articleList:
  11 + if i[8] == type:
  12 + articles.append(i)
  13 + return articles
  14 +
  15 +def getArticleCharLikeCount(type):# 统计特定类型文章的点赞数分布
  16 + articles = getArticleByType(type)
  17 + xData = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
  18 + yData = [0 for x in range(len(xData))]# 初始化为长度和xData相同但是每一个元素都是零的列表
  19 + for article in articles:
  20 + likeCount = int(article[1])
  21 + if likeCount < 100:
  22 + yData[0] += 1
  23 + elif likeCount < 1000:
  24 + yData[1] += 1
  25 + elif likeCount < 5000:
  26 + yData[2] += 1
  27 + elif likeCount < 15000:
  28 + yData[3] += 1
  29 + elif likeCount < 30000:
  30 + yData[4] += 1
  31 + elif likeCount < 50000:
  32 + yData[5] += 1
  33 + elif likeCount >= 50000:
  34 + yData[6] += 1
  35 + return xData,yData
  36 +
  37 +def getArticleCharCommentsLen(type):# 统计特定类型文章的评论数分布
  38 + articles = getArticleByType(type)
  39 + xData = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
  40 + yData = [0 for x in range(len(xData))]# 初始化为长度和xData相同但是每一个元素都是零的列表
  41 + for article in articles:
  42 + commentLen = int(article[2])
  43 + if commentLen < 100:
  44 + yData[0] += 1
  45 + elif commentLen < 500:
  46 + yData[1] += 1
  47 + elif commentLen < 5000:
  48 + yData[2] += 1
  49 + elif commentLen < 1000:
  50 + yData[3] += 1
  51 + elif commentLen < 1500:
  52 + yData[4] += 1
  53 + elif commentLen < 3000:
  54 + yData[5] += 1
  55 + elif commentLen < 5000:
  56 + yData[6] += 1
  57 + elif commentLen < 10000:
  58 + yData[7] += 1
  59 + elif commentLen >= 15000:
  60 + yData[8] += 1
  61 + return xData,yData
  62 +
  63 +def getArticleCharRepotsLen(type):# 统计特定类型文章的转发数分布
  64 + articles = getArticleByType(type)
  65 + xData = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
  66 + yData = [0 for x in range(len(xData))]
  67 + for article in articles:
  68 + repostsCount = int(article[3])
  69 + if repostsCount < 100:
  70 + yData[0] += 1
  71 + elif repostsCount < 300:
  72 + yData[1] += 1
  73 + elif repostsCount < 500:
  74 + yData[2] += 1
  75 + elif repostsCount < 1000:
  76 + yData[3] += 1
  77 + elif repostsCount < 3000:
  78 + yData[4] += 1
  79 + elif repostsCount < 4000:
  80 + yData[5] += 1
  81 + elif repostsCount < 5000:
  82 + yData[6] += 1
  83 + elif repostsCount < 10000:
  84 + yData[7] += 1
  85 + elif repostsCount < 15000:
  86 + yData[8] += 1
  87 + elif repostsCount < 30000:
  88 + yData[9] += 1
  89 + elif repostsCount < 70000:
  90 + yData[10] += 1
  91 + elif repostsCount >= 70000:
  92 + yData[11] += 1
  93 + return xData,yData
  94 +
  95 +def getIPCharByArticleRegion():#统计文章发布地域的分布情况
  96 + articleRegionDic = {}
  97 + for i in articleList:
  98 + if i[4] != '无':# 如果ip为确定值的话就进行下一步
  99 + if i[4] in articleRegionDic.keys():
  100 + articleRegionDic[i[4]] += 1
  101 + else:
  102 + articleRegionDic[i[4]] = 1
  103 + resultData = []
  104 + for key,value in articleRegionDic.items():
  105 + resultData.append({
  106 + 'name':key,
  107 + 'value':value
  108 + })
  109 + return resultData
  110 +
  111 +def getIPCharByCommentsRegion():#统计评论发布地域的分布情况
  112 + commentRegionDic = {}
  113 + for i in commentList:
  114 + if i[3] != '无':
  115 + if i[3] in commentRegionDic.keys():
  116 + commentRegionDic[i[3]] += 1
  117 + else:
  118 + commentRegionDic[i[3]] = 1
  119 + resultData = []
  120 + for key,value in commentRegionDic.items():
  121 + resultData.append({
  122 + 'name':key,
  123 + 'value':value
  124 + })
  125 + return resultData
  126 +
  127 +def getCommentCharDataOne():# 统计评论点赞数的分布情况
  128 + xData = []
  129 + rangeNum = 20
  130 + for item in range(100):
  131 + xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
  132 + yData = [0 for x in range(len(xData))]
  133 + for comment in commentList:
  134 + for item in range(100):
  135 + if int(comment[2]) < rangeNum * (item + 1):
  136 + yData[item] += 1
  137 + break
  138 + return xData,yData
  139 +
  140 +def getCommentCharDataTwo():# 统计评论数据中不同性别的数量
  141 + genderDic = {}
  142 + for i in commentList:
  143 + if i[6] in genderDic.keys():
  144 + genderDic[i[6]] += 1
  145 + else:
  146 + genderDic[i[6]] = 1
  147 + resultData = []
  148 + for key,value in genderDic.items():
  149 + resultData.append({
  150 + 'name':key,
  151 + 'value':value
  152 + })
  153 + return resultData
  154 +