__init__.py
1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from . import normal
from . import seg
from . import tag
from . import sentiment
from .sim import bm25
from .summary import textrank
from .summary import words_merge
class SnowNLP(object):
def __init__(self, doc):
self.doc = doc
self.bm25 = bm25.BM25(doc)
@property
def words(self):
return seg.seg(self.doc)
@property
def sentences(self):
return normal.get_sentences(self.doc)
@property
def han(self):
return normal.zh2hans(self.doc)
@property
def pinyin(self):
return normal.get_pinyin(self.doc)
@property
def sentiments(self):
return sentiment.classify(self.doc)
@property
def tags(self):
words = self.words
tags = tag.tag(words)
return zip(words, tags)
@property
def tf(self):
return self.bm25.f
@property
def idf(self):
return self.bm25.idf
def sim(self, doc):
return self.bm25.simall(doc)
def summary(self, limit=5):
doc = []
sents = self.sentences
for sent in sents:
words = seg.seg(sent)
words = normal.filter_stop(words)
doc.append(words)
rank = textrank.TextRank(doc)
rank.solve()
ret = []
for index in rank.top_index(limit):
ret.append(sents[index])
return ret
def keywords(self, limit=5, merge=False):
doc = []
sents = self.sentences
for sent in sents:
words = seg.seg(sent)
words = normal.filter_stop(words)
doc.append(words)
rank = textrank.KeywordTextRank(doc)
rank.solve()
ret = []
for w in rank.top_index(limit):
ret.append(w)
if merge:
wm = words_merge.SimpleMerge(self.doc, ret)
return wm.merge()
return ret