1 2 3 | zipped=rate.zip(tfidf) data=zipped.map(lambda lineabeledPoint(line[0],line[1])) training, test = data.randomSplit([0.6, 0.4], seed = 0) |
1 2 3 4 | NBmodel = NaiveBayes.train(training, 1.0) predictionAndLabel = test.map(lambda p : (NBmodel.predict(p.features), p.label)) accuracy = 1.0 * predictionAndLabel.filter(lambda x: 1.0 \ if x[0] == x[1] else 0.0).count() / test.count() |
1 2 3 4 5 | yourDocument=input("输入待分类的评论:") yourwords="/".join(jieba.cut_for_search(yourDocument)).split("/") yourtf = hashingTF.transform(yourwords) yourtfidf=idfModel.transform(yourtf) print('NaiveBayes Model Predict:',NBmodel.predict(yourtfidf),' |
1 2 3 4 5 | text=words.flatMap(lambda w:w) wordCounts = text.map(lambda word: (word, 1))\ .reduceByKey(lambda a, b: a+b).\ sortBy(lambda x: x[1],ascending=False) wordCounts.take(10) |
1 2 3 4 5 6 | def filterStopWords(line): for i in line: if i in stopwords: line.remove(i) return line words=words.map(lambda w : filterStopWords(w)) |
欢迎光临 电子技术论坛_中国专业的电子工程师学习交流社区-中电网技术论坛 (http://bbs.eccn.com/) | Powered by Discuz! 7.0.0 |