1 | weatherParse = weather.map(lambda line : line.split(",")) |
1 | weatherParse.first() |
1 | weatherParse.first()[0] |
1 | weatherParse.first()[2] |
1 2 3 4 | # x[0] is the station # x[3] is the precipitation value weatherPrecpCountByKey = weatherPrecp.map(lambda x : (x[0], (int(x[3]), 1))) weatherPrecpCountByKey.first() |
1 2 | weatherPrecpAddByKey = weatherPrecpCountByKey.reduceByKey(lambda v1,v2 : (v1[0]+v2[0], v1[1]+v2[1])) weatherPrecpAddByKey.first() |
1 2 | weatherAverages = weatherPrecpAddByKey.map(lambda k: (k[0], k[1][0] / float(k[1][1] ) ) ) weatherAverages.first() |
1 2 | for pair in weatherAverages.top(10): print "Station %s had average precipitations of %f" % (pair[0],pair[1]) |
1 2 3 4 5 6 | precTop10=[] stationsTop10=[] for pair in weatherAverages.map(lambda (x,y) : (y,x)).top(10): precTop10.append(pair[0]) stationsTop10.append(pair[1]) print "Station %s had average precipitations of %f" % (pair[1],pair[0]) |
1 2 3 4 5 6 7 8 9 10 11 12 13 | %matplotlib inline import numpy as np import matplotlib.pyplot as plt N = 10 index = np.arange(N) bar_width = 0.5 plt.bar(index, precTop10, bar_width, color='b') plt.xlabel('Stations') plt.ylabel('Precipitations') plt.title('10 stations with the highest average precipitation') plt.xticks(index + bar_width, stationsTop10, rotation=90) plt.show() |
欢迎光临 电子技术论坛_中国专业的电子工程师学习交流社区-中电网技术论坛 (http://bbs.eccn.com/) | Powered by Discuz! 7.0.0 |