1 2 3 4 5 | $ hadoop dfs -ls / drwxrwxrwx - hue supergroup 0 2011-12-10 06:56 /tmp drwxr-xr-x - hue supergroup 0 2011-12-08 05:20 /user drwxr-xr-x - mapred supergroup 0 2011-12-08 10:06 /var $ |
1 2 3 4 5 6 7 8 9 | $ dmesg > kerndata $ hadoop dfs -mkdir /test $ hadoop dfs -ls /test $ hadoop dfs -copyFromLocal kerndata /test/mydata $ hadoop dfs -cat /test/mydata Linux version 2.6.18-274-7.1.el5 (mockbuild@builder10.centos.org)... ... e1000: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX $ |
1 2 3 4 5 6 7 8 9 | #!/usr/bin/env python import sys for line in sys.stdin: line = line.strip() words = line.split() for word in words: print '%s\t1' % word |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | #!/usr/bin/env python from operator import itemgetter import sys last_word = None last_count = 0 cur_word = None for line in sys.stdin: line = line.strip() cur_word, count = line.split('\t', 1) count = int(count) if last_word == cur_word: last_count += count else: if last_word: print '%s\t%s' % (last_word, last_count) last_count = count last_word = cur_word if last_word == cur_word: print '%s\t%s' % (last_word, last_count) |
1 2 3 4 5 6 7 8 9 10 11 12 13 | $ hadoop jar /usr/lib/hadoop-0.20/contrib/streaming/hadoop-streaming-0.20.2-cdh3u2.jar \ -file pymap.py -mapper pymap.py -file pyreduce.py -reducer pyreduce.py \ -input /test/mydata -output /test/output ... $ hadoop dfs -cat /test/output/part-00000 ... write 3 write-combining 2 wrong. 1 your 2 zone: 2 zonelists. 1 $ |
1 2 3 4 5 6 7 8 | $ pig -x local grunt> log = LOAD '/var/log/messages'; grunt> logkern = FILTER log BY $0 MATCHES '.*kernel:.*'; grunt> logkernterm = FILTER logkern BY $0 MATCHES '.*terminating.*'; grunt> dump logkernterm ... (Dec 8 11:08:48 localhost kernel: Kernel log daemon terminating.) grunt> |
1 2 3 4 5 | log = LOAD '/var/log/messages' USING PigStorage(' ') AS (month:chararray, \ day:int, time:chararray, host:chararray, source:chararray); sources = GROUP log BY source; counts = FOREACH sources GENERATE group, COUNT(log); dump counts; |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | $ pig -x local logsources.pig ... (init:,1) (gconfd,12) (kernel:,505) (syslogd,2) (dhclient:,91) (localhost,1168) (gpm[2139]:,2) [gpm[2168]:,2) (NetworkManager:,292) (avahi-daemon[3345]:,37) (avahi-daemon[3362]:,44) (nm-system-settings:,8) $ |
欢迎光临 电子技术论坛_中国专业的电子工程师学习交流社区-中电网技术论坛 (http://bbs.eccn.com/) | Powered by Discuz! 7.0.0 |