1 2 | jsonResultTweets = read(http(url)); jsonResultTweets; |
1 2 3 | governmentTopicDiscussionByLocation = jsonResultTweets -> transform {location: $.location,user_id: $.from_user_id_str,date_created: $.created_at,commenttext} -> group by key = $.location |
1 2 3 | governmentTopicDiscussionByLocation Cnt -> write(del("/user/governmentTopics/governmentTopic_1Tweets.del", schema = schema { list_of_comma_seperated_json_fields} |
1 2 3 4 5 6 7 8 9 10 | url = "https://stream.twitter.com/1.1/statuses/filter.json?track=governmentTopic"; jsonResultTweets = read(http(url)); jsonResultTweets; governmentTopicDiscussionByLocation = jsonResultTweets -> transform {location: $.location,user_id: $.from_user_id_str,user_name: $.user.name,user_location: $.user.location,date_created: $.created_at,comment: $.text} -> group by key = $.location governmentTopicDiscussionByLocation -> write(del("/user/governmentTopics/governmentTopic_1Tweets.del", schema = schema {location,user_id,user_name,user_location,date_created,comment} |
1 2 3 4 5 | libcurl4-gnutls-dev libcurl4-nss-dev libcurl4-openssl-dev r-base r-base-dev r-cran-rjson |
1 2 3 | install.packages(“twitteR”) install.packages(“ROAuth”) install.packages(“RCurl”) |
1 2 3 4 | rm(list=ls()) library(twitteR) library(ROAuth) library(RCurl) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | download.file(url="http://curl.haxx.se/ca/cacert.pem",destfile="cacert.pem") requestURL <- "https://api.twitter.com/oauth/request_token" accessURL <- "https://api.twitter.com/oauth/access_token" authURL <- "https://api.twitter.com/oauth/authorize" consumerKey <- myConsumerKeyFromTwitter consumerSecret <- myConsumerSeccretFromTwitter myCred <- OAuthFactory$new(consumerKey=consumerKey, consumerSecret=consumerSecret, requestURL=requestURL, accessURL=accessURL, authURL=authURL) accessToken <- myAccessTokenFromTwitter accessSecret <- myAccessSecretFromTwitter setup_twitter_oauth(consumerKey,consumerSecret,accessToken,accessSecret) |
1 | govt_sentiment_data <- searchTwitter("#keyWord",since={last_date_pulled} |
1 2 | govt_sentiment_data <- filterStream( file="tweets_rstats.json", track="#keyWord", timeout=3600, oauth=myCred) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | govt_sentiment_data_txt = govt_sentiment_data$text # remove retweet entities govt_sentiment_data_txt = gsub(“(RT|via)((?:\\b\\W*@\\w+)+)”, “”, tweet_txt) # remove at people govt_sentiment_data_txt = gsub(“@\\w+”, “”, tweet_txt) # remove punctuation govt_sentiment_data_txt = gsub(“[[:punct:]]”, “”, tweet_txt) # remove numbers govt_sentiment_data_txt = gsub(“[[:digit:]]”, “”, tweet_txt) # remove html links govt_sentiment_data_txt = gsub(“http\\w+”, “”, tweet_txt) # remove unnecessary spaces govt_sentiment_data_txt = gsub(“[ \t]{2,}”, “”, tweet_txt) govt_sentiment_data_txt = gsub(“^\\s+|\\s+$”, “”, tweet_txt) govt_sentiment_data_txt=gsub(“[^0-9a-zA-Z ,./?><:;’~`!@#&*’]”,””, tweet_txt) |
1 2 3 4 | hdfsFile <- hdfs.file("/tmp/govt_sentiment_data.txt", "w") hdfs.write(govt_sentiment_data_txt, hdfsFile) hdfs.close(hdfsFile) write(govt_sentiment_data, "govt_sentiment_data.txt") |
欢迎光临 电子技术论坛_中国专业的电子工程师学习交流社区-中电网技术论坛 (http://bbs.eccn.com/) | Powered by Discuz! 7.0.0 |