1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | private static void getFeeds(String newsFeedUrlLink){ File f = new File(“newsFeeds.txt”); boolean ok = false; try { URL feedUrl = new URL(newsFeedUrlLink); SyndFeedInput input = new SyndFeedInput(); InputSource source = new InputSource(feedUrl.openStream()); SyndFeed feed = input.build(source); for (Iterator i = feed.getEntries().iterator(); i.hasNext();) { SyndEntry entry = (SyndEntry) i.next(); writeToFile(f,entry); } ok = true; } catch (Exception ex) { ex.printStackTrace(); System.out.println("ERROR: "+ex.getMessage()); } if (!ok) { System.out.println(); System.out.println("FeedReader reads and prints any RSS/Atom feed type."); System.out.println("The first parameter must be the URL of the feed to read."); System.out.println(); } } private static void writeToFile(File f, SyndEntry entry) throws IOException { FileWriter fw = new FileWriter(f.getName(),true); BufferedWriter bw = new BufferedWriter(fw); bw.write(entry.getTitle()+”\n”); bw.close(); } |
1 2 3 4 5 6 7 8 9 | mydata <- readLines("newsFeeds.txt") myfile <- hdfs.file("/tmp/govt_sentiment_data.txt", "r") dfserialized <- hdfs.read(myfile) df <- unserialize(dfserialized) hdfs.close(myfile) //write(mydata, file = "/tmp/govt_sentiment_data.txt",append = TRUE) hdfs.write(mydata, file = "/tmp/govt_sentiment_data.txt",append = TRUE) government_sentiment_data <- read.hdfs(“/tmp/govt_sentiment_data.txt”) |
1 2 | sqoop import --options-file dbCredentials.txt --connect jdbc:mysql://217.8.156.117/govt_policy_app --table opinions –-target-dir /tmp \ --append |
欢迎光临 电子技术论坛_中国专业的电子工程师学习交流社区-中电网技术论坛 (http://bbs.eccn.com/) | Powered by Discuz! 7.0.0 |