CSDB Blog快速备份程序-备份你自己的Blog
以下程序需要htmlparser.jar。你可以直接從
http://umn.dl.sourceforge.net/sourceforge/htmlparser/htmlparser1_5_20040728.zip
下載,http://htmlparser.sourceforge.net是htmlparser的主頁。
//copy from here.
/*******************************************************************************
?* $Header$
?* $Revision$
?* $Date$
?*
?*==============================================================================
?*
?* Copyright (c) 2001-2004 XXX Technologies, Ltd.
?* All rights reserved.
?*
?* Created on 2004-12-3
?*******************************************************************************/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.lexer.Page;
import org.htmlparser.tags.Div;
import org.htmlparser.util.ParserException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
/**
?*
?* @author 晏斐 (mailto:mr_yanfei&yahoo.com)
?*/
/*
?* 修改歷史
?* $Log$
?*/
public final class BlogBackupTool {
??????? private static final String RSS_URL = "http://blog.csdn.net/mr_yanfei/Rss.aspx";
??????? private static final String SAVE_PATH = "d://temp";
??????? private static final String CHANNEL = "channel";
??????? private static final String CHANNEL_ITEM = "item";
??????? private static final String ITEM_TITLE = "title";
??????? private static final String ITEM_LINK = "link";
???????
??????? private static final boolean FILTER = true;
???????
??????? class Blog {
??????????????? private String fTitle;
??????????????? private String fLink;
???????????????
??????????????? public Blog(String title, String link) {
??????????????????????? fTitle = title;
??????????????????????? fLink = link;
??????????????? }
???????????????
??????????????? public String getTitle() {
??????????????????????? return fTitle;
??????????????? }
???????????????
??????????????? public String getLink() {
??????????????????????? return fLink;
??????????????? }
??????? }
???????
??????? private Blog[] getBlogs(String rssUrl) {
??????? DocumentBuilderFactory factory =
??????????? DocumentBuilderFactory.newInstance();
??????? factory.setNamespaceAware(true);
???????
??????? List result = new ArrayList();
??????? try {
??????????????? URL url = new URL(rssUrl);
??????????? DocumentBuilder builder = factory.newDocumentBuilder();
??????????? Document document = builder.parse(url.openStream());
???????????
??????????? Element channel = document.getDocumentElement();
???????????
??????????? channel = (Element)document.getElementsByTagName(CHANNEL).item(0);
??????????? if(CHANNEL.equals(channel.getLocalName())) {
???????????????
??????????????? NodeList nodes = channel.getChildNodes();
??????????????? for(int i = 0; i < nodes.getLength(); i ++) {
??????????????????? org.w3c.dom.Node item = nodes.item(i);
??????????????????? if (CHANNEL_ITEM.equals(item.getLocalName())) {
??????????????????????? String title = getChildNodeText(item, ITEM_TITLE);
??????????????????????? String link = getChildNodeText(item, ITEM_LINK);
??????????????????????? result.add(new Blog(title, link));
??????????????????? }
??????????????? }
??????????? }
??????? } catch (Exception ex){
??????????????? ex.printStackTrace();
??????? }??????
??????? return (Blog[])result.toArray(new Blog[result.size()]);
??????? }
???????
???????
??????? private String getChildNodeText(org.w3c.dom.Node item, String nodeName) {
???????????????
??????????????? NodeList nodes = item.getChildNodes();
??????????????? for(int i = 0; i < nodes.getLength(); i++) {
??????????????????????? org.w3c.dom.Node node = nodes.item(i);
??????????????????????? if (nodeName.equals(node.getLocalName())) {
??????????????????????????????? return node.getFirstChild().getNodeValue();
??????????????????????? }
??????????????? }
??????????????? return null;
??????? }
???????
??????? private String validFilename(String name) {
??????????????? String result = name.replace(':', '_');
??????????????? result = result.replace('/', '_');
??????????????? result = result.replace('//', '_');
??????????????? result = result.replace('?', '?');
??????????????? result = result.replace('*', '_');
??????????????? result = result.replace('<', '_');
??????????????? result = result.replace('>', '_');
??????????????? result = result.replace('|', '_');
??????????????? result = result.replace('"', '_');
??????????????? return result;
??????? }
???????
??????? private void saveBlogs(Blog[] blogs) throws Exception{
???????????????
??????????????? String title, link;
??????????????? for (int i = 0; i < blogs.length; i++) {
??????????????????????? title = blogs[i].getTitle();
??????????????????????? link = blogs[i].getLink();
???????????????????????
??????????????????????? System.out.println("Get Blog " + title);
??????????????????????? System.out.println("URL : " + link);
???????
??????????????????????? if (FILTER) {
??????????????????????????????? Parser parser = null;
??????????????????????????????? try {
??????????????????????????????????????? parser = new Parser(link);
??????????????????????????????? } catch (ParserException ex) {
??????????????????????????????????????? continue;
??????????????????????????????? }
??????????????????????????????? Page page = parser.getLexer().getPage();
??????????????????????????????? String pageUrl = page.getUrl();
???????????????????????????????
??????????????????????????????? Node[] bases = parser.extractAllNodesThatAre(Div.class);
??????????????????????????????? for (int j = 0; j < bases.length; j++) {
??????????????????????????????????????? String attr = ((Div)bases[j]).getAttribute("class");
???????????????????????????????????????
??????????????????????????????????????? if (attr == null)
??????????????????????????????????????????????? attr = "";
???????????????????????????????????????
??????????????????????????????????????? if (attr.equals("post")) {
??????????????????????????????????????????????? String content = ((Div)bases[j]).getChildrenHTML();
??????????????????????????????????????????????? saveBlogToFile(title + ".html", content);
??????????????????????????????????????????????? break;
??????????????????????????????????????? }
??????????????????????????????? }
??????????????????????????????? parser.reset();
??????????????????????? }
??????????????????????? else {
??????????????????????????????? StringBuffer buffer = getHtmlFromURL(link);
??????????????????????????????? saveBlogToFile(title + ".html", buffer.toString());
??????????????????????? }
??????????????? }
??????? }
???????
??????? private StringBuffer getHtmlFromURL(String url) {
??????? StringBuffer buffer = new StringBuffer();
??????????????? try {
??????????? URL pageUrl = new URL(url);
???????
??????????? BufferedReader in = new BufferedReader(new InputStreamReader(pageUrl.openStream()));
??????????? String str;
??????????? while ((str = in.readLine()) != null) {
??????????????? buffer.append(str);
??????????? }
??????????? in.close();
??????? } catch (MalformedURLException e) {
??????????? e.printStackTrace();
??????? } catch (IOException e) {
??????????? e.printStackTrace();
??????? }
??????? return buffer;
??????? }
???????
??????? private void saveBlogToFile(String filename, String content) {
??????????????? try {
??????????????????????? filename = validFilename(filename);
??????????????????????? File file = new File(SAVE_PATH, filename);
??????????????????? OutputStream out = new FileOutputStream(file);
??????????????? OutputStreamWriter writer = new OutputStreamWriter(out);
??????????????? writer.write(content);
??????????????? writer.close();
??????????????? } catch (IOException ex) {
???????????????????????
??????????????? }
??????? }
???????
??????? public static void main(String[] args) throws Exception{
??????????????? BlogBackupTool reader = new BlogBackupTool();
??????????????? Blog[] blogs = reader.getBlogs(RSS_URL);
???????????????
??????????????? reader.saveBlogs(blogs);
??????????????? String msg = MessageFormat.format("Totle {0} blogs saved.", new String[]{Integer.toString(blogs.length)});
??????????????? System.out.println(msg);
??????? }
}
//end
總結(jié)
以上是生活随笔為你收集整理的CSDB Blog快速备份程序-备份你自己的Blog的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: javascript --- 作用域和
- 下一篇: es6 --- 模块