《小程序---利用jsoup解析CSDN博客信息》
package com.fenghuo.html;
import java.io.IOException;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class AnalyzeHtml {
/**
* Example program to list links from a URL.
*/
public static void main(String[] args) throws IOException {
String csdn = "http://blog.csdn.net";
String blog = "http://blog.csdn.net/w695050167";
String url = blog + "?viewmode=list";
Connection connection = Jsoup.connect(url);
connection.timeout(500);//设置连接超时时间
//给服务器发消息头,告诉服务器,俺不是java程序。CSDN不允许java程序访问
connection.header("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)");
Document doc = connection.get();//获取返回的html的document对象
//解析document对象
Elements links = doc.select(".link_title");
for (Element e : links) {
if (e.getAllElements().size() == 2) {
Element ae = e.select("a[href]").first();
String href = ae.attr("href");
System.out.println(csdn + href);
String text = e.text();
System.out.println(text);
}
}
}
}