疯狂java


您现在的位置: 疯狂软件 >> 新闻资讯 >> 正文

Java HTML页面抓取实例


 

  import java.io.BufferedReader;

  import java.io.IOException;

  import java.io.InputStreamReader;

  import java.io.UnsupportedEncodingException;

  import java.net.HttpURLConnection;

  import java.net.MalformedURLException;

  import java.net.URL;

  public class Url {

  public static void main(String[] args) throws Exception{

  String html = getURLContent();

  System.out.println(html);

  }

  /**

  * 获取网页内容

  */

  private static String getURLContent() throws MalformedURLException, IOException, UnsupportedEncodingException {

  URL urlmy = new URL("http://www.baidu.com");

  HttpURLConnection con = (HttpURLConnection) urlmy.openConnection();

  HttpURLConnection.setFollowRedirects(true);

  con.setInstanceFollowRedirects(false);

  con.connect();

  BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(),"UTF-8"));

  String s = "";

  StringBuffer sb = new StringBuffer();

  while ((s = br.readLine()) != null) {

  sb.append(s+" ");

  }

  return sb.toString();

  }

  }