当前位置: 代码迷 >> J2SE >> 正则高手帮看看那里出现有关问题CPU 100%
  详细解决方案

正则高手帮看看那里出现有关问题CPU 100%

热度:387   发布时间:2016-04-24 01:08:24.0
正则高手帮看看那里出现问题CPU 100%
Java code
/** * <p>Title: </p> * * <p>Description: </p> * * <p>Copyright: Copyright (c) 2012</p> * * <p>Company: </p> * * @author not attributable * @version 1.0 */import java.io.*;import java.net.*;import java.util.*;import java.util.regex.*;import java.util.zip.*;public class test {    //返回所有组的正则    public static ArrayDeque<String[]> regexAllGroups(String original, String regex) {        int total = 0;        String[] ary = null;        ArrayDeque Q = new ArrayDeque();        if (original == null || regex == null) {            return Q;        }        Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);        Matcher m = p.matcher(original);        while (m != null && m.find()) { //该正则在这里m.find卡死。。            total = m.groupCount();            if (total < 1) {                continue;            }            ary = new String[total];            for (int i = 1; i <= total; i++) {                ary[i - 1] = new String(m.group(i));            }            Q.add(ary);        }        m = null;        p = null;        return Q;    }        //获取网页源码    public static String getUrlHtml(String strURL) {        String body = null;        String contentEncoding = null;        URL _URL = null;        InputStream IN = null;        HttpURLConnection CONNECTION = null;        try {            _URL = new URL(strURL);            CONNECTION = (HttpURLConnection) _URL.openConnection();            CONNECTION.setConnectTimeout(3000);            CONNECTION.setReadTimeout(3000);            CONNECTION.setRequestProperty("Accept-Encoding", "gzip,deflate");            CONNECTION.setRequestProperty("Accept", "*/*");            CONNECTION.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)");            CONNECTION.setRequestProperty("Connection", "close");            CONNECTION.setRequestMethod("GET");            CONNECTION.setFollowRedirects(true);            CONNECTION.setUseCaches(false);            CONNECTION.setInstanceFollowRedirects(true);            /*判断是不是GZIP/DEFLATE压缩格式的网页*/            int type = 0;            contentEncoding = CONNECTION.getContentEncoding();            if (contentEncoding != null) {                contentEncoding = contentEncoding.toLowerCase();                if (contentEncoding.indexOf("gzip") != -1) {                    type = 1;                }                if (contentEncoding.indexOf("deflate") != -1) {                    type = 2;                }            }            switch (type) {            case 1:                IN = new GZIPInputStream(CONNECTION.getInputStream());                break;            case 2:                IN = new InflaterInputStream(CONNECTION.getInputStream());                break;            default:                IN = CONNECTION.getInputStream();                break;            }            byte[] b = null;            if (IN != null && (b = inputStreamToByte(IN)) != null) {                body = new String(b, "utf-8");                IN.close();            }            CONNECTION.disconnect();            b = null;        } catch (Exception e) {            try {                if (IN != null) {                    IN.close();                }                if (CONNECTION != null) {                    CONNECTION.disconnect();                }            } catch (Exception ex) {            }            body = null;        }        IN = null;        _URL = null;        CONNECTION = null;        return body;    }            public static byte[] inputStreamToByte(InputStream in) {       if (in == null) {           return null;       }       int ch;       byte[] b = null;       ByteArrayOutputStream stream = new ByteArrayOutputStream();       try {           while ((ch = in.read()) != -1) {               stream.write(ch);           }           b = stream.toByteArray();           stream.reset();           stream.close();           in.close();       } catch (Exception e) {           e.printStackTrace();       }       in = null;       stream = null;       return b;   }    public static void main(String[] args) {        //正则表达式        String regex = "<table cellpadding=\"0\" cellspacing=\"0\" class=\"result\" id=\"\\d+\".*><tr><td class=f><h3 class=\"t\"><a.*href=\"([\\s\\S]*?)\".*target=\"_blank\">([\\s\\S]*?)</a>([\\s\\S]*?)<br>";        //获取该网页地址的html源代码        String html = getUrlHtml("http://www.baidu.com/s?wd=%D2%F8%C1%AA%B4%F3%B0%AE%BF%A8&pn=0&rn=10&usm=1");        //分析结果,在这里出现CPU资源100%        ArrayDeque<String[]> Q = regexAllGroups(html, regex);            }}
  相关解决方案