当前位置: 代码迷 >> Java Web开发 >> java网页抓取后部署web项目 一直报路径异常
  详细解决方案

java网页抓取后部署web项目 一直报路径异常

热度:1681   发布时间:2013-02-25 21:15:17.0
java网页抓取后部署web项目 一直报路径错误
开始时间:2012-07-06 09:25:00 共需查询:716条 HTTP/1.1 200 OK
java.net.MalformedURLException: unknown protocol: c
at java.net.URL.<init>(Unknown Source)
at java.net.URL.<init>(Unknown Source)
at java.net.URL.<init>(Unknown Source)
at org.cyberneko.HTMLScanner.setInputSource(HTMLScanner.java:860)
at org.cyberneko.HTMLConfiguration.setInputSource(HTMLConfiguration.java:478)
at org.cyberneko.HTMLConfiguration.parse(HTMLConfiguration.java:451)
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
at org.apache.xerces.parsers.DOMParser.parse(Unknown Source)
at com.rensanning.M1Job.getHiddenValue(M1Job.java:215)
at com.rensanning.M1Job.execute(M1Job.java:78)
at org.quartz.core.JobRunShell.run(JobRunShell.java:202)
at org.quartz.simpl.SimpleThreadPool$WorkerThread.run(SimpleThreadPool.java:529)

public class BaseServelet extends HttpServlet {
public void init() throws ServletException {  
super.init();
M1Job.ROOT_PATH = getServletContext().getRealPath("/");
System.out.println(M1Job.ROOT_PATH);
System.out.println(M1Job.ROOT_PATH+"html");
File file = new File(M1Job.ROOT_PATH+"html");
if (!file.exists()) {
file.mkdirs();
}
__startJob();  
}
private void __startJob() {
try {
SchedulerFactory sf = new StdSchedulerFactory();
Scheduler sched = sf.getScheduler();
//每天7点钟扫描一次
String sconf1m = "0 25 9 * * ?";
M1Job m1job = new M1Job();
JobDetail job1m = new JobDetail("job1m", "group1m", m1job.getClass());
CronTrigger trigger1m = new CronTrigger("trigger1m", "group1m", "job1m", "group1m", sconf1m);
sched.addJob(job1m, true);
sched.scheduleJob(trigger1m);
sched.start();
} catch (Exception e) {
e.printStackTrace();
}
}
}
public class M1Job implements Job {
public static String ROOT_PATH = "";
private static final String HTML_TACK_HTML = "html\\tack";
private static final String HTML_DETAIL_HTML = "html\\detail";
private static String url1 = "http://wwwapps.ups.com/WebTracking/track?HTMLVersion=5.0&loc=zh_CN&Requester=UPSHome&WBPM_lid=homepage%2Fct1_pnl_trk&trackNums=#TRACK_NUM#&track.x=%E8%BF%BD%E8%B8%AA"; 
private static String url2 = "http://wwwapps.ups.com/WebTracking/detail"; 
public final void execute(JobExecutionContext context)
throws JobExecutionException {
System.out.println("开始时间:"+getCurrentTime());
UpsDao upsdao = new UpsDao();
TdmxDao tddao = new TdmxDao();  
List<Tdmx> listtd = tddao.searchTdmx();  
for(int i=0; i<listtd.size();i++){  
Tdmx td = listtd.get(i);  
System.out.println("共需查询:"+listtd.size()+"条");
String url1new = url1.replace("#TRACK_NUM#", td.getBl1().trim()); //URL替换的运单参数
String jbno = td.getJbno(); //工作编号  
//如果upstrac表有相对记录,先执行删除
if(upsdao.DeleteUpstrace(jbno)>0)
{
upsdao.DeleteUpstrace(jbno);
}  
try {  
//抓取追踪信息页面HTML  
getHtml(url1new, ROOT_PATH+HTML_TACK_HTML, null);  
//获取 抓取运输进程页面HTML时 需要的参数  
Map<String, String> data = getHiddenValue(ROOT_PATH+HTML_TACK_HTML);  
if (data.get("trackNums") == null) {
continue;
}  
//抓取运输进程页面HTML  
getHtml(url2, ROOT_PATH+HTML_DETAIL_HTML, data);  
  相关解决方案