htmlunit及xpath使用
下面的例子使用htmlunit及xpath , 来获取某网页html中的数据. 可以用来爬虫获取数据等.
package com.whoistester.test.report.module;
import java.util.List;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class Project {
String projectDescription;
String projectKey;
String buildNumber;
float rulesCompliance;
int blocker;
int critical;
int major;
int minor;
int info;
float unitTestsCoverage;
float lineCoverage;
float branchCoverage;
float unitTestSucess;
int testFailures;
int errors;
int tests;
String seconds;
float packageTangleIndex;
int cycles;
float methodComplexity;
float classComplexity;
float fileComplexity;
String host;
String url;
public Project(String projectKey, String projectDescription,String host)
{
this.projectKey = projectKey;
this.projectDescription = projectDescription;
this.host = host;
getData();
}
private void getData()
{
this.url = this.host+this.projectKey;
HtmlPage page = null;
WebClient webclient =null ;
try {
webclient = new WebClient();
page = webclient.getPage(this.url);
}
catch(Exception e)
{
}
if(null == page || webclient == null) return;
this.buildNumber = getElementStringText(page,”//div[@id=’snapshot_title’]/h4″);
this.rulesCompliance = getElementFloatText(page, “//span[@id=’m_violations_density’]”);
this.blocker = getElementIntText(page,”//span[@id=’m_blocker_violations’]”);
this.critical = getElementIntText(page,”//span[@id=’m_critical_violations’]”);
this.major = getElementIntText(page,”//span[@id=’m_major_violations’]”);
this.minor = getElementIntText(page,”//span[@id=’m_minor_violations’]”);
this.info = getElementIntText(page,”//span[@id=’m_info_violations’]”);
this.unitTestsCoverage = getElementFloatText(page, “//span[@id=’m_coverage’]”);
this.lineCoverage = getElementFloatText(page, “//span[@id=’m_line_coverage’]”);
this.branchCoverage = getElementFloatText(page, “//span[@id=’m_branch_coverage’]”);
this.tests = getElementIntText(page,”//span[@id=’m_tests’]”);
this.unitTestSucess = getElementFloatText(page, “//span[@id=’m_test_success_density’]”);
this.testFailures = getElementIntText(page,”//span[@id=’m_test_failures’]”);
this.errors = getElementIntText(page,”//span[@id=’m_test_errors’]”);
this.seconds = getElementStringText(page,”//span[@id=’m_test_execution_time’]”);
this.packageTangleIndex = getElementFloatText(page,”//span[@id=’m_package_tangle_index’]”);
this.cycles = getElementIntText(page,”//span[@id=’m_package_cycles’]”);
this.methodComplexity = getElementFloatText(page,”//span[@id=’m_function_complexity’]”);
this.classComplexity = getElementFloatText(page,”//span[@id=’m_class_complexity’]”);
this.fileComplexity = getElementFloatText(page,”//span[@id=’m_file_complexity’]”);
}
private float getElementFloatText(final HtmlPage page , final String xpath)
{
List temp ;
HtmlElement element;
temp = page.getByXPath(xpath);
if(temp.size()>0)
{
element = (HtmlElement) temp.get(0);
return Float.valueOf(element.getTextContent().replace(“%”, “”));
}
return -1;
}
private int getElementIntText(final HtmlPage page , final String xpath)
{
List temp ;
HtmlElement element;
temp = page.getByXPath(xpath);
if(temp.size()>0)
{
element = (HtmlElement) temp.get(0);
return Integer.valueOf(element.getTextContent().replace(“,”, “”));
}
return -1;
}
private String getElementStringText(final HtmlPage page , final String xpath)
{
List temp ;
HtmlElement element;
temp = page.getByXPath(xpath);
if(temp.size()>0)
{
element = (HtmlElement) temp.get(0);
return element.getTextContent();
}
return “”;
}
public static void main(String [] args)
{
//https://dev.eclipse.org/sonar/dashboard/index/
Project a = new Project(“10616″,”test”,”https://dev.eclipse.org/sonar/dashboard/index/”);
System.out.println(a.blocker+” “+a.critical + ” “+a.major);
}
}
此篇文章已被阅读3029 次