问题描述
我正在尝试制作一个可从网格/列表中在线解析图片的android应用程序,但是我遇到了一些运行时错误..它的说法是我为我的家犬解析错误。 有人知道我在哪里犯错误吗? 我知道为什么数组会超出范围,但我不知道如何解决它!
我正在尝试解析该网站数据..但是在我的这些部分出现运行时错误
DogActivity.class
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
}
解析器类
public class Parser {
Document doc;
Context context;
Elements dogRows;
public Parser(String url, Context context) {
this.context = context;
try {
doc = Jsoup.connect(url).get();
} catch (IOException e) {
Log.e("Page", "Wrong URL or network problems", e);
}
}
public ArrayList<Dog> parseDogsPage(Breed.Name breedName, Context context) {
ArrayList<Dog> dogs = new ArrayList<>();
try {
Element dogContainer;
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogContainer = doc.getElementsByClass("familybreed").get(0);
} else {
dogContainer = doc.getElementsByClass("toybreed").get(0);
}
Log.i("Page", "A page has been parsed successfully");
dogRows = dogContainer.getElementsByTag("a");
for (Element dogRow : dogRows) {
String dogName, dogURL;
Dog dog;
dogURL = dogRow.getElementsByTag("a").get(0).absUrl("href");
String dogThumbnailURL = dogRow.
getElementsByTag("img").get(0).absUrl("src");
if (breedName == Breed.Name.FAMILY_DOG_BREED) {
dogName = dogRow.getElementsByTag("span").get(0).text();
dog = new Dog(dogName, dogURL, dogThumbnailURL, breedName);
} else {
dogName = dogRow.getElementsByTag("strong").get(0).text();
Element details = dogContainer.getElementsByClass("details").get(0);
Elements children = details.children();
if (breedName == Breed.Name.TOY_DOG_BREED || breedName == Breed.Name.HOUND_DOG_BREED) {
String origin = children.get(1).text();
String lifespan = children.get(3).text();
dog= new Dog(dogName, origin , lifespan, dogURL, dogThumbnailURL, breedName);
} else {
//for herding
String sizetype = children.get(1).text();
dog = new Dog(dogName, sizetype, dogThumbnailURL, dogURL, breedName);
}
}
dogs.add(dog);
}
} catch (Exception e) {
Log.e("Breed activity", "Wrong parsing for " + breedName, e);
}
return dogs;
}
public Dog parseProfile(Dog dog) {
if (!dog.isDetailDataReady()) {
//coaches already read the data in the coaches page
try {
Element dogContainer = doc.getElementById("dogscontainer");
Element bioContainer = dogContainer.getElementById("biocontainer");
Element bioDetails = bioContainer.getElementById("biodetails");
dog.setOtherNames(bioDetails.getElementsByTag("h1").text());
ArrayList<Dog.Detail> dogDetails = new ArrayList<>();
Elements rows = bioDetails.getElementsByTag("tr");
for (Element row : rows) {
Elements tds = row.getElementsByTag("td");
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//coaches, manager and legends use th and td
Elements ths = row.getElementsByTag("th");
dogDetails.add(new Dog.Detail(ths.get(0).text(), tds.get(0).text()));
} else {
//dogs use two tds
dogDetails.add(new Dog.Detail(tds.get(0).text(), tds.get(1).text()));
}
}
dog.setDetails(dogDetails);
Element articleText = dogContainer.getElementsByClass("dogarticletext").get(0);
Elements paragraphs = articleText.getElementsByTag("p");
String text = "";
for (Element p : paragraphs) {
text = text + "\n\n\n" + p.text();
}
dog.setArticleText(dog.getArticleText() + text);
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED ||
dog.getBreed() == Breed.Name.TERRIER_DOG_BREED ||
dog.getBreed() == Breed.Name.HERDING_DOG_BREED) {
//get main image url
dog.setMainImageURL(bioContainer.getElementsByTag("img").get(0).absUrl("src"));
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setThumbnailURL(dog.getMainImageURL());
//only need first name
dog.setName(dog.getOtherNames().split(" ")[1]);
}
} else {
dog.setMainImageURL(bioContainer.getElementsByClass("mainImage").get(0).absUrl("src"));
}
} catch (Exception e) {
Log.e("Profile activity", "Wrong parsing for " + dog.getUrl(), e);
}
if (dog.getBreed() == Breed.Name.WORKING_DOG_BREED) {
dog.setBasicDataReady(true);
}
dog.setDetailDataReady(true);
}
return dog;
}
}
RetrieveDogTask:
private class RetrieveDogsTask extends AsyncTask<String, Void, Void> {
@Override
protected Void doInBackground(String... urls) {
for (String url : urls) {
Parser parser = new Parser(url, DogsActivity.this);
Breed.Name breedName = breed.getName();
if (breedName == Breed.Name.HERDING_DOG_BREED) {
dogs.add(parser.parseProfile(new Dog(url, breedName)));
} else {
dogs.addAll(parser.parseDogsPage(breedName, DogsActivity.this));
}
}
return null;
Logcat:
Wrong parsing for FAMILY_DOG_BREED
java.lang.IndexOutOfBoundsException: Invalid index 0, size is 0
at java.util.ArrayList.throwIndexOutOfBoundsException(ArrayList.java:255)
at java.util.ArrayList.get(ArrayList.java:308)
at org.jsoup.select.Elements.get(Elements.java:544)
at com.example.shannon.popular.Parser.parseDogsPage(Parser.java:35)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:140)
at com.example.shannon.popular.DogsActivity$RetrieveDogsTask.doInBackground(DogsActivity.java:131)
at android.os.AsyncTask$2.call(AsyncTask.java:288)
at java.util.concurrent.FutureTask.run(FutureTask.java:237)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1112)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:587)
at java.lang.Thread.run(Thread.java:818)
品种类别:
public class Breed implements Serializable {
private Name name;
private String url;
Breed(Name name, String url) {
this.name = name;
this.url = url;
}
public Name getName() {
return name;
}
public String getNameString(Context context) {
String nameString = "";
switch (name) {
case FAMILY_DOG_BREED:
nameString = context.getString(R.string.family_breed);
break;
case TOY_DOG_BREED:
nameString = context.getString(R.string.toy_breed);
break;
case HOUND_DOG_BREED:
nameString = context.getString(R.string.hound_breed);
break;
case TERRIER_DOG_BREED:
nameString = context.getString(R.string.terrier_breed);
break;
case WORKING_DOG_BREED:
nameString = context.getString(R.string.working_breed);
break;
case HERDING_DOG_BREED:
nameString = context.getString(R.string.herding_breed);
break;
}
return nameString;
}
public String getURL() {
return url;
}
public enum Name {FAMILY_DOG_BREED, TOY_DOG_BREED, HOUND_DOG_BREED, TERRIER_DOG_BREED, WORKING_DOG_BREED, HERDING_DOG_BREED}
}
1楼
您可能对格式错误的HTML文档使用了严格的XML解析器。
我只是尝试对要解析的URL进行XML验证,但由于<link>
元素从未关闭(在严格的XML中,它应以</link>
标记结尾,但在该页面中丢失),因此它失败了。
这对于HTML页面非常常见,因为当今的浏览器倾向于自动更正此类错误。
由于您使用严格的XML解析器,因此解析器很可能会失败。
我建议切换到其他解析器。 我会使用PULL解析器(例如 )-这种技术允许使用较低级别的控件进行解析,这意味着您可以轻松地忽略HTML中不需要的内容-如这些链接元素,或者任何其他。
因此,您可以执行以下操作:
XmlPullParser parser = XmlPullParserFactory.newInstance().newPullParser();
parser.setInput(new BufferedReader(
new InputStreamReader(
new URL("http://.....").openConnection().getInputStream()
)
)
);
while(XmlPullParser.END_DOCUMENT != parser.next()){
if(XmlPullParser.START_TAG == parser.getEventType()){
String tagName = parser.getName();
if(parser.getAttributeCount() > 0 {
// parse attributes, if needed
}
if(parser.nextToken() == XmlPullParser.TEXT){
String tagValue = parser.getText()
}
// etc.
}
}