Broken Link Finder / Checker using java

by Vinoth 2009-08-05 18:25:28

Broken link finder / Finder using java
Compile and run the bellow code to find broken links for a webpage

import java.net.HttpURLConnection;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.Collection;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import java.util.LinkedHashSet;
import javax.swing.text.MutableAttributeSet;
public class BrokenLinkFinder{

public static boolean isLive(String link) {
HttpURLConnection urlConnection = null;
try{
URL url = new URL(link);
urlConnection = (HttpURLConnection) url.openConnection();
urlConnection.setRequestMethod("GET");
urlConnection.connect();
String redirectLink = urlConnection.getHeaderField("Location");
if (redirectLink != null && !url.equals(redirectLink)) {
return isLive(redirectLink);
} else {
return urlConnection.getResponseCode() == HttpURLConnection.HTTP_OK;
}
}catch (Exception e) {
return false;
} finally {
if (urlConnection != null)
urlConnection.disconnect();
}
}
public static void main(String arg[]){
try {
URL url = new URL(arg[0]);
Reader reader = new InputStreamReader(url.openStream());
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLCallback callback = new HTMLCallback(url);
//callback.handleStartTag(tag, atts, 2);
parser.parse(reader, callback, true);
Collection results = callback.getResults();
reader.close();
//System.out.println("Testing"+ results);
//String links[]=(String[])results.toArray();
String link="";
while(results.iterator().hasNext()){
Object test=results.iterator().next();
String islink=test.toString();
System.out.println(islink+" status "+isLive(islink));
results.remove(test);
//System.out.println(" "+("http://100pulse.com").compareToIgnoreCase(tests));
}
}
catch(Exception e){
System.out.println("Requested Webpage Not available");
}
}
}






class HTMLCallback extends HTMLEditorKit.ParserCallback{
private final URL base;
private final Collection results = new LinkedHashSet();
public HTMLCallback(URL base) { this.base = base; }

// full tags, with separate opening and closing tags
public void handleStartTag(HTML.Tag tag, MutableAttributeSet atts, int pos){
if (HTML.Tag.A.equals(tag)){
String href = "";//tag.getAttribute(HTML.Attribute.HREF);
if (tag == HTML.Tag.A){
href = (String)atts.getAttribute (HTML.Attribute.HREF);
}
if (href != null){
try{
URL url = new URL(base, href);
results.add(url);
}
catch (Exception e){System.out.println(e);
results.add(href);
}
}
}
}

// simple tags without a closing tag
public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet atts, int pos){
if (HTML.Tag.IMG.equals(tag)){
if (tag == HTML.Tag.A){
String link = (String)atts.getAttribute (HTML.Attribute.HREF);System.out.println(link);
results.add(link);
}
}
}
public Collection getResults(){
return results;
}
}

Tagged in:

2669
like
0
dislike
0
mail
flag

You must LOGIN to add comments