//------------// Introduction to Programming Using Java: An Object-Oriented Approach//	Arnow/Weiss//------------import java.io.*;import java.util.*;import java.net.*;public class WebSurveyor {	// Initialize a WebSurveyor object capable of giving a set of WebPages reachable 	//      from startingUrl all on the site defined by siteString	public WebSurveyor(String startingUrl, String siteString) throws IOException {		wpList = new Set();		urlList = new Set();		if (startingUrl.indexOf(siteString) == -1)			return;		WebPage wp = new WebPage(startingUrl); 		wpList.addElement(wp);		urlList.addElement(startingUrl);		survey(wp,siteString);	}	// Examine the web pages of the site defined by siteString and reachable from the 	// web page wp	private void survey(WebPage wp, String siteString) throws IOException {		Set links = wp.getLinks();		Enumeration e = links.elements();		while (e.hasMoreElements()) {			String url = (String) e.nextElement();			if (url.indexOf(siteString)!=-1 && !urlList.contains(url)) {				WebPage wp2 = new WebPage(url);				urlList.addElement(url);				if (!wp2.isBad()) { 					wpList.addElement(wp2);					survey(wp2,siteString);				}			}		}	}	Set getPages() {		return wpList;	}	private Set wpList;	// set of existing web pages whose URLs contain 				// the siteString and that are reachable from 				// the initial URL given to WebSurveyor	private Set urlList;	// set of URLs good or bad that have already 				// been surveyed	private static void showBad(WebPage wp, Set badset) {		System.out.println("Bad pages of "+wp.getURL()+":");		Enumeration ebad = badset.elements();		while (ebad.hasMoreElements())			System.out.println(ebad.nextElement());	}	public static void main(String[] args) throws IOException {		String startingUrl = args[0];		String siteString = args[1];		WebSurveyor ws = new 		WebSurveyor(startingUrl,siteString);		Set pset = ws.getPages();		Enumeration e = pset.elements();		while (e.hasMoreElements()) { 			WebPage wp = (WebPage) e.nextElement();			Set badset = wp.getBadLinks();			if (!badset.isEmpty()) 				showBad(wp,badset);		}	}}class WebPage {	public WebPage(String url) {		this.url = url;	}	public boolean isBad() throws IOException {		return isBad(this.url);	}	// return a set of all the HTTP HREFs in this web page	public Set getLinks() throws IOException {		Set links = new Set();		HttpReader hr = new HttpReader(url); 		String link = hr.readLine(); 		while (link!=null) {			links.addElement(link);			link = hr.readLine();		}		return links;	}	// return a set of all the HTTP HREFs in this web page that are bad links	public Set getBadLinks() throws IOException {		Set links = new Set();		HttpReader hr = new HttpReader(url);		String link = hr.readLine();		while (link!=null) {			if (isBad(link))				links.addElement(link);			link = hr.readLine();		}		return links;	}	public String getURL() {		return url;	}	private boolean isBad(String url) throws IOException {		WebReader wr = new WebReader(url);		String s = wr.readLine().toUpperCase();		return s.indexOf("404")!=-1			|| s.indexOf("403")!=-1			|| s.indexOf("NOT FOUND")!=-1			|| s.indexOf("FORBIDDEN")!=-1;	}	private String url;	public static void main(String[] a) throws Exception {		WebPage wp = new WebPage(a[0]);		Set links = wp.getLinks();		Enumeration e = links.elements();		while (e.hasMoreElements())			System.out.println(wp.getURL()+"has this link:"+e.nextElement());	}}class WebReader {	public WebReader(String url) throws IOException {		URL u = new URL(url);		URLConnection uC = u.openConnection();		BufferedInputStream ins = (BufferedInputStream) 		uC.getContent();		InputStreamReader isr = new InputStreamReader(ins);		br = new BufferedReader(isr);	}	public String readLine() throws IOException {		return br.readLine();	}	private BufferedReader br;}class HttpReader {	public HttpReader(String urlString) throws IOException {		lr = new LinkReader(urlString);	}	// return the next HTTP HREF in complete URL form	public String readLine() throws IOException {		String line = lr.readLine(); 		while (line!=null && line.toUpperCase().indexOf("HTTP:")==-1)			line = lr.readLine();		return line;	}	private LinkReader lr;}class LinkReader {	public LinkReader(String urlString) throws IOException {		hr = new HrefReader(urlString);		url = new URL(urlString);		host = url.getHost();		resource = url.getFile();		directory = null;		parentDirectory = null;		int k = resource.lastIndexOf("/");		if (k!=-1) {			directory = resource.substring(0,k); 			k = directory.lastIndexOf("/");			if (k!=-1) {				parentDirectory = resource.substring(0,k);				parentDirectory = parentDirectory.concat("/");			}			directory = directory.concat("/");		}		port = url.getPort();		protocol = url.getProtocol();	}	public String readLine() throws IOException {		String link;		link = hr.readLine();		if (link==null)			return null;		int k = link.indexOf("://");		if (k!=-1)			if (link.substring(k+3).indexOf("/")==-1)				return link+"/";			else 				return link;		k = link.indexOf(":");		if (k!=-1 && link.substring(0,k).indexOf(".")==-1)			return link;		if (link.length()>=2 && link.substring(0,2).equals(".."))			return protocol				+ "://"				+ host + parentDirectory + link.substring(3);		if (link.length()>=1 && link.substring(0,1).equals("/"))			return protocol + "://" + host + link;		return protocol + "://" + host + directory + link;	}	private HrefReader hr;	// The HrefReader to read HREFs from	private URL url;	// The URL of this page	private String host,	// The host portion of this URL		resource,	// The resource portion of this URL		protocol,	// The protocol portion of this URL		directory,	// The resources directory		parentDirectory;// The directory of the 				// resources directory	private int port;	// The port number of this URL}class HrefReader {	public HrefReader(String url) throws IOException {		tr = new TagReader(url);	}	// return index of HREF href HrEf and so on in s	private int hrefIndex(String s) {		return s.toUpperCase().indexOf("HREF");	}	// return the largest prefix of s that does not contain x with spaces trimmed	private String trimFrom(String s, String x) {		int k = s.indexOf(x);		if (k!=-1)			return s.substring(0,k).trim();		else			return s.trim();	}	// return the largest suffix of s that does not contain x with spaces trimmed	private String trimUpThrough(String s, String x) {		int k = s.indexOf(x);		if (k!=-1)			return s.substring(k,k+x.length()).trim();		else			return s.trim();	}	public String readLine() throws IOException {		String tag;		tag = tr.readLine();		while (tag!=null && hrefIndex(tag)==-1)			tag = tr.readLine();		if (tag==null)			return null;		int k = hrefIndex(tag);		tag = tag.substring(k+1).trim();		tag = trimUpThrough(tag,"=");		tag = trimFrom(tag," ");		if (tag.indexOf("\"")!=-1) {			tag = trimUpThrough(tag,"\"");			tag = trimFrom(tag,"\"");		}		if (tag.indexOf("'")!=-1) { 			tag = trimUpThrough(tag,"'");			tag = trimFrom(tag,"'");		}		tag = trimFrom(tag,"#");		return trimFrom(tag,"?");	}	private TagReader tr; // The TagReader to read tags from}class TagReader {	public TagReader(String url) throws IOException {		wr = new WebReader(url);		line = null;	}	public String readLine() throws IOException {		if (line==null)		line = wr.readLine();		while (line!=null && line.indexOf("<")==-1)			line = wr.readLine();		if (line==null)			return null;		int k = line.indexOf(">");		if (k!=-1 && k<line.indexOf("<"))			line = line.substring(k+1);		String nextLine = wr.readLine();		while (nextLine!=null && line.indexOf(">")==-1) {			line = line.concat(nextLine);			nextLine = wr.readLine();		}		if (nextLine!=null)			line = line.concat(nextLine);		if (line.indexOf(">")==-1)			return null;		int tagStart = line.indexOf("<");		int tagEnd = line.indexOf(">");		if (tagStart<0 || tagEnd<0 || tagStart>tagEnd)			System.err.println("Bad angle brackets: "+line);		String tag = line.substring(tagStart+1,tagEnd); 		line = line.substring(tagEnd+1); 		return tag;	}	private String line;	// The unprocessed part of the most recent line				// read from wr	private WebReader wr;	// The WebReader to read lines from}class Set  {	public  Set()  {		vector  =  new  Vector();	}	public  Set(int n)  {		vector  =  new  Vector(n);	}	public  boolean  isEmpty()  {		return  vector.isEmpty();	}	public  int  size()  {		return  vector.size();	}	public  boolean  contains(Object  o)  {		Enumeration  enum  =  vector.elements();		while  (enum.hasMoreElements())  {			Object  elem  =  enum.nextElement();			if  (elem.equals(o))				return  true;		}		return  false;	}	public  void  addElement(Object  o)  {		if  (!contains(o))			vector.addElement(o);	}	public  Object  clone()  {		Set  destSet  =  new  Set();		Enumeration  enum  =  vector.elements();		while  (enum.hasMoreElements())			destSet.addElement(enum.nextElement());		return  destSet;	}	public  Set  union(Set  s)  {		Set  unionSet  =  (Set)s.clone();		Enumeration  enum  =  vector.elements();		while  (enum.hasMoreElements())			unionSet.addElement(enum.nextElement());		return  unionSet;	}	public  Set  intersection(Set  s)  {		Set  interSet  =  new  Set();		Enumeration  enum  =  this.vector.elements();		while  (enum.hasMoreElements())  {			Object  elem  =  enum.nextElement();			if  (s.contains(elem))				interSet.addElement(elem);		}		return  interSet;	}	public Enumeration  elements()  {		return  vector.elements();	}	public  void  print(PrintStream  ps)  {		Enumeration  enum  =  vector.elements();		while  (enum.hasMoreElements())  {			ps.print(enum.nextElement().toString());			ps.print("  ");		}	}	private Vector  vector;}