//System.printString(path);
//System.printString("\n");
- if (isDocument(path)) {
- lq = (LocalQuery)(todoList.pop());
- depth = lq.getDepth();
- continue;
- }
-
Socket s = new Socket();
if(s.connect(hostname, 80) == -1) {
- lq = (LocalQuery)(todoList.pop());
- depth = lq.getDepth();
- continue;
- }
-
-// System.out.println("AAA");
- requestQuery(hostname, path, s);
-// System.out.println("BBB");
- readResponse(lq, s);
-
-// System.out.println("CCC");
- if ((title = grabTitle(lq)) != null) {
- toprocess = processPage(lq);
+ //lq = (LocalQuery)(todoList.pop());
+ //depth = lq.getDepth();
+ //continue;
+ return;
}
-// System.out.println("DDD");
+ if(requestQuery(hostname, path, s) == 0) {
+ readResponse(lq, s);
+ if ((title = grabTitle(lq)) != null) {
+ toprocess = processPage(lq);
+ }
+ } else {
+ ;
+ }
s.close();
done(toprocess);
lq = (LocalQuery)(todoList.pop());
}
public void done(Queue toprocess) {
+ /*
if ((title != null) && (title.length() > 0)) {
processedList();
}
-
+ */
int searchCnt = 0;
while(!toprocess.isEmpty()) {
LocalQuery q = (LocalQuery)toprocess.pop();
while (iter.hasNext() == true) {
str = ((String)(iter.next()));
- //System.printString(str + "\n");
}
}
else
return false;
}
-
- public static void requestQuery(String hostname, String path, Socket sock) {
- StringBuffer req = new StringBuffer("GET ");
- req.append("/");
- req.append(path);
- req.append(" HTTP/1.0\r\nHost: ");
- req.append(hostname);
- req.append("\r\n\r\n");
- sock.write(req.toString().getBytes());
- }
+
+ public static int requestQuery(String hostname, String path, Socket sock) {
+ StringBuffer req = new StringBuffer("GET ");
+ req.append("/");
+ req.append(path);
+ req.append(" HTTP/1.0\r\nHost: ");
+ req.append(hostname);
+ req.append("\r\n\r\n");
+ if(sock.write(req.toString().getBytes()) == -1) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
public static void readResponse(LocalQuery lq, Socket sock) {
// state 0 - nothing
return str;
}
- public static Queue processPage(LocalQuery lq) {
- int index = 0;
- String href = new String("href=\"");
- String searchstr = lq.response.toString();
- int depth;
- boolean cont = true;
- Queue toprocess;
-
- depth = lq.getDepth() + 1;
-
- toprocess = new Queue();
- while(cont) {
- int mindex = searchstr.indexOf(href,index);
- if (mindex != -1) {
- int endquote = searchstr.indexOf('"', mindex+href.length());
- if (endquote != -1) {
- String match = searchstr.subString(mindex+href.length(), endquote);
- String match2 = lq.makewebcanonical(match);
-
- String hostname;
- String path;
-
- hostname = new String(lq.getHostName(match));
- path = new String(lq.getPathName(match));
-
- if (match2 != null) {
- LocalQuery gq = new LocalQuery(hostname, path, depth);
- toprocess.push(gq);
- }
- index = endquote;
- } else cont = false;
- } else cont = false;
+ public static Queue processPage(LocalQuery lq) {
+ int index = 0;
+ String href = new String("href=\"");
+ String searchstr = lq.response.toString();
+ int depth;
+ Queue toprocess;
+
+ depth = lq.getDepth() + 1;
+
+ toprocess = new Queue();
+ while(true) {
+ int mindex = searchstr.indexOf(href,index);
+ if (mindex != -1) {
+ int endquote = searchstr.indexOf('"', mindex+href.length());
+ if (endquote != -1) {
+ String match = searchstr.subString(mindex+href.length(), endquote);
+ String match2 = lq.makewebcanonical(match);
+
+ String hostname;
+ String path;
+
+ hostname = new String(lq.getHostName(match));
+ path = new String(lq.getPathName(match));
+
+ if (match2 != null) {
+ LocalQuery gq = new LocalQuery(hostname, path, depth);
+ toprocess.push(gq);
+ }
+ index = endquote;
+ } else break;
+ } else break;
+ }
+ return toprocess;
}
- return toprocess;
- }
}
int ldepth;
atomic {
- System.out.println("trans 2");
max = this.maxDepth;
maxSearch = this.maxSearchDepth;
ldepth=this.depth;
}
-
+
if (ldepth < max) {
/* local variables */
- String hostname;
- String path;
- String title;
-
+ String hostname=null;
+ String path=null;
+ String title=null;
+
atomic {
- System.out.println("trans 3");
- hostname = new String(GlobalString.toLocalCharArray(getHostName()));
- path = new String(GlobalString.toLocalCharArray(getPath()));
-
- GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
- gsb.append("/");
- gsb.append(path);
- workingURL = global new GlobalString(gsb.toGlobalString());
- gTitle = null;
+ hostname = new String(GlobalString.toLocalCharArray(getHostName()));
+ path = new String(GlobalString.toLocalCharArray(getPath()));
+ System.out.println("hostname= " + hostname + " path= " + path);
+ GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+ gsb.append("/");
+ gsb.append(path);
+ workingURL = global new GlobalString(gsb.toGlobalString());
+ gTitle = null;
}
LocalQuery lq = new LocalQuery(hostname, path, ldepth);
+ /*
if (isDocument(path)) {
- return;
+ return;
}
-
+ */
+
Socket s = new Socket();
if(s.connect(hostname, 80) == -1) {
- return;
+ return;
}
-
+
+ if(requestQuery(hostname, path, s) == 0) {
+ readResponse(lq, s);
+ if ((title = grabTitle(lq)) != null) {
+ atomic {
+ //commits everything...either works or fails
+ gTitle = global new GlobalString(title);
+ processPage(lq);
+ dequeueTask();
+ }
+ }
+ } else {
+ atomic {
+ dequeueTask();
+ }
+ }
+
+ /*
+ if(requestQuery(hostname, path, s) == -1) {
+ atomic {
+ dequeueTask();
+ }
+ } else {
+ readResponse(lq, s);
+ if ((title = grabTitle(lq)) != null) {
+ atomic {
+ //commits everything...either works or fails
+ gTitle = global new GlobalString(title);
+ processPage(lq);
+ dequeueTask();
+ }
+ }
+ }
+ */
+ /*
requestQuery(hostname, path, s);
readResponse(lq, s);
-
if ((title = grabTitle(lq)) != null) {
- atomic {
- System.out.println("trans 4");
- //commits everything...either works or fails
- gTitle = global new GlobalString(title);
- processPage(lq);
- dequeueTask();
- }
+ atomic {
+ //commits everything...either works or fails
+ gTitle = global new GlobalString(title);
+ processPage(lq);
+ dequeueTask();
+ }
}
+ */
s.close();
} else {
atomic {
- System.out.println("trans 5");
- dequeueTask();
+ dequeueTask();
}
}
}
public GlobalString getPathName(GlobalString page) {
GlobalString http = global new GlobalString("http://");
GlobalString https = global new GlobalString("https://");
- int beginindex;
- int nextindex;
+ int beginindex=0;
+ int nextindex=0;
if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
GlobalString path = getPath();
beginindex = page.indexOf(http) + http.length();
}
nextindex = page.indexOf('/',beginindex+1);
-
if ((beginindex == -1) || (nextindex == -1))
return global new GlobalString("index.html");
return page.subString(nextindex+1, page.length());
public static boolean isDocument(String str) {
int index = str.lastindexOf('.');
-
if (index != -1) {
if ((str.subString(index+1)).equals("pdf")) return true;
else if ((str.subString(index+1)).equals("ps")) return true;
return false;
}
+ /*
public void output() {
String str;
Iterator iter = results_list.iterator();
System.out.println("Size = " + results_list.size());
}
+ */
public static String grabTitle(LocalQuery lq) {
String sBrace = new String("<");
if (mindex != -1) {
title = searchstr.subString(mindex, endquote);
if (Character.isWhitespace(title.charAt(0))){
- mindex=0;
- while (Character.isWhitespace(title.charAt(mindex++)));
- mindex--;
- if (mindex >= title.length()) return null;
- title = new String(title.subString(mindex));
+ mindex=0;
+ while (Character.isWhitespace(title.charAt(mindex++)));
+ mindex--;
+ if (mindex >= title.length()) return null;
+ title = new String(title.subString(mindex));
}
-
+
if (Character.isWhitespace(title.charAt(title.length()-1))) {
- endquote=title.length()-1;
- while (Character.isWhitespace(title.charAt(endquote--)));
- endquote += 2;
- if (mindex >= endquote) return null;
- title = new String(title.subString(0, endquote));
+ endquote=title.length()-1;
+ while (Character.isWhitespace(title.charAt(endquote--)));
+ endquote += 2;
+ if (mindex >= endquote) {
+ return null;
+ }
+ title = new String(title.subString(0, endquote));
}
-
+
if (isErrorPage(title)) {
- return null;
+ return null;
}
}
-
+
return title;
}
return false;
}
- public static void requestQuery(String hostname, String path, Socket sock) {
+ public static int requestQuery(String hostname, String path, Socket sock) {
StringBuffer req = new StringBuffer("GET ");
req.append("/");
req.append(path);
req.append(" HTTP/1.0\r\nHost: ");
req.append(hostname);
req.append("\r\n\r\n");
- sock.write(req.toString().getBytes());
+ if(sock.write(req.toString().getBytes()) == -1) {
+ return -1; //error in openning this webpage
+ } else {
+ return 0;
+ }
}
public static void readResponse(LocalQuery lq, Socket sock) {
GlobalString token = null;
int mindex = 0;
int endquote = 0;
-
+
while (endquote != -1) {
endquote = gTitle.indexOf(' ', mindex);
-
+
if (endquote != -1) {
- token = gTitle.subString(mindex, endquote);
- mindex = endquote + 1;
- if (filter(token)) {
- continue;
- }
- token = refine(token);
+ token = gTitle.subString(mindex, endquote);
+ mindex = endquote + 1;
+ if (filter(token)) {
+ continue;
+ }
+ token = refine(token);
} else {
- token = gTitle.subString(mindex);
- token = refine(token);
+ token = gTitle.subString(mindex);
+ token = refine(token);
}
-
+
GlobalQueue q = (GlobalQueue)results.get(token);
if (q == null) {
- q = global new GlobalQueue();
+ q = global new GlobalQueue();
}
q.push(workingURL);
results.put(token, q);
return str;
}
-
+
public void processPage(LocalQuery lq) {
+ //System.out.println("Inside processPage");
+ /*
if ((gTitle != null) && (gTitle.length() > 0)) {
processedList();
}
+ */
int index = 0;
String href = new String("href=\"");
while(true) {
int mindex = searchstr.indexOf(href,index);
if (mindex != -1) {
- int endquote = searchstr.indexOf('"', mindex+href.length());
- if (endquote != -1) {
- String match = searchstr.subString(mindex+href.length(), endquote);
- String match2 = lq.makewebcanonical(match);
-
- GlobalString ghostname;
- GlobalString gpath;
-
- ghostname = global new GlobalString(lq.getHostName(match));
- gpath = global new GlobalString(lq.getPathName(match));
-
- GlobalStringBuffer gsb = global new GlobalStringBuffer(ghostname);
- gsb.append("/");
- gsb.append(gpath);
+ int endquote = searchstr.indexOf('"', mindex+href.length());
+ if (endquote != -1) {
+ String match = searchstr.subString(mindex+href.length(), endquote);
+ String match2 = lq.makewebcanonical(match);
+ //System.out.println("match= " + match + " match2= " + match2);
- if (match2 != null) {
- if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) {
- GlobalString str = global new GlobalString("1");
- visitedList.put(gsb.toGlobalString(), str);
- results_list.add(gsb.toGlobalString());
- searchCnt++;
- QueryTask gq = global new QueryTask(visitedList, maxDepth, maxSearchDepth, results, results_list, ghostname, gpath, lq.getDepth()+1);
- enqueueTask(gq);
- }
- }
- index = endquote;
- } else
- break;
- } else
- break;
+ GlobalString ghostname;
+ GlobalString gpath;
+
+ ghostname = global new GlobalString(lq.getHostName(match));
+ gpath = global new GlobalString(lq.getPathName(match));
+
+ GlobalStringBuffer gsb = global new GlobalStringBuffer(ghostname);
+ gsb.append("/");
+ gsb.append(gpath);
+ //System.out.println("match2=" + match2 + lq.getHostName(match)+"/"+lq.getPathName(match));
+
+ if (match2 != null) {
+ if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) {
+ //System.out.println("I am here");
+ GlobalString str = global new GlobalString("1");
+ visitedList.put(gsb.toGlobalString(), str);
+ //results_list.add(gsb.toGlobalString());
+ searchCnt++;
+ QueryTask gq = global new QueryTask(visitedList, maxDepth, maxSearchDepth, results, results_list, ghostname, gpath, lq.getDepth()+1);
+ enqueueTask(gq);
+ }
+ }
+ index = endquote;
+ } else {
+ //System.out.println("mindex= " + mindex + " index= " + index + " endquote= " + endquote + " href.length()= " + href.length());
+ break;
+ }
+ } else {
+ //System.out.println("mindex= " + mindex + " index= " + index);
+ break;
+ }
}
+ //System.out.println("End of processPage");
+ //System.out.println("\n");
}
}