else if ((str.subString(index+1)).equals("ppt")) return true;
else if ((str.subString(index+1)).equals("pptx")) return true;
else if ((str.subString(index+1)).equals("jpg")) return true;
+ else if ((str.subString(index+1)).equals("mp3")) return true;
+ else if ((str.subString(index+1)).equals("wmv")) return true;
+ else if ((str.subString(index+1)).equals("doc")) return true;
+ else if ((str.subString(index+1)).equals("docx")) return true;
+ else if ((str.subString(index+1)).equals("mov")) return true;
+ else if ((str.subString(index+1)).equals("flv")) return true;
else return false;
}
return false;
}
public void done(Object obj) {
- if (gTitle != null)
+ if ((gTitle != null) && (gTitle.length() > 0)) {
processList();
-
- GlobalString str = global new GlobalString("true");
-
- doneList.put(workingURL, str);
+ }
while(!toprocess.isEmpty()) {
GlobalQuery q = (GlobalQuery)toprocess.pop();
if (!doneList.containsKey(gsb.toGlobalString())) {
todoList.push(q);
+
+ GlobalString str = global new GlobalString("1");
+ doneList.put(gsb.toGlobalString(), str);
}
}
}
else if (str.equals("but")) return true;
else if (str.equals("to")) return true;
else if (str.equals("The")) return true;
- else if (str.equals(".")) return true;
- else if (str.equals("-")) return true;
- else if (str.equals("=")) return true;
- else if (str.equals("_")) return true;
- else if (str.equals(":")) return true;
- else if (str.equals(";")) return true;
- else if (str.equals("\'")) return true;
- else if (str.equals("\"")) return true;
- else if (str.equals("|")) return true;
- else if (str.equals("@")) return true;
- else if (str.equals("&")) return true;
- else if (str.equals(" ")) return true;
+ else if (str.length() == 1) {
+ if (str.charAt(0) == '.') return true;
+ else if (str.charAt(0) == '.') return true;
+ else if (str.charAt(0) == '-') return true;
+ else if (str.charAt(0) == '=') return true;
+ else if (str.charAt(0) == '_') return true;
+ else if (str.charAt(0) == ':') return true;
+ else if (str.charAt(0) == ';') return true;
+ else if (str.charAt(0) == '\'') return true;
+ else if (str.charAt(0) == '\"') return true;
+ else if (str.charAt(0) == '|') return true;
+ else if (str.charAt(0) == '@') return true;
+ else if (str.charAt(0) == '&') return true;
+ else if (str.charAt(0) == ' ') return true;
+ }
else return false;
}
public class Spider {
public static void main(String[] args) {
int NUM_THREADS = 3;
- int maxDepth = 3;
+ int maxDepth = 4;
int i, j;
Work[] works;
QueryTask[] qt;
NUM_THREADS = Integer.parseInt(args[0]);
- if (args.length == 3) {
- maxDepth = Integer.parseInt(args[2]);
- }
+// if (args.length == 3) {
+// maxDepth = Integer.parseInt(args[2]);
+// }
GlobalString firstmachine;
+ GlobalString firstpage;
int mid[] = new int[NUM_THREADS];
// mid[0] = (128<<24)|(195<<16)|(180<<8)|21;
atomic {
firstmachine = global new GlobalString(args[1]);
+ if (args.length == 3) {
+ firstpage = global new GlobalString(args[2]);
+ }
+ else
+ firstpage = global new GlobalString("");;
works = global new Work[NUM_THREADS];
qt = global new QueryTask[NUM_THREADS];
currentWorkList = global new GlobalQuery[NUM_THREADS];
- GlobalQuery firstquery = global new GlobalQuery(firstmachine);
+ GlobalQuery firstquery = global new GlobalQuery(firstmachine, firstpage);
Queue todoList = global new Queue();
DistributedHashMap doneList = global new DistributedHashMap(500, 500, 0.75f);