only print out line numbers in debug mode
[IRC.git] / QueryTask.java
1 public class QueryTask extends Task {
2         int maxDepth;
3         Queue toprocess;
4         DistributedHashMap results;
5         GlobalString workingURL;
6
7   public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) {
8     this.todoList = todoList;
9                 this.doneList = doneList;
10                 this.maxDepth = maxDepth;
11                 this.results = results;
12   }
13
14   public void execute() {
15                 int depth;
16                 int max;
17                 
18                 atomic {
19                         depth = ((GlobalQuery)myWork).getDepth();
20       max = this.maxDepth;
21                 }
22
23                 if (depth < max) {
24                         /* global variables */
25                         GlobalQuery gq;
26
27                         /* local variables */
28                         LocalQuery lq;
29                         String hostname;
30                         String path;
31
32                         atomic {
33                                 gq = (GlobalQuery)myWork;
34                                 hostname = new String(GlobalString.toLocalCharArray(gq.getHostName()));
35                                 path = new String(GlobalString.toLocalCharArray(gq.getPath()));
36
37                                 GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
38                                 gsb.append("/");
39                                 gsb.append(path);
40                                 workingURL = global new GlobalString(gsb.toGlobalString());
41                         }
42                         lq = new LocalQuery(hostname, path, depth);
43
44                         System.printString(lq.getDepth()+" ");
45                         System.printString("Processing - Hostname : ");
46                         System.printString(hostname);
47                         System.printString(", Path : ");
48                         System.printString(path);
49                         System.printString("\n");
50
51                         Socket s = new Socket(hostname, 80);
52     
53                         requestQuery(hostname, path, s);
54                         readResponse(lq, s);
55
56                         atomic {
57                                 processList(lq, workingURL, results);
58                         }
59
60                         atomic {
61                                 toprocess = processPage(lq);
62                         }
63
64                         s.close();
65                 }
66   }
67
68         public void done(Object obj) {
69                 GlobalString str = global new GlobalString("true");
70                 doneList.put(workingURL, str);
71
72                 while(!toprocess.isEmpty()) {
73                         GlobalQuery q = (GlobalQuery)toprocess.pop();
74
75                         GlobalString hostname = global new GlobalString(q.getHostName());
76                         GlobalString path = global new GlobalString(q.getPath());
77
78                         GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
79                         gsb.append("/");
80                         gsb.append(path);
81
82                         if (!doneList.containsKey(gsb.toGlobalString())) {
83                                 todoList.push(q);
84                         }
85                 }
86         }
87
88         public static void requestQuery(String hostname, String path, Socket sock) {
89     StringBuffer req = new StringBuffer("GET "); 
90     req.append("/");
91                 req.append(path);
92     req.append(" HTTP/1.1\r\nHost:");
93     req.append(hostname);
94     req.append("\r\n\r\n");
95     sock.write(req.toString().getBytes());
96   }
97
98         public static void readResponse(LocalQuery lq, Socket sock) {
99         //    state 0 - nothing
100         //    state 1 - \r
101         //    state 2 - \r\n
102         //    state 3 - \r\n\r
103         //    state 4 - \r\n\r\n
104     int state=0;
105     while(true) {
106       if (state<4) {
107         if (state==0) {
108           byte[] b=new byte[1];
109           int numchars=sock.read(b);
110           if ((numchars==1)) {
111             if (b[0]=='\r') {
112               state++;
113             }
114           } else
115                                                 return;
116         } else if (state==1) {
117           byte[] b=new byte[1];
118           int numchars=sock.read(b);
119           if (numchars==1) {
120             if (b[0]=='\n')
121               state++;
122             else
123               state=0;
124           } else return;
125         } else if (state==2) {
126           byte[] b=new byte[1];
127           int numchars=sock.read(b);
128           if (numchars==1) {
129             if (b[0]=='\r')
130               state++;
131             else
132               state=0;
133           } else return;
134         } else if (state==3) {
135           byte[] b=new byte[1];
136           int numchars=sock.read(b);
137           if (numchars==1) {
138             if (b[0]=='\n')
139               state++;
140             else
141               state=0;
142           } else return;
143         }
144       } else {
145                                 byte[] buffer=new byte[1024];
146         int numchars=sock.read(buffer);
147         if (numchars==0)
148           return;
149         else {
150           String curr=(new String(buffer)).subString(0,numchars);
151                                         lq.response.append(curr);
152         }
153       }
154     }
155   }
156
157         public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) {
158                 String sTitle = new String("<title>");  
159                 String eTitle = new String("</title>");
160                 String searchstr = lq.response.toString();
161                 LinkedList ll;
162
163                 int sIndex = searchstr.indexOf(sTitle);
164                 if (sIndex != -1) {
165                         int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length());
166                         String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex));
167                         ll = tokenize(title);
168
169                         Queue q;
170                         while (!ll.isEmpty()) {
171                                 GlobalString word = global new GlobalString(ll.pop().toString());
172 //                              q = (Queue)(results.get(word));
173
174 //                              if (q == null) {
175                                 if (!results.containsKey(word)) {
176                                         q = global new Queue();
177                                 }
178                                 else {
179                                         q = (Queue)(results.get(word));
180                                 }
181                                 q.push(url);
182                                 results.put(word, q);
183
184                                 System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]");
185 /*
186                                 for (int i = 0; i < q.size(); i++) {
187                                         Object obj = q.elements[i];
188                                         GlobalString str = global new GlobalString((GlobalString)obj);
189                                         System.out.println("\t["+i+"] : "+str.toLocalString());
190                                 }*/
191                         }
192                 }
193         }
194
195         public static LinkedList tokenize(String str) {
196                 LinkedList ll;
197                 int sIndex = 0;
198                 int eIndex = 0;
199                 String token;
200
201                 ll = new LinkedList();
202                 
203                 // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '='
204                 while (true) {
205                         eIndex = str.indexOf(' ', sIndex);
206                         if (eIndex == -1) {
207                                 token = str.subString(sIndex);
208                                 ll.add(token);
209                                 break;
210                         }
211                         else {
212                                 token = str.subString(sIndex, eIndex);
213                                 ll.add(token);
214                                 sIndex = eIndex+1;
215                         }
216                 }
217                 
218                 return ll;
219         }
220         
221   public static Queue processPage(LocalQuery lq) {
222     int index = 0;
223         String href = new String("href=\"");
224         String searchstr = lq.response.toString();
225                 int depth;
226         boolean cont = true;
227                 Queue toprocess;
228
229                 depth = lq.getDepth() + 1;
230
231                 toprocess = global new Queue();
232
233                 while(cont) {
234                         int mindex = searchstr.indexOf(href,index);
235                         if (mindex != -1) {     
236                                 int endquote = searchstr.indexOf('"', mindex+href.length());
237                 if (endquote != -1) {
238                       String match = searchstr.subString(mindex+href.length(), endquote);
239                                         String match2 = lq.makewebcanonical(match);
240         
241                                         GlobalString ghostname;
242                                         GlobalString gpath;
243
244                                         ghostname = global new GlobalString(lq.getHostName(match));
245                                         gpath = global new GlobalString(lq.getPathName(match));
246
247                       if (match2 != null) {
248                                                         GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth);
249                                                         toprocess.push(gq);
250                                         }
251                                         index = endquote;
252         } else cont = false;
253       } else cont = false;
254     }
255                 return toprocess;
256   }
257 }