From 82723a7024db62a08100bc29393bf4dec9960bf6 Mon Sep 17 00:00:00 2001 From: adash Date: Sat, 7 Nov 2009 00:07:18 +0000 Subject: [PATCH] bug fixes --- .../SpamFilter/DistributedHashMap.java | 2 + .../Distributed/SpamFilter/FilterResult.java | 16 ++-- .../SpamFilter/FilterStatistic.java | 7 ++ .../Distributed/SpamFilter/GString.java | 33 ++++++++ .../Distributed/SpamFilter/HashEntry.java | 2 +- .../Distributed/SpamFilter/Mail.java | 27 ++++++- .../SpamFilter/SignatureComputer.java | 16 ++-- .../Distributed/SpamFilter/SpamFilter.java | 80 ++++++++++--------- .../Distributed/SpamFilter/makefile | 6 +- 9 files changed, 130 insertions(+), 59 deletions(-) diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java b/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java index 93ffe3a9..8ea2010a 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/DistributedHashMap.java @@ -70,6 +70,7 @@ public class DistributedHashMap { Object getKey(Object key) { int hashcode=key.hashCode(); int index1=hash1(hashcode, table.length); + DistributedHashEntry dhe=table[index1]; if (dhe==null) return null; @@ -132,6 +133,7 @@ public class DistributedHashMap { dhe.array=he; dhe.count++; + return null; } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java index 2eab89cc..196006be 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java @@ -21,28 +21,28 @@ public class FilterResult { // ----------------------------------------------------------------------------- public FilterResult(double result) { - SPAM_THRESHOLD=500; - ABSOLUTE_SPAM=1000; + SPAM_THRESHOLD=50; + ABSOLUTE_SPAM=100; ABSOLUTE_HAM=0; NO_RESULT=-1; //this.result = result; } public FilterResult() { - SPAM_THRESHOLD=500; - ABSOLUTE_SPAM=1000; + SPAM_THRESHOLD=50; + ABSOLUTE_SPAM=100; ABSOLUTE_HAM=0; NO_RESULT=-1; } public boolean getResult(int[] confidenceVals) { - int[] res = new int[3]; + int[] res = new int[3]; //3 equals spam, ham and unknown for(int i=0; i= 0 && confidenceVals[i] < 500) + if(confidenceVals[i] >= 0 && confidenceVals[i] < SPAM_THRESHOLD) res[1]+=1; //ham - if(confidenceVals[i] > SPAM_THRESHOLD) + if(confidenceVals[i] >= SPAM_THRESHOLD) res[2]+=1;//spam } int maxVotes=0; @@ -60,7 +60,7 @@ public class FilterResult { if(max==2) return true; - System.out.println("Err: getResult() Control shouldn't come here\n"); + System.out.println("Err: getResult() Control shouldn't come here, max= " + max); return false; } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java index e4fbac54..2e326a5d 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java @@ -56,4 +56,11 @@ public class FilterStatistic { public void increaseUnknown() { setUnknown(getUnknown() + 1); } + + public String toString() { + String str = "Filterstats_spam_"+spam; + str += "_ham_" +ham; + str += "_unknown_"+unknown; + return str; + } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java b/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java index 7ac34249..89726a3d 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java @@ -131,4 +131,37 @@ public class GString { else return o.toString(); } + + public String toLocalString() { + return new String(toLocalCharArray(this)); + } + + public static char[] toLocalCharArray(GString str) { + char[] c; + int length; + length = str.length(); + c = new char[length]; + for (int i = 0; i < length; i++) { + c[i] = str.value[i+str.offset]; + } + return c; + } + + public int hashCode() { + String s = this.toLocalString(); + return s.hashCode(); + } + + public boolean equals(Object o) { + if(o == null) + return false; + if(!(o instanceof GString)) + return false; + GString gs = (GString)o; + String s1 = gs.toLocalString(); + String s2 = this.toLocalString(); + if(s2.equals(s1)) + return true; + return false; + } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java index 0a1d69b5..2021a691 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java @@ -15,7 +15,7 @@ public class HashEntry { result = engine.hashCode(); result ^= signature.hashCode(); //result ^= stats.hashCode(); - System.out.println("result= " + result); + //System.out.println("HashEntry: hashCode= " + result); return result; } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java b/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java index 905d728b..de6b748f 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java @@ -27,11 +27,16 @@ public class Mail { public Mail(String fileName) // read a mail from file { + //System.out.println("fileName= " + fileName); + FileInputStream fileinput = new FileInputStream(fileName); String line; - + boolean chk = false; + while((line = fileinput.readLine()) != null) { + chk = true; + Vector splittedLine = line.split(); if(((String)(splittedLine.elementAt(0))).equals("Spam:")) { @@ -60,12 +65,18 @@ public class Mail { } } // parsed messageID, To, from, cc, Title + if(!chk) + System.out.println("no line read"); + + body = new String(); while((line = fileinput.readLine()) != null) { body += line; } + + fileinput.close(); } // ------------------------------------------------------- @@ -264,7 +275,17 @@ public class Mail { return body; } - + /* TODO add this to process entire email + public Vector returnEmail() { + Vector myemail = new Vector(); + + myemail.addElement(getCommonPart()); + myemail.addElement(getURLs()); + myemail.addElement(getSplittedBody()); + return myemail; + } + */ + public Vector getURLs() { Vector returnStrings = new Vector(); @@ -356,7 +377,7 @@ public class Mail { //Preprocess emails //Vector partsOfMailStrings = mail.createMailStringsWithURL(); Vector partsOfMailStrings = getCommonPart(); - partsOfMailStrings.addElement(getBodyString()); + //partsOfMailStrings.addElement(getBodyString()); //Compute signatures SignatureComputer sigComp = new SignatureComputer(); diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java index 1986d080..b4d2c77d 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java @@ -85,15 +85,18 @@ public class SignatureComputer { for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { int engineNo = enginesToUseForCheck[engineIndex]; String sig = null; + /* EphemeralSignature calculator */ if(engineNo==4) { sig = computeSignature(engineNo,mail); - } + } + /* if(engineNo==8) { sig = computeSignature(engineNo,mail); - } + } if(engineNo!=4 || engineNo!=8) { - System.out.println("Couldn't find the signature engine\n"); + System.out.println("Err: Couldn't find the signature engine: " + engineNo); } + */ if (sig != null) { String hash = engineNo + ":" + sig; @@ -108,7 +111,7 @@ public class SignatureComputer { /** * @param engineNo - * @param cleaned + * @param email * @return */ private String computeSignature(int engineNo, String mail) { @@ -118,14 +121,11 @@ public class SignatureComputer { //return new String { this.sig4.computeSignature(mail) }; } - /* if(engineNo==8) { //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML); //return this.sig8.computeSignature(cleanedButKeepHTML); - return this.sig8.computeSignature(mail); + //return this.sig8.computeSignature(mail); } - */ - return null; } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java index 14de5ed1..db7ab448 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java @@ -43,24 +43,14 @@ public class SpamFilter extends Thread { Random rand = new Random(thid); Random myrand = new Random(0); - /* - if(id==0) { - //Randomly set Spam vals for each email - for(int i=0; i