From 94901a4dd220a36f71bc786f718409bddc2d2aa3 Mon Sep 17 00:00:00 2001 From: adash Date: Sat, 31 Oct 2009 02:19:18 +0000 Subject: [PATCH] more changes to check emails --- .../Distributed/SpamFilter/HashEntry.java | 19 +++- .../Distributed/SpamFilter/HashStat.java | 6 ++ .../SpamFilter/SignatureComputer.java | 58 +++------- .../Distributed/SpamFilter/SpamFilter.java | 102 ++++++++++++++++-- 4 files changed, 133 insertions(+), 52 deletions(-) diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java index 31d60aad..c3014f58 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java @@ -14,10 +14,23 @@ public class HashEntry { // this will not work well if some of the strings are equal. result = engine.hashCode(); result ^= signature.hashCode(); - result ^= stats.hashCode(); + //result ^= stats.hashCode(); + System.out.println("result= " + result); return result; } + public void setengine(String engine) { + this.engine=engine; + } + + public void setstats(HashStat stats) { + this.stats=stats; + } + + public void setsig(String signature) { + this.setsig=signature; + } + public String getEngine() { return engine; } @@ -38,8 +51,8 @@ public class HashEntry { return false; if(!(he.getSignature().equals(Signature))) return false; - if(!(he.getStats().equals(stats))) - return false; + //if(!(he.getStats().equals(stats))) + // return false; return true; } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java index 68b3ebf5..e475087a 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java @@ -9,6 +9,12 @@ public class HashStat { } } + public void setuser(int id, int spam, int ham, int unknown) { + userstat[id].setSpam(spam); + userstat[id].setHam(ham); + userstat[id].setUnknown(unknown); + } + public int getuser(int id) { return userid[id]; } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java index 07611172..e0ffd26a 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java @@ -70,12 +70,12 @@ public class SignatureComputer { return "4"; } - public Vector computeSigs(StringBuffer[] Mails) { - if (Mails == null) return null; + public Vector computeSigs(StringBuffer[] EmailParts) { + if (EmailParts == null) return null; - Vector printableSigs = new Vector(); - for (int mailIndex = 0; mailIndex < Mails.length; mailIndex++) { - StringBuffer mail = Mails[mailIndex]; + Vector printableSigs = new Vector(); // vector of strings + for (int mailIndex = 0; mailIndex < EmailParts.length; mailIndex++) { + StringBuffer mail = EmailParts[mailIndex]; if (mail == null) continue; @@ -84,36 +84,29 @@ public class SignatureComputer { */ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { int engineNo = enginesToUseForCheck[engineIndex]; - String[] sig = null; + String sig = null; switch (engineNo) { case 4: - sig = computeSignature(engineNo,curPart.getCleaned()); + sig = computeSignature(engineNo,mail.toString()); break; case 8: - sig = computeSignature(engineNo,curPart.getBody()); + sig = computeSignature(engineNo,mail.toString()); break; default: - /* - * for nilsimsa and sha1 wich are no longer supported by - * the server and might be removed someday - */ - sig = computeSignature(engineNo,curPart.getCleaned()); + System.out.println("Couldn't find the signature engine\n"); + //sig = computeSignature(engineNo,curPart.getCleaned()); break; }//switch engineNo if (sig != null && sig.length > 0) { - for (int curSigIndex = 0; curSigIndex < sig.length; curSigIndex++) { - String hash = engineNo + ":" + sig[curSigIndex]; - curPart.addHash(hash); - printableSigs.add(hash); - } - + String hash = engineNo + ":" + sig[curSigIndex]; + printableSigs.add(hash); } else { /* we didn't produce a signature for the mail. */ } }//engine - }//mails + }//each emails part return printableSigs; }//computeSigs @@ -127,32 +120,11 @@ public class SignatureComputer { case 4: return new String[] { this.sig4.computeSignature(mail) }; case 8: - String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML); + //TODO device and equivalent for this + //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML); return this.sig8.computeSignature(cleanedButKeepHTML); default: return null; } } - - public static String[] getCommonSupportedEngines(int serverSupportedEngines) { - Vector commonSupported = new Vector(); - int engineMask = 1; - int engineIndex = 1; - while (engineIndex < 32) { - boolean serverSupported = (serverSupportedEngines & engineMask) > 0; - boolean clientSupported = isSigSupported(engineIndex); - if (serverSupported && clientSupported) { - commonSupported.add(String.valueOf(engineIndex)); - } - //switch to next - engineMask <<= 1; //shift one to left - engineIndex++; - } - if (commonSupported.size() == 0) { - return null; - } - String[] result = new String[commonSupported.size()]; - commonSupported.toArray(result); - return result; - } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java index 7a9b62b4..e134e042 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java @@ -12,7 +12,7 @@ public class SpamFilter extends Thread { } - public SpamFilter(int numiter, int numemail,int threadid) { + public SpamFilter(int numiter, int numemail,int id) { this.numiter=numiter; this.numemail=numemail; this.id = id; @@ -20,10 +20,12 @@ public class SpamFilter extends Thread { public void run() { int niter; - int nemails + int nemails; + int thid; atomic { niter=numiter; nemails=numemails; + thid = id; } Random rand = new Random(0); @@ -32,7 +34,7 @@ public class SpamFilter extends Thread { for(int j=0; j enginesToSend = new Vector(); + Vector sigsToSend = new Vector(); + + for (RazorMail mail : razorMails) { + for (int partNr = 0; partNr < mail.getPartSize(); partNr++) { + Part part = mail.getPart(partNr); + if (part.skipMe()) { + continue; + } + + for (Iterator hashIter = part.getHashIterator(); hashIter.hasNext();) { + String curHash = (String)hashIter.next(); + String[] engineHashSplit = curHash.split(":"); + String engine = engineHashSplit[0]; + String signature = engineHashSplit[1]; + enginesToSend.add(engine); + sigsToSend.add(signature); + } + } + } + + if (sigsToSend.size() == 0) { // nothing to send + return; + } + + String[] enginesToSendArr = new String[enginesToSend.size()]; + enginesToSend.toArray(enginesToSendArr); + String[] sigsToSendArr = new String[sigsToSend.size()]; + sigsToSend.toArray(sigsToSendArr); + + // ----- now connect to server and ask query ----- + int[] confidenceVals = null; + RazorCommunicationEngine checkEngine = getCheckEngine(); + try { + checkEngine.connect(); + confidenceVals = checkEngine.askForSpam(sigsToSendArr,enginesToSendArr); + checkEngine.disconnect(); + } finally { + checkEngines.add(checkEngine); + } + + if (confidenceVals == null) { + System.err.println("check got no answer from server. error."); + return; // error + } + + if (confidenceVals.length != sigsToSendArr.length) { + throw new IllegalStateException("We got not enough answers from server. expected: " + sigsToSendArr.length + " received: " + confidenceVals.length); + } + + // ----- now dispatch the answers to the mail objects ----- + int answerIndex = 0; + for (RazorMail mail : razorMails) { + for (int partNr = 0; partNr < mail.getPartSize(); partNr++) { + Part part = mail.getPart(partNr); + if (part.skipMe()) { + continue; + } + + for (Iterator hashIter = part.getHashIterator(); hashIter.hasNext();) { + String curHash = hashIter.next(); + part.setResponse(curHash,String.valueOf(confidenceVals[answerIndex++])); + } + } + } + // --> after this loop the mail is able to determine if it is spam or not + } + } + } } -- 2.34.1