* A FilterResult encapsulates the result of a filter made by checking a mail.
**/
public class FilterResult {
- /**
- * This value is used if type is ERROR or UNKNOWN.
- */
- public double NO_RESULT;
-
- /**
- * A result value greater or equal this value indicates that the filter has
- * decided on spam.
- */
- public double SPAM_THRESHOLD;
- public double ABSOLUTE_SPAM;
- public double ABSOLUTE_HAM;
-
- //TODO decide a good way of deciding
- public double result; // the result, a value between 0 (ham) and 1 (spam), negative values for "error", "unknown" etc.
-
- //public HashMap<String,String> properties = new HashMap<String,String>(); // additional properties of the filter (mainly for statistics)
-
- // -----------------------------------------------------------------------------
-
- public FilterResult(double result) {
- SPAM_THRESHOLD=0.5;
- ABSOLUTE_SPAM=1.0;
- ABSOLUTE_HAM=0.0;
- NO_RESULT=-1;
- this.result = result;
- }
+ /**
+ * This value is used if type is ERROR or UNKNOWN.
+ */
+ public double NO_RESULT;
+
+ /**
+ * A result value greater or equal this value indicates that the filter has
+ * decided on spam.
+ */
+ public int SPAM_THRESHOLD;
+ public int ABSOLUTE_SPAM;
+ public int ABSOLUTE_HAM;
+
+ //public double result; // the result, a value between -1 (ham) and 1000 (spam),
+ // negative values for "error", "unknown" etc.
+
+ // -----------------------------------------------------------------------------
+
+ public FilterResult(double result) {
+ SPAM_THRESHOLD=500;
+ ABSOLUTE_SPAM=1000;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ this.result = result;
+ }
- public double getResult() {
- return result;
- }
+ public FilterResult() {
+ SPAM_THRESHOLD=500;
+ ABSOLUTE_SPAM=1000;
+ ABSOLUTE_HAM=0;
+ NO_RESULT=-1;
+ }
+
+ public double getResult() {
+ return result;
+ }
+
+ public boolean isSpam() {
+ return result >= SPAM_THRESHOLD;
+ }
+
+ public boolean getResult(int[] confidenceVals) {
+ int[] res = new int[3];
+ for(int i=0; i<confidenceVals; i++) {
+ if(confidenceVals[i] < 0)
+ res[0]+=1; //unknown
+ if(confidenceVals[i] >= 0 && confidenceVals[i] < 500)
+ res[1]+=1; //ham
+ if(confidenceVals[i] > SPAM_THRESHOLD)
+ res[2]+=1;//spam
+ }
+ int maxVotes=0;
+ int max;
+ for(int i=0; i<3;i++) {
+ if(res[i] > maxVotes) {
+ maxVotes = res[i];
+ max = i;
+ }
+ }
+ if(i==0)
+ return false;
+ if(i==1)
+ return false;
+ if(i==2)
+ return true;
- public boolean isSpam() {
- return result >= SPAM_THRESHOLD;
- }
+ System.out.println("Err: getResult() Shouldn't come here\n");
+ return false;
+ }
- /*
- public void addProperty(String key, String value) {
- properties.put(key,value);
- }
+ /*
+ public void addProperty(String key, String value) {
+ properties.put(key,value);
+ }
- public String getProperty(String key) {
- return properties.get(key);
- }
+ public String getProperty(String key) {
+ return properties.get(key);
+ }
- public HashMap<String,String> getProperties() {
- return properties;
- }
- */
+ public HashMap<String,String> getProperties() {
+ return properties;
+ }
+ */
}
public class FilterStatistic {
- int unknown;
- int spam;
- int ham;
+ int unknown;
+ int spam;
+ int ham;
- // -------------------------------------------------------
-
- public FilterStatistic() {
- this(0,0,0);
- }
+ // -------------------------------------------------------
- public FilterStatistic(int spam, int ham, int unknown) {
- this.spam = spam;
- this.ham = ham;
- this.unknown = unknown;
- }
+ public FilterStatistic() {
+ this(0,0,0);
+ }
- public int getChecked() {
- //TODO Change this formula
- return getSpam() + getHam() + getUnknown();
- }
+ public FilterStatistic(int spam, int ham, int unknown) {
+ this.spam = spam;
+ this.ham = ham;
+ this.unknown = unknown;
+ }
- public int getHam() {
- return ham;
- }
+ public int getChecked() {
+ return getSpam() + getHam() + getUnknown();
+ }
- public int getSpam() {
- return spam;
- }
+ public int getHam() {
+ return ham;
+ }
- public String getName() {
- return name;
- }
+ public int getSpam() {
+ return spam;
+ }
- public void setHam(int i) {
- ham = i;
- }
+ public String getName() {
+ return name;
+ }
- public void setSpam(int i) {
- spam = i;
- }
+ public void setHam(int i) {
+ ham = i;
+ }
- public int getUnknown() {
- return unknown;
- }
+ public void setSpam(int i) {
+ spam = i;
+ }
- public void setUnknown(int u) {
- unknown = u;
- }
+ public int getUnknown() {
+ return unknown;
+ }
- public void increaseSpam() {
- setSpam(getSpam() + 1);
- }
+ public void setUnknown(int u) {
+ unknown = u;
+ }
- public void increaseHam() {
- setHam(getHam() + 1);
- }
+ public void increaseSpam() {
+ setSpam(getSpam() + 1);
+ }
- public void increaseUnknown() {
- setUnknown(getUnknown() + 1);
- }
+ public void increaseHam() {
+ setHam(getHam() + 1);
+ }
- public void setName(String name) {
- this.name = name;
- }
+ public void increaseUnknown() {
+ setUnknown(getUnknown() + 1);
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
}
// return false;
return true;
}
+
+ public int askForSpam() {
+ Vector users = stats.getUsers();
+ int spamConfidence=0;
+ for(int i=0; i<users.size(); i++) {
+ int userid = (int) (users.elementAt(i));
+ spamConfidence += stats.userstat[userid].getChecked();
+ }
+ return spamConfidence;
+ }
}
public class HashStat {
int[] userid;
FilterStatistic[] userstat;
+ Vector listofusers;
public HashStat() {
userid = new int[8]; //max users for our system=8
userstat = new FilterStatistic[8];
}
public void setuser(int id, int spam, int ham, int unknown) {
+ userid[id] = 1;
userstat[id].setSpam(spam);
userstat[id].setHam(ham);
userstat[id].setUnknown(unknown);
public int getunknowncount(int userid) {
return userstat[userid].getUnknown();
}
+
+ public Vector getUsers() {
+ for(int i=0; i<8; i++) {
+ if(userid[i] == 1) {
+ listofusers.addElement(i);
+ }
+ }
+ return listofusers;
+ }
+
+ public int numUsers() {
+ int count=0;
+ for(int i=0; i<8; i++) {
+ if(userid[i] == 1) {
+ count++;
+ listofusers.addElement(i);
+ }
+ }
+ return count;
+ }
}
String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place)
//same as hashcode of a class
+ boolean isSpam;
public Mail() {
messageID=null;
return returnStrings;
}
+ public void setIsSpam(boolean spam) {
+ isSpam = spam;
+ }
+
+ public boolean getIsSpam() {
+ return isSpam;
+ }
+
public static void main(String[] args)
{
Mail mail = new Mail("./emails/email1");
System.out.println(b);
}
}
-
}
public class SignatureComputer {
- public EphemeralSignature sig4; //signature engines
- public WhiplashSignature sig8; //signature engines
+ public EphemeralSignature sig4; //signature engines
+ public WhiplashSignature sig8; //signature engines
- int[] enginesToUseForCheck;
+ int[] enginesToUseForCheck;
- public SignatureComputer() {
- sig4 = new EphemeralSignature(); //default values
- sig8 = new WhiplashSignature();
- createEnginesToCheck();
- }
+ public SignatureComputer() {
+ sig4 = new EphemeralSignature(); //default values
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
- /**
- * constructor to be used when some parsing has already taken place with the
- * server-provides value <code>randomNumberSeed</code>.
- *
- * @param randomNumberSeed
- * a non-negative number used for seeding the random number generator
- * before starting to hash values.
- * @param separator
- * how the mail-text should be splitted into lines. (== what chars
- * separate 2 lines)
- */
- public SignatureComputer(int randomNumberSeed, String separator) {
- sig4 = new EphemeralSignature(randomNumberSeed,separator);
- sig8 = new WhiplashSignature();
- createEnginesToCheck();
- }
+ /**
+ * constructor to be used when some parsing has already taken place with the
+ * server-provides value <code>randomNumberSeed</code>.
+ *
+ * @param randomNumberSeed
+ * a non-negative number used for seeding the random number generator
+ * before starting to hash values.
+ * @param separator
+ * how the mail-text should be splitted into lines. (== what chars
+ * separate 2 lines)
+ */
+ public SignatureComputer(int randomNumberSeed, String separator) {
+ sig4 = new EphemeralSignature(randomNumberSeed,separator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
- /**
- * the constructor to be used most of the time. you can hand over the
- * seed-string exactly as it is provided by the razor-server.
- *
- * @param seedAndSeparator
- * a string containing the seed value for the RNG and a separator list
- * (separated by ' <b>- </b>'). default value is
- * <code>"7542-10"</code> which means server-seed 7542 and only one
- * separator 10 (which is ascii '\n').
- */
- public SignatureComputer(String seedAndSeparator) {
- sig4 = new EphemeralSignature(seedAndSeparator);
- sig8 = new WhiplashSignature();
- createEnginesToCheck();
- }
+ /**
+ * the constructor to be used most of the time. you can hand over the
+ * seed-string exactly as it is provided by the razor-server.
+ *
+ * @param seedAndSeparator
+ * a string containing the seed value for the RNG and a separator list
+ * (separated by ' <b>- </b>'). default value is
+ * <code>"7542-10"</code> which means server-seed 7542 and only one
+ * separator 10 (which is ascii '\n').
+ */
+ public SignatureComputer(String seedAndSeparator) {
+ sig4 = new EphemeralSignature(seedAndSeparator);
+ sig8 = new WhiplashSignature();
+ createEnginesToCheck();
+ }
- /**
- *
- */
- public void createEnginesToCheck() {
- enginesToUseForCheck = new int[2];
- enginesToUseForCheck[0] = 4; //Ephemeral engine
- enginesToUseForCheck[1] = 8;//Whiplash engine
- }
+ /**
+ *
+ */
+ public void createEnginesToCheck() {
+ enginesToUseForCheck = new int[2];
+ enginesToUseForCheck[0] = 4; //Ephemeral engine
+ enginesToUseForCheck[1] = 8;//Whiplash engine
+ }
- public boolean isSigSupported(int sig) {
- boolean found = false;
- for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
- if (enginesToUseForCheck[i] == sig) {
- found = true;
- }
- }
- return found;
- }
+ public boolean isSigSupported(int sig) {
+ boolean found = false;
+ for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
+ if (enginesToUseForCheck[i] == sig) {
+ found = true;
+ }
+ }
+ return found;
+ }
- public boolean isSigSupported(String sig) {
- return (sig != null && isSigSupported(Integer.parseInt(sig)));
- }
+ public boolean isSigSupported(String sig) {
+ return (sig != null && isSigSupported(Integer.parseInt(sig)));
+ }
- public String getDefaultEngine() {
- return "4";
- }
+ public String getDefaultEngine() {
+ return "4";
+ }
- public Vector computeSigs(StringBuffer[] EmailParts) {
- if (EmailParts == null) return null;
+ public Vector computeSigs(Vector EmailParts) {
+ if (EmailParts == null) return null;
- Vector printableSigs = new Vector(); // vector of strings
- for (int mailIndex = 0; mailIndex < EmailParts.length; mailIndex++) {
- StringBuffer mail = EmailParts[mailIndex];
+ Vector printableSigs = new Vector(); // vector of strings
+ for (int mailIndex = 0; mailIndex < EmailParts.size(); mailIndex++) {
+ String mail = EmailParts.elementAt(mailIndex);
- if (mail == null) continue;
+ if (mail == null) continue;
- /*
- * Compute Sig for bodyparts that are cleaned.
- */
- for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
- int engineNo = enginesToUseForCheck[engineIndex];
- String sig = null;
+ /*
+ * Compute Sig for bodyparts that are cleaned.
+ */
+ for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+ int engineNo = enginesToUseForCheck[engineIndex];
+ String sig = null;
- switch (engineNo) {
- case 4:
- sig = computeSignature(engineNo,mail.toString());
- break;
- case 8:
- sig = computeSignature(engineNo,mail.toString());
- break;
- default:
- System.out.println("Couldn't find the signature engine\n");
- //sig = computeSignature(engineNo,curPart.getCleaned());
- break;
- }//switch engineNo
+ switch (engineNo) {
+ case 4:
+ sig = computeSignature(engineNo,mail);
+ break;
+ case 8:
+ sig = computeSignature(engineNo,mail);
+ break;
+ default:
+ System.out.println("Couldn't find the signature engine\n");
+ //sig = computeSignature(engineNo,curPart.getCleaned());
+ break;
+ }//switch engineNo
- if (sig != null && sig.length > 0) {
- String hash = engineNo + ":" + sig[curSigIndex];
- printableSigs.add(hash);
- } else {
- /* we didn't produce a signature for the mail. */
- }
- }//engine
- }//each emails part
- return printableSigs;
- }//computeSigs
+ if (sig != null && sig.length > 0) {
+ String hash = engineNo + ":" + sig[curSigIndex];
+ printableSigs.add(hash);
+ } else {
+ /* we didn't produce a signature for the mail. */
+ }
+ }//engine
+ }//each emails part
+ return printableSigs;
+ }//computeSigs
- /**
- * @param engineNo
- * @param cleaned
- * @return
- */
- private String[] computeSignature(int engineNo, String mail) {
- switch (engineNo) {
- case 4:
- return new String[] { this.sig4.computeSignature(mail) };
- case 8:
- //TODO device and equivalent for this
- //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
- return this.sig8.computeSignature(cleanedButKeepHTML);
- default:
- return null;
- }
- }
+ /**
+ * @param engineNo
+ * @param cleaned
+ * @return
+ */
+ private String computeSignature(int engineNo, String mail) {
+ switch (engineNo) {
+ case 4:
+ return new String { this.sig4.computeSignature(mail) };
+ case 8:
+ //TODO device and equivalent for this
+ //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
+ return this.sig8.computeSignature(cleanedButKeepHTML);
+ default:
+ return null;
+ }
+ }
}
for(int i=0; i<niter; i++) {
for(int j=0; j<nemails; j++) {
int pickemail = rand.nextInt(100);
- //String email = getEmail(pickemail);
- checkMail(email, thid);
+ //Mail email = getEmail(pickemail);
+ boolean filterAnswer = checkMail(email, thid);
+ boolean userAnswer = email.getIsSpam();
+ if(filterAnswer != userAnswer) {
+ sendFeedBack(email);
+ }
}
}
}
}
/**
- * Returns signatures to the Spam filter
+ * Returns result to the Spam filter
**/
- public FilterResult[] checkMail(Mail mail, int userid) {
+ public boolean checkMail(Mail mail, int userid) {
//Preprocess emails
- //String[] partsOfMailStrings = createMailStrings();
- //RazorMail[] razorMails =
+ //Vector partsOfMailStrings = createMailStrings(mail);
+
//Compute signatures
SignatureComputer sigComp = new SignatureComputer();
Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
-
+
//check with global data structure
- check(signatures, userid);
+ int[] confidenceVals = check(signatures,userid);
//---- create and return results --------
- FilterResult[] filterResults = new FilterResult[mailStrings.length];
+ FilterResult filterResult = new FilterResult();
+ boolean spam = filterResult.getResult();
- return filterResults;
+ return spam;
}
- public void check(Vector emailParts, int userid) {
- for(int i=0; i<emailParts.size(); i++) {
+ public int[] check(Vector emailParts, int userid) {
+ int numparts = emailParts.size();
+ int[] confidenceVals = new int[numparts];
+ for(int i=0; i<numparts; i++) {
String part = (String)(emailParts.elementAt(i));
char tmpengine = part.charAt(0);
String engine = new String(tmpengine);
myhe.setsig(signature);
//find object in distributedhashMap: if no object then add object
//else read object
- HashEntry tmphe;
- if((tmphe=(HashEntry)mydhmap.get(myhe))== null) {
+ HashEntry tmphe= (HashEntry)(mydhmap.get(myhe));
+ if(tmphe == null) {
//add new object
myhe.stats = new HashStat();
- myhe.stats.setuser(userid, 0, 0, 1);
+ myhe.stats.setuser(userid, 0, 0, -1);
+ FilterStatistic fs = new FilterStatistic(0,0,-1);
+ mydhmap.put(myhe, fs);
} else {
- //else if read object
- Vector<String> enginesToSend = new Vector<String>();
- Vector<String> sigsToSend = new Vector<String>();
-
- for (RazorMail mail : razorMails) {
- for (int partNr = 0; partNr < mail.getPartSize(); partNr++) {
- Part part = mail.getPart(partNr);
- if (part.skipMe()) {
- continue;
- }
-
- for (Iterator<String> hashIter = part.getHashIterator(); hashIter.hasNext();) {
- String curHash = (String)hashIter.next();
- String[] engineHashSplit = curHash.split(":");
- String engine = engineHashSplit[0];
- String signature = engineHashSplit[1];
- enginesToSend.add(engine);
- sigsToSend.add(signature);
- }
- }
- }
-
- if (sigsToSend.size() == 0) { // nothing to send
- return;
- }
-
- String[] enginesToSendArr = new String[enginesToSend.size()];
- enginesToSend.toArray(enginesToSendArr);
- String[] sigsToSendArr = new String[sigsToSend.size()];
- sigsToSend.toArray(sigsToSendArr);
-
- // ----- now connect to server and ask query -----
- int[] confidenceVals = null;
- RazorCommunicationEngine checkEngine = getCheckEngine();
- try {
- checkEngine.connect();
- confidenceVals = checkEngine.askForSpam(sigsToSendArr,enginesToSendArr);
- checkEngine.disconnect();
- } finally {
- checkEngines.add(checkEngine);
- }
-
- if (confidenceVals == null) {
- System.err.println("check got no answer from server. error.");
- return; // error
- }
-
- if (confidenceVals.length != sigsToSendArr.length) {
- throw new IllegalStateException("We got not enough answers from server. expected: " + sigsToSendArr.length + " received: " + confidenceVals.length);
- }
-
- // ----- now dispatch the answers to the mail objects -----
- int answerIndex = 0;
- for (RazorMail mail : razorMails) {
- for (int partNr = 0; partNr < mail.getPartSize(); partNr++) {
- Part part = mail.getPart(partNr);
- if (part.skipMe()) {
- continue;
- }
-
- for (Iterator<String> hashIter = part.getHashIterator(); hashIter.hasNext();) {
- String curHash = hashIter.next();
- part.setResponse(curHash,String.valueOf(confidenceVals[answerIndex++]));
- }
- }
- }
- // --> after this loop the mail is able to determine if it is spam or not
+ // ----- now connect to global data structure and ask query -----
+ confidenceVals[i] = tmphe.askForSpam(numparts);
}
}
+
+ // --> the mail client is able to determine if it is spam or not
+ return confidenceVals;
}
}