From bb8c39b6d5c4f4deddcd8960a950570137a94396 Mon Sep 17 00:00:00 2001 From: adash <adash> Date: Thu, 5 Nov 2009 21:52:09 +0000 Subject: [PATCH] added Global string and other changes for compilation --- .../SpamFilter/EphemeralSignature.java | 4 +- .../Distributed/SpamFilter/GString.java | 134 ++++++++++++++++++ .../Distributed/SpamFilter/HashEntry.java | 12 +- .../Distributed/SpamFilter/HashStat.java | 8 +- .../Distributed/SpamFilter/Mail.java | 12 +- .../Distributed/SpamFilter/SpamFilter.java | 92 +++++++++--- .../Distributed/SpamFilter/emails/gen.c | 10 ++ .../Distributed/SpamFilter/makefile | 7 +- 8 files changed, 238 insertions(+), 41 deletions(-) create mode 100644 Robust/src/Benchmarks/Distributed/SpamFilter/GString.java diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java b/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java index f938aac9..52616681 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/EphemeralSignature.java @@ -1,7 +1,7 @@ public class EphemeralSignature { - private int serverSeed; - private String serverSeparator; + int serverSeed; + String serverSeparator; Random rand; public EphemeralSignature() { diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java b/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java new file mode 100644 index 00000000..7ac34249 --- /dev/null +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/GString.java @@ -0,0 +1,134 @@ +public class GString { + char value[]; + int count; + int offset; + + public GString() { + } + + public GString(char c) { + char[] str = global new char[1]; + str[0] = c; + GString(str); + } + + public GString(String str) { + value = global new char[str.count]; + for(int i =0; i< str.count;i++) { + value[i] = str.value[i+str.offset]; + } + count = str.count; + offset = 0; + } + + public GString(GString gstr) { + this.value = gstr.value; + this.count = gstr.count; + this.offset = gstr.offset; + } + + public GString(StringBuffer gsb) { + value = global new char[gsb.length()]; + count = gsb.length(); + offset = 0; + for (int i = 0; i < count; i++) + value[i] = gsb.value[i]; + } + + public GString(char str[]) { + char charstr[]=new char[str.length]; + for(int i=0; i<str.length; i++) + charstr[i]=str[i]; + this.value=charstr; + this.count=str.length; + this.offset=0; + } + + public static char[] toLocalCharArray(GString str) { + char[] c; + int length; + + length = str.length(); + + c = new char[length]; + + for (int i = 0; i < length; i++) { + c[i] = str.value[i+str.offset]; + } + return c; + } + + public String toLocalString() { + return new String(toLocalCharArray(this)); + } + + public int length() { + return count; + } + + public int indexOf(int ch, int fromIndex) { + for (int i = fromIndex; i < count; i++) + if (this.charAt(i) == ch) + return i; + return -1; + } + + public int lastindexOf(int ch) { + return this.lastindexOf(ch, count - 1); + } + + public int lastindexOf(int ch, int fromIndex) { + for (int i = fromIndex; i > 0; i--) + if (this.charAt(i) == ch) + return i; + return -1; + } + + public char charAt(int i) { + return value[i+offset]; + } + + public int indexOf(String str) { + return this.indexOf(str, 0); + } + + public int indexOf(String str, int fromIndex) { + if (fromIndex < 0) + fromIndex = 0; + for (int i = fromIndex; i <= (count-str.count); i++) + if (regionMatches(i, str, 0, str.count)) + return i; + return -1; + } + + public boolean regionMatches(int toffset, String other, int ooffset, int len) { + if (toffset < 0 || ooffset < 0 || (toffset+len) > count || (ooffset+len) > other.count) + return false; + + for (int i = 0; i < len; i++) { + if (other.value[i+other.offset+ooffset] != this.value[i+this.offset+toffset]) + return false; + } + return true; + } + + public String subString(int beginIndex, int endIndex) { + return substring(beginIndex, endIndex); + } + + public String substring(int beginIndex, int endIndex) { + String str; + str = global new String(); + str.value = this.value; + str.count = endIndex-beginIndex; + str.offset = this.offset + beginIndex; + return str; + } + + public static String valueOf(Object o) { + if (o==null) + return "null"; + else + return o.toString(); + } +} diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java index 6b49e9c8..0a1d69b5 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java @@ -1,6 +1,6 @@ public class HashEntry { - String engine; - String signature; + GString engine; + GString signature; HashStat stats; public HashEntry() { } @@ -19,7 +19,7 @@ public class HashEntry { return result; } - public void setengine(String engine) { + public void setengine(GString engine) { this.engine=engine; } @@ -27,15 +27,15 @@ public class HashEntry { this.stats=stats; } - public void setsig(String signature) { + public void setsig(GString signature) { this.signature=signature; } - public String getEngine() { + public GString getEngine() { return engine; } - public String getSignature() { + public GString getSignature() { return signature; } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java index d8d2b58b..afbe9cfe 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java @@ -3,10 +3,10 @@ public class HashStat { FilterStatistic[] userstat; int[] listofusers; public HashStat() { - userid = new int[8]; //max users for our system=8 - userstat = new FilterStatistic[8]; + userid = global new int[8]; //max users for our system=8 + userstat = global new FilterStatistic[8]; for(int i=0; i<8; i++) { - userstat[i] = new FilterStatistic(); + userstat[i] = global new FilterStatistic(); } } @@ -47,7 +47,7 @@ public class HashStat { public int[] getUsers() { int nusers = numUsers(); - listofusers = new int[nusers]; + listofusers = global new int[nusers]; int j=0; for(int i=0; i<8; i++) { if(userid[i] == 1) { diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java b/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java index 41cb3cda..905d728b 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java @@ -13,6 +13,7 @@ public class Mail { String body; String noURLBody; String sourceCode; + String spam; boolean hasAttachement; String encoding; //rich text, plain, html @@ -32,7 +33,11 @@ public class Mail { while((line = fileinput.readLine()) != null) { Vector splittedLine = line.split(); - if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id + if(((String)(splittedLine.elementAt(0))).equals("Spam:")) + { + spam = (String)splittedLine.elementAt(1); + } + else if(((String)(splittedLine.elementAt(0))).equals("Header:")) // message id { header = (String)splittedLine.elementAt(1); } @@ -173,7 +178,6 @@ public class Mail { return sourceCode; } - // TODO: String? Is this a boolean, a number, or can be both? public void setHasAttachement(boolean hasAttachement) { this.hasAttachement = hasAttachement; } @@ -340,7 +344,9 @@ public class Mail { } public boolean getIsSpam() { - return isSpam; + if(spam.equals("yes")) + return true; + return false; } /** diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java index 60d5c50a..14de5ed1 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java @@ -40,14 +40,30 @@ public class SpamFilter extends Thread { thid = id; } - Random rand = new Random(0); + Random rand = new Random(thid); + Random myrand = new Random(0); + + /* + if(id==0) { + //Randomly set Spam vals for each email + for(int i=0; i<nemails; i++) { + Mail email = new Mail("emails/email"+i); + int spamval = rand.nextInt(100); + if(spamval<60) { //assume 60% are spam and rest are ham + email.setIsSpam(false); + } else { + email.setIsSpam(true); + } + } + } + */ for(int i=0; i<niter; i++) { for(int j=0; j<nemails; j++) { int pickemail = rand.nextInt(100); Mail email = new Mail("emails/email"+pickemail); - //Mail email = getEmail(pickemail); Vector signatures = email.checkMail(thid); + //check with global data structure int[] confidenceVals=null; atomic { @@ -58,6 +74,15 @@ public class SpamFilter extends Thread { FilterResult filterResult = new FilterResult(); boolean filterAnswer = filterResult.getResult(confidenceVals); + //---- get user's take on email and send feedback ------ + /* + int spamval = rand.nextInt(100); + if(spamval<60) { //assume 60% are spam and rest are ham + email.setIsSpam(false); + } else { + email.setIsSpam(true); + } + */ boolean userAnswer = email.getIsSpam(); if(filterAnswer != userAnswer) { atomic { @@ -84,18 +109,6 @@ public class SpamFilter extends Thread { SpamFilter.parseCmdLine(args, sf); int nthreads = sf.nthreads; - Random rand = new Random(8); - //Randomly set Spam vals for each email - for(int i=0; i<sf.numemail; i++) { - Mail email = new Mail("./emails/email"+i); - int spamval = rand.nextInt(100); - if(spamval<60) { //assume 60% are spam and rest are ham - email.setIsSpam(false); - } else { - email.setIsSpam(true); - } - } - //Create Global data structure DistributedHashMap dhmap; SpamFilter[] spf; @@ -196,9 +209,17 @@ public class SpamFilter extends Thread { for(int i=0; i<numparts; i++) { String part = (String)(signatures.elementAt(i)); char tmpengine = part.charAt(0); - String engine = global new String(tmpengine); - String signature = global new String(part.substring(2)); - //String signature = part.substring(2); //a:b index(a)=0, index(:)=1, index(b)=2 + GString engine=null; + if(tmpengine == '4') { //Ephemeral Signature calculator + String tmpstr = new String("4"); + engine = global new GString(tmpstr); + } + if(tmpengine == '8') { //Whiplash Signature calculator + String tmpstr = new String("8"); + engine = global new GString(tmpstr); + } + String str = new String(part.substring(2));//a:b index of a =0, index of : =1, index of b =2 + GString signature = global new GString(str); HashEntry myhe = global new HashEntry(); myhe.setengine(engine); myhe.setsig(signature); @@ -215,14 +236,22 @@ public class SpamFilter extends Thread { // ----- now connect to global data structure and ask for spam ----- HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe)); FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe)); //get the value from hash + confidenceVals[i] = fs.getChecked(); } } // --> the mail client is able to determine if it is spam or not + // --- According to the "any"-logic (in Core#check_logic) in original Razor --- + // If any answer is spam, the entire email is spam. return confidenceVals; } + /** + * This method sends feedback from the user to a distributed + * spam database and trains the spam database to check future + * emails and detect spam + **/ public void sendFeedBack(Mail mail, boolean isSpam, int id) { Vector partsOfMailStrings = mail.getCommonPart(); partsOfMailStrings.addElement(mail.getBodyString()); @@ -232,21 +261,38 @@ public class SpamFilter extends Thread { for(int i=0;i<signatures.size();i++) { String part = (String)(signatures.elementAt(i)); - char tmpengine = part.charAt(0); - String engine = global new String(tmpengine); - String signature = global new String(part.substring(2)); - //String signature = part.substring(2); //a:b index(a)=0, index(:)=1, index(b)=2 + // + // Signature is of form a:b + // where a = string representing a signature engine + // either "4" or "8" + // b = string representing signature + // + char tmpengine = part.charAt(0); // + GString engine; + if(tmpengine == '4') { + String tmpstr = new String("4"); + engine = global new GString(tmpstr); + } + if(tmpengine == '8') { + String tmpstr = new String("8"); + engine = global new GString(tmpstr); + } + String tmpsig = new String(part.substring(2)); + GString signature = global new GString(tmpsig); HashEntry myhe = global new HashEntry(); myhe.setengine(engine); myhe.setsig(signature); - // ----- now connect to global data structure and upate spam count ----- + // ----- now connect to global data structure and update stats ----- HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe)); if(tmphe.stats.userid[id] != 1) { tmphe.stats.setuserid(id); } - FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe)); //get the value from hash + //---- get value from distributed hash and update spam count + FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe)); + + //TODO: Allow users to give incorrect feedback //Increment spam or ham value if(isSpam) { diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/emails/gen.c b/Robust/src/Benchmarks/Distributed/SpamFilter/emails/gen.c index 5491f7de..195fb38f 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/emails/gen.c +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/emails/gen.c @@ -95,6 +95,16 @@ void generateEmails(int num_email,char** wl,int word_num,char** ul,int url_num,c sprintf(fileNameBuffer,"%s%d",fileName,i+1); newFile = fopen(fileNameBuffer,"w"); + // write spam or no spam + // 60% of email is spam and rest is ham + char yes[] = "yes"; + char no[] = "no"; + int tmprandindex = rand() % num_email; + if(tmprandindex<60) + fprintf(newFile,"Spam: %s\n",yes); + else + fprintf(newFile,"Spam: %s\n",no); + // write header fprintf(newFile,"Header: %d\n",i+1); diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/makefile b/Robust/src/Benchmarks/Distributed/SpamFilter/makefile index 8491ee74..170248b7 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/makefile +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/makefile @@ -8,16 +8,17 @@ SRC=${MAINCLASS}.java \ SignatureComputer.java \ FilterStatistic.java \ EphemeralSignature.java \ + GString.java \ WhiplashSignature.java FLAGS1=-dsm -optimize -mainclass ${MAINCLASS} FLAGS2=-dsm -dsmcaching -optimize -mainclass ${MAINCLASS} -FLAGS3=-dsm -dsmcaching -prefetch -optimize -mainclass ${MAINCLASS} +FLAGS3=-dsm -dsmcaching -rangeprefetch -optimize -mainclass ${MAINCLASS} default: ../../../buildscript ${FLAGS1} -o ${MAINCLASS}NPNC ${SRC} -# ../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPC ${SRC} -# ../../../buildscript ${FLAGS3} -o ${MAINCLASS}RangeN ${SRC} + ../../../buildscript ${FLAGS2} -o ${MAINCLASS}NPC ${SRC} + ../../../buildscript ${FLAGS3} -o ${MAINCLASS}RangeN ${SRC} clean: rm -rf tmpbuilddirectory -- 2.34.1