From: jjenista <jjenista> Date: Thu, 26 Feb 2009 23:26:37 +0000 (+0000) Subject: tagger benchmark X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9056bcd62aa87343b295e6cfe534217cedfb5b21;p=IRC.git tagger benchmark --- diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Action.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Action.java new file mode 100644 index 00000000..5b8654b2 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Action.java @@ -0,0 +1,513 @@ +/** + * Action class + * Represents an action performed in response to a token + * Instance of command pattern + * + * @author Daniel Jackson + * @version 0, 07/06/01 + */ + +//package tagger; +//import java.util.*; + +public /*abstract*/ class Action { + /** + * requires: iter is an iterator that just yielded this + * ensures: performs action for token, and may remove itself from iter + * default behaviour is equivalent to perform + */ + public void perform (Token token, Iterator iter) { + perform (token); + } + + public void perform (Token token) { + ; + } +} + +public class ParagraphAction extends Action { + boolean first_para; + + Generator generator; + StringBox current_para_style; + Numbering numbering; + public ParagraphAction( Generator g, + StringBox cps, + Numbering n) { + generator = g; + current_para_style = cps; + numbering = n; + first_para = true; + } + + public void perform (Token t, Iterator iter) { + if (t.type != Token.PARASTYLECOMMAND()) { + if (!first_para) generator.linefeed (); + generator.new_para (current_para_style.string); + String numstr = numbering.get_numbering_string (current_para_style.string); + if (numstr.length() != 0) { + // display numbering as evidence of progress + System.out.println (numstr); + generator.plaintext (numstr); + } + + iter.remove (); + first_para = false; + } + } +} + +public class PlaintextAction extends Action { + Generator generator; + public PlaintextAction( Generator g ) { + generator = g; + } + + public void perform(Token t) { + generator.plaintext (t.arg); + } +} + +public class NewlineAction extends Action { + Generator generator; + public NewlineAction( Generator g ) { + generator = g; + } + + public void perform(Token t) { + generator.new_line (); + } +} + +public class ApostropheAction extends Action { + Generator generator; + PropertyMap char_map; + public ApostropheAction( Generator g, + PropertyMap cm) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.apostrophe_char_name(), + t.line); + } +} + +public class PrimeAction extends Action { + Generator generator; + PropertyMap char_map; + public PrimeAction( Generator g, + PropertyMap cm) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.prime_char_name(), + t.line); + } +} + +public class OpenSingleQuoteAction extends Action { + Generator generator; + PropertyMap char_map; + public OpenSingleQuoteAction( Generator g, + PropertyMap cm) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.opensinglequote_char_name(), + t.line); + } +} + +public class CloseSingleQuoteAction extends Action { + Generator generator; + PropertyMap char_map; + public CloseSingleQuoteAction( Generator g, + PropertyMap cm) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.closesinglequote_char_name(), + t.line); + } +} + +public class OpenDoubleQuoteAction extends Action { + Generator generator; + PropertyMap char_map; + public OpenDoubleQuoteAction( Generator g, + PropertyMap cm) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.opendoublequote_char_name(), + t.line); + } +} + +public class CloseDoubleQuoteAction extends Action { + Generator generator; + PropertyMap char_map; + public CloseDoubleQuoteAction( Generator g, + PropertyMap cm) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.closedoublequote_char_name(), + t.line); + } +} + +public class HyphenAction extends Action { + Generator generator; + PropertyMap char_map; + public HyphenAction( Generator g, + PropertyMap cm ) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + int len = t.arg.length (); + if (len == 1) + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.hyphen_char_name(), + t.line); + else if (len == 2) + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.endash_char_name(), + t.line); + else if (len == 3) + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.emdash_char_name(), + t.line); + else + System.out.println (t.line + ": Too many hyphens: " + t.arg); + } +} + +public class DotsAction extends Action { + Generator generator; + PropertyMap char_map; + public DotsAction( Generator g, + PropertyMap cm ) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + int len = t.arg.length (); + if (len == 1) + generator.plaintext ("."); + else if (len == 2) + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.twodotleader_char_name(), + t.line); + else if (len == 3) + StandardEngine.put_special_char (generator, + char_map, + StandardEngine.ellipsis_char_name(), + t.line); + else + System.out.println (t.line + ": Too many dots: " + t.arg); + } +} + +public class LoadCharMapCommandAction extends Action { + Generator generator; + PropertyMap char_map; + Numbering numbering; + + public LoadCharMapCommandAction( Generator g, + PropertyMap cm, + Numbering n ) { + generator = g; + char_map = cm; + numbering = n; + } + + public void perform (Token t) { + // open file with given name and load char map from it + String file_name = t.arg; + File f = new File (file_name); + FileInputStream s = new FileInputStream (f); + PropertyParser p = new PropertyParser (s); + char_map.incorporate (p); + } +} + +public class LoadStyleSheetCommandAction extends Action { + Generator generator; + PropertyMap style_map; + Numbering numbering; + + public LoadStyleSheetCommandAction( Generator g, + PropertyMap sm, + Numbering n ) { + generator = g; + style_map = sm; + numbering = n; + } + + public void perform (Token t) { + // open file with given name and load char map from it + String file_name = t.arg; + File f = new File (file_name); + FileInputStream s = new FileInputStream (f); + PropertyParser p = new PropertyParser (s); + style_map.incorporate (p); + numbering.incorporate (); + } +} + +public class UnsuppressAction extends Action { + Generator generator; + + public UnsuppressAction( Generator g ) { + generator = g; + } + + public void perform (Token t, Iterator i) { + generator.suppress_off (); + i.remove (); + } +} + +public class PreambleCommandAction extends Action { + Generator generator; + Action unsuppress_action; + StandardEngine engine; + + public PreambleCommandAction( Generator g, + Action ua, + StandardEngine se ) { + generator = g; + unsuppress_action = ua; + engine = se; + } + + public void perform (Token t) { + generator.suppress_on (); + engine.register_by_type (unsuppress_action, Token.PARABREAK()); + } +} + +public class ParaBreakAction extends Action { + Action paragraph_action; + StringBox current_para_style; + PropertyMap style_map; + + public ParaBreakAction( Action pa, + StringBox cps, + PropertyMap sm ) { + paragraph_action = pa; + current_para_style = cps; + style_map = sm; + } + + public void perform (Token t) { + String next_style = style_map.get_property (current_para_style.string, + StandardEngine.next_style_prop_name()); + if (next_style == null) { + System.out.println (t.line + ": No next style property given for style: " + current_para_style.string); + return; + } + current_para_style.set (next_style); + StandardEngine.register_for_all (paragraph_action); + } +} + +public class ParaStyleCommandAction extends Action { + StringBox current_para_style; + + public ParaStyleCommandAction( StringBox cps ) { + current_para_style = cps; + } + + public void perform (Token t) { + current_para_style.set (t.arg); + } +} + +public class CharCommandAction extends Action { + Generator generator; + PropertyMap char_map; + + public CharCommandAction( Generator g, + PropertyMap cm ) { + generator = g; + char_map = cm; + } + + public void perform (Token t) { + String index = char_map.get_property (t.arg, + StandardEngine.index_prop_name()); + if (index == null) { + System.out.println (t.line + ": No index property given for character: " + t.arg); + return; + } + String font = char_map.get_property (t.arg, + StandardEngine.font_prop_name()); + // if no font is listed, generate special character in standard font + if (font == null) + generator.special_char (index); + else + generator.special_char (font, index); + } +} + +public class UnderscoreAction extends Action { + Generator generator; + boolean italic_mode_on; + + public UnderscoreAction( Generator g ) { + generator = g; + italic_mode_on = false; + } + + public void perform (Token t) { + if (italic_mode_on) { + italic_mode_on = false; + generator.pop_format (); + } + else { + italic_mode_on = true; + generator.push_format (Generator.ITALICS()); + } + } +} + +public class PushItalicsAction extends Action { + Generator generator; + + public PushItalicsAction( Generator g ) { + generator = g; + } + + public void perform (Token t, Iterator iter) { + Assert.assert_ (t.type == Token.ALPHABETIC()); + generator.push_format (Generator.ITALICS()); + } +} + +public class PopItalicsAction extends Action { + Generator generator; + + public PopItalicsAction( Generator g ) { + generator = g; + } + + public void perform (Token t, Iterator iter) { + Assert.assert_ (t.type == Token.ALPHABETIC()); + generator.pop_format (); + } +} + +public class DollarAction extends Action { + Action push_italics_action; + Action pop_italics_action; + boolean math_mode_on; + + public DollarAction( Action pushia, Action popia ) { + push_italics_action = pushia; + pop_italics_action = popia; + math_mode_on = false; + } + + public void perform (Token t) { + if (math_mode_on) { + math_mode_on = false; + StandardEngine.unregister_by_type (push_italics_action, Token.ALPHABETIC()); + StandardEngine.unregister_by_type (pop_italics_action, Token.ALPHABETIC()); + } + else { + math_mode_on = true; + StandardEngine.register_by_type_back (pop_italics_action, Token.ALPHABETIC()); + StandardEngine.register_by_type_front (push_italics_action, Token.ALPHABETIC()); + } + } +} + +public class FormatCommandAction extends Action { + Generator generator; + + public FormatCommandAction( Generator g ) { + generator = g; + } + + public void perform (Token t) { + if (t.arg.equals (StandardEngine.ROMAN_COMMANDNAME())) + generator.push_format (Generator.ROMAN()); + else if (t.arg.equals (StandardEngine.BOLD_COMMANDNAME())) + generator.push_format (Generator.BOLD()); + else if (t.arg.equals (StandardEngine.ITALICS_COMMANDNAME())) + generator.push_format (Generator.ITALICS()); + else if (t.arg.equals (StandardEngine.SUBSCRIPT_COMMANDNAME())) + generator.push_format (Generator.SUBSCRIPT()); + else if (t.arg.equals (StandardEngine.SUPERSCRIPT_COMMANDNAME())) + generator.push_format (Generator.SUPERSCRIPT()); + } +} + +public class PopFormatCommandAction extends Action { + Generator generator; + + public PopFormatCommandAction( Generator g ) { + generator = g; + } + + public void perform (Token t) { + generator.pop_format (); + } +} + +public class OtherAction extends Action { + Generator generator; + + public OtherAction( Generator g ) { + generator = g; + } + + public void perform (Token t) { + generator.plaintext (t.arg); + } +} + +public class EndOfStreamAction extends Action { + Generator generator; + + public EndOfStreamAction( Generator g ) { + generator = g; + } + + public void perform (Token t) { + System.out.println ("... done"); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Assert.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Assert.java new file mode 100644 index 00000000..4a901639 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Assert.java @@ -0,0 +1,29 @@ +/** + * Assert class + * Provides assertion checking + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +//package tagger; +//import java.io.*; + +public class Assert { + //static PrintStream error_stream = Tagger.error_stream; + + public static void assert_ (boolean cond) { + if (!cond) { + //error_stream.println ("Assertion failure"); + System.out.println ("Assertion failure"); + System.exit(-1); + // print stack trace + } + } + + public static void unreachable () { + //error_stream.println ("Assertion failure"); + System.out.println ("Assertion failure"); + System.exit(-1); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Counter.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Counter.java new file mode 100644 index 00000000..c07d0a0b --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Counter.java @@ -0,0 +1,122 @@ +/** + * Counter class + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +//package tagger; +//import java.io.*; + +public class Counter { + private int count; + private int initial; + private int type; + + static int NO_SUCH_TYPE() { return -1; } + static int ARABIC () { return 0; } + static int ROMAN_UPPER () { return 1; } + static int ROMAN_LOWER () { return 2; } + static int ALPHA_UPPER () { return 3; } + static int ALPHA_LOWER () { return 4; } + + // eventually recognize counter_type and set initial count and output format + // takes style and stream for error reporting + /* + * requires: count_prop and style are non null + * + */ + public Counter (String count_prop, String style) { + Assert.assert_ (count_prop != null); + Assert.assert_ (style != null); + type = get_type (count_prop); + + //switch (type) { + if( type == NO_SUCH_TYPE() ) { + type = ARABIC(); + initial = 0; + + } else if( type == ALPHA_LOWER() || + type == ALPHA_UPPER() ) { + if (count_prop.length () != 1) { + System.out.println ("Bad counter type for style " + style + ": " + count_prop); + initial = 0; + } else { + initial = count_prop.toLowerCase().charAt (0) - 'a'; + } + + } else if( type == ARABIC() ) { + initial = Integer.parseInt (count_prop) - 1; + + } else if( type == ROMAN_LOWER() || + type == ROMAN_UPPER() ) { + // not yet implemented + initial = 0; + type = ARABIC(); + + } else { + Assert.unreachable (); + } + + count = initial; + } + + /** + * ensures: increments counter + * returns true iff successful, false otherwise (eg, because alphabetic counter went past 'z') + */ + public boolean increment () { + if ((type == ALPHA_UPPER() || type == ALPHA_LOWER()) && count == 26) + return false; + count++; + return true; + } + + public void reset () { + count = initial; + } + + public String unparse () { + + //switch (type) { + + if( type == ALPHA_LOWER() ) { + char c = (char) ('a' + count - 1); + return new Character (c).toString(); + } + + if( type == ALPHA_UPPER() ) { + char c = (char) ('A' + count - 1); + return new Character (c).toString(); + } + + if( type == ARABIC() ) { + return String.valueOf (count); + } + + Assert.unreachable (); + return "DUMMY"; + } + + /** + * + * ensures: returns counter type of counter given in the string counter_type + * as an int, being equal to one of the values of the constants declared in the Counter class. + * returns Counter.NO_SUCH_TYPE if the string is not well formed. + */ + public static int get_type (String counter_type) { + if (counter_type.length() == 0) return NO_SUCH_TYPE(); + char c = counter_type.charAt (0); + if (c >= 'a' && c <= 'z') + return ALPHA_LOWER(); + if (c >= 'A' && c <= 'Z') + return ALPHA_UPPER(); + if (c == 'i' || c == 'v' || c == 'x' ||c == 'l' || c == 'c' || c == 'm') + return ROMAN_LOWER(); + if (c == 'I' || c == 'V' || c == 'X' ||c == 'L' || c == 'C' || c == 'M') + return ROMAN_LOWER(); + if (c >= '0' && c <= '9') + return ARABIC(); + return NO_SUCH_TYPE(); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Engine.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Engine.java new file mode 100644 index 00000000..5c3d52c9 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Engine.java @@ -0,0 +1,78 @@ +/** + * Engine class + * Maps token types to actions + * + * @author Daniel Jackson + * @version 0, 07/06/01 + */ + +//package tagger; +//import java.util.*; + +public class Engine { + /** + * There are some very tricky concurrent modification issues with this class. + * Can't execute a register or unregister method during an execution of consume_token + * if the register or unregister affects the same list of actions associated with the token. + * This means that during a consume_token for some type, can't register or unregister for + * that type, or for the all types. + * Note that a version of the perform method of action takes an iterator argument to + * allow an action to remove itself. + */ + + // array of Action lists indexed on token type + private LinkedList [] actions; + + // actions performed for all token types + private LinkedList default_actions; + + public Engine () { + actions = new LinkedList [Token.MAXTOKEN() + 1]; + for (int i = 0; i < actions.length; i++) + actions[i] = new LinkedList (); + default_actions = new LinkedList (); + } + + public void register_by_type (Action action, int type) { + register_by_type_front (action, type); + } + + public void register_for_all (Action action) { + default_actions.addFirst (action); + } + + public void unregister_for_all (Action action) { + default_actions.remove (action); + } + + public void register_by_type_front (Action action, int type) { + Assert.assert_ (type >= 0); + Assert.assert_ (type <= Token.MAXTOKEN()); + actions[type].addFirst (action); + } + + public void register_by_type_back (Action action, int type) { + Assert.assert_ (type >= 0); + Assert.assert_ (type <= Token.MAXTOKEN()); + actions[type].addLast (action); + } + + public void unregister_by_type (Action action, int type) { + Assert.assert_ (type >= 0); + Assert.assert_ (type <= Token.MAXTOKEN()); + actions[type].remove (action); + } + + public void consume_token (Token token) { + perform_actions (default_actions, token); + perform_actions (actions[token.type], token); + } + + public static void perform_actions (LinkedList actions, Token token) { + Iterator i = actions.iterator (); + while (i.hasNext ()) { + Action a = (Action) i.next (); + a.perform (token, i); + } + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Generator.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Generator.java new file mode 100644 index 00000000..80a72d38 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Generator.java @@ -0,0 +1,43 @@ +/** + * Generator interface + * Generic backend tagged text generator + * + * @author Daniel Jackson + * @version 0, 07/08/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public /*interface*/ class Generator { + + // formats to pass to push_format + public static int ROMAN () { return 0; } + public static int ITALICS () { return 1; } + public static int BOLD () { return 2; } + public static int SUBSCRIPT () { return 3; } + public static int SUPERSCRIPT() { return 4; } + + // prints new line to output + void linefeed (){} + + void new_para (String style){} + + // inserts code for new line + void new_line (){} + + void special_char (String font, String index){} + + // for dashes, ellipses, etc + void special_char (String index){} + + void plaintext (String text){} + void push_format (int format){} + void pop_format (){} + + // turn output suppression on and off + void suppress_on (){} + void suppress_off (){} + +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Numbering.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Numbering.java new file mode 100644 index 00000000..b46c1cb4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Numbering.java @@ -0,0 +1,196 @@ +/** + * Numbering class + * Provides special navigations for numbering + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class Numbering { + private PropertyMap style_map; + + public static String PARENT_PROPNAME () { return "parent"; } + public static String CHILD_PROPNAME () { return "child"; } + public static String ROOT_PROPNAME () { return "root"; } + public static String COUNTER_PROPNAME () { return "counter"; } + public static String SEPARATOR_PROPNAME() { return "separator";} + public static String LEADER_PROPNAME () { return "leader"; } + public static String TRAILER_PROPNAME () { return "trailer"; } + + /* + * The graph structure of the numbering relations is represented using + * properties in the paragraph style property map. + * Each style is mapped to its root -- the ancestor with no parent in the + * numbering relationship -- and to its parent and child. + * The child and root properties are added; the parent property is given + * in the style sheet file. + * + * If a style is numbered, its ancestors must be also. + * This property is not currently checked. + */ + + /* + * Representation invariant + * + * Definition: A style is numbered if it has a counter property. + * A numbered style has a root property. + * A root style has itself as root and has no parent. + * There is a bidirectional parent/child chain from a style to its root + * + * Checking that style sheet is well formed? + */ + + + // maps paragraph style names to counters + // styles that are not numbered are not mapped + private HashMap counter_map; // String -> Counter + + /** + * ensures: constructs a Numbering + * not well formed until incorporate called + */ + public Numbering (PropertyMap style_map) { + this.style_map = style_map; + counter_map = new HashMap (); + } + + /** + * ensures: constructs a Numbering + * modifies: property lists in style_map + */ + /* + public Numbering (PropertyMap style_map) { + this.style_map = style_map; + add_extra_properties (style_map); + initialize_counters (style_map); + } + */ + + /** + * ensures: constructs a Numbering using current entries in style_map + * modifies: property lists in style_map + */ + public void incorporate () { + add_extra_properties (); + initialize_counters (); + } + + /* + * requires: all ancestor and descendant styles of style are numbered iff style is numbered + * ensures: returns the numbering string for a new paragraph whose style name is _style_ + * + * format of numbering string is: + * <mytrailer><rootcounter><rootseparator>...<counter><separator>...<mycounter><mytrailer> + */ + public String get_numbering_string (String style) { + // return empty string if style is not numbered + if (!style_has_property (style, COUNTER_PROPNAME())) return ""; + + // initialize numbering string to leader + String leader = style_map.get_property (style, LEADER_PROPNAME()); + String numbering = leader == null ? "" : leader; + + // append numbering for each style from root to this style + // each followed by its separator + String s = style_map.get_property (style, ROOT_PROPNAME()); + Assert.assert_ (s != null); + while (! s.equals (style)) { + numbering += ((Counter) counter_map.get(s)).unparse (); + String separator = style_map.get_property (s, SEPARATOR_PROPNAME()); + numbering += separator == null ? "" : separator; + s = style_map.get_property (s, CHILD_PROPNAME()); + } + + // increment numbering for this style and append its string + Counter c = (Counter) counter_map.get (s); + boolean success = c.increment (); + if (!success) + System.out.println ("Counter overrun for style: " + style); + numbering += c.unparse (); + + // append trailer + String trailer = style_map.get_property (s, TRAILER_PROPNAME()); + numbering += trailer == null ? "" : trailer; + + // reset counters for all descendant styles + s = style_map.get_property (s, CHILD_PROPNAME()); + while (s != null) { + c = (Counter) counter_map.get (s); + c.reset (); + s = style_map.get_property (s, CHILD_PROPNAME()); + } + return numbering; + } + + private void add_extra_properties () { + add_child_property (); + add_root_property (); + } + + // for each style with a counter property, insert into counter_map + private void initialize_counters () { + HashSet styles = style_map.get_items (); + Iterator iter = (Iterator) styles.iterator (); + while (iter.hasNext ()) { + String style = (String) iter.next (); + if (style_has_property (style, COUNTER_PROPNAME())) { + // get counter type (arabic, roman, etc) + String count_prop = style_map.get_property (style, COUNTER_PROPNAME()); + int count_type = Counter.get_type (count_prop); + if (count_type == Counter.NO_SUCH_TYPE()) { + System.out.println ("Bad counter type for style " + style + ": " + count_prop); + // and insert into counter_map anyway to preserve rep invariant + // so must check counter type when counter is created and default if bad + } + counter_map.put (style, new Counter (count_prop, style)); + } + } + } + + // add to each style that is a parent of another style a child property to it + private void add_child_property () { + HashSet styles = style_map.get_items (); + Iterator iter = (Iterator) styles.iterator (); + while (iter.hasNext ()) { + String style = (String) iter.next (); + String pstyle = (String) style_map.get_property (style, PARENT_PROPNAME()); + // if parent exists, add child property to it + if (pstyle != null) { + LinkedList props = style_map.get_property_list (pstyle); + props.add (new Property (CHILD_PROPNAME(), style)); + } + } + } + + // add root property to each numbered style + private void add_root_property () { + HashSet styles = style_map.get_items (); + Iterator iter = (Iterator) styles.iterator (); + while (iter.hasNext ()) { + String style = (String) iter.next (); + if (!style_has_property (style, PARENT_PROPNAME())) { + // if no parent, then it's a root, so add root property for it and all descendants + String root = style; + while (style != null) { + LinkedList props = style_map.get_property_list (style); + props.add (new Property (ROOT_PROPNAME(), root)); + style = style_map.get_property (style, CHILD_PROPNAME()); + } + } + } + } + + // ensures: returns true iff style has property prop_name + private boolean style_has_property (String style, String prop_name) { + String p = (String) style_map.get_property (style, prop_name); + return p != null; + } + + public String toString () { + return "UNIMPLEMENTED"; + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Property.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Property.java new file mode 100644 index 00000000..7bb87e6c --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Property.java @@ -0,0 +1,25 @@ +/** + * Property class + * <p> + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class Property { + public String property; + public String value; + + public Property (String p, String v) { + property = p; + value = v; + } + + public String toString () { + return "<" + property + ":" + value + ">"; + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/PropertyMap.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/PropertyMap.java new file mode 100644 index 00000000..da94478e --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/PropertyMap.java @@ -0,0 +1,88 @@ +/** + * PropertyMap class + * Maps identifiers to property lists + * Used for stylesheets, character maps, etc + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class PropertyMap { + private HashMap map; // String -> LinkedList [Property] + private HashSet keys; + + /** + * ensures: constructs an empty property map + */ + public PropertyMap () { + map = new HashMap (); + keys = new HashSet(); + } + + /** + * ensures: constructs a property map using the parser <code>p</code>. + */ + public PropertyMap (PropertyParser p) { + map = new HashMap (); + keys = new HashSet(); + + while (p.has_more_properties ()) { + LinkedList props = p.get_property_list (); + Property prop = (Property) props.removeFirst (); + map.put (prop.value, props); + keys.add(prop.value); + } + } + + /** + * ensures: incorporates properties using the parser <code>p</code>. + */ + public void incorporate (PropertyParser p) { + + while (p.has_more_properties ()) { + LinkedList props = p.get_property_list (); + Property prop = (Property) props.removeFirst (); + map.put (prop.value, props); + keys.add(prop.value); + } + + } + + /** + * @return the property list for item <code>item</code>. Returns null if no such item. + */ + public LinkedList get_property_list (String item) { + return (LinkedList) map.get (item); + } + + /** + * @return the value of property <code>prop</code> for item <code>item</code> + * or null if it does not exist + */ + public String get_property (String item, String prop) { + LinkedList props = (LinkedList) map.get (item); + if (props == null) return null; + Iterator iter = props.iterator (); + while (iter.hasNext ()) { + Property p = (Property) iter.next (); + if (p.property.equals (prop)) + return p.value; + } + return null; + } + + /** + * @return the set of items with property lists in the map + */ + public HashSet get_items () { + return keys; //map.keySet (); + } + + public String toString () { + return map.toString (); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/PropertyParser.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/PropertyParser.java new file mode 100644 index 00000000..f6738d74 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/PropertyParser.java @@ -0,0 +1,102 @@ +/** + * PropertyParser class + * Parses property files + * <p> + * <code>int</code>. + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class PropertyParser { + private FileInputStream reader; + private String token; + private int next_char; + + public PropertyParser (FileInputStream r) { + reader = r; + next_char = reader.read (); + consume_comments (); + } + + private void consume_comments () { + // consume lines that don't start with < + while (next_char != '<' && !is_eos (next_char)) { + if (!is_eol (next_char)) + reader.readLine (); + consume_char (); + } + } + + private void consume_char () { + token += (char) next_char; + next_char = reader.read (); + } + + private void error (String msg) { + // correct to number from 1, not zero + //t line_number = reader.getLineNumber() + 1; + System.out.println (msg); + } + + public boolean has_more_properties () { + return (!is_eos (next_char)); + } + + /** + * requires: next_char contains next character in reader <p> + * ensures: returns list of properties until end of line or stream <p> + * according to the following syntax: + * property list is sequence of properties followed by eol of eos + * property is left-angle, property-name, colon, value, right-angle + * property-name is alphanumeric string, but value is any char sequence + * skips lines that do not start with < + * reports syntax errors on this.error_reporter + * Syntax + * @return list of properties until end of line or stream. + * Notes: chose LinkedList because it provides removeFirst, to support common + * case in which first property is removed (eg, because it's a style name) + */ + public LinkedList get_property_list () { + LinkedList result = new LinkedList (); + while (!is_eol (next_char) && !is_eos(next_char)) + result.add (get_property ()); + consume_char (); + consume_comments (); + return result; + } + + private Property get_property () { + if (next_char != '<') + error ("Found " + next_char + " when expecting <"); + consume_char (); + token = ""; + while (is_alphanumeric (next_char)) consume_char (); + String property = token; + if (next_char != ':') + error ("Found " + next_char + " following " + token + " when expecting :"); + consume_char (); + token = ""; + while (next_char != '>' && !is_eol(next_char) && !is_eos (next_char)) + consume_char (); + String value = token; + if (next_char != '>') + error ("Found " + next_char + " following " + token + " when expecting >"); + consume_char (); + return new Property (property, value); + } + + static boolean is_eol (int c) {return c == '\n';} + static boolean is_eos (int c) {return c == -1;} + static boolean is_alphabetic (int c) { + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; + } + static boolean is_numeric (int c) {return c >= '0' && c <= '9';} + static boolean is_alphanumeric (int c) { + return is_numeric (c) || is_alphabetic (c); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/QuarkGenerator.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/QuarkGenerator.java new file mode 100644 index 00000000..981d68a4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/QuarkGenerator.java @@ -0,0 +1,100 @@ +/** + * QuarkGenerator interface + * Backend tagged text generator for QuarkXpress + * + * @author Daniel Jackson + * @version 0, 07/08/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class QuarkGenerator extends Generator { + FileOutputStream output_stream; + LinkedList format_stack; + private boolean on; + + public QuarkGenerator (FileOutputStream s) { + on = true; + output_stream = s; + + // stack holds strings used to terminate formats + format_stack = new LinkedList (); + } + + public void suppress_on () { + on = false; + } + + public void suppress_off () { + on = true; + } + + private void print (String s) { + if (on) output_stream.write (s.getBytes()); + } + + public void linefeed () { + if (on) output_stream.write ('\n'); + } + + // print "@style:" + public void new_para (String style) { + print ("@" + style + ":"); + } + + // print "<\n>" + public void new_line () { + print ("<\\n>"); + } + + public void special_char (String font, String index) { + print ("<f\"" + font + "\"><\\#" + index + "><f$>"); + } + + public void special_char (String index) { + print ("<\\#" + index + ">"); + } + + public void plaintext (String text) { + print (text); + } + + public void push_format (int format) { + //switch (format) { + + if( format == Generator.ROMAN() || + format == Generator.ITALICS() ) { + print ("<I>"); + format_stack.push ("<I>"); + return; + } + + if( format == Generator.BOLD() ) { + print ("<B>"); + format_stack.push ("<B>"); + return; + } + + if( format == Generator.SUBSCRIPT() ) { + print ("<->"); + format_stack.push ("<->"); + return; + } + + if( format == Generator.SUPERSCRIPT() ) { + print ("<+>"); + format_stack.push ("<+>"); + return; + } + + Assert.unreachable (); + } + + public void pop_format () { + // for now, handle too many pops without report + if (format_stack.isEmpty ()) return; + print ((String) format_stack.pop ()); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/SourceParser.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/SourceParser.java new file mode 100644 index 00000000..5fe3717c --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/SourceParser.java @@ -0,0 +1,285 @@ +/** + * SourceParser class + * <p> + * <code>int</code>. + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class SourceParser { + public static String loadcharmapcommand_name () { return "loadchars"; } + public static String loadstylesheetcommand_name() { return "loadstyles";} + public static String preamblecommand_name () { return "preamble"; } + public static String refcommand_name () { return "ref"; } + public static String tagcommand_name () { return "tag"; } + public static String citecommand_name () { return "cite"; } + public static String separatorcommand_name () { return "sep"; } + + private FileInputStream reader; + + // holds set of strings recognized as paragraph styles + private HashSet parastyles; + + // holds the previous value of next_char + private int last_char; + private int next_char; + private boolean within_single_quotes; + private boolean within_double_quotes; + private boolean at_start_of_line; + private String token; + + public SourceParser (FileInputStream reader, HashSet parastyles) { + this.reader = reader; + this.parastyles = parastyles; + next_char = reader.read (); + last_char = -1; + at_start_of_line = true; + } + + public boolean has_more_tokens () { + return (next_char != -1); + } + + private void consume_char () { + token += (char) next_char; + last_char = next_char; + next_char = reader.read (); + } + + // consume until next close curly and return string excluding curly + private String consume_arg () { + consume_char (); // consume open curly + token = ""; + consume_char (); + while (!is_close_curly (next_char) && !is_eol (next_char)) consume_char (); + String arg = token; + consume_char (); // consume close curly + return arg; + } + + /** + * requires: next_char contains next character in reader <p> + * ensures: returns next token according to one of these productions: <p> + * <blockquote><pre> + * char-sequence = alphanumeric+ + * whitespace ::= (space | tab)+ + * command ::= slash alphanum* [star] + * paragraph-break ::= <blank line> + * line-break ::= slash slash + * hyphen-sequence ::= hyphen+ + * dot-sequence ::= dot+ + * underscore ::= underscore + * </pre></blockquote> + * quote characters, disambiguated by context: + * open-single-quote: when not preceded by alphanumeric + * close-single-quote: when not followed by alphanumeric and preceded by + * open-single-quote + * open-double-quote: when not preceded by open-double-quote + * close-double-quote: when preceded by open-double-quote + * apostrophe: between alphanumerics, or when followed by numeric + * prime: after alphanumeric, when not followed by alphanumeric, + * and not preceded by open-single-quote + * @return the next token. + * explicitly returns end of stream token. + */ + public Token get_token () { + token = new String (); + if (is_eos (next_char)) + return new Token (Token.ENDOFSTREAM(), 0); + if (at_start_of_line) { + if (is_eol (next_char)) { + consume_char (); + within_single_quotes = false; + within_double_quotes = false; + return new Token (Token.PARABREAK(), 0); + } + else if (is_hash (next_char)) { + String line = reader.readLine (); + consume_char (); + return new Token (Token.COMMENT(), line, 0); + } + else + at_start_of_line = false; + } + if (is_eol (next_char)) { + consume_char (); + at_start_of_line = true; + if (is_eol (next_char)) { + consume_char (); + within_single_quotes = false; + within_double_quotes = false; + return new Token (Token.PARABREAK(), 0); + } + // check this + return new Token (Token.WHITESPACE(), " ", 0); + } + if (is_slash (next_char)) { + consume_char (); + token = ""; + if (is_slash (next_char)) { + consume_char (); + return new Token (Token.LINEBREAK(), 0); + } + if (!is_alphabetic (next_char)) { + // next character assumed prefixed with slash to avoid special treatment + // eg, \< for <, \$ for $ + token = new Character ((char) next_char).toString (); + return new Token (Token.OTHER(), token, 0); + } + while (is_alphanumeric (next_char)) consume_char (); + String command_name = token; + if (is_star (next_char)) consume_char (); + if (command_name.equals (preamblecommand_name())) { + return new Token (Token.PREAMBLECOMMAND(), 0); + } + if (command_name.equals (separatorcommand_name())) { + // consume whitespace until next token + while (is_whitespace (next_char)) consume_char (); + return new Token (Token.SEPARATORCOMMAND(), 0); + } + if (is_less_than (next_char)) { + consume_char (); + return new Token (Token.FORMATCOMMAND(), command_name, 0); + } + if (is_open_curly (next_char)) { + String arg = consume_arg (); + if (command_name.equals (loadcharmapcommand_name())) { + return new Token (Token.LOADCHARMAPCOMMAND(), arg, 0); + } + if (command_name.equals (loadstylesheetcommand_name())) { + return new Token (Token.LOADSTYLESHEETCOMMAND(), arg, 0); + } + if (command_name.equals (refcommand_name())) { + return new Token (Token.REFCOMMAND(), arg, 0); + } + if (command_name.equals (tagcommand_name())) { + return new Token (Token.TAGCOMMAND(), arg, 0); + } + if (command_name.equals (citecommand_name())) { + return new Token (Token.CITECOMMAND(), arg, 0); + } + } + if (parastyles.contains (command_name)) { + while (is_whitespace (next_char)) consume_char (); + // paragraph style command consumes the first linebreak following it also + if (is_eol (next_char)) consume_char (); + return new Token (Token.PARASTYLECOMMAND(), command_name, 0); + } + else + // temporary + return new Token (Token.CHARCOMMAND(), command_name, 0); + } + if (is_alphabetic (next_char)) { + consume_char (); + while (is_alphabetic (next_char)) consume_char (); + return new Token (Token.ALPHABETIC(), token, 0); + } + if (is_numeric (next_char)) { + consume_char (); + while (is_numeric (next_char)) consume_char (); + return new Token (Token.NUMERIC(), token, 0); + } + if (is_whitespace (next_char)) { + consume_char (); + while (is_whitespace (next_char)) consume_char (); + if (is_eol (next_char)) { + consume_char (); + // check this + return new Token (Token.WHITESPACE(), " ", 0); + } + return new Token (Token.WHITESPACE(), token, 0); + } + if (is_hyphen (next_char)) { + consume_char (); + while (is_hyphen (next_char)) consume_char (); + return new Token (Token.HYPHENS(), token, 0); + } + if (is_dot (next_char)) { + consume_char (); + while (is_dot (next_char)) consume_char (); + return new Token (Token.DOTS(), token, 0); + } + if (is_underscore (next_char)) { + consume_char (); + return new Token (Token.UNDERSCORE(), 0); + } + if (is_dollar (next_char)) { + consume_char (); + return new Token (Token.DOLLAR(), 0); + } + if (is_greater_than (next_char)) { + consume_char (); + return new Token (Token.POPFORMATCOMMAND(), 0); + } + if (is_single_quote (next_char)) { + if (is_alphanumeric (last_char)) { + if (is_alphanumeric (next_char)) { + consume_char (); + return new Token (Token.APOSTROPHE(), 0); + } + else if (within_single_quotes) { + within_single_quotes = false; + consume_char (); + return new Token (Token.CLOSESINGLEQUOTE(), 0); + } + else { + consume_char (); + return new Token (Token.PRIME(), 0); + } + } + consume_char (); + if (is_numeric (next_char)) { + return new Token (Token.APOSTROPHE(), 0); + } + else { + within_single_quotes = true; + return new Token (Token.OPENSINGLEQUOTE(), 0); + } + } + if (is_double_quote (next_char)) { + consume_char (); + if (within_double_quotes) { + within_double_quotes = false; + return new Token (Token.CLOSEDOUBLEQUOTE(), 0); + } + else { + within_double_quotes = true; + return new Token (Token.OPENDOUBLEQUOTE(), 0); + } + } + consume_char (); + return new Token (Token.OTHER(), token, 0); + } + + static boolean is_eol (int c) {return c == '\n';} + static boolean is_eos (int c) {return c == -1;} + static boolean is_star (int c) {return c == '*';} + static boolean is_hash (int c) {return c == '#';} + static boolean is_dot (int c) {return c == '.';} + static boolean is_slash (int c) {return c == '\\';} + static boolean is_hyphen (int c) {return c == '-';} + static boolean is_underscore (int c) {return c == '_';} + static boolean is_dollar (int c) {return c == '$';} + static boolean is_single_quote (int c) {return c == '\'';} + static boolean is_double_quote (int c) {return c == '\"';} + static boolean is_open_curly (int c) {return c == '{';} + static boolean is_close_curly (int c) {return c == '}';} + static boolean is_less_than (int c) {return c == '<';} + static boolean is_greater_than (int c) {return c == '>';} + + // should perhaps use Character.isLetter? not sure, because that allows Unicode chars for + // other languages that are outside the a-Z range. + static boolean is_alphabetic (int c) { + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; + } + static boolean is_numeric (int c) {return c >= '0' && c <= '9';} + static boolean is_alphanumeric (int c) { + return is_numeric (c) || is_alphabetic (c); + } + static boolean is_whitespace (int c) {return c == ' ' || c == '\t';} +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/StandardEngine.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/StandardEngine.java new file mode 100644 index 00000000..89a303fe --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/StandardEngine.java @@ -0,0 +1,203 @@ +/** + * StandardEngine class + * Standard registration of actions + * Implemented as a subclass of Engine for no good reason + * + * @author Daniel Jackson + * @version 0, 07/08/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + + +// a hack to work around lack of proper closures in Java +// can't assign to local variable within actions +class StringBox { + String string; + StringBox (String s) {string = s;} + void set (String s) {string = s;} +} + + +public class StandardEngine extends Engine { + + //static Engine STANDARD; + + // reserved words for property files + + // character table + static public String index_prop_name () { return "index"; } + static public String font_prop_name () { return "font"; } + + static public String apostrophe_char_name () { return "quoteright"; } + static public String prime_char_name () { return "prime"; } + static public String opensinglequote_char_name () { return "quoteleft"; } + static public String closesinglequote_char_name() { return "quoteright"; } + static public String opendoublequote_char_name () { return "quotedblleft"; } + static public String closedoublequote_char_name() { return "quotedblright"; } + static public String hyphen_char_name () { return "hyphen"; } + static public String endash_char_name () { return "endash"; } + static public String emdash_char_name () { return "emdash"; } + static public String period_char_name () { return "period"; } + static public String twodotleader_char_name () { return "twodotleader"; } + static public String ellipsis_char_name () { return "ellipsis"; } + + static public String ROMAN_COMMANDNAME () { return "roman"; } + static public String BOLD_COMMANDNAME () { return "bold"; } + static public String ITALICS_COMMANDNAME () { return "italic"; } + static public String SUBSCRIPT_COMMANDNAME () { return "sub"; } + static public String SUPERSCRIPT_COMMANDNAME () { return "sup"; } + + // style sheet + static public String next_style_prop_name () { return "next"; } + static public String default_style_name () { return "body"; } + + + public StandardEngine ( + final Generator generator, + final PropertyMap style_map, + final FileOutputStream index_stream + ) { + Engine(); + + final PropertyMap char_map = new PropertyMap (); + final Numbering numbering = new Numbering (style_map); + + final StringBox current_para_style = new StringBox (default_style_name()); + + // special action for start of paragraph + // created once, but dynamically inserted and removed + // so that it's performed once at the start of each paragraph + final Action paragraph_action = new ParagraphAction ( generator, + current_para_style, + numbering ); + + register_by_type (new PlaintextAction (generator), + Token.ALPHABETIC()); + + register_by_type (new PlaintextAction (generator), + Token.NUMERIC()); + + register_by_type (new PlaintextAction (generator), + Token.WHITESPACE()); + + register_by_type (new NewlineAction (generator), + Token.LINEBREAK()); + + register_by_type (new ApostropheAction (generator, char_map), + Token.APOSTROPHE()); + + register_by_type (new PrimeAction (generator, char_map), + Token.PRIME()); + + register_by_type (new OpenSingleQuoteAction (generator, char_map), + Token.OPENSINGLEQUOTE()); + + register_by_type (new CloseSingleQuoteAction (generator, char_map), + Token.CLOSESINGLEQUOTE()); + + register_by_type (new OpenDoubleQuoteAction (generator, char_map), + Token.OPENDOUBLEQUOTE()); + + register_by_type (new CloseDoubleQuoteAction (generator, char_map), + Token.CLOSEDOUBLEQUOTE()); + + register_by_type (new HyphenAction (generator, char_map), + Token.HYPHENS()); + + register_by_type (new DotsAction (generator, char_map), + Token.DOTS()); + + register_by_type (new LoadCharMapCommandAction (generator, + char_map, + numbering), + Token.LOADCHARMAPCOMMAND()); + + register_by_type (new LoadStyleSheetCommandAction (generator, + style_map, + numbering), + Token.LOADSTYLESHEETCOMMAND()); + + final Action unsuppress_action = new UnsuppressAction (generator); + + // preamble command switches on output suppression + // registers action to turn suppression off with paragraph break command + register_by_type (new PreambleCommandAction (generator, + unsuppress_action, + this ), + Token.PREAMBLECOMMAND()); + + register_by_type (new ParaBreakAction (paragraph_action, + current_para_style, + style_map), + Token.PARABREAK()); + + register_by_type (new ParaStyleCommandAction (current_para_style), + Token.PARASTYLECOMMAND()); + + register_by_type (new CharCommandAction (generator, + char_map), + Token.CHARCOMMAND()); + + register_by_type (new UnderscoreAction (generator) {}, + Token.UNDERSCORE()); + + // used to italicize alphabetic tokens in math mode + final Action push_italics_action = new PushItalicsAction (generator); + final Action pop_italics_action = new PopItalicsAction (generator); + + register_by_type (new DollarAction (push_italics_action, + pop_italics_action), + Token.DOLLAR()); + + register_by_type (new FormatCommandAction (generator), + Token.FORMATCOMMAND()); + + register_by_type (new PopFormatCommandAction (generator), + Token.POPFORMATCOMMAND()); + + register_by_type (new OtherAction (generator), + Token.OTHER()); + + register_by_type (new EndOfStreamAction (generator), + Token.ENDOFSTREAM()); + + //STANDARD = this; + } + + /* no actions for these token types: + COMMENT + SEPARATORCOMMAND + */ + + /* + not yet coded: + + public static final int REFCOMMAND = 32; + public static final int TAGCOMMAND = 33; + public static final int CITECOMMAND = 34; + */ + + + /* general form of action registration is this: + register_by_type (new Action () { + public void perform (Token t) { + // put code to be executed for token type here + }}, + Token.TYPENAME); + */ + + static public void put_special_char (Generator generator, + PropertyMap char_map, + String char_name, + int line) { + String index = char_map.get_property (char_name, index_prop_name()); + if (index == null) { + System.out.println (line + ": Unresolved character: " + char_name); + } + else + generator.special_char (index); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Tagger.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Tagger.java new file mode 100644 index 00000000..ed66fed9 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Tagger.java @@ -0,0 +1,70 @@ +/** + * Tagger class + * Main class of Tagger application + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class Tagger { + + // holds mapping of token types to actions + //Engine engine; + + /** + * The main method of the Tagger application. + * @param args The command line arguments, described in usage method + */ + public static void main (String[] args) { + check_usage (args); + + String base_name = args[0]; + String source_file_name = base_name + ".txt"; + String output_file_name = base_name + ".tag.txt"; + String index_file_name = base_name + ".index.txt"; + FileInputStream input_stream; + FileOutputStream output_stream; + FileOutputStream index_stream; + + input_stream = new FileInputStream(source_file_name); + output_stream = new FileOutputStream(output_file_name); + index_stream = new FileOutputStream(index_file_name); + + // for now, hardwire to Quark + Generator generator = new QuarkGenerator (output_stream); + + PropertyMap style_map = new PropertyMap (); + Engine engine = new StandardEngine (generator, style_map, index_stream); + + consume_source (engine, style_map, input_stream); + + output_stream.close (); + } + + public static void consume_source (Engine engine, + PropertyMap style_map, + FileInputStream source_reader) { + HashSet para_styles = style_map.get_items (); + SourceParser p = new SourceParser (source_reader, para_styles); + Token token; + while (p.has_more_tokens ()) { + token = p.get_token (); + engine.consume_token (token); + } + // consume end of stream token explicitly + // depends on get_token returning ENDOFSTREAM token when no more tokens + token = p.get_token (); + engine.consume_token (token); + } + + static void check_usage (String args []) { + if (args.length == 0) { + System.out.println ("one argument required, should be name of source file, excluding .txt extension"); + } + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/Token.java b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Token.java new file mode 100644 index 00000000..fc99c39e --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/Token.java @@ -0,0 +1,71 @@ +/** + * Token class + * Represents tokens generated by lexer + * <p> + * + * @author Daniel Jackson + * @version 0, 07/06/01 + */ + +//package tagger; +//import java.io.*; +//import java.util.*; + +public class Token { + // may be null + public String arg; + public int line; + public int type; + + public static final int COMMENT () { return 0; } + public static final int WHITESPACE () { return 1; } + public static final int ALPHABETIC () { return 2; } + public static final int NUMERIC () { return 3; } + + public static final int PARABREAK () { return 4; } + public static final int LINEBREAK () { return 5; } + + public static final int APOSTROPHE () { return 10; } + public static final int PRIME () { return 11; } + public static final int OPENSINGLEQUOTE () { return 12; } + public static final int CLOSESINGLEQUOTE () { return 13; } + public static final int OPENDOUBLEQUOTE () { return 14; } + public static final int CLOSEDOUBLEQUOTE () { return 15; } + public static final int HYPHENS () { return 16; } + public static final int DOTS () { return 17; } + + public static final int PARASTYLECOMMAND () { return 20; } + public static final int FORMATCOMMAND () { return 21; } + public static final int POPFORMATCOMMAND () { return 22; } + public static final int REFCOMMAND () { return 23; } + public static final int TAGCOMMAND () { return 24; } + public static final int CITECOMMAND () { return 25; } + public static final int CHARCOMMAND () { return 26; } + public static final int LOADCHARMAPCOMMAND () { return 27; } + public static final int LOADSTYLESHEETCOMMAND() { return 28; } + public static final int PREAMBLECOMMAND () { return 29; } + public static final int SEPARATORCOMMAND () { return 30; } + + public static final int UNDERSCORE () { return 31; } + public static final int DOLLAR () { return 32; } + public static final int OTHER () { return 33; } + public static final int ENDOFSTREAM () { return 34; } + + public static final int MAXTOKEN () { return 34; } + + public Token (int type, String arg, int line) { + this.type = type; + this.arg = arg; + this.line = line; + } + + public Token (int type, int line) { + this.type = type; + this.line = line; + } + + // temporary implementation + public String toString () { + return arg; + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/charmap.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/charmap.txt new file mode 100644 index 00000000..cc4ab0e0 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/charmap.txt @@ -0,0 +1,31 @@ +# basic characters +<char:linebreak><index:22> + +# dots +<char:cdot><index:22> + +# quotes +<char:quote><index:22> +<char:quoteleft><index:22> +<char:quoteright><index:22> +<char:quotedblleft><index:22> +<char:quotedblright><index:22> + +#dashes +<char:hyphen><index:22> +<char:endash><index:22> +<char:emdash><index:22> + +# math symbols +<char:oplus><index:22> +<char:langle><index:22> +<char:rangle><index:22> +<char:textarrow><index:22> +<char:hat><index:22> +<char:fatsemi><index:22> +<char:forall><index:22> +<char:fatdot><index:22> +<char:fatsemi><index:22> +<char:implies><index:22> +<char:exists><index:22> +<char:and><index:22> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathext-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathext-charmap.txt new file mode 100644 index 00000000..623b3040 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathext-charmap.txt @@ -0,0 +1,130 @@ +# character map for Lucida New Math Extended font + +<char:parenleftbig><font:LucidNewMatExtT><index:161> +<char:parenrightbig><font:LucidNewMatExtT><index:162> +<char:bracketleftbig><font:LucidNewMatExtT><index:163> +<char:bracketrightbig><font:LucidNewMatExtT><index:164> +<char:floorleftbig><font:LucidNewMatExtT><index:165> +<char:floorrightbig><font:LucidNewMatExtT><index:166> +<char:ceilingleftbig><font:LucidNewMatExtT><index:167> +<char:ceilingrightbig><font:LucidNewMatExtT><index:168> +<char:braceleftbig><font:LucidNewMatExtT><index:169> +<char:bracerightbig><font:LucidNewMatExtT><index:170> +<char:angbracketleftbig><font:LucidNewMatExtT><index:173> +<char:angbracketrightbig><font:LucidNewMatExtT><index:174> +<char:vextendsingle><font:LucidNewMatExtT><index:175> +<char:vextenddouble><font:LucidNewMatExtT><index:176> +<char:slashbig><font:LucidNewMatExtT><index:177> +<char:backslashbig><font:LucidNewMatExtT><index:178> +<char:parenleftBig><font:LucidNewMatExtT><index:179> +<char:parenrightBig><font:LucidNewMatExtT><index:180> +<char:parenleftbigg><font:LucidNewMatExtT><index:181> +<char:parenrightbigg><font:LucidNewMatExtT><index:182> +<char:bracketleftbigg><font:LucidNewMatExtT><index:183> +<char:bracketrightbigg><font:LucidNewMatExtT><index:184> +<char:floorleftbigg><font:LucidNewMatExtT><index:185> +<char:floorrightbigg><font:LucidNewMatExtT><index:186> +<char:ceilingleftbigg><font:LucidNewMatExtT><index:187> +<char:ceilingrightbigg><font:LucidNewMatExtT><index:188> +<char:braceleftbigg><font:LucidNewMatExtT><index:189> +<char:bracerightbigg><font:LucidNewMatExtT><index:190> +<char:angbracketleftbigg><font:LucidNewMatExtT><index:28> +<char:angbracketrightbigg><font:LucidNewMatExtT><index:29> +<char:slashbigg><font:LucidNewMatExtT><index:193> +<char:backslashbigg><font:LucidNewMatExtT><index:194> +<char:parenleftBigg><font:LucidNewMatExtT><index:195> +<char:parenrightBigg><font:LucidNewMatExtT><index:33> +<char:bracketleftBigg><font:LucidNewMatExtT><index:34> +<char:bracketrightBigg><font:LucidNewMatExtT><index:35> +<char:floorleftBigg><font:LucidNewMatExtT><index:36> +<char:floorrightBigg><font:LucidNewMatExtT><index:37> +<char:ceilingleftBigg><font:LucidNewMatExtT><index:38> +<char:ceilingrightBigg><font:LucidNewMatExtT><index:39> +<char:braceleftBigg><font:LucidNewMatExtT><index:40> +<char:bracerightBigg><font:LucidNewMatExtT><index:41> +<char:angbracketleftBigg><font:LucidNewMatExtT><index:42> +<char:angbracketrightBigg><font:LucidNewMatExtT><index:43> +<char:slashBigg><font:LucidNewMatExtT><index:44> +<char:backslashBigg><font:LucidNewMatExtT><index:45> +<char:slashBig><font:LucidNewMatExtT><index:46> +<char:backslashBig><font:LucidNewMatExtT><index:47> +<char:parenlefttp><font:LucidNewMatExtT><index:48> +<char:parenrighttp><font:LucidNewMatExtT><index:49> +<char:bracketlefttp><font:LucidNewMatExtT><index:50> +<char:bracketrighttp><font:LucidNewMatExtT><index:51> +<char:bracketleftbt><font:LucidNewMatExtT><index:52> +<char:bracketrightbt><font:LucidNewMatExtT><index:53> +<char:bracketleftex><font:LucidNewMatExtT><index:54> +<char:bracketrightex><font:LucidNewMatExtT><index:55> +<char:bracelefttp><font:LucidNewMatExtT><index:56> +<char:bracerighttp><font:LucidNewMatExtT><index:57> +<char:braceleftbt><font:LucidNewMatExtT><index:58> +<char:bracerightbt><font:LucidNewMatExtT><index:59> +<char:braceleftmid><font:LucidNewMatExtT><index:60> +<char:bracerightmid><font:LucidNewMatExtT><index:61> +<char:braceex><font:LucidNewMatExtT><index:62> +<char:arrowvertex><font:LucidNewMatExtT><index:63> +<char:parenleftbt><font:LucidNewMatExtT><index:64> +<char:parenrightbt><font:LucidNewMatExtT><index:65> +<char:parenleftex><font:LucidNewMatExtT><index:66> +<char:parenrightex><font:LucidNewMatExtT><index:67> +<char:angbracketleftBig><font:LucidNewMatExtT><index:68> +<char:angbracketrightBig><font:LucidNewMatExtT><index:69> +<char:unionsqtext><font:LucidNewMatExtT><index:70> +<char:unionsqdisplay><font:LucidNewMatExtT><index:71> +<char:contintegraltext><font:LucidNewMatExtT><index:72> +<char:contintegraldisplay><font:LucidNewMatExtT><index:73> +<char:circledottext><font:LucidNewMatExtT><index:74> +<char:circledotdisplay><font:LucidNewMatExtT><index:75> +<char:circleplustext><font:LucidNewMatExtT><index:76> +<char:circleplusdisplay><font:LucidNewMatExtT><index:77> +<char:circlemultiplytext><font:LucidNewMatExtT><index:78> +<char:circlemultiplydisplay><font:LucidNewMatExtT><index:79> +<char:summationtext><font:LucidNewMatExtT><index:80> +<char:producttext><font:LucidNewMatExtT><index:81> +<char:integraltext><font:LucidNewMatExtT><index:82> +<char:uniontext><font:LucidNewMatExtT><index:83> +<char:intersectiontext><font:LucidNewMatExtT><index:84> +<char:unionmultitext><font:LucidNewMatExtT><index:85> +<char:logicalandtext><font:LucidNewMatExtT><index:86> +<char:logicalortext><font:LucidNewMatExtT><index:87> +<char:summationdisplay><font:LucidNewMatExtT><index:88> +<char:productdisplay><font:LucidNewMatExtT><index:89> +<char:integraldisplay><font:LucidNewMatExtT><index:90> +<char:uniondisplay><font:LucidNewMatExtT><index:91> +<char:intersectiondisplay><font:LucidNewMatExtT><index:92> +<char:unionmultidisplay><font:LucidNewMatExtT><index:93> +<char:logicalanddisplay><font:LucidNewMatExtT><index:94> +<char:logicalordisplay><font:LucidNewMatExtT><index:95> +<char:coproducttext><font:LucidNewMatExtT><index:96> +<char:coproductdisplay><font:LucidNewMatExtT><index:97> +<char:hatwide><font:LucidNewMatExtT><index:98> +<char:hatwider><font:LucidNewMatExtT><index:99> +<char:hatwidest><font:LucidNewMatExtT><index:100> +<char:tildewide><font:LucidNewMatExtT><index:101> +<char:tildewider><font:LucidNewMatExtT><index:102> +<char:tildewidest><font:LucidNewMatExtT><index:103> +<char:bracketleftBig><font:LucidNewMatExtT><index:104> +<char:bracketrightBig><font:LucidNewMatExtT><index:105> +<char:floorleftBig><font:LucidNewMatExtT><index:106> +<char:floorrightBig><font:LucidNewMatExtT><index:107> +<char:ceilingleftBig><font:LucidNewMatExtT><index:108> +<char:ceilingrightBig><font:LucidNewMatExtT><index:109> +<char:braceleftBig><font:LucidNewMatExtT><index:110> +<char:bracerightBig><font:LucidNewMatExtT><index:111> +<char:radicalbig><font:LucidNewMatExtT><index:112> +<char:radicalBig><font:LucidNewMatExtT><index:113> +<char:radicalbigg><font:LucidNewMatExtT><index:114> +<char:radicalBigg><font:LucidNewMatExtT><index:115> +<char:radicalbt><font:LucidNewMatExtT><index:116> +<char:radicalvertex><font:LucidNewMatExtT><index:117> +<char:radicaltp><font:LucidNewMatExtT><index:118> +<char:arrowvertexdbl><font:LucidNewMatExtT><index:119> +<char:arrowtp><font:LucidNewMatExtT><index:120> +<char:arrowbt><font:LucidNewMatExtT><index:121> +<char:bracehtipdownleft><font:LucidNewMatExtT><index:122> +<char:bracehtipdownright><font:LucidNewMatExtT><index:123> +<char:bracehtipupleft><font:LucidNewMatExtT><index:124> +<char:bracehtipupright><font:LucidNewMatExtT><index:125> +<char:arrowdbltp><font:LucidNewMatExtT><index:126> +<char:arrowdblbt><font:LucidNewMatExtT><index:196> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathit-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathit-charmap.txt new file mode 100644 index 00000000..612d09c4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathit-charmap.txt @@ -0,0 +1,68 @@ +# character map for Lucida Math Italic font + +<char:Gamma><font:LucidNewMatItaT><index:161> +<char:Delta><font:LucidNewMatItaT><index:162> +<char:Theta><font:LucidNewMatItaT><index:163> +<char:Lambda><font:LucidNewMatItaT><index:164> +<char:Xi><font:LucidNewMatItaT><index:165> +<char:Pi><font:LucidNewMatItaT><index:166> +<char:Sigma><font:LucidNewMatItaT><index:167> +<char:Upsilon><font:LucidNewMatItaT><index:7> +<char:Phi><font:LucidNewMatItaT><index:169> +<char:Psi><font:LucidNewMatItaT><index:170> +<char:Omega><font:LucidNewMatItaT><index:173> +<char:alpha><font:LucidNewMatItaT><index:174> +<char:beta><font:LucidNewMatItaT><index:175> +<char:gamma><font:LucidNewMatItaT><index:176> +<char:delta><font:LucidNewMatItaT><index:177> +<char:epsilon1><font:LucidNewMatItaT><index:178> +<char:zeta><font:LucidNewMatItaT><index:179> +<char:eta><font:LucidNewMatItaT><index:180> +<char:theta><font:LucidNewMatItaT><index:181> +<char:iota><font:LucidNewMatItaT><index:182> +<char:kappa><font:LucidNewMatItaT><index:183> +<char:lambda><font:LucidNewMatItaT><index:184> +<char:mu><font:LucidNewMatItaT><index:185> +<char:nu><font:LucidNewMatItaT><index:186> +<char:xi><font:LucidNewMatItaT><index:187> +<char:pi><font:LucidNewMatItaT><index:188> +<char:rho><font:LucidNewMatItaT><index:189> +<char:sigma><font:LucidNewMatItaT><index:190> +<char:tau><font:LucidNewMatItaT><index:191> +<char:upsilon><font:LucidNewMatItaT><index:192> +<char:phi><font:LucidNewMatItaT><index:193> +<char:chi><font:LucidNewMatItaT><index:194> +<char:psi><font:LucidNewMatItaT><index:195> +<char:tie><font:LucidNewMatItaT><index:196> +<char:omega><font:LucidNewMatItaT><index:33> +<char:epsilon><font:LucidNewMatItaT><index:34> +<char:theta1><font:LucidNewMatItaT><index:35> +<char:pi1><font:LucidNewMatItaT><index:36> +<char:rho1><font:LucidNewMatItaT><index:37> +<char:sigma1><font:LucidNewMatItaT><index:38> +<char:phi1><font:LucidNewMatItaT><index:39> +<char:arrowlefttophalf><font:LucidNewMatItaT><index:40> +<char:arrowleftbothalf><font:LucidNewMatItaT><index:41> +<char:arrowrighttophalf><font:LucidNewMatItaT><index:42> +<char:arrowrightbothalf><font:LucidNewMatItaT><index:43> +<char:arrowhookleft><font:LucidNewMatItaT><index:44> +<char:arrowhookright><font:LucidNewMatItaT><index:45> +<char:triangleright><font:LucidNewMatItaT><index:46> +<char:triangleleft><font:LucidNewMatItaT><index:47> +<char:period><font:LucidNewMatItaT><index:58> +<char:comma><font:LucidNewMatItaT><index:59> +<char:less><font:LucidNewMatItaT><index:60> +<char:slash><font:LucidNewMatItaT><index:61> +<char:greater><font:LucidNewMatItaT><index:62> +<char:star><font:LucidNewMatItaT><index:63> +<char:partialdiff><font:LucidNewMatItaT><index:64> +<char:flat><font:LucidNewMatItaT><index:91> +<char:natural><font:LucidNewMatItaT><index:92> +<char:sharp><font:LucidNewMatItaT><index:93> +<char:slurbelow><font:LucidNewMatItaT><index:94> +<char:slurabove><font:LucidNewMatItaT><index:95> +<char:lscript><font:LucidNewMatItaT><index:96> +<char:dotlessi><font:LucidNewMatItaT><index:123> +<char:dotlessj><font:LucidNewMatItaT><index:124> +<char:weierstrass><font:LucidNewMatItaT><index:125> +<char:vector><font:LucidNewMatItaT><index:126> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathsym-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathsym-charmap.txt new file mode 100644 index 00000000..bdde61d2 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/lucmathsym-charmap.txt @@ -0,0 +1,130 @@ +# mathematical characters for Lucida New Math Symbol font + +<char:minus><font:LucidNewMatSymT><index:161> +<char:periodcentered><font:LucidNewMatSymT><index:162> +<char:multiply><font:LucidNewMatSymT><index:163> +<char:asteriskmath><font:LucidNewMatSymT><index:164> +<char:divide><font:LucidNewMatSymT><index:165> +<char:diamondmath><font:LucidNewMatSymT><index:166> +<char:plusminus><font:LucidNewMatSymT><index:167> +<char:minusplus><font:LucidNewMatSymT><index:168> +<char:circleplus><font:LucidNewMatSymT><index:169> +<char:circleminus><font:LucidNewMatSymT><index:170> +<char:circlemultiply><font:LucidNewMatSymT><index:173> +<char:circledivide><font:LucidNewMatSymT><index:174> +<char:circledot><font:LucidNewMatSymT><index:175> +<char:circlecopyrt><font:LucidNewMatSymT><index:176> +<char:openbullet><font:LucidNewMatSymT><index:177> +<char:bullet><font:LucidNewMatSymT><index:178> +<char:equivasymptotic><font:LucidNewMatSymT><index:179> +<char:equivalence><font:LucidNewMatSymT><index:180> +<char:reflexsubset><font:LucidNewMatSymT><index:181> +<char:reflexsuperset><font:LucidNewMatSymT><index:182> +<char:lessequal><font:LucidNewMatSymT><index:183> +<char:greaterequal><font:LucidNewMatSymT><index:184> +<char:precedesequal><font:LucidNewMatSymT><index:185> +<char:followsequal><font:LucidNewMatSymT><index:186> +<char:similar><font:LucidNewMatSymT><index:187> +<char:approxequal><font:LucidNewMatSymT><index:188> +<char:propersubset><font:LucidNewMatSymT><index:189> +<char:propersuperset><font:LucidNewMatSymT><index:190> +<char:lessmuch><font:LucidNewMatSymT><index:191> +<char:greatermuch><font:LucidNewMatSymT><index:192> +<char:precedes><font:LucidNewMatSymT><index:193> +<char:follows><font:LucidNewMatSymT><index:194> +<char:arrowleft><font:LucidNewMatSymT><index:195> +<char:spade><font:LucidNewMatSymT><index:196> +<char:arrowright><font:LucidNewMatSymT><index:33> +<char:arrowup><font:LucidNewMatSymT><index:34> +<char:arrowdown><font:LucidNewMatSymT><index:35> +<char:arrowboth><font:LucidNewMatSymT><index:36> +<char:arrownortheast><font:LucidNewMatSymT><index:37> +<char:arrowsoutheast><font:LucidNewMatSymT><index:38> +<char:similarequal><font:LucidNewMatSymT><index:39> +<char:arrowdblleft><font:LucidNewMatSymT><index:40> +<char:arrowdblright><font:LucidNewMatSymT><index:41> +<char:arrowdblup><font:LucidNewMatSymT><index:42> +<char:arrowdbldown><font:LucidNewMatSymT><index:43> +<char:arrowdblboth><font:LucidNewMatSymT><index:44> +<char:arrownorthwest><font:LucidNewMatSymT><index:45> +<char:arrowsouthwest><font:LucidNewMatSymT><index:46> +<char:proportional><font:LucidNewMatSymT><index:47> +<char:prime><font:LucidNewMatSymT><index:48> +<char:infinity><font:LucidNewMatSymT><index:49> +<char:element><font:LucidNewMatSymT><index:50> +<char:owner><font:LucidNewMatSymT><index:51> +<char:triangle><font:LucidNewMatSymT><index:52> +<char:triangleinv><font:LucidNewMatSymT><index:53> +<char:negationslash><font:LucidNewMatSymT><index:54> +<char:mapsto><font:LucidNewMatSymT><index:55> +<char:universal><font:LucidNewMatSymT><index:56> +<char:existential><font:LucidNewMatSymT><index:57> +<char:logicalnot><font:LucidNewMatSymT><index:58> +<char:emptyset><font:LucidNewMatSymT><index:59> +<char:Rfractur><font:LucidNewMatSymT><index:60> +<char:Ifractur><font:LucidNewMatSymT><index:61> +<char:latticetop><font:LucidNewMatSymT><index:62> +<char:perpendicular><font:LucidNewMatSymT><index:63> +<char:aleph><font:LucidNewMatSymT><index:64> +<char:scriptA><font:LucidNewMatSymT><index:65> +<char:scriptB><font:LucidNewMatSymT><index:66> +<char:scriptC><font:LucidNewMatSymT><index:67> +<char:scriptD><font:LucidNewMatSymT><index:68> +<char:scriptE><font:LucidNewMatSymT><index:69> +<char:scriptF><font:LucidNewMatSymT><index:70> +<char:scriptG><font:LucidNewMatSymT><index:71> +<char:scriptH><font:LucidNewMatSymT><index:72> +<char:scriptI><font:LucidNewMatSymT><index:73> +<char:scriptJ><font:LucidNewMatSymT><index:74> +<char:scriptK><font:LucidNewMatSymT><index:75> +<char:scriptL><font:LucidNewMatSymT><index:76> +<char:scriptM><font:LucidNewMatSymT><index:77> +<char:scriptN><font:LucidNewMatSymT><index:78> +<char:scriptO><font:LucidNewMatSymT><index:79> +<char:scriptP><font:LucidNewMatSymT><index:80> +<char:scriptQ><font:LucidNewMatSymT><index:81> +<char:scriptR><font:LucidNewMatSymT><index:82> +<char:scriptS><font:LucidNewMatSymT><index:83> +<char:scriptT><font:LucidNewMatSymT><index:84> +<char:scriptU><font:LucidNewMatSymT><index:85> +<char:scriptV><font:LucidNewMatSymT><index:86> +<char:scriptW><font:LucidNewMatSymT><index:87> +<char:scriptX><font:LucidNewMatSymT><index:88> +<char:scriptY><font:LucidNewMatSymT><index:89> +<char:scriptZ><font:LucidNewMatSymT><index:90> +<char:union><font:LucidNewMatSymT><index:91> +<char:intersection><font:LucidNewMatSymT><index:92> +<char:unionmulti><font:LucidNewMatSymT><index:93> +<char:logicaland><font:LucidNewMatSymT><index:94> +<char:logicalor><font:LucidNewMatSymT><index:95> +<char:turnstileleft><font:LucidNewMatSymT><index:96> +<char:turnstileright><font:LucidNewMatSymT><index:97> +<char:floorleft><font:LucidNewMatSymT><index:98> +<char:floorright><font:LucidNewMatSymT><index:99> +<char:ceilingleft><font:LucidNewMatSymT><index:100> +<char:ceilingright><font:LucidNewMatSymT><index:101> +<char:braceleft><font:LucidNewMatSymT><index:102> +<char:braceright><font:LucidNewMatSymT><index:103> +<char:angbracketleft><font:LucidNewMatSymT><index:104> +<char:angbracketright><font:LucidNewMatSymT><index:105> +<char:bar><font:LucidNewMatSymT><index:106> +<char:bardbl><font:LucidNewMatSymT><index:107> +<char:arrowbothv><font:LucidNewMatSymT><index:108> +<char:arrowdblbothv><font:LucidNewMatSymT><index:109> +<char:backslash><font:LucidNewMatSymT><index:110> +<char:wreathproduct><font:LucidNewMatSymT><index:111> +<char:radical><font:LucidNewMatSymT><index:112> +<char:coproduct><font:LucidNewMatSymT><index:113> +<char:nabla><font:LucidNewMatSymT><index:114> +<char:integral><font:LucidNewMatSymT><index:115> +<char:unionsq><font:LucidNewMatSymT><index:116> +<char:intersectionsq><font:LucidNewMatSymT><index:117> +<char:subsetsqequal><font:LucidNewMatSymT><index:118> +<char:supersetsqequal><font:LucidNewMatSymT><index:119> +<char:section><font:LucidNewMatSymT><index:120> +<char:dagger><font:LucidNewMatSymT><index:121> +<char:daggerdbl><font:LucidNewMatSymT><index:122> +<char:paragraph><font:LucidNewMatSymT><index:123> +<char:club><font:LucidNewMatSymT><index:124> +<char:diamond><font:LucidNewMatSymT><index:125> +<char:heart><font:LucidNewMatSymT><index:126> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/makefile b/Robust/src/Benchmarks/mlp/tagger/mlp-java/makefile new file mode 100644 index 00000000..831977fd --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/makefile @@ -0,0 +1,33 @@ +MAIN_CLASS=Tagger + +PROGRAM=test +SOURCE_FILES=*.java + +BUILDSCRIPT=~/research/Robust/src/buildscript +BSFLAGS= -debug -nooptimize -mainclass $(MAIN_CLASS) #-justanalyze -ownership -ownallocdepth 1 -ownwritedots final -enable-assertions + +all: run + +run: $(PROGRAM).bin + $(PROGRAM).bin test.txt + +view: PNGs + eog *.png & + +PNGs: DOTs + d2p *COMPLETE*.dot + +DOTs: $(PROGRAM).bin + +$(PROGRAM).bin: $(SOURCE_FILES) + $(BUILDSCRIPT) $(BSFLAGS) -o $(PROGRAM) $(SOURCE_FILES) + +clean: + rm -f $(PROGRAM).bin + rm -fr tmpbuilddirectory + rm -f *~ + rm -f *.dot + rm -f *.png + rm -f aliases.txt + rm -f test.index.txt + rm -f test.tag.txt diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/standard-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/standard-charmap.txt new file mode 100644 index 00000000..9e6a44ff --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/standard-charmap.txt @@ -0,0 +1,220 @@ +# character map for standard font + +<char:space><font:><index:32> +<char:exclam><font:><index:33> +<char:quotedbl><font:><index:34> +<char:numbersign><font:><index:35> +<char:dollar><font:><index:36> +<char:percent><font:><index:37> +<char:ampersand><font:><index:38> +<char:quotesingle><font:><index:39> +<char:parenleft><font:><index:40> +<char:parenright><font:><index:41> +<char:asterisk><font:><index:42> +<char:plus><font:><index:43> +<char:comma><font:><index:44> +<char:hyphen><font:><index:45> +<char:period><font:><index:46> +<char:slash><font:><index:47> +<char:zero><font:><index:48> +<char:one><font:><index:49> +<char:two><font:><index:50> +<char:three><font:><index:51> +<char:four><font:><index:52> +<char:five><font:><index:53> +<char:six><font:><index:54> +<char:seven><font:><index:55> +<char:eight><font:><index:56> +<char:nine><font:><index:57> +<char:colon><font:><index:58> +<char:semicolon><font:><index:59> +<char:less><font:><index:60> +<char:equal><font:><index:61> +<char:greater><font:><index:62> +<char:question><font:><index:63> +<char:at><font:><index:64> +<char:A><font:><index:65> +<char:B><font:><index:66> +<char:C><font:><index:67> +<char:D><font:><index:68> +<char:E><font:><index:69> +<char:F><font:><index:70> +<char:G><font:><index:71> +<char:H><font:><index:72> +<char:I><font:><index:73> +<char:J><font:><index:74> +<char:K><font:><index:75> +<char:L><font:><index:76> +<char:M><font:><index:77> +<char:N><font:><index:78> +<char:O><font:><index:79> +<char:P><font:><index:80> +<char:Q><font:><index:81> +<char:R><font:><index:82> +<char:S><font:><index:83> +<char:T><font:><index:84> +<char:U><font:><index:85> +<char:V><font:><index:86> +<char:W><font:><index:87> +<char:X><font:><index:88> +<char:Y><font:><index:89> +<char:Z><font:><index:90> +<char:bracketleft><font:><index:91> +<char:backslash><font:><index:92> +<char:bracketright><font:><index:93> +<char:asciicircum><font:><index:94> +<char:underscore><font:><index:95> +<char:grave><font:><index:96> +<char:a><font:><index:97> +<char:b><font:><index:98> +<char:c><font:><index:99> +<char:d><font:><index:100> +<char:e><font:><index:101> +<char:f><font:><index:102> +<char:g><font:><index:103> +<char:h><font:><index:104> +<char:i><font:><index:105> +<char:j><font:><index:106> +<char:k><font:><index:107> +<char:l><font:><index:108> +<char:m><font:><index:109> +<char:n><font:><index:110> +<char:o><font:><index:111> +<char:p><font:><index:112> +<char:q><font:><index:113> +<char:r><font:><index:114> +<char:s><font:><index:115> +<char:t><font:><index:116> +<char:u><font:><index:117> +<char:v><font:><index:118> +<char:w><font:><index:119> +<char:x><font:><index:120> +<char:y><font:><index:121> +<char:z><font:><index:122> +<char:braceleft><font:><index:123> +<char:bar><font:><index:124> +<char:braceright><font:><index:125> +<char:asciitilde><font:><index:126> +<char:euro><font:><index:128> +<char:quotesinglbase><font:><index:130> +<char:florin><font:><index:131> +<char:quotedblbase><font:><index:132> +<char:ellipsis><font:><index:133> +<char:dagger><font:><index:134> +<char:daggerdbl><font:><index:135> +<char:circumflex><font:><index:136> +<char:perthousand><font:><index:137> +<char:Scaron><font:><index:138> +<char:guilsinglleft><font:><index:139> +<char:OE><font:><index:140> +<char:Zcaron><font:><index:142> +<char:quoteleft><font:><index:145> +<char:quoteright><font:><index:146> +<char:quotedblleft><font:><index:147> +<char:quotedblright><font:><index:148> +<char:bullet><font:><index:149> +<char:endash><font:><index:150> +<char:emdash><font:><index:151> +<char:tilde><font:><index:152> +<char:trademark><font:><index:153> +<char:scaron><font:><index:154> +<char:guilsinglright><font:><index:155> +<char:oe><font:><index:156> +<char:zcaron><font:><index:158> +<char:Ydieresis><font:><index:159> +<char:nbspace><font:><index:160> +<char:exclamdown><font:><index:161> +<char:cent><font:><index:162> +<char:sterling><font:><index:163> +<char:currency><font:><index:164> +<char:yen><font:><index:165> +<char:brokenbar><font:><index:166> +<char:section><font:><index:167> +<char:dieresis><font:><index:168> +<char:copyright><font:><index:169> +<char:ordfeminine><font:><index:170> +<char:guillemotleft><font:><index:171> +<char:logicalnot><font:><index:172> +<char:sfthyphen><font:><index:173> +<char:registered><font:><index:174> +<char:macron><font:><index:175> +<char:degree><font:><index:176> +<char:plusminus><font:><index:177> +<char:twosuperior><font:><index:178> +<char:threesuperior><font:><index:179> +<char:acute><font:><index:180> +<char:mu><font:><index:181> +<char:paragraph><font:><index:182> +<char:periodcentered><font:><index:183> +<char:cedilla><font:><index:184> +<char:onesuperior><font:><index:185> +<char:ordmasculine><font:><index:186> +<char:guillemotright><font:><index:187> +<char:onequarter><font:><index:188> +<char:onehalf><font:><index:189> +<char:threequarters><font:><index:190> +<char:questiondown><font:><index:191> +<char:Agrave><font:><index:192> +<char:Aacute><font:><index:193> +<char:Acircumflex><font:><index:194> +<char:Atilde><font:><index:195> +<char:Adieresis><font:><index:196> +<char:Aring><font:><index:197> +<char:AE><font:><index:198> +<char:Ccedilla><font:><index:199> +<char:Egrave><font:><index:200> +<char:Eacute><font:><index:201> +<char:Ecircumflex><font:><index:202> +<char:Edieresis><font:><index:203> +<char:Igrave><font:><index:204> +<char:Iacute><font:><index:205> +<char:Icircumflex><font:><index:206> +<char:Idieresis><font:><index:207> +<char:Eth><font:><index:208> +<char:Ntilde><font:><index:209> +<char:Ograve><font:><index:210> +<char:Oacute><font:><index:211> +<char:Ocircumflex><font:><index:212> +<char:Otilde><font:><index:213> +<char:Odieresis><font:><index:214> +<char:multiply><font:><index:215> +<char:Oslash><font:><index:216> +<char:Ugrave><font:><index:217> +<char:Uacute><font:><index:218> +<char:Ucircumflex><font:><index:219> +<char:Udieresis><font:><index:220> +<char:Yacute><font:><index:221> +<char:Thorn><font:><index:222> +<char:germandbls><font:><index:223> +<char:agrave><font:><index:224> +<char:aacute><font:><index:225> +<char:acircumflex><font:><index:226> +<char:atilde><font:><index:227> +<char:adieresis><font:><index:228> +<char:aring><font:><index:229> +<char:ae><font:><index:230> +<char:ccedilla><font:><index:231> +<char:egrave><font:><index:232> +<char:eacute><font:><index:233> +<char:ecircumflex><font:><index:234> +<char:edieresis><font:><index:235> +<char:igrave><font:><index:236> +<char:iacute><font:><index:237> +<char:icircumflex><font:><index:238> +<char:idieresis><font:><index:239> +<char:eth><font:><index:240> +<char:ntilde><font:><index:241> +<char:ograve><font:><index:242> +<char:oacute><font:><index:243> +<char:ocircumflex><font:><index:244> +<char:otilde><font:><index:245> +<char:odieresis><font:><index:246> +<char:divide><font:><index:247> +<char:oslash><font:><index:248> +<char:ugrave><font:><index:249> +<char:uacute><font:><index:250> +<char:ucircumflex><font:><index:251> +<char:udieresis><font:><index:252> +<char:yacute><font:><index:253> +<char:thorn><font:><index:254> +<char:ydieresis><font:><index:255> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/styles.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/styles.txt new file mode 100644 index 00000000..55aaf72c --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/styles.txt @@ -0,0 +1,11 @@ +<style:title><next:author> +<style:author><next:section> +<style:section><next:noindent><counter:1><separator:.><trailer: > +<style:opening><next:noindent> +<style:noindent><next:body> +<style:body><next:body> +<style:subsection><next:noindent><parent:section><counter:1><separator:.><trailer: > +<style:subsubsection><next:noindent><parent:subsection><counter:a><separator:.><trailer: > +<style:geekmath><next:noindent> +<style:point><next:noindent><counter:A><leader:\alpha > +<style:ref><next:ref> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/symbol-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/symbol-charmap.txt new file mode 100644 index 00000000..4481c174 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/symbol-charmap.txt @@ -0,0 +1,195 @@ +# character map for Symbol font + +<char:Symbol><font:Symbol><index:for> +<char:space><font:Symbol><index:32> +<char:exclam><font:Symbol><index:33> +<char:universal><font:Symbol><index:34> +<char:numbersign><font:Symbol><index:35> +<char:existential><font:Symbol><index:36> +<char:percent><font:Symbol><index:37> +<char:ampersand><font:Symbol><index:38> +<char:suchthat><font:Symbol><index:39> +<char:parenleft><font:Symbol><index:40> +<char:parenright><font:Symbol><index:41> +<char:asteriskmath><font:Symbol><index:42> +<char:plus><font:Symbol><index:43> +<char:comma><font:Symbol><index:44> +<char:minus><font:Symbol><index:45> +<char:period><font:Symbol><index:46> +<char:slash><font:Symbol><index:47> +<char:zero><font:Symbol><index:48> +<char:one><font:Symbol><index:49> +<char:two><font:Symbol><index:50> +<char:three><font:Symbol><index:51> +<char:four><font:Symbol><index:52> +<char:five><font:Symbol><index:53> +<char:six><font:Symbol><index:54> +<char:seven><font:Symbol><index:55> +<char:eight><font:Symbol><index:56> +<char:nine><font:Symbol><index:57> +<char:colon><font:Symbol><index:58> +<char:semicolon><font:Symbol><index:59> +<char:less><font:Symbol><index:60> +<char:equal><font:Symbol><index:61> +<char:greater><font:Symbol><index:62> +<char:question><font:Symbol><index:63> +<char:congruent><font:Symbol><index:64> +<char:Alpha><font:Symbol><index:65> +<char:Beta><font:Symbol><index:66> +<char:Chi><font:Symbol><index:67> +<char:Delta><font:Symbol><index:68> +<char:Epsilon><font:Symbol><index:69> +<char:Phi><font:Symbol><index:70> +<char:Gamma><font:Symbol><index:71> +<char:Eta><font:Symbol><index:72> +<char:Iota><font:Symbol><index:73> +<char:theta1><font:Symbol><index:74> +<char:Kappa><font:Symbol><index:75> +<char:Lambda><font:Symbol><index:76> +<char:Mu><font:Symbol><index:77> +<char:Nu><font:Symbol><index:78> +<char:Omicron><font:Symbol><index:79> +<char:Pi><font:Symbol><index:80> +<char:Theta><font:Symbol><index:81> +<char:Rho><font:Symbol><index:82> +<char:Sigma><font:Symbol><index:83> +<char:Tau><font:Symbol><index:84> +<char:Upsilon><font:Symbol><index:85> +<char:sigma1><font:Symbol><index:86> +<char:Omega><font:Symbol><index:87> +<char:Xi><font:Symbol><index:88> +<char:Psi><font:Symbol><index:89> +<char:Zeta><font:Symbol><index:90> +<char:bracketleft><font:Symbol><index:91> +<char:therefore><font:Symbol><index:92> +<char:bracketright><font:Symbol><index:93> +<char:perpendicular><font:Symbol><index:94> +<char:underscore><font:Symbol><index:95> +<char:radicalex><font:Symbol><index:96> +<char:alpha><font:Symbol><index:97> +<char:beta><font:Symbol><index:98> +<char:chi><font:Symbol><index:99> +<char:delta><font:Symbol><index:100> +<char:epsilon><font:Symbol><index:101> +<char:phi><font:Symbol><index:102> +<char:gamma><font:Symbol><index:103> +<char:eta><font:Symbol><index:104> +<char:iota><font:Symbol><index:105> +<char:phi1><font:Symbol><index:106> +<char:kappa><font:Symbol><index:107> +<char:lambda><font:Symbol><index:108> +<char:mu><font:Symbol><index:109> +<char:nu><font:Symbol><index:110> +<char:omicron><font:Symbol><index:111> +<char:pi><font:Symbol><index:112> +<char:theta><font:Symbol><index:113> +<char:rho><font:Symbol><index:114> +<char:sigma><font:Symbol><index:115> +<char:tau><font:Symbol><index:116> +<char:upsilon><font:Symbol><index:117> +<char:omega1><font:Symbol><index:118> +<char:omega><font:Symbol><index:119> +<char:xi><font:Symbol><index:120> +<char:psi><font:Symbol><index:121> +<char:zeta><font:Symbol><index:122> +<char:braceleft><font:Symbol><index:123> +<char:bar><font:Symbol><index:124> +<char:braceright><font:Symbol><index:125> +<char:similar><font:Symbol><index:126> +<char:Euro><font:Symbol><index:160> +<char:Upsilon1><font:Symbol><index:161> +<char:minute><font:Symbol><index:162> +<char:lessequal><font:Symbol><index:163> +<char:fraction><font:Symbol><index:164> +<char:infinity><font:Symbol><index:165> +<char:florin><font:Symbol><index:166> +<char:club><font:Symbol><index:167> +<char:diamond><font:Symbol><index:168> +<char:heart><font:Symbol><index:169> +<char:spade><font:Symbol><index:170> +<char:arrowboth><font:Symbol><index:171> +<char:arrowleft><font:Symbol><index:172> +<char:arrowup><font:Symbol><index:173> +<char:arrowright><font:Symbol><index:174> +<char:arrowdown><font:Symbol><index:175> +<char:degree><font:Symbol><index:176> +<char:plusminus><font:Symbol><index:177> +<char:second><font:Symbol><index:178> +<char:greaterequal><font:Symbol><index:179> +<char:multiply><font:Symbol><index:180> +<char:proportional><font:Symbol><index:181> +<char:partialdiff><font:Symbol><index:182> +<char:bullet><font:Symbol><index:183> +<char:divide><font:Symbol><index:184> +<char:notequal><font:Symbol><index:185> +<char:equivalence><font:Symbol><index:186> +<char:approxequal><font:Symbol><index:187> + +# seems to be a quarter fraction +# <char:ellipsis><font:Symbol><index:188> + +<char:arrowvertex><font:Symbol><index:189> +<char:arrowhorizex><font:Symbol><index:190> +<char:carriagereturn><font:Symbol><index:191> +<char:aleph><font:Symbol><index:192> +<char:Ifraktur><font:Symbol><index:193> +<char:Rfraktur><font:Symbol><index:194> +<char:weierstrass><font:Symbol><index:195> +<char:circlemultiply><font:Symbol><index:196> +<char:circleplus><font:Symbol><index:197> +<char:emptyset><font:Symbol><index:198> +<char:intersection><font:Symbol><index:199> +<char:union><font:Symbol><index:200> +<char:propersuperset><font:Symbol><index:201> +<char:reflexsuperset><font:Symbol><index:202> +<char:notsubset><font:Symbol><index:203> +<char:propersubset><font:Symbol><index:204> +<char:reflexsubset><font:Symbol><index:205> +<char:element><font:Symbol><index:206> +<char:notelement><font:Symbol><index:207> +<char:angle><font:Symbol><index:208> +<char:gradient><font:Symbol><index:209> +<char:registerserif><font:Symbol><index:210> +<char:copyrightserif><font:Symbol><index:211> +<char:trademarkserif><font:Symbol><index:212> +<char:product><font:Symbol><index:213> +<char:radical><font:Symbol><index:214> +<char:dotmath><font:Symbol><index:215> +<char:logicalnot><font:Symbol><index:216> +<char:logicaland><font:Symbol><index:217> +<char:logicalor><font:Symbol><index:218> +<char:arrowdblboth><font:Symbol><index:219> +<char:arrowdblleft><font:Symbol><index:220> +<char:arrowdblup><font:Symbol><index:221> +<char:arrowdblright><font:Symbol><index:222> +<char:arrowdbldown><font:Symbol><index:223> +<char:lozenge><font:Symbol><index:224> +<char:angleleft><font:Symbol><index:225> +<char:registersans><font:Symbol><index:226> +<char:copyrightsans><font:Symbol><index:227> +<char:trademarksans><font:Symbol><index:228> +<char:summation><font:Symbol><index:229> +<char:parenlefttp><font:Symbol><index:230> +<char:parenleftex><font:Symbol><index:231> +<char:parenleftbt><font:Symbol><index:232> +<char:bracketlefttp><font:Symbol><index:233> +<char:bracketleftex><font:Symbol><index:234> +<char:bracketleftbt><font:Symbol><index:235> +<char:bracelefttp><font:Symbol><index:236> +<char:braceleftmid><font:Symbol><index:237> +<char:braceleftbt><font:Symbol><index:238> +<char:braceex><font:Symbol><index:239> +<char:angleright><font:Symbol><index:241> +<char:integral><font:Symbol><index:242> +<char:integraltp><font:Symbol><index:243> +<char:integralex><font:Symbol><index:244> +<char:integralbt><font:Symbol><index:245> +<char:parenrighttp><font:Symbol><index:246> +<char:parenrightex><font:Symbol><index:247> +<char:parenrightbt><font:Symbol><index:248> +<char:bracketrighttp><font:Symbol><index:249> +<char:bracketrightex><font:Symbol><index:250> +<char:bracketrightbt><font:Symbol><index:251> +<char:bracerighttp><font:Symbol><index:252> +<char:bracerightmid><font:Symbol><index:253> +<char:bracerightbt><font:Symbol><index:254> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/symbols.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/symbols.txt new file mode 100644 index 00000000..9b89caf8 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/symbols.txt @@ -0,0 +1,530 @@ +# character map for Lucida Math Italic font + +<char:Gamma><font:LucidNewMatItaT><index:161> +<char:Delta><font:LucidNewMatItaT><index:162> +<char:Theta><font:LucidNewMatItaT><index:163> +<char:Lambda><font:LucidNewMatItaT><index:164> +<char:Xi><font:LucidNewMatItaT><index:165> +<char:Pi><font:LucidNewMatItaT><index:166> +<char:Sigma><font:LucidNewMatItaT><index:167> +<char:Upsilon><font:LucidNewMatItaT><index:7> +<char:Phi><font:LucidNewMatItaT><index:169> +<char:Psi><font:LucidNewMatItaT><index:170> +<char:Omega><font:LucidNewMatItaT><index:173> +<char:alpha><font:LucidNewMatItaT><index:174> +<char:beta><font:LucidNewMatItaT><index:175> +<char:gamma><font:LucidNewMatItaT><index:176> +<char:delta><font:LucidNewMatItaT><index:177> +<char:epsilon1><font:LucidNewMatItaT><index:178> +<char:zeta><font:LucidNewMatItaT><index:179> +<char:eta><font:LucidNewMatItaT><index:180> +<char:theta><font:LucidNewMatItaT><index:181> +<char:iota><font:LucidNewMatItaT><index:182> +<char:kappa><font:LucidNewMatItaT><index:183> +<char:lambda><font:LucidNewMatItaT><index:184> +<char:mu><font:LucidNewMatItaT><index:185> +<char:nu><font:LucidNewMatItaT><index:186> +<char:xi><font:LucidNewMatItaT><index:187> +<char:pi><font:LucidNewMatItaT><index:188> +<char:rho><font:LucidNewMatItaT><index:189> +<char:sigma><font:LucidNewMatItaT><index:190> +<char:tau><font:LucidNewMatItaT><index:191> +<char:upsilon><font:LucidNewMatItaT><index:192> +<char:phi><font:LucidNewMatItaT><index:193> +<char:chi><font:LucidNewMatItaT><index:194> +<char:psi><font:LucidNewMatItaT><index:195> +<char:tie><font:LucidNewMatItaT><index:196> +<char:omega><font:LucidNewMatItaT><index:33> +<char:epsilon><font:LucidNewMatItaT><index:34> +<char:theta1><font:LucidNewMatItaT><index:35> +<char:pi1><font:LucidNewMatItaT><index:36> +<char:rho1><font:LucidNewMatItaT><index:37> +<char:sigma1><font:LucidNewMatItaT><index:38> +<char:phi1><font:LucidNewMatItaT><index:39> +<char:arrowlefttophalf><font:LucidNewMatItaT><index:40> +<char:arrowleftbothalf><font:LucidNewMatItaT><index:41> +<char:arrowrighttophalf><font:LucidNewMatItaT><index:42> +<char:arrowrightbothalf><font:LucidNewMatItaT><index:43> +<char:arrowhookleft><font:LucidNewMatItaT><index:44> +<char:arrowhookright><font:LucidNewMatItaT><index:45> +<char:triangleright><font:LucidNewMatItaT><index:46> +<char:triangleleft><font:LucidNewMatItaT><index:47> +<char:period><font:LucidNewMatItaT><index:58> +<char:comma><font:LucidNewMatItaT><index:59> +<char:less><font:LucidNewMatItaT><index:60> +<char:slash><font:LucidNewMatItaT><index:61> +<char:greater><font:LucidNewMatItaT><index:62> +<char:star><font:LucidNewMatItaT><index:63> +<char:partialdiff><font:LucidNewMatItaT><index:64> +<char:flat><font:LucidNewMatItaT><index:91> +<char:natural><font:LucidNewMatItaT><index:92> +<char:sharp><font:LucidNewMatItaT><index:93> +<char:slurbelow><font:LucidNewMatItaT><index:94> +<char:slurabove><font:LucidNewMatItaT><index:95> +<char:lscript><font:LucidNewMatItaT><index:96> +<char:dotlessi><font:LucidNewMatItaT><index:123> +<char:dotlessj><font:LucidNewMatItaT><index:124> +<char:weierstrass><font:LucidNewMatItaT><index:125> +<char:vector><font:LucidNewMatItaT><index:126> + + +# mathematical characters for Lucida New Math Symbol font + +<char:minus><font:LucidNewMatSymT><index:161> +<char:periodcentered><font:LucidNewMatSymT><index:162> +<char:multiply><font:LucidNewMatSymT><index:163> +<char:asteriskmath><font:LucidNewMatSymT><index:164> +<char:divide><font:LucidNewMatSymT><index:165> +<char:diamondmath><font:LucidNewMatSymT><index:166> +<char:plusminus><font:LucidNewMatSymT><index:167> +<char:minusplus><font:LucidNewMatSymT><index:168> +<char:circleplus><font:LucidNewMatSymT><index:169> +<char:circleminus><font:LucidNewMatSymT><index:170> +<char:circlemultiply><font:LucidNewMatSymT><index:173> +<char:circledivide><font:LucidNewMatSymT><index:174> +<char:circledot><font:LucidNewMatSymT><index:175> +<char:circlecopyrt><font:LucidNewMatSymT><index:176> +<char:openbullet><font:LucidNewMatSymT><index:177> +<char:bullet><font:LucidNewMatSymT><index:178> +<char:equivasymptotic><font:LucidNewMatSymT><index:179> +<char:equivalence><font:LucidNewMatSymT><index:180> +<char:reflexsubset><font:LucidNewMatSymT><index:181> +<char:reflexsuperset><font:LucidNewMatSymT><index:182> +<char:lessequal><font:LucidNewMatSymT><index:183> +<char:greaterequal><font:LucidNewMatSymT><index:184> +<char:precedesequal><font:LucidNewMatSymT><index:185> +<char:followsequal><font:LucidNewMatSymT><index:186> +<char:similar><font:LucidNewMatSymT><index:187> +<char:approxequal><font:LucidNewMatSymT><index:188> +<char:propersubset><font:LucidNewMatSymT><index:189> +<char:propersuperset><font:LucidNewMatSymT><index:190> +<char:lessmuch><font:LucidNewMatSymT><index:191> +<char:greatermuch><font:LucidNewMatSymT><index:192> +<char:precedes><font:LucidNewMatSymT><index:193> +<char:follows><font:LucidNewMatSymT><index:194> +<char:arrowleft><font:LucidNewMatSymT><index:195> +<char:spade><font:LucidNewMatSymT><index:196> +<char:arrowright><font:LucidNewMatSymT><index:33> +<char:arrowup><font:LucidNewMatSymT><index:34> +<char:arrowdown><font:LucidNewMatSymT><index:35> +<char:arrowboth><font:LucidNewMatSymT><index:36> +<char:arrownortheast><font:LucidNewMatSymT><index:37> +<char:arrowsoutheast><font:LucidNewMatSymT><index:38> +<char:similarequal><font:LucidNewMatSymT><index:39> +<char:arrowdblleft><font:LucidNewMatSymT><index:40> +<char:arrowdblright><font:LucidNewMatSymT><index:41> +<char:arrowdblup><font:LucidNewMatSymT><index:42> +<char:arrowdbldown><font:LucidNewMatSymT><index:43> +<char:arrowdblboth><font:LucidNewMatSymT><index:44> +<char:arrownorthwest><font:LucidNewMatSymT><index:45> +<char:arrowsouthwest><font:LucidNewMatSymT><index:46> +<char:proportional><font:LucidNewMatSymT><index:47> +<char:prime><font:LucidNewMatSymT><index:48> +<char:infinity><font:LucidNewMatSymT><index:49> +<char:element><font:LucidNewMatSymT><index:50> +<char:owner><font:LucidNewMatSymT><index:51> +<char:triangle><font:LucidNewMatSymT><index:52> +<char:triangleinv><font:LucidNewMatSymT><index:53> +<char:negationslash><font:LucidNewMatSymT><index:54> +<char:mapsto><font:LucidNewMatSymT><index:55> +<char:universal><font:LucidNewMatSymT><index:56> +<char:existential><font:LucidNewMatSymT><index:57> +<char:logicalnot><font:LucidNewMatSymT><index:58> +<char:emptyset><font:LucidNewMatSymT><index:59> +<char:Rfractur><font:LucidNewMatSymT><index:60> +<char:Ifractur><font:LucidNewMatSymT><index:61> +<char:latticetop><font:LucidNewMatSymT><index:62> +<char:perpendicular><font:LucidNewMatSymT><index:63> +<char:aleph><font:LucidNewMatSymT><index:64> +<char:scriptA><font:LucidNewMatSymT><index:65> +<char:scriptB><font:LucidNewMatSymT><index:66> +<char:scriptC><font:LucidNewMatSymT><index:67> +<char:scriptD><font:LucidNewMatSymT><index:68> +<char:scriptE><font:LucidNewMatSymT><index:69> +<char:scriptF><font:LucidNewMatSymT><index:70> +<char:scriptG><font:LucidNewMatSymT><index:71> +<char:scriptH><font:LucidNewMatSymT><index:72> +<char:scriptI><font:LucidNewMatSymT><index:73> +<char:scriptJ><font:LucidNewMatSymT><index:74> +<char:scriptK><font:LucidNewMatSymT><index:75> +<char:scriptL><font:LucidNewMatSymT><index:76> +<char:scriptM><font:LucidNewMatSymT><index:77> +<char:scriptN><font:LucidNewMatSymT><index:78> +<char:scriptO><font:LucidNewMatSymT><index:79> +<char:scriptP><font:LucidNewMatSymT><index:80> +<char:scriptQ><font:LucidNewMatSymT><index:81> +<char:scriptR><font:LucidNewMatSymT><index:82> +<char:scriptS><font:LucidNewMatSymT><index:83> +<char:scriptT><font:LucidNewMatSymT><index:84> +<char:scriptU><font:LucidNewMatSymT><index:85> +<char:scriptV><font:LucidNewMatSymT><index:86> +<char:scriptW><font:LucidNewMatSymT><index:87> +<char:scriptX><font:LucidNewMatSymT><index:88> +<char:scriptY><font:LucidNewMatSymT><index:89> +<char:scriptZ><font:LucidNewMatSymT><index:90> +<char:union><font:LucidNewMatSymT><index:91> +<char:intersection><font:LucidNewMatSymT><index:92> +<char:unionmulti><font:LucidNewMatSymT><index:93> +<char:logicaland><font:LucidNewMatSymT><index:94> +<char:logicalor><font:LucidNewMatSymT><index:95> +<char:turnstileleft><font:LucidNewMatSymT><index:96> +<char:turnstileright><font:LucidNewMatSymT><index:97> +<char:floorleft><font:LucidNewMatSymT><index:98> +<char:floorright><font:LucidNewMatSymT><index:99> +<char:ceilingleft><font:LucidNewMatSymT><index:100> +<char:ceilingright><font:LucidNewMatSymT><index:101> +<char:braceleft><font:LucidNewMatSymT><index:102> +<char:braceright><font:LucidNewMatSymT><index:103> +<char:angbracketleft><font:LucidNewMatSymT><index:104> +<char:angbracketright><font:LucidNewMatSymT><index:105> +<char:bar><font:LucidNewMatSymT><index:106> +<char:bardbl><font:LucidNewMatSymT><index:107> +<char:arrowbothv><font:LucidNewMatSymT><index:108> +<char:arrowdblbothv><font:LucidNewMatSymT><index:109> +<char:backslash><font:LucidNewMatSymT><index:110> +<char:wreathproduct><font:LucidNewMatSymT><index:111> +<char:radical><font:LucidNewMatSymT><index:112> +<char:coproduct><font:LucidNewMatSymT><index:113> +<char:nabla><font:LucidNewMatSymT><index:114> +<char:integral><font:LucidNewMatSymT><index:115> +<char:unionsq><font:LucidNewMatSymT><index:116> +<char:intersectionsq><font:LucidNewMatSymT><index:117> +<char:subsetsqequal><font:LucidNewMatSymT><index:118> +<char:supersetsqequal><font:LucidNewMatSymT><index:119> +<char:section><font:LucidNewMatSymT><index:120> +<char:dagger><font:LucidNewMatSymT><index:121> +<char:daggerdbl><font:LucidNewMatSymT><index:122> +<char:paragraph><font:LucidNewMatSymT><index:123> +<char:club><font:LucidNewMatSymT><index:124> +<char:diamond><font:LucidNewMatSymT><index:125> +<char:heart><font:LucidNewMatSymT><index:126> + + + +# character map for Symbol font + +<char:Symbol><font:Symbol><index:for> +<char:space><font:Symbol><index:32> +<char:exclam><font:Symbol><index:33> +<char:universal><font:Symbol><index:34> +<char:numbersign><font:Symbol><index:35> +<char:existential><font:Symbol><index:36> +<char:percent><font:Symbol><index:37> +<char:ampersand><font:Symbol><index:38> +<char:suchthat><font:Symbol><index:39> +<char:parenleft><font:Symbol><index:40> +<char:parenright><font:Symbol><index:41> +<char:asteriskmath><font:Symbol><index:42> +<char:plus><font:Symbol><index:43> +<char:comma><font:Symbol><index:44> +<char:minus><font:Symbol><index:45> +<char:period><font:Symbol><index:46> +<char:slash><font:Symbol><index:47> +<char:zero><font:Symbol><index:48> +<char:one><font:Symbol><index:49> +<char:two><font:Symbol><index:50> +<char:three><font:Symbol><index:51> +<char:four><font:Symbol><index:52> +<char:five><font:Symbol><index:53> +<char:six><font:Symbol><index:54> +<char:seven><font:Symbol><index:55> +<char:eight><font:Symbol><index:56> +<char:nine><font:Symbol><index:57> +<char:colon><font:Symbol><index:58> +<char:semicolon><font:Symbol><index:59> +<char:less><font:Symbol><index:60> +<char:equal><font:Symbol><index:61> +<char:greater><font:Symbol><index:62> +<char:question><font:Symbol><index:63> +<char:congruent><font:Symbol><index:64> +<char:Alpha><font:Symbol><index:65> +<char:Beta><font:Symbol><index:66> +<char:Chi><font:Symbol><index:67> +<char:Delta><font:Symbol><index:68> +<char:Epsilon><font:Symbol><index:69> +<char:Phi><font:Symbol><index:70> +<char:Gamma><font:Symbol><index:71> +<char:Eta><font:Symbol><index:72> +<char:Iota><font:Symbol><index:73> +<char:theta1><font:Symbol><index:74> +<char:Kappa><font:Symbol><index:75> +<char:Lambda><font:Symbol><index:76> +<char:Mu><font:Symbol><index:77> +<char:Nu><font:Symbol><index:78> +<char:Omicron><font:Symbol><index:79> +<char:Pi><font:Symbol><index:80> +<char:Theta><font:Symbol><index:81> +<char:Rho><font:Symbol><index:82> +<char:Sigma><font:Symbol><index:83> +<char:Tau><font:Symbol><index:84> +<char:Upsilon><font:Symbol><index:85> +<char:sigma1><font:Symbol><index:86> +<char:Omega><font:Symbol><index:87> +<char:Xi><font:Symbol><index:88> +<char:Psi><font:Symbol><index:89> +<char:Zeta><font:Symbol><index:90> +<char:bracketleft><font:Symbol><index:91> +<char:therefore><font:Symbol><index:92> +<char:bracketright><font:Symbol><index:93> +<char:perpendicular><font:Symbol><index:94> +<char:underscore><font:Symbol><index:95> +<char:radicalex><font:Symbol><index:96> +<char:alpha><font:Symbol><index:97> +<char:beta><font:Symbol><index:98> +<char:chi><font:Symbol><index:99> +<char:delta><font:Symbol><index:100> +<char:epsilon><font:Symbol><index:101> +<char:phi><font:Symbol><index:102> +<char:gamma><font:Symbol><index:103> +<char:eta><font:Symbol><index:104> +<char:iota><font:Symbol><index:105> +<char:phi1><font:Symbol><index:106> +<char:kappa><font:Symbol><index:107> +<char:lambda><font:Symbol><index:108> +<char:mu><font:Symbol><index:109> +<char:nu><font:Symbol><index:110> +<char:omicron><font:Symbol><index:111> +<char:pi><font:Symbol><index:112> +<char:theta><font:Symbol><index:113> +<char:rho><font:Symbol><index:114> +<char:sigma><font:Symbol><index:115> +<char:tau><font:Symbol><index:116> +<char:upsilon><font:Symbol><index:117> +<char:omega1><font:Symbol><index:118> +<char:omega><font:Symbol><index:119> +<char:xi><font:Symbol><index:120> +<char:psi><font:Symbol><index:121> +<char:zeta><font:Symbol><index:122> +<char:braceleft><font:Symbol><index:123> +<char:bar><font:Symbol><index:124> +<char:braceright><font:Symbol><index:125> +<char:similar><font:Symbol><index:126> +<char:Euro><font:Symbol><index:160> +<char:Upsilon1><font:Symbol><index:161> +<char:minute><font:Symbol><index:162> +<char:lessequal><font:Symbol><index:163> +<char:fraction><font:Symbol><index:164> +<char:infinity><font:Symbol><index:165> +<char:florin><font:Symbol><index:166> +<char:club><font:Symbol><index:167> +<char:diamond><font:Symbol><index:168> +<char:heart><font:Symbol><index:169> +<char:spade><font:Symbol><index:170> +<char:arrowboth><font:Symbol><index:171> +<char:arrowleft><font:Symbol><index:172> +<char:arrowup><font:Symbol><index:173> +<char:arrowright><font:Symbol><index:174> +<char:arrowdown><font:Symbol><index:175> +<char:degree><font:Symbol><index:176> +<char:plusminus><font:Symbol><index:177> +<char:second><font:Symbol><index:178> +<char:greaterequal><font:Symbol><index:179> +<char:multiply><font:Symbol><index:180> +<char:proportional><font:Symbol><index:181> +<char:partialdiff><font:Symbol><index:182> +<char:bullet><font:Symbol><index:183> +<char:divide><font:Symbol><index:184> +<char:notequal><font:Symbol><index:185> +<char:equivalence><font:Symbol><index:186> +<char:approxequal><font:Symbol><index:187> + +# seems to be a quarter fraction +# <char:ellipsis><font:Symbol><index:188> + +<char:arrowvertex><font:Symbol><index:189> +<char:arrowhorizex><font:Symbol><index:190> +<char:carriagereturn><font:Symbol><index:191> +<char:aleph><font:Symbol><index:192> +<char:Ifraktur><font:Symbol><index:193> +<char:Rfraktur><font:Symbol><index:194> +<char:weierstrass><font:Symbol><index:195> +<char:circlemultiply><font:Symbol><index:196> +<char:circleplus><font:Symbol><index:197> +<char:emptyset><font:Symbol><index:198> +<char:intersection><font:Symbol><index:199> +<char:union><font:Symbol><index:200> +<char:propersuperset><font:Symbol><index:201> +<char:reflexsuperset><font:Symbol><index:202> +<char:notsubset><font:Symbol><index:203> +<char:propersubset><font:Symbol><index:204> +<char:reflexsubset><font:Symbol><index:205> +<char:element><font:Symbol><index:206> +<char:notelement><font:Symbol><index:207> +<char:angle><font:Symbol><index:208> +<char:gradient><font:Symbol><index:209> +<char:registerserif><font:Symbol><index:210> +<char:copyrightserif><font:Symbol><index:211> +<char:trademarkserif><font:Symbol><index:212> +<char:product><font:Symbol><index:213> +<char:radical><font:Symbol><index:214> +<char:dotmath><font:Symbol><index:215> +<char:logicalnot><font:Symbol><index:216> +<char:logicaland><font:Symbol><index:217> +<char:logicalor><font:Symbol><index:218> +<char:arrowdblboth><font:Symbol><index:219> +<char:arrowdblleft><font:Symbol><index:220> +<char:arrowdblup><font:Symbol><index:221> +<char:arrowdblright><font:Symbol><index:222> +<char:arrowdbldown><font:Symbol><index:223> +<char:lozenge><font:Symbol><index:224> +<char:angleleft><font:Symbol><index:225> +<char:registersans><font:Symbol><index:226> +<char:copyrightsans><font:Symbol><index:227> +<char:trademarksans><font:Symbol><index:228> +<char:summation><font:Symbol><index:229> +<char:parenlefttp><font:Symbol><index:230> +<char:parenleftex><font:Symbol><index:231> +<char:parenleftbt><font:Symbol><index:232> +<char:bracketlefttp><font:Symbol><index:233> +<char:bracketleftex><font:Symbol><index:234> +<char:bracketleftbt><font:Symbol><index:235> +<char:bracelefttp><font:Symbol><index:236> +<char:braceleftmid><font:Symbol><index:237> +<char:braceleftbt><font:Symbol><index:238> +<char:braceex><font:Symbol><index:239> +<char:angleright><font:Symbol><index:241> +<char:integral><font:Symbol><index:242> +<char:integraltp><font:Symbol><index:243> +<char:integralex><font:Symbol><index:244> +<char:integralbt><font:Symbol><index:245> +<char:parenrighttp><font:Symbol><index:246> +<char:parenrightex><font:Symbol><index:247> +<char:parenrightbt><font:Symbol><index:248> +<char:bracketrighttp><font:Symbol><index:249> +<char:bracketrightex><font:Symbol><index:250> +<char:bracketrightbt><font:Symbol><index:251> +<char:bracerighttp><font:Symbol><index:252> +<char:bracerightmid><font:Symbol><index:253> +<char:bracerightbt><font:Symbol><index:254> + + +# character map for Lucida New Math Extended font + +<char:parenleftbig><font:LucidNewMatExtT><index:161> +<char:parenrightbig><font:LucidNewMatExtT><index:162> +<char:bracketleftbig><font:LucidNewMatExtT><index:163> +<char:bracketrightbig><font:LucidNewMatExtT><index:164> +<char:floorleftbig><font:LucidNewMatExtT><index:165> +<char:floorrightbig><font:LucidNewMatExtT><index:166> +<char:ceilingleftbig><font:LucidNewMatExtT><index:167> +<char:ceilingrightbig><font:LucidNewMatExtT><index:168> +<char:braceleftbig><font:LucidNewMatExtT><index:169> +<char:bracerightbig><font:LucidNewMatExtT><index:170> +<char:angbracketleftbig><font:LucidNewMatExtT><index:173> +<char:angbracketrightbig><font:LucidNewMatExtT><index:174> +<char:vextendsingle><font:LucidNewMatExtT><index:175> +<char:vextenddouble><font:LucidNewMatExtT><index:176> +<char:slashbig><font:LucidNewMatExtT><index:177> +<char:backslashbig><font:LucidNewMatExtT><index:178> +<char:parenleftBig><font:LucidNewMatExtT><index:179> +<char:parenrightBig><font:LucidNewMatExtT><index:180> +<char:parenleftbigg><font:LucidNewMatExtT><index:181> +<char:parenrightbigg><font:LucidNewMatExtT><index:182> +<char:bracketleftbigg><font:LucidNewMatExtT><index:183> +<char:bracketrightbigg><font:LucidNewMatExtT><index:184> +<char:floorleftbigg><font:LucidNewMatExtT><index:185> +<char:floorrightbigg><font:LucidNewMatExtT><index:186> +<char:ceilingleftbigg><font:LucidNewMatExtT><index:187> +<char:ceilingrightbigg><font:LucidNewMatExtT><index:188> +<char:braceleftbigg><font:LucidNewMatExtT><index:189> +<char:bracerightbigg><font:LucidNewMatExtT><index:190> +<char:angbracketleftbigg><font:LucidNewMatExtT><index:28> +<char:angbracketrightbigg><font:LucidNewMatExtT><index:29> +<char:slashbigg><font:LucidNewMatExtT><index:193> +<char:backslashbigg><font:LucidNewMatExtT><index:194> +<char:parenleftBigg><font:LucidNewMatExtT><index:195> +<char:parenrightBigg><font:LucidNewMatExtT><index:33> +<char:bracketleftBigg><font:LucidNewMatExtT><index:34> +<char:bracketrightBigg><font:LucidNewMatExtT><index:35> +<char:floorleftBigg><font:LucidNewMatExtT><index:36> +<char:floorrightBigg><font:LucidNewMatExtT><index:37> +<char:ceilingleftBigg><font:LucidNewMatExtT><index:38> +<char:ceilingrightBigg><font:LucidNewMatExtT><index:39> +<char:braceleftBigg><font:LucidNewMatExtT><index:40> +<char:bracerightBigg><font:LucidNewMatExtT><index:41> +<char:angbracketleftBigg><font:LucidNewMatExtT><index:42> +<char:angbracketrightBigg><font:LucidNewMatExtT><index:43> +<char:slashBigg><font:LucidNewMatExtT><index:44> +<char:backslashBigg><font:LucidNewMatExtT><index:45> +<char:slashBig><font:LucidNewMatExtT><index:46> +<char:backslashBig><font:LucidNewMatExtT><index:47> +<char:parenlefttp><font:LucidNewMatExtT><index:48> +<char:parenrighttp><font:LucidNewMatExtT><index:49> +<char:bracketlefttp><font:LucidNewMatExtT><index:50> +<char:bracketrighttp><font:LucidNewMatExtT><index:51> +<char:bracketleftbt><font:LucidNewMatExtT><index:52> +<char:bracketrightbt><font:LucidNewMatExtT><index:53> +<char:bracketleftex><font:LucidNewMatExtT><index:54> +<char:bracketrightex><font:LucidNewMatExtT><index:55> +<char:bracelefttp><font:LucidNewMatExtT><index:56> +<char:bracerighttp><font:LucidNewMatExtT><index:57> +<char:braceleftbt><font:LucidNewMatExtT><index:58> +<char:bracerightbt><font:LucidNewMatExtT><index:59> +<char:braceleftmid><font:LucidNewMatExtT><index:60> +<char:bracerightmid><font:LucidNewMatExtT><index:61> +<char:braceex><font:LucidNewMatExtT><index:62> +<char:arrowvertex><font:LucidNewMatExtT><index:63> +<char:parenleftbt><font:LucidNewMatExtT><index:64> +<char:parenrightbt><font:LucidNewMatExtT><index:65> +<char:parenleftex><font:LucidNewMatExtT><index:66> +<char:parenrightex><font:LucidNewMatExtT><index:67> +<char:angbracketleftBig><font:LucidNewMatExtT><index:68> +<char:angbracketrightBig><font:LucidNewMatExtT><index:69> +<char:unionsqtext><font:LucidNewMatExtT><index:70> +<char:unionsqdisplay><font:LucidNewMatExtT><index:71> +<char:contintegraltext><font:LucidNewMatExtT><index:72> +<char:contintegraldisplay><font:LucidNewMatExtT><index:73> +<char:circledottext><font:LucidNewMatExtT><index:74> +<char:circledotdisplay><font:LucidNewMatExtT><index:75> +<char:circleplustext><font:LucidNewMatExtT><index:76> +<char:circleplusdisplay><font:LucidNewMatExtT><index:77> +<char:circlemultiplytext><font:LucidNewMatExtT><index:78> +<char:circlemultiplydisplay><font:LucidNewMatExtT><index:79> +<char:summationtext><font:LucidNewMatExtT><index:80> +<char:producttext><font:LucidNewMatExtT><index:81> +<char:integraltext><font:LucidNewMatExtT><index:82> +<char:uniontext><font:LucidNewMatExtT><index:83> +<char:intersectiontext><font:LucidNewMatExtT><index:84> +<char:unionmultitext><font:LucidNewMatExtT><index:85> +<char:logicalandtext><font:LucidNewMatExtT><index:86> +<char:logicalortext><font:LucidNewMatExtT><index:87> +<char:summationdisplay><font:LucidNewMatExtT><index:88> +<char:productdisplay><font:LucidNewMatExtT><index:89> +<char:integraldisplay><font:LucidNewMatExtT><index:90> +<char:uniondisplay><font:LucidNewMatExtT><index:91> +<char:intersectiondisplay><font:LucidNewMatExtT><index:92> +<char:unionmultidisplay><font:LucidNewMatExtT><index:93> +<char:logicalanddisplay><font:LucidNewMatExtT><index:94> +<char:logicalordisplay><font:LucidNewMatExtT><index:95> +<char:coproducttext><font:LucidNewMatExtT><index:96> +<char:coproductdisplay><font:LucidNewMatExtT><index:97> +<char:hatwide><font:LucidNewMatExtT><index:98> +<char:hatwider><font:LucidNewMatExtT><index:99> +<char:hatwidest><font:LucidNewMatExtT><index:100> +<char:tildewide><font:LucidNewMatExtT><index:101> +<char:tildewider><font:LucidNewMatExtT><index:102> +<char:tildewidest><font:LucidNewMatExtT><index:103> +<char:bracketleftBig><font:LucidNewMatExtT><index:104> +<char:bracketrightBig><font:LucidNewMatExtT><index:105> +<char:floorleftBig><font:LucidNewMatExtT><index:106> +<char:floorrightBig><font:LucidNewMatExtT><index:107> +<char:ceilingleftBig><font:LucidNewMatExtT><index:108> +<char:ceilingrightBig><font:LucidNewMatExtT><index:109> +<char:braceleftBig><font:LucidNewMatExtT><index:110> +<char:bracerightBig><font:LucidNewMatExtT><index:111> +<char:radicalbig><font:LucidNewMatExtT><index:112> +<char:radicalBig><font:LucidNewMatExtT><index:113> +<char:radicalbigg><font:LucidNewMatExtT><index:114> +<char:radicalBigg><font:LucidNewMatExtT><index:115> +<char:radicalbt><font:LucidNewMatExtT><index:116> +<char:radicalvertex><font:LucidNewMatExtT><index:117> +<char:radicaltp><font:LucidNewMatExtT><index:118> +<char:arrowvertexdbl><font:LucidNewMatExtT><index:119> +<char:arrowtp><font:LucidNewMatExtT><index:120> +<char:arrowbt><font:LucidNewMatExtT><index:121> +<char:bracehtipdownleft><font:LucidNewMatExtT><index:122> +<char:bracehtipdownright><font:LucidNewMatExtT><index:123> +<char:bracehtipupleft><font:LucidNewMatExtT><index:124> +<char:bracehtipupright><font:LucidNewMatExtT><index:125> +<char:arrowdbltp><font:LucidNewMatExtT><index:126> +<char:arrowdblbt><font:LucidNewMatExtT><index:196> diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt new file mode 100644 index 00000000..a41f4665 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt @@ -0,0 +1,1057 @@ +\preamble +\loadchars{charmap.txt} +\loadchars{standard-charmap.txt} +\loadchars{lucmathsym-charmap.txt} +\loadchars{lucmathit-charmap.txt} +\loadchars{lucmathext-charmap.txt} +\loadchars{symbol-charmap.txt} +\loadstyles{styles.txt} + +\title A Micromodularity Mechanism + +\section Testing + +This is gamma: \gamma.\\ +This is Delta: \Delta.\\ +This is oplus: \oplus. +\scriptA \arrowdblright \scriptA + +This is a subscripted variable: A\sub<\bold<hello>\italics<there>>. +Math mode: $x + 2 = y, and && x\sub<2> = y\sub<3> = x\sub<ijk>$ + +\author Daniel Jackson, Ilya Shlyakhter and Manu Sridharan\\ +Laboratory for Computer Science\\ +Massachusetts Institute of Technology\\ +Cambridge, Massachusetts, USA\\ +dnj@mit.edu + +\opening Abstract + +A simple mechanism for structuring specifications is described. By modelling structures as atoms, it remains entirely first-order and thus amenable to automatic analysis. And by interpreting fields of structures as relations, it allows the same relational operators used in the formula language to be used for dereferencing. An extension feature allows structures to be developed incrementally, but requires no textual inclusion nor any notion of subtyping. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. + +\subsection* Categories and Subject Descriptors + +D.2.1 Requirements/Specifications---Languages; D.2.4 Software/Program Verification---Formal methods, Model checking; F.3.1 Specifying and Verifying and Reasoning about Programs---Assertions, Invariants, Specification techniques. + +\subsection* General Terms + +Design; Documentation; Languages; Verification. + +\subsection* Keywords + +Modeling languages; formal specification; first-order logic; relational calculus; Alloy language; Z specification language; schema calculus. + +\section* Introduction + +\quote I am neither crazy nor a micromaniac.\\ +(A micromaniac is someone obsessed with\\ +reducing things to their smallest possible form.\\ +This word, by the way, is not in the dictionary.)\\ +--_Edouard de Pomiane, French Cooking in Ten Minutes, 1930_ + +\noindent Most specification languages provide mechanisms that allow larger specifications to be built from smaller ones. These mechanisms are often the most complicated part of the language, and present obstacles to analysis. This paper presents a simple mechanism that seems to be expressive enough for a wide variety of uses, without compromising analyzability. + +This work is part of a larger project investigating the design of a "micro modelling language". Our premise is that lightweight application of formal methods [6] demands an unusually small and simple language that is amenable to fully automatic semantic analysis. The Alloy language is the result to date of our efforts to design such a language. Based on our experiences with the language [4] and its analyzer [5], we have recently developed a revision of Alloy that overcomes many of its limitations. This paper describes the key feature of the revised language: the _signature_, a new modularity mechanism. + +The mechanism allows our existing analysis scheme [3] to be applied to specifications involving structures. This is not achieved by treating the structuring mechanism as a syntactic sugar, which would limit the power of the notation (ruling out, for example, quantification over structures) and would complicate the analysis tool and make output harder for users to interpret. Because of the mechanism's generality, it has also enabled us to simplify the language as a whole, making it more uniform and eliminating some ad hoc elements. + +Our mechanism has a variety of applications. It can express inherent structure in the system being modelled, and can be used to organize a specification in which details are added incrementally. It can be used to construct a library of datatypes, or to describe a system as an instantiation of a more general system. And it can express state invariants, transitions, and sequences, despite the lack of any special syntax for state machines. + +In this last respect, the new language differs most markedly from its predecessor [4], which provided built-in notions of state invariants and operations. We now think this was a bad idea, because it made the language cumbersome for problems (such as the analysis of security policies or architectural topology constraints) in which temporal behaviour can be fruitfully ignored, and too inflexible for many problems in which temporal behaviour is important. + +#Because the notation as a whole is small, simple and analyzable, and free of bias towards any particular domain of application, it may be suitable as an intermediate language. A tool for architectural design, for example, might translate a more domain-specific notation into our language, allowing analyses that such tools do not currently support (such as automatic generation of sample configurations from style rules, and checking of consistency). +# +Our paper begins by explaining our motivations---the requirements our mechanism is designed to meet. The mechanism is then presented first informally in a series of examples, and then slightly more rigorously feature-by-feature. We discuss related work, especially the schema calculus of Z, and close with a summary of the merits and deficiences of our notation as a whole. + +\section Requirements + +The goal of this work was to find a single structuring mechanism that would support a variety of common specification idioms: + +\point \cdot _States_: description of complex state as a collection of named components; incremental description both by hierarchy, in which a complex state becomes a component of a larger state, and by extension, in which new components are added; declaration of invariants and definitions of derived components; + +\point \cdot _Datatypes_: separate description of a library of polymorphic datatypes, such as lists, sequences, trees and orders, along with their operators; + +\point \cdot _Transitions_: specification of state transitions as operations described implicitly as formulas relating pre- and post-state; composition of operations from previously defined invariants and operations; sequential composition of operations; description of traces as sequences of states; + +\point \cdot _Abstractions_: description of abstraction relations between state spaces; + +\point \cdot _Assertions_: expression of properties intended to be redundant, to be checked by analysis, including: relationships amongst invariants; wellformedness of definitions (eg, that an implicit definition is functional); establishment and preservation of invariants by operations; properties of states reachable along finite traces; and simulation relationships between abstract and concrete versions of an operation. + +\noindent We wanted additionally to meet some more general criteria: + +\point \cdot _Simplicity_. The language as a whole should be exceptionally small and simple. + +\point \cdot _Flexibility_. Support for the particular idioms of state-machine specification should not be a straitjacket; the language should not dictate how state machines are expressed, and should not make it hard to describe structures that are not state machines (such as security models and architectural styles). + +\point \cdot _Analyzability_. A fully automatic semantic analysis should be possible. In the present work, this has been achieved by requiring that the modularity mechanism be first order, and expressible in the kernel of the existing language. + +\noindent Finally, our language design decisions have been influenced by some principles that we believe contribute to these goals, make the language easier to use, and analysis tools easier to build: + +\point \cdot _Explicitness_. The language should be fully explicit, with as few implicit constraints, coercions, etc, as possible. + +\point \cdot _Minimal mathematics_. The basic theory of sets and relations should suffice; it should not be necessary to introduce domains, fixed points, infinities or special logical values. + +\point \cdot _Minimal syntax_. There should be very few keywords or special symbols, and no need for special typography or layout. + +\point \cdot _Uniformity_. A small and general set of constructs should be applied uniformly, independent of context. + +\point \cdot _Lack of novelty_. Whenever possible, notions and syntax should follow standard usage of conventional mathematics and programming. + +\section Informal Description + +As a running example, we will specify a simple memory system involving a cache and a main memory. The memory has a fixed set of addresses and associates a data value with each address. The cache, in contrast, associates data values with some subset of addresses that varies over time. The cache is updated by a "write-back scheme", which means that updates need not be reflected to main memory immediately. The cache may therefore hold a more current value for an address than the main memory; the two are brought into alignment when the address is flushed from the cache and its value is written to main memory. + +\subsection States + +We start by declaring the existence of addresses and data values: + +\geekmath sig Addr {}\\ +sig Data {} + +Each line declares a _signature_, and introduces a set of atoms: _Addr_ for the set of addresses, and _Data_ for the set of data values. Like 'given types' in Z, these sets are disjoint from one another, and their atoms are unstructured and uninterpreted. Signature names can be used as expressions denoting sets, but they are also treated as types, so the expression _Addr+Data_, for example, is ill-typed, since the union operator (+) requires the types of its operands to match. + +The signature declaration + +\geekmath sig Memory {\\ + addrs: set Addr,\\ + map: addrs ->! Data\\ + } + +likewise declares a set of atoms, _Memory_, corresponding to the set of all possible memories. In addition, it declares two fields: _addrs_ and _map_ which associate with a memory a set of addresses and a mapping from addresses to data values respectively. Thus, given a memory _m_, the expression _m.addrs_ will be a set of addresses, _m.map_ will be a relation from addresses to data values. The memory, addresses and data values should be viewed as distinct atoms in their own right; fields don't decompose an atom, but rather relate one atom to others. The exclamation mark in the declaration of the field _map_ is a 'multiplicity marking': it says that _m.map_ associates exactly one data value with each address in the set _m.addrs_. The use of _addrs_ rather than _Addr_ on the left side of the arrow indicates that _m.map_ does not associate a data value with an address that is not in the set _m.addrs_. + +In these expressions, the dot is simply relational image. More precisely, when we say that _m_ is a memory, we mean that the expression _m_ denotes a set consisting of a single atom. The field _addrs_ is a relation from _Memory_ to _Addr_, and _m.addrs_ denotes the image of the singleton set under this relation. So for a set of memories _ms_, the expression _ms.addrs_ will denote the union of the sets of addresses that belong to the individual memories. Given an address _a_, the expression _a.(m.map)_ denotes the set of data values associated with address _a_ in memory _m_, which will either be empty (when the address is not mapped) or a singleton. For convenience, we allow the relational image _s.r_ to be written equivalently as _r_[_s_], where [] binds more loosely than dot, so this expression may be written as _m.map_[_a_] instead. + +Like objects of an object-oriented language, two distinct atoms can have fields of the same value. Unlike objects, however, atoms are immutable. Each field is fixed, and cannot map an atom to one value at one time and another value at another time. To describe an operation that changes the state of a memory, therefore, we will use two distinct atoms in the set _Memory_ to represent the memory's state before and after. + +\subsection Extension + +A signature declaration can introduce a set as a subset of one previously declared, in which case we call it a _subsignature_. In this case, the set does not correspond to a type, but rather its atoms take on the type of the superset. For example, the declaration + +\geekmath sig MainMemory extends Memory {} + +introduces a set of atoms _MainMemory_ representing main memories, which is constrained to be a subset of the set _Memory_. Likewise + +\geekmath sig Cache extends Memory {\\ + dirty: set addrs\\ + } + +introduces a set of atoms _Cache_ representing those memories that can be regarded as caches. It also introduces a field _dirty_ that associates with a cache the set of addresses that is dirty; later, we will use this to represent those addresses for which a cache and main memory differ. Because _Cache_ is a subset of _Memory_, and _m.addrs_ (for any memory _m_) is a subset of _Addr_, the field denotes a relation whose type is from _Memory_ to _Addr_. Expressions such as _m.dirty_ are therefore type-correct for a memory _m_, whether or not _m_ is a cache. But since declaration of the field _dirty_ within the signature _Cache_ constrains _dirty_ to be a relation that maps only caches, _m.dirty_ will always denote the empty set when _m_ is not a cache. + +This approach avoids introducing a notion of subtyping. Subtypes complicate the language, and tend to make it more difficult to use. In OCL [17], which models extension with subtypes rather than subsets, an expression such as _m.dirty_ would be illegal, and would require a coercion of _m_ to the subtype _Cache_. Coercions do not fit smoothly into the relational framework; they interfere with the ability to take the image of a set under a relation, for example. Moreover, subtypes are generally disjoint, whereas our approach allows the sets denoted by subsignatures to overlap. In this case, we'll add a constraint (in Section 2.4 below) to ensure that _MainMemory_ and _Cache_ are in fact disjoint. + +Declaring _Cache_ and _MainMemory_ as subsignatures of _Memory_ serves to factor out their common properties. Extension can be used for a different purpose, in which a single signature is developed by repeated extensions along a chain. In this case, the supersignatures may not correspond to entities in the domain being modelled, but are simply artifacts of specification---fragments developed along the way. Z specifications are typically developed in this style. + +\subsection Hierarchy + +The signature declaration also supports hierarchical structuring. We can declare a signature for systems each consisting of a cache and a main memory: + +\geekmath sig System {\\ + cache: Cache,\\ + main: MainMemory\\ + } + +Again, _System_ introduces a set of atoms, and each field represents a relation. The omission of the keyword _set_ indicates that a relation is a total function. So for a system _s_, the expression _s.cache_ denotes one cache---that is, a set consisting of a single cache. This is one of very few instances of implicit constraints in our language, which we introduced in order to make declaration syntax conventional. + +Since signatures denote sets of atoms, apparently circular references are allowed. Linked lists, for example, may be modelled like this, exactly as they might be implemented in a language like Java: + +\geekmath sig List {}\\ +sig NonEmptyList extends List {elt: Elt, rest: List} + +There is no recursion here; the field _rest_ is simply a homogeneous relation of type _List_ to _List_, with its domain restricted to the subset _NonEmptyList_. + +\subsection State Properties + +Properties of signature atoms are recorded as logical formulas. To indicate that such a property always holds, we package it as a _fact_. To say that, for any memory system, the addresses in a cache are always addresses within the main memory, we might write: + +\geekmath fact {all s: System | s.cache.addrs in s.main.addrs} + +or, using a shorthand that allows facts about atoms of a signature to be appended to it: + +\geekmath sig System {cache: Cache, main: MainMemory}\\ + {cache.addrs in main.addrs} + +The appended fact is implicitly prefixed by + +\geekmath all this: System | with this | + +in which the _with_ construct, explained in Sectiom 3.6 below, causes the fields implicitly to be dereferences of the atom _this_. + +A fact can constrain atoms of arbitrary signatures; to say that no main memory is a cache we might write: + +\geekmath fact {no (MainMemory & Cache)} + +where _no e_ means that the expression _e_ has no elements, and & is intersection. + +#Again, this is common enough that we provide a shorthand. Declaring a subsignature as _disjoint_ indicates that it shares no atoms with any other subsignatures of the same supersignature. So the fact can be replaced by changing our declaration of _MainMemory_ to: +# +#\geekmath disjoint sig MainMemory extends Memory {} +# +Most descriptions have more interesting facts. We can express the fact that linked lists are acyclic, for example: + +\geekmath fact {no p: List | p in p.\hat @sep rest} + +The expression _\hat @sep rest_ denotes the transitive closure of the relation _rest_, so that _p.^rest_ denotes the set of lists reachable from _p_ by following the field _rest_ once or more. This illustrates a benefit of treating a field as a relation---that we can apply standard relational operators to it---and is also an example of an expression hard to write in a language that treats extension as subtyping (since each application of _rest_ would require its own coercion). + +Often we want to define a property without imposing it as a permanent constraint. In that case, we declare it as a _function_. Here, for example, is the invariant that the cache lines not marked as dirty are consistent with main memory: + +\geekmath fun DirtyInv (s: System) {\\ + all a !: s.cache.dirty | s.cache.map[a] = s.main.map[a]\\ + } + +(The exclamation mark negates an operator, so the quantification is over all addresses that are _not_ dirty.) Packaging this as a function that can be applied to a particular system, rather than as a fact for all systems, will allow us to express assertions about preservation of the invariant (Section 2.8). + +By default, a function returns a boolean value---the value of the formula in its body. The value of _DirtyInv(s)_ for a system _s_ is therefore true or false. A function may return non-boolean values. We might, for example, define the set of bad addresses to be those for which the cache and main memory differ: + +\geekmath fun BadAddrs (s: System): set Addr {\\ + result = {a: Addr | s.cache.map[a] != s.main.map[a]}\\ + } + +and then write our invariant like this: + +\geekmath fun DirtyInv (s: System) {BadAddrs(s) in s.cache.dirty} + +In this case, _BadAddrs(s)_ denotes a set of addresses, and is short for the expression on the right-hand side of the equality in the definition of the function _BadAddrs_. The use of the function application as an expression does not in fact depend on the function being defined explicitly. Had we written + +\geekmath fun BadAddrs (s: System): set Addr {\\ + all a: Addr | a in result iff s.cache.map[a] != s.main.map[a]\\ + } + +the application would still be legal; details are explained in Section 3.7. +# +# \geekmath BadAddrs(s) in s.cache.dirty +# +# would be treated as short for +# +# \geekmath all result: set Addr |\\ +# (all a: Addr | a in result iff s.cache.map[a] != s.main.map[a])\\ +# => result in s.cache.dirty +# +# This desugaring is explained in more detail in Section 99 below. + +\subsection Operations + +Following Z, we can specify operations as formulas that constrain pre- and post-states. An operation may be packaged as a single function (or as two functions if we want to separate pre- and post-conditions in the style of VDM or Larch). + +The action of writing a data value to an address in memory might be specified like this: + +\geekmath fun Write (m,m': Memory, d: Data, a: Addr) {\\ + m'.map = m.map ++ (a->d)\\ + } + +The formula in the body of the function relates _m_, the value of the memory before, to _m'_, the value after. These identifers are just formal arguments, so the choice of names is not significant. Moreover, the prime mark plays no special role akin to decoration in Z---it's a character like any other. The operator ++ is relational override, and the arrow forms a cross product. As mentioned above, scalars are represented as singleton sets, so there is no distinction between a tuple and a relation. The arrows in the expressions _a->d_ here and _addrs->Data_ in the declaration of the _map_ field of _Memory_ are one and the same. + +The action of reading a data value can likewise be specified as a function, although since it has no side-effect we omit the _m'_ parameter: + +\geekmath fun Read (m: Memory, d: Data, a: Addr) {\\ + d = m.map[a]\\ + } + +Actions on the system as a whole can be specified using these primitive operations; in Z, this idiom is called 'promotion'. A read on the system is equivalent to reading the cache: + +\geekmath fun SystemRead (s: System, d: Data, a: Addr) {\\ + Read (s.cache, d, a)\\ + } + +The _Read_ operation has an implicit precondition. Since the data parameter _d_ is constrained (implicitly by its declaration) to be scalar---that is, a singleton set---the relation _m.map_ must include a mapping for the address parameter _a_, since otherwise the expression _m.map[a]_ will evaluate to the empty set, and the formula will not be satisfiable. This precondition is inherited by _SystemRead_. If the address _a_ is not in the cache, the operation cannot proceed, and it will be necessary first to load the data from main memory. It is convenient to specify this action as a distinct operation: + +\geekmath fun Load (s,s': System, a: Addr) {\\ + a !in s.cache.addrs\\ + s'.cache.map = s.cache.map + (a->s.main.map[a])\\ + s'.main = s.main\\ + } + +The + operator is just set union (in this case, of two binary relations, the second consisting of a single tuple). A write on the system involves a write to the cache, and setting the dirty bit. Again, this can be specified using a primitive memory operation: + +\geekmath fun SystemWrite (s,s': System, d: Data, a: Addr) {\\ + Write (s.cache, s'.cache, d, a)\\ + s'.cache.dirty = s.cache.dirty + a\\ + s'.main = s.main\\ + } + +A cache has much smaller capacity than main memory, so it will occasionally be necessary (prior to loading or writing) to flush lines from the cache back to main memory. We specify flushing as a non-deterministic operation that picks some subset of the cache addrs and writes them back to main memory: + +\geekmath fun Flush (s,s': System) {\\ + some x: set s.cache.addrs {\\ + s'.cache.map = s'.cache.map - (x->Data)\\ + s'.cache.dirty = s.cache.dirty - x\\ + s'.main.map = s.main.map ++ \\ + {a: x, d: Data | d = s.cache.map[a]}\\ + } + +The - operator is set difference; note that it is applied to sets of addresses (in the third line) and to binary relations (in the second). The comprehension expression creates a relation of pairs _a_->_d_ satisfying the condition. + +Finally, it is often useful to specify the initial conditions of a system. To say that the cache initially has no addresses, we might write a function imposing this condition on a memory system: + +\geekmath fun Init (s: System) {no s.cache.addrs} + +\subsection Traces + +To support analyses of behaviours consisting of sequences of states, we declare two signatures, for ticks of a clock and traces of states: + +\geekmath sig Tick {}\\ +sig SystemTrace {\\ + ticks: set Tick,\\ + first, last: ticks,\\ + next: (ticks - last) !->! (ticks - first)\\ + state: ticks ->! System}\\ + {\\ + first.*next = ticks\\ + Init (first.state)\\ + all t: ticks - last | \\ + some s = t.state, s' = t.next.state |\\ + Flush (s,s')\\ + || (some a: Addr | Load (s,s',a))\\ + || (some d: Data, a: Addr | SystemWrite (s,s',d,a))\\ + } + +Each trace consists of a set of _ticks_, a _first_ and _last_ tick, an ordering relation _next_ (whose declaration makes it a bijection from all ticks except the last to all ticks except the first), and a relation _state_ that maps each tick to a system state. + +The fact appended to the signature states first a generic property of traces: that the ticks of a trace are those reachable from the first tick. It then imposes the constraints of the operations on the states in the trace. The initial condition is required to hold in the first state. Any subsequent pair of states is constrained to be related by one of the three side-effecting operations. The existential quantifier plays the role of a _let_ binding, allowing _s_ and _s'_ in place of _t.state_ and _t.next.state_, representing the state for tick _t_ and the state for its successor _t.next_. Note that this formulation precludes stuttering; we could admit it simply by adding the disjunct _s_=_s'_ allowing a transition that corresponds to no operation occurring. + +Bear in mind that this fact is a constraint on all atoms in the set _SystemTrace_. As a free standing fact, the second line of the fact---the initial condition--- would have been written: + +\geekmath fact {all x: SystemTrace | Init ((x.first).(x.state))} + +\subsection Abstraction + +Abstraction relationships are easily expressed using our function syntax. To show that our memory system refines a simple memory without a cache, we define an abstraction function _Alpha_ saying that a system corresponds to a memory that is like the system's memory, overwritten by the entries of the system's cache: + +\geekmath fun Alpha (s: System, m: Memory) {\\ + m.map = s.main.map ++ s.cache.map\\ + } + +As another example, if our linked list were to represent a set, we might define the set corresponding to a given list as that containing the elements reachable from the start: + +\geekmath fun ListAlpha (p: List, s: set Elt) {\\ + s = p.*rest.elt\\ + } + +\subsection Assertions + +Theorems about a specification are packaged as _assertions_. An assertion is simply a formula that is intended to hold. A tool can check an assertion by searching for a counterexample---that is, a model of the formula's negation. + +The simplest kinds of assertion record consequences of state properties. For example, + +\geekmath assert {\\ + all s: System | DirtyInv (s) && no s.cache.dirty\\ + => s.cache.map in s.main.map\\ + } + +asserts that if the dirtiness invariant holds,and there are no dirty addresses, then the mapping of addresses to data in the cache is a subset of the mapping in the main memory. + +An assertion can express consequences of operations. For example, + +\geekmath assert {\\ + all s: System, d: Data, a: Addr |\\ + SystemRead (s,d,a) => a in s.cache.addrs\\ + } + +embodies the claim made above that _SystemRead_ has an implicit precondition; it asserts that whenever _SystemRead_ occurs for an address, that address must be in the cache beforehand. An assertion can likewise identify a consequence in the post-state; this assertion + +\geekmath assert {\\ + all s,s': System, d: Data, a: Addr |\\ + SystemWrite (s,s',d,a) => s'.cache.map[a] = d\\ + } + +says that after a _SystemWrite_, the data value appears in the cache at the given address. + +Preservation of an invariant by an operation is easily recorded as an assertion. To check that our dirtiness invariant is preserved when writes occur, we would assert + +\geekmath assert {\\ + all s,s': System, d: Data, a: Addr |\\ + SystemWrite (s,s',d,a) && DirtyInv (s) => DirtyInv (s')\\ + } + +Invariant preservation is not the only consequence of an operation that we would like to check that relates pre- and post-states. We might, for example, want to check that operations on the memory system do not change the set of addresses of the main memory. For the _Flush_ operation, for example, the assertion would be + +\geekmath assert {\\ + all s,s': System | Flush(s,s') => s.main.addrs = s'.main.addrs\\ + } + +which holds only because the cache addresses are guaranteed to be a subset of the main memory addresses (by the fact associated with the _System_ signature). + +The effect of a sequence of operations can be expressed by quantifying appropriately over states. For example, + +\geekmath assert {\\ + all s, s': System, a: Addr, d,d': Data | \\ + SystemWrite (s,s',d,a) && SystemRead (s',d',a) => d = d'\\ + } + +says that when a write is followed by a read of the same address, the read returns the data value just written. + +To check that a property holds for all reachable states, we can assert that the property is an invariant of every operation, and is established by the initial condition. This strategy can be shown (by induction) to be sound, but it is not complete. A property may hold for all reachable states, but may not be preserved because an operation breaks the property when executed in a state that happens not to be reachable. + +Traces overcome this incompleteness. Suppose, for example, that we want to check the (rather contrived) property that, in every reachable state, if the cache contains an address that isn't dirty, then it agrees with the main memory on at least one address: + +\geekmath fun DirtyProp (s: System) {\\ + some (s.cache.addrs - s.cache.dirty)\\ + => some a: Addr | s.cache.map[a] = s.main.map[a]\\ + } + +We can assert that this property holds in the last state of every trace: + +\geekmath assert {\\ + all t: SystemTrace | with t | DirtyProp (last.state)\\ + } + +This assertion is valid, even though _DirtyProp_ is not an invariant. A write invoked in a state in which all clean entries but one had non-matching values can result in a state in which there are still clean entries but none has a matching value. + +Finally, refinements are checked by assertions involving abstraction relations. We can assert that a _SystemWrite_ refines a basic _Write_ operation on a simple memory: + +\geekmath assert {\\ + all s,s': System, m,m': Memory, a: Addr, d: Data |\\ + Alpha (s,m) && Alpha (s',m') && SystemWrite (s,s',a,d)\\ + => Write (m,m',a,d)\\ + } + +or that the _Flush_ operation is a no-op when viewed abstractly: + +\geekmath assert {\\ + all s,s': System, m,m': Memory |\\ + Alpha (s,m) && Alpha (s',m') && Flush (s,s')\\ + => m.map = m'.map\\ + } + +Note the form of the equality; _m = m'_ would be wrong, since two distinct memories may have the same mapping, and the abstraction _Alpha_ constrains only the mapping and not the memory atom itself. + +Many of the assertions shown here can be made more succinct by the function shorthand explained in Section 3.7 below. For example, the assertion that a read following a write returns the value just written becomes: + +\geekmath assert {\\ + all s: System, a: Addr, d: Data | \\ + SystemRead (SystemWrite (s,d,a),a) = d\\ + } + +and the assertion that _Flush_ is a no-op becomes: + +\geekmath assert {\\ + all s: System | Alpha (s).map = Alpha (Flush (s)).map\\ + } + +\subsection Polymorphism + +Signatures can be parameterized by signature types. Rather than declaring a linked list whose elements belong to a particular type _Elt_, as above, we would prefer to declare a generic list: + +\geekmath sig List [T] {}\\ +sig NonEmptyList [T] extends List [T] {elt: T, rest: List [T]} + +Functions and facts may be parameterized in the same way, so we can define generic operators, such as: + +\geekmath fun first [T] (p: List [T]): T {result = p.elt}\\ +fun last [T] (p: List [T]): T {some q: p.*rest | result = q.elt && no q.rest}\\ +fun elements [T] (p: List [T]): set T {result = p.*rest.elt} + +In addition, let's define a generic function that determines whether two elements follow one another in a list: + +\geekmath fun follows [T] (p: List[T], a,b: T) {\\ + some x: p.*rest | x.elt = a && x.next.elt = b\\ + } + +To see how a generic signature and operators are used, consider replacing the traces of Section 2.6 with lists of system states. Define a function that determines whether a list is a trace: + +\geekmath fun isTrace (t: List [System]) {\\ + Init (first(t))\\ + all s, s': System | follows (t,s,s') => {\\ + Flush (s,s')\\ + || (some a: Addr | Load (s,s',a))\\ + || (some d: Data, a: Addr | SystemWrite (s,s',d,a))\\ + }\\ + } + +Now our assertion that every reachable system state satisfies _DirtyProp_ can now be written: + +\geekmath assert {\\ + all t: List[System] | isTrace(t) => DirtyProp (last(t))\\ + } + +\subsection Variants + +To illustrate the flexibility of our notation, we sketch a different formulation of state machines oriented around transitions rather than states. + +Let's introduce a signature representing state transitions of our memory system: + +\geekmath sig SystemTrans {pre,post: System}\\ + {pre.main.addrs = post.main.addrs} + +Declaring the transitions as a signature gives us the opportunity to record properties of all transitions---in this case requiring that the set of addresses of the main memory is fixed. + +Now we introduce a subsignature for the transitions of each operation. For example, the transitions that correspond to load actions are given by: + +\geekmath sig LoadTrans extends SystemTrans {a: Addr}\\ + {Load (pre, post, a)} +# } { +# a !in pre.cache.addrs\\ +# post.cache.map = pre.cache.map ++ (a->pre.main.map[a])\\ +# post.main = pre.main\\ +# } +# +# The formula here is actually identical to the one declared above, but with _pre_ and _post_ for # _s_ and _s'_ ; we could in fact replace it by the function application _Load(pre,post,a)_. + +For each invariant, we define a set of states. For the states satisfying the dirty invariant, we might declare + +\geekmath sig DirtyInvStates extends System {} + +along with the fact + +\geekmath fact {DirtyInvStates = {s: System | DirtyInv(s)}} + +To express invariant preservation, it will be handy to declare a function that gives the image of a set of states under a set of transitions: + +\geekmath fun postimage (ss: set System, tt: set SystemTrans): set System {\\ + result = {s: System | some t: tt | t.pre in ss && s = t.post}\\ + } + +so that we can write the assertion like this: + +\geekmath assert {postimage (DirtyInvStates, LoadTrans) in DirtyInvStates} + +For an even more direct formulation of state machine properties, wemight have defined a transition relation instead: + +\geekmath fun Trans (r: System -> System) {\\ + all s, s' : System | \\ + s->s' in r => Flush (s,s') || ...\\ + } + +Then, using transitive closure, we can express the set of states reachable from an initial state, and assert that this set belongs to the set characterized by some property: + +\geekmath assert {all r: System -> System, s: System |\\ + Init (s) && Trans(r) => s.*r in DirtyPropStates\\ + } + +where _DirtyPropStates_ is defined analogously to _DirtyInvStates_. + +\subsection Definitions + +Instead of declaring the addresses of a memory along with its mapping, as we did before: + +\geekmath sig Memory {\\ + addrs: set Addr,\\ + map: addrs ->! Data\\ + } + +we could instead have declared the mapping alone: + +\geekmath sig Memory {\\ + map: Addr ->? Data\\ + } + +and then _defined_ the addresses using a subsignature: + +\geekmath sig MemoryWithAddrs extends Memory {\\ + addrs: set Addr}\\ + {addrs = {a: Addr | some a.map}} + +Now by making the subsignature subsume all memories: + +\geekmath fact {Memory in MemoryWithAddrs} + +we have essentially 'retrofitted' the field. Any formula involving memory atoms now implicitly constrains the _addrs_ field. For example, we can assert that _Read_ has an implicit precondition requiring that the argument be a valid address: + +\geekmath assert {all m: Memory, a: Addr, d: Data | Read (m,d,a) => a in m.addrs} + +even though the specification of _Read_ was written when the field _addrs_ did not even exist. + +\section Semantics + +For completeness, we give an overview of the semantics of the language. The novelties with respect to the original version of Alloy [4] are (1) the idea of organizing relations around basic types as signatures, (2) the treatment of extension as subsetting, and (3) the packaging of formulas in a more explicit (and conventional) style. The semantic basis has been made cleaner, by generalizing relations to arbitrary arity, eliminating 'indexed relations' and the need for a special treatment of sets. + +\subsection Types + +We assume a universe of atoms. The standard notion of a mathematical relation gives us our only composite datatype. The value of an expression will always be a relation---that is, a collection of tuples of atoms. Relations are first order: the elements of a tuple are themselves atoms and never relations. + +The language is strongly typed. We partition the universe into subsets each associated with a _basic_ type, and write (T_1, T_2, ..., T_n) for the type of a relation whose tuples each consist of _n_ atoms, with types T_1, T_2, etc. + +A set is represented semantically as a unary relation, namely a relation whose tuples each contain one atom. A tuple is represented as a singleton relation, namely a relation containing exactly one tuple. A scalar is represented as a unary, singleton relation. We use the terms 'set', 'tuple' and 'scalar' to describe relations with the appropriate properties. Basic types are used only to construct relation types, and every expression that appears in a specification has a relational type. Often we will say informally that an expression has a type _T_ where _T_ is the name of a basic type when more precisely we mean that the expression has the type (_T_). + +So, in contrast to traditional mathematical style, we do not make distinctions amongst the atom _a_, the tuple (_a_), the set {_a_} containing just the atom, or the set {(_a_)} containing the tuple, and represent all of these as the last. This simplifies the semantics and gives a more succinct and uniform syntax. +# Because the language is first order (and has no sets of sets, for example), it requires no coercions, and seems not to cause confusion even for novice specifiers. + +\subsection Expression Operators + +Expressions can be formed using the standard set operators written as ASCII characters: union (+), intersection (&) and difference (-). Some standard relational operators, such as transpose (~) and transitive closure (^), can be applied to expressions that denote binary relations. Relational override (++) has its standard meaning for binary relations but can applied more broadly. +#The type rules and semantics are completely standard. For example, if _e_ has the type (S,T), then ~_e_ has the type (T,S) and denotes the collection of pairs obtained by reversing each pair in _e_; if _p_ and _q_ both have the type (T_1, T_2, ..., T_n), then the union _p+q_, intersection _p_&_q_, and difference _p-q_ also have that type, and denote respectively the relations whose tuples are those that appear in either of _p_ and _q_, both of _p_ and _q_, and _p_ but not _q_. + +There are two special relational operators, dot and arrow. The dot operator is a generalized relational composition. Given expressions $p$ and $q$, the expression $p.q$ contains the tuple +$\angleleft\sep p\sub<1>, ... p\sub<m-1>, q\sub<2>, ..., q\sub<n>\angleright$ +when _p_ contains +@math \langle@sep p_1, ..., p_{m}\rangle, +_q_ contains +@math \langle@sep q_1, ... q_n\rangle, +and +@math p_m = q_1. The last type of _p_ and the first type of _q_ must match, and _m_ + _n_, the sum of the arities of _p_ and _q_, must be three or more so that the result is not degenerate. When _p_ is a set and _q_ is a binary relation, the composition _p.q_ is the standard relational image of _p_ under _q_; when _p_ and _q_ are both binary relations, _p.q_ is standard relational composition. In all of the examples above, the dot operator is used only for relational image. + +The arrow operator is cross product: _p \textarrow q_ is the relation containing the tuple +@math \langle@sep p_1, ..., p_{m}, q_1, ... q_n\rangle +when _p_ contains +@math \langle@sep p_1, ..., p_{m}\rangle, +and _q_ contains +@math \langle@sep q_1, ... q_n\rangle. +In all the examples in this paper, _p_ and _q_ are sets, and _p \textarrow q_ is their standard cross product. + +\subsection Formula Operators + +Elementary formulas are formed from the subset operator, written _in_. Thus _p in q_ is true when every tuple in _p_ is in _q_. The formula _p : q_ has the same meaning, but when _q_ is a set, adds an implicit constraint that _p_ be scalar (ie, a singleton). This constraint is overridden by writing _p: option q_ (which lets _p_ to be empty or a scalar) or _p: set q_ (which eliminates the constraint entirely). Equality is just standard set equality, and is short for a subset constraint in each direction. + +An arrow that appears as the outermost expression operator on the right-hand side of a subset formula can be annotated with _multiplicity markings_: + (one or more), ? (zero or one) and ! (exactly one). The formula + +\geekmath r: S m \textarrow n T + +where _m_ and _n_ are multiplicity markings constrains the relation _r_ to map each atom of _S_ to _n_ atoms of _T_, and to map _m_ atoms of _S_ to each atom of _T_. _S_ and _T_ may themselves be product expressions, but are usually variables denoting sets. For example, + +\geekmath r: S \textarrow ! T\\ +r: S ? \textarrow ! T + +make _r_ respectively a total function on _S_ and an injection. + +Larger formulas are obtained using the standard logical connectives: && (and), || (or), ! (not), => (implies), _iff_ (bi-implication). The formula _if b then f else g_ is short for _b_ => _f_ && !_b_ => _g_. Within curly braces, consecutive formulas are implicitly conjoined. + +Quantifications take their usual form: + +\geekmath all x: e | F + +is true when the formula _F_ holds under every binding of the variable _x_ to a member of the set _e_. In addition to the standard quantifiers, _all_ (universal) and _some_ (existential), we have _no_, _sole_ and _one_ meaning respectively that there are no values, at most one value, and exactly one value satisfying the formula. For a quantifier _Q_ and expression _e_, the formula _Q e_ is short for _Q x: T | e_ (where _T_ is the type of _e_), so _no e_, for example, says that _e_ is empty. + +The declaration of a quantified formula is itself a formula---an elementary formula in which the left-hand side is a variable. Thus + +\geekmath some x = e | F + +is permitted, and is a useful way to express a _let_ binding. Quantifiers may be higher-order; the formula + +\geekmath all f: s ->! t | F + +is true when _F_ holds for every binding of a total function from _s_ to _t_ to the variable _f_. Our analysis tool cannot currently handle higher-order quantifiers, but many uses of higher-order quantifiers that arise in practice can be eliminated by skolemization. + +Finally, we have relational comprehensions; the expression + +\geekmath {x_1: e_1, x_2: e_2, ... | F} + +constructs a relation of tuples with elements _x_1_, _x_2_, etc., drawn from set expressions _e_1_, _e_2_, etc., whose values satisfy _F_. + +# \subsection Choice of Operator Symbols +# +# The choice of symbols, especially the arrow, may seem unconventional, but results in familiar-# looking formulas. The dot operator generalizes the 'navigation expressions' of Syntropy# [CD94], now adopted by UML's Object Constraint Language [17], and is intended to be fa# miliar to programmers by resembling object dereferencing. Thus, _x.f_ can be viewed as dere# ferencing the object _x_ with field _f_ when _x_ is a scalar and _f_ is a binary relation. The cho# ice of relational composition rather than function application allows such an expression to be wr# itten without concern for whether _f_ is a function. It also gives a simple and workable treatmen# t of partiality. When _x_ is not in the domain of _f_, _x.f_ is the empty set, and _x.f = y_ will be# false if _y_ is a scalar. +# +# The arrow notation is designed to allow declarations to be written in a familiar way, but to be # given a simple, first-order interpretation. For example, if _S_ and _T_ denote sets, +# +# \geekmath f: S \textarrow T +# +# declares _f_ to be a binary relation from _S_ to _T_. A conventional interpretation would have # the arrow construct a set of relations---a higher-order notion. Instead, we interpret the arrow # as cross product and the colon as subset, with the same result. The choice of arrow is also # convenient for constructing tuples; when _x_ and _y_ are scalars, the formula# +# +# \geekmath r' = r + (x \textarrow y) +# +# makes _r'_ the relation containing the tuples of _r_, and additionally, a mapping from _x_ to # _y_. # +\subsection Signatures + +A _signature_ declaration introduces a basic type, along with a collection of relations called _fields_. The declaration + +\geekmath sig S {f: E} + +declares a basic type _S_, and a relation _f_. If _E_ has the type (T_1, T_2, ..., T_n), the relation _f_ will have the type (S, T_1, T_2, ..., T_n), and if _x_ has the type _S_, the expression _x.f_ will have the same type as _E_. When there are several fields, field names already declared may appear in expressions on the right-hand side of declarations; in this case, a field _f_ is typed as if it were the expression _this.f_, where _this_ denotes an atom of the signature type (see Section 3.6). + +The meaning of a specification consisting of a collection of signature declarations is an assignment of values to global constants-- the signatures and the fields. For example, the specification + +\geekmath sig Addr {}\\ +sig Data {}\\ +sig Memory {map: Addr -> Data} + +has 4 constants---the three signatures and one field---with assignments such as: + +\geekmath Addr = {a0, a1}\\ +Data = {d0, d1, d2}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d0), (m1,a0,d1), (m1,a0,d2)} + +corresponding to a world in which there are 2 addresses, 3 data values and 2 memories, with the first memory (_m0_) mapping the first address (_a0_) to the first data value (_d0_), and the second memory (_m1_) mapping the first address (_a0_) both to the second (_d1_) and third (_d2_) data values. + +A fact is a formula that constrains the constants of the specification, and therefore tends to reduce the set of assignments denoted by the specification. For example, + +\geekmath fact {all m: Memory | all a: Addr | sole m.map[a]} + +rules out the above assignment, since it does not permit a memory (such as _m1_) to map an address (such as _a0_) to more than one data value. + +The meaning of a function is a set of assignments, like the meaning of the specification as a whole, but these include bindings to parameters. For example, the function + +\geekmath fun Read (m: Memory, d: Data, a: Addr) {\\ + d = m.map[a]\\ + } + +has assignments such as: + +\geekmath Addr = {a0, a1}\\ +Data = {d0, d1, d2}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d1)}\\ +m = {m0}\\ +d = {d1}\\ +a = {a0} + +The assignments of a function representing a state invariant correspond to states satisfying the invariant; the functions of a function representing an operation (such as _Read_) correspond to executions of the operation. + +An assertion is a formula that is claimed to be _valid_: that is, true for every assignment that satisfies the facts of the specification. To check an assertion, one can search for a _counterexample_: an assignment that makes the formula false. +For example, the assertion + +\geekmath assert {\\ + all m,m': Memory, d: Data, a: Addr | Read (m,d,a) => Read (m',d,a)} + +which claims, implausibly, that if a read of memory _m_ returns _d_ at _a_, then so does a read at memory _m'_, has the counterexample + +\geekmath Addr = {a0}\\ +Data = {d0,d1}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d0), (m1,a0,d1)} + +To find a counterexample, a tool should negate the formula and then skolemize away the bound variables, treating them like the parameters of a function, with values to be determined as part of the assignment. In this case, the assignment might include: + +\geekmath m = {m0}\\ +m' = {m1}\\ +d = {d0}\\ +a = {a0} + +\subsection Extension + +Not every signature declaration introduces a new basic type. A signature declared without an extension clause is a _type signature_, and creates both a basic type and a set constant of the same name. A signature _S_ declared as an extension is a _subsignature_, and creates only a set constant, along with a constraint making it a subset of each _supersignature_ listed in the extension clause. The subsignature takes on the type of the supersignatures, so if there is more than one, they must therefore have the same type, by being direct or indirect subsignatures of the same type signature. + +A field declared in a subsignature is as if declared in the corresponding type signature, with the constraint that the domain of the field is the subsignature. For example, + +\geekmath sig List {}\\ +sig NonEmptyList extends List {elt: Elt,rest: List} + +makes _List_ a type signature, and _NonEmptyList_ a subset of _List_. The fields _elt_ and _rest_ map atoms from the type _List_, but are constrained to have domain _NonEmptyList_. Semantically, it would have been equivalent to declare them as fields of _List_, along with facts constraining their domains: + +\geekmath sig List {elt: Elt,rest: List}\\ +sig NonEmptyList extends List {}\\ +fact {elt.Elt in NonEmptyList}\\ +fact {rest.List in NonEmptyList} + +(exploiting our dot notation to write the domain of a relation _r_ from _S_ to _T_ as _r.T_). + +\subsection Overloading and Implicit Prefixing + +Whenever a variable is declared, its type can be easily obtained from its declaration (from the type of the expression on the right-hand side of the declaration), and every variable appearing in an expression is declared in an enclosing scope. The one complication to this rule is the typing of fields. + +For modularity, a signature creates a local namespace. Two fields with the name _f_ appearing in different signatures do not denote the same relational constant. Interpreting an expression therefore depends on first resolving any field names that appear in it. +#We have devised a simple resolution scheme whose details are beyond the scope of this paper. +In an expression of the form _e.f_, the signature to which _f_ belongs is determined according to the type of _e_. To keep the scheme simple, we require that sometimes the specifier resolve the overloading explicitly by writing the field _f_ of signature _S_ as _S$f_. (At the end of the previous section, for example, the reference in the fact to _rest_ should actually be to _List$rest_, since the context does not indicate which signature _rest_ belongs to.) + +In many formulas, a single expression is dereferenced several times with different fields. A couple of language features are designed to allow these formulas to be written more succinctly, and, if used with care, more comprehensibly. First, we provide two syntactic variants of the dot operator. Both _p_::_q_ and _q_[_p_] are equivalent to _p.q_, but have different precedence: the double colon binds more tightly than the dot, and the square brackets bind more loosely than the dot. Second, we provide a _with_ construct similar to Pascal's that makes dereferencing implicit. + +Consider, for example, the following simplified signature for a trace: + +\geekmath sig Trace {\\ + ticks: set Tick,\\ + first: Tick,\\ + next: Tick -> Tick,\\ + state: Tick -> State\\ + } + +Each trace _t_ has a set of ticks _t.ticks_, a first tick _t.first_, an ordering _t.next_ that maps ticks to ticks, and a relation _t.state_ mapping each tick to a state. For a trace _t_ and tick _k_, the state is _k_.(_t.state_); the square brackets allow this expression to be written instead as _t.state_[_k_]. To constrain _t.ticks_ to be those reachable from _t. first_ we might write: + +\geekmath fact {all t: Trace | (t.first).*(t.next ) = t.ticks} + +Relying on the tighter binding of the double colon, we can eliminate the parentheses: + +\geekmath fact {all t: Trace | t::first.*t::next = t.ticks} + +Using _with_, we can make the _t_ prefixes implicit: + +\geekmath fact {all t: Trace | with t | first.*next = ticks} + +In general, _with e | F_ is like _F_, but with _e_ prefixed wherever appropriate to a field name. Appropriateness is determined by type: _e_ is matched to any field name with which it can be composed using the dot operator. +#Fields that are prefixed using a double colon operator are not automatically prefixed, so one can use _with_ to prefix some fields of a given signature but not others. There is a corresponding _with_ construct for expressions also, so that _with e | E_ is like the expression _E_, with _e_ prefixed as appropriate. +A fact attached to a signature _S_ is implicitly enclosed by _all this: S | with this |_, and the declarations of a signature are interpreted as constraints as if they had been declared within this scope. Consequently, the declaration of _first_ above should be interpreted as if it were the formula: + +\geekmath all this: Trace | with this | first: ticks + +which is equivalent to + +\geekmath all this: Trace | this.first: this.ticks + +and should be typed accordingly. +# +# So, in the following fuller version of the above signature: +# +# \geekmath sig Trace {\\ +# ticks: set Tick\\ +# first: ticks,\\ +# next: (ticks - first) ->? ticks\\ +# state: ticks ->! State\\ +# } {first.*next = ticks} +# +# the declaration of the field _first_, for example, includes the constraint +# +# \geekmath all this: Trace | with this | first: ticks +# +# which is equivalent to +# +# \geekmath all this: Trace | this.first: this.ticks + +\subsection Function Applications + +A function may be applied by binding its parameters to expressions. The resulting application may be either an expression or a formula, but in both cases the function body is treated as a formula. The formula case is simple: the application is simply short for the body with the formal parameters replaced by the actual expressions (and bound variables renamed where necessary to avoid clashes). + +The expression case is more interesting. The application is treated as a syntactic sugar. Suppose we have a function application expression, _e_ say, of the form + +\geekmath f(a_1, a_2, ..., a_n) + +that appears in an elementary formula _F_. The declaration of the function _f_ must list _n_ + 1 formal arguments, of which the _second_ will be treated as the result. The entire elementary formula is taken to be short for + +\geekmath all result: D | f (a_1, result, a_2, ..., a_n) => F [result/e] + +where _D_ is the right-hand side of the declaration of the missing argument, and _F_ [_result_/_e_] is _F_ with the fresh variable _result_ substituted for the application expression _e_. The application of _f_ in this elaborated formula is now a formula, and is treated simply as an inlining of the formula of _f_. + +#Type checking will thus require that the actual arguments match the formals that are listed first, third, fourth, fifth, etc. (This choice of the second argument, incidentally, is one concession we make to specifying state machines; function applications can be used to model operation invocations in which it is convenient to declare the pre- and post- states as the first and second arguments of the operation.) +# +To see how this works, consider the definition of a function _dom_ that gives the domain of a relation over signature _X_: + +\geekmath fun dom (r: X -> X, d: set X) {d = r.X} + +(We have defined the function monomorphically for a homogeneous relation. In practice, one would define a polymorphic function, but we want to avoid conflating two unrelated issues.) Here is a trivial assertion that applies the function as an expression: + +\geekmath assert {all p: X \textarrow X | (dom (p)).p in X} + +Desugaring the formula, we get + +\geekmath all p: X \textarrow X | all result: set X | dom (p, result) => result.p in X + +and then inlining + +\geekmath all p: X \textarrow X | all result: set X | result = p.X => result.p in X + +This formula can be reduced (by applying a universal form of the One Point Rule) to + +\geekmath all p: X \textarrow X | (p.X).p in X + +which is exactly what would have been obtained had we just replaced the application expression by the expression on the right-hand side of the equality in the function's definition! +# +# If there is more than one application expression in an elementary formula, a fresh quantification is # generated for each. For example,# +# +# \geekmath assert {all p, q: X \textarrow X | dom (p.q) in dom (p)} +# +# becomes +# +# \geekmath all p,q: X \textarrow X | all result1, result2: set X | \\ +# dom (p.q, result1) => dom (p, result2) => result1 in result2 +# +# which can again be reduced by inlining and the One Point Rule to +# +# \geekmath all p,q: X \textarrow X | (p.q).X in p.X + +Now let's consider an implicit definition. Suppose we have a signature _X_ with an ordering _lte_, so that _e.lte_ is the set of elements that _e_ is less than or equal to, and a function _min_ that gives the minimum of a set, defined implicitly as the element that is a member of the set, and less than or equal to all members of the set: + +\geekmath sig X {lte: set X}\\ +fun min (s: set X, m: option X) {\\ + m in s && s in m.lte\\ + } + +Because the set may be empty, _min_ is partial. Depending on the properties of _lte_ it may also fail to be deterministic. A formula that applies this function + +\geekmath assert {all s: set X | min (s) in s} + +can as before be desugared + +\geekmath all s: set X | all result: option X | min (s, result) => result in s + +and expanded by inlining + +\geekmath all s: set X | all result: option X |\\ + (result in s) && s in result.lte => result in s + +but in this case the One Point Rule is not applicable. + +As a convenience, our language allows the result argument of a function to be declared anonymously in a special position, and given the name _result_. The domain function, for example, can be defined as: + +\geekmath fun dom (r: X -> X): set X {result = r.X} + +How the function is defined has no bearing on how it is used; this definition is entirely equivalent to the one above, and can also be applied as a formula with two arguments. + +\subsection Polymorphism + +Polymorphism is treated as a syntactic shorthand. Lack of space does not permit a full discussion here. + +\section Related Work + +We have shown how a handful of elements can be assembled into a rather simple but flexible notation. The elements themselves are far from novel---indeed, we hope that their familiarity will make the notation easy to learn and use---but their assembly into a coherent whole results in a language rather different from existing specification languages. + +\subsection New Aspects + +The more novel aspects of our work are: + +\point \cdot _Objectification of state_. Most specification languages represent states as cartesian products of components; in our approach, a state, like a member of any signature, is an individual---a distinct atom with identity. A similar idea is used in the situation calculus [11], whose 'relational fluents' add a situation variable to each time-varying relation. The general idea of objectifying all values is of course the foundation of object-oriented programming languages, and was present in LISP. Interestingly, object-oriented variants of Z (such as [1]) do not objectify schemas. The idea of representing structures in first-order style as atoms is present also in algebraic specifications such as Larch [2], which treat even sets and relations in this manner. + +\point \cdot _Components as relations_. Interpreting fields of a structure as functions goes back to early work on verification, and is widely used (for example, by Leino and Nelson [10]). We are not aware, however, of specification languages that use this idea, or that flatten fields to relations over atoms. + +\point \cdot _Extension by global axioms_. The 'facts' of our notation allow the properties of a signature to be extended monotonically. The idea of writing axioms that constrain the members of a set constant declared globally is hardly remarkable, but it appears not to have been widely exploited in specification languages. + +\point \cdot _Extension by subset_. Treating the extension of a structure as a refinement modelled by subset results in a simple semantics, and melds well with the use of global axioms. Again, this seems to be an unremarkable idea, but one whose power has not been fully recognized. + +\subsection Old Aspects + +The aspects of our work that are directly taken from existing languages are: + +\point \cdot _Formulas_. The idea of treating invariants, definitions, operations, etc, uniformly as logical formulas is due to Z [14]. + +\point \cdot _Assertions_. Larch [2] provides a variety of constructs for adding intentional redundancy to a specification in order to provide error-detection opportunities. + +\point \cdot _Parameterized formulas_. The 'functional' style we have adopted, in which all formulas are explicitly parameterized, in contrast to the style of most specification languages, is used also by languages for theorem provers, such as PVS [13]. VDM [8] offers a mechanism called 'operation quotation' in which pre- and post conditions are reused by interpreting them as functions similar to ours. + +\point \cdot _Parametric Polymorphism_. The idea of parameterizing descriptions by types was developed in the programming languages community, most notably in the context of ML [12]. + +\point \cdot _Implicit Prefixing_. Our 'with' operator is taken from Pascal [9]. + +\point \cdot _Relational operators_. The dot operator, and the treament of scalars as singletons, comes from the earlier version of Alloy [4]. +# +#\point \cdot _Function shorthands_. The idea of desugaring function applications by quantifying over the result is present in Beth's extensionality theorem [Beth]. + +\subsection Z's Schema Calculus + +Z has been a strong influence on our work; indeed, this paper may be viewed as an attempt to achieve some of the power and flexibility of Z's schema calculus in a first-order setting. Readers unfamiliar with Z can find an excellent presentation of the schema calculus in [16]. The current definitive reference is [15], although Spivey's manual [14] is more accessible for practioners. + +A _schema_ consists of a collection of variable declarations and a formula constraining the variables. Schemas can be anonymous. When a name has been bound to a schema, it can be used in three different ways, distinguished according to context. First, it can be used as a _declaration_, in which case it introduces its variables into the local scope, constraining them with its formula. Second, where the variables are already in scope, it can be used as a _predicate_, in which case the formula applies and no new declarations are added. Both of these uses are syntactic; the schema can be viewed as a macro. + +In the third use, the schema is semantic. Its name represents a set of _bindings_, each binding being a finite function from variables names to values. The bindings denoted by the schema name are the models of the schema's formula: those bindings of variable names to values that make the formula true. + +How a schema is being applied is not always obvious; in the set comprehension {_S_}, for example, _S_ represents a declaration, so that the expression as a whole denotes the same set of bindings as _S_ itself. Given a binding _b_ for a schema with component variable _x_, the expression _b.x_ denotes the value assigned to _x_ in _b_. Unlike Alloy's dot, this dot is a function application, so for a set of bindings _B_, the expression _B.x_ is not well formed. + +Operations in Z are expressed using the convention that primed variables denote components of the post-state. A mechanism known as _decoration_ allows one to write _S'_ for the schema that is like _S_, but whose variable names have been primed. Many idioms, such as promotion, rely on being able to manipulate the values of a schema's variables in aggregate. To support this, Z provides the theta operator: \theta @sep _S_ is an expression that denotes a binding in which each variable _x_ that belongs to _S_ is bound to a variable of the same name _x_ declared in the local scope. Theta and decoration interact subtly: \theta @sep _S'_ is not a binding of _S'_, but rather binds each variable _x_ of _S_ to a variable _x'_ declared locally. So where we would write _s=s'_ to say that pre- and post-states _s_ and _s'_ are the same, a Z specifier would write \theta @sep _S_ = \theta @sep _S'_. This formula equates each component _x_ of _S_ to its matching component _x'_ of _S'_, because _x_ and _x'_ are the respective values bound to _x_ by \theta @sep _S_ and \theta @sep _S'_ respectively. + +Our 'fact' construct allows the meaning of a signature name to be constrained subsequent to its declaration. A schema, in contrast, is 'closed': a new schema name must be introduced for each additional constraint. This can produce an undesirable proliferation of names for a system's state, but it does make it easier to track down those formulas that affect a schema's meaning. + +The variables of a schema can be renamed, but cannot be replaced by arbitrary expressions (since this would make nonsense of declarations).This requires the introduction of existential quantifiers where in our notation an expression is passed as an actual. On the other hand, when no renaming is needed, it is more succinct. + +Z's sequential composition operator is defined by a rather complicated transformation, and relies on adherence to particular conventions. The schema _P_ @sep \fatsemi @sep _Q_ is obtained by collecting primed variables in _P_ that match unprimed variables in _Q_; renaming these in both _P_ and _Q_ with a new set of variable names; and then existentially quantifying the new names away. For example, to say that a read following a write to the same address yields the value written, we would write: + +\geekmath +all m: Memory, a: Addr, d, d': Data | Read (Write(m,a,d),d') => d = d' + +which is short for + +\geekmath all m: Memory, a: Addr, d, d': Data |\\ + all m': Memory | Write (m,m',a,d) => Read (m,a,d') => d = d' + +In Z, assuming appropriate declarations of a schema _Memory_ and a given type _Data_, the formula would be: + +\geekmath +\forall Memory; Memory'; x!: Data \fatdot Write \fatsemi Read [x!/d!] \implies x! = d! + +which is short for + +\geekmath +\forall Memory; Memory'; x!: Data \fatdot \\ + \exists Memory'' \fatdot \\ + \exists Memory' \fatdot Write \and \theta @sep Memory' = \theta @sep Memory''\\ + \exists Memory'; d!: Data \fatdot \\ + Read \and \theta @sep Memory = \theta @sep Memory'' \and d! = x!\\ + \implies x! = d! + +The key semantic difference between signatures and schemas is this. A signature is a set of atoms; its fields are relational constants declared in global scope. A schema, on the other hand, denotes a higher-order object: a set of functions from field names to values. Our approach was motivated by the desire to remain first order, so that the analysis we have developed [3] can be applied. Not surprisingly, there is a cost in expressiveness. We cannot express higher-order formulas, most notably those involving preconditions. Suppose we want to assert that our write operation has no implicit precondition. In Z, such an assertion is easily written: + +\geekmath +\forall Memory; a?: Addr \fatdot \exists Memory'; d!: Data \fatdot Write + +We might attempt to formulate such an assertion in our notation as follows: + +\geekmath assert {\\ + all m: Memory, a: Addr, d: Data | some m': Memory | Write (m,m',d,a) + } + +Unfortunately, this has counterexamples such as + +\geekmath Addr = {a0}\\ +Data = {d0}\\ +Memory = {m0, m1}\\ +map = {} + +in which the _map_ relation lacks an appropriate tuple. Intuitively, the assertion claims that there is no context in which a write cannot proceed; a legitimate counterexample---but one we certainly did not intend---simply gives a context in which a memory with the appropriate address-value mapping is not available. + +We have focused in this discussion on schemas. It is worth noting that Z is expressive enough to allow a style of structuring almost identical to ours, simply by declaring signatures as given types, fields and functions as global variables, and by writing facts, and the bodies of functions, as axioms. Field names would have to be globally unique, and the resulting specification would likely be less succinct than if expressed in our notation. + +\subsection Phenomenology + +Pamela Zave and Michael Jackson have developed an approach to composing descriptions [18] that objectifies states, events and time intervals, and constrains their properties with global axioms. Objectification allows descriptions to be reduced to a common phenomenology, so that descriptions in different languages, and even in different paradigms can be combined. Michael Jackson has argued separately for the importance of objectification as a means of making a more direct connection between a formal description and the informal world: as he puts it, "domain phenomena are facts about individuals" [7]. It is reassuring that the concerns of language design and tractability of analysis that motivated our notation are not in conflict with sound method, and it seems that our notation would be a good choice for expressing descriptions in the form that Zave and Jackson have proposed. + +\section Evaluation + +\subsection Merits + +The key motivations of the design of our mechanism have been minimality and flexibility. It is worth noting how this has been achived by the _omission_ of certain features: + +\point \cdot There is only one form of semantic structuring; our opinion is that adding extra mechanisms, for example to group operations into classes, does not bring enough benefit to merit the additional complexity, and tends to be inflexible. (Our language does provide some namespace control for signature and paragraph names in the style of Java packages, but this is trivial and does not interact with the basic mechanism). + +\point \cdot There is no subtyping; subsignatures are just subsets of their supersignatures, and have the same type. There are only two types: basic types (for signatures), and relational types (for expressions). Types are not nested. + +\point \cdot There is only one way that formulas are packaged for reuse. The same function syntax is used for observers, operations, refinement relations, etc. The function shorthand syntax unifies the syntax of both declaration and use for explicit and implicit function definitions. + +\point \cdot The values of a signature with fields are just like the values of any basic type; there is nothing like Z's notion of a schema binding. + +Our interpretation of a subsignature as a subset of the supersignature appears to be novel as a mechanism for structuring in a specification language. It has three nice consequences: + +\point \cdot _Elimination of type coercions_. If _x_ belongs to a signature _S_ whose extension _S'_ defines a field _f_, the expression _x.f_ will just denote an empty set if _x_ does not belong to _S'_. Contrast this with the treatment of subclasses in the Object Constraint Language [17], for example, which results in pervasive coercions and often prevents the use of set and relation operators (since elements must be coerced one at a time). + +\point \cdot _Ease of extension_. Constraints can be added to the subsignature simply by writing a constraint that is universally quantified over elements of that subset. + +\point \cdot _Definitional extension_. We can declare an extension _S'_ of a signature _S_ with additional fields, relate these fields to the fields declared explicitly for _S_, and then record the fact that _S=S'_ (as illustrated in Section 2.11). The effect is that every atom of _S_ has been extended with appropriately defined fields, which can be accessed whenever an expression denoting such an atom is in scope! We expect to find this idiom especially useful for defining additional fields for visualization purposes. + +\subsection Deficiencies + +One might wonder whether, having encoded structures using atoms, and having provided quantifiers over those atoms, one can express arbitrary properties of higher-order structures. Unfortunately, but not surprisingly, this is not possible. The catch is that fields are treated in any formulas as global variables that are existentially quantified. To simulate higher-order logic, it would be necessary to allow quantifications over these variables, and since they have relational type, that would imply higher-order quantification. The practical consequence is that properties requiring higher-order logic cannot be expressed. One cannot assert that the precondition of an operation is no stronger than some predicate; one cannot in general specify operations by minimization; and one cannot express certain forms of refinement check. An example of this problem is given in Section 4.3 above. Whether the problem is fundamental or can be partially overcome remains to be seen. + +The treatment of subsignatures as subsets has a nasty consequence. Since a field declared in a subsignature becomes implicitly a field of the supersignature, two subsignatures cannot declare fields of the same name. The extension mechanism is therefore not properly modular, and a specification should use hierarchical structure instead where this matters. + +Modelling a set of states as atoms entails a certain loss of abstraction. In this specification + +\geekmath sig A {}\\ +sig S {a: A}\\ +fun op (s,s': S) {s.a = s'.a} + +the operation _op_ has executions in which the pre- and post-states are equal (that is, the same atom in _S_), and executions in which only their _a_ components are equal. One might object that this distinction is not observable. Moreover, replacing the formula by _s=s'_ would arguably be an overspecification---a 'bias' in VDM terminology [8]. The situation calculus [11] solves this problem by requiring every operation to produce a state change: _s_ and _s'_ are thus regarded as distinct situations by virtue of occurring at different points in the execution. The dual of this solution is to add an axiom requiring that no two distinct atoms of _S_ may have equal _a_ fields. Either of these solutions is easily imposed in our notation. + +Our treatment of scalars and sets uniformly as relations has raised the concern that the resulting succinctness comes with a loss of clarity and redundancy. Extensive use of the previous version of our language, mostly by inexperienced specifiers, suggests that this is not a problem. The loss of some static checking is more than compensated by the semantic analysis that our tool performs. + +\section Conclusion + +Two simple ideas form the basis of our modularity mechanism: (1) that a structure is just a set of atoms, and its fields are global relations that map those atoms to structure components; and (2) that extensions of a structure are just subsets. Our relational semantics, in which all variables and fields are represented as relations, makes the use of structures simple and succinct, and it ensures that the language as a whole remains first order. For a variety of modelling tasks, we believe that our approach provides a useful balance of expressiveness and tractability. + +\section* Acknowledgments + +The language described here was refined by experience writing specifications, long before an analyzer existed, and by the development of the analyzer tool itself. Mandana Vaziri and Sarfraz Khurshid were our early adopters, and Brian Lin and Joe Cohen helped implement the tool. The paper itself was improved greatly by comments from Mandana and Sarfraz, from Michael Jackson, from Tomi Mannisto, and especially from Pamela Zave, whose suggestions prompted a major rewrite. Jim Woodcock helped us understand Z, and the clarity and simplicity of his own work has been a source of inspiration to us. Our ideas have also been improved by the comments of the members of IFIP working groups 2.3 and 2.9, especially Tony Hoare, Greg Nelson and Rustan Leino. This work was funded in part by ITR grant #0086154 from the National Science Foundation, by a grant from NASA, and by an endowment from Doug and Pat Ross. + +\section* References + +#\ref [CD94] Steve Cook and John Daniels. Designing Object Systems: Object-Oriented Modelling with Syntropy. Prentice Hall, 1994. +# +\ref [1] R. Duke, G. Rose and G. Smith. Object-Z: A Specification Language Advocated for the Description of Standards. SVRC Technical Report 94-45. The Software Verification Research Centre, University of Queensland, Australia. + +\ref [2] John V. Guttag, James J. Horning, and Andres Modet. Report on the Larch Shared Language: Version 2.3. Technical Report 58, Compaq Systems Research Center, Palo Alto, CA, 1990. + +#\ref [Hal90] Anthony Hall. Using Z as a Specification Calculus for Object-Oriented Systems. In D. Bjorner, C.A.R. Hoare, and H. Langmaack, eds., VDM and Z: Formal Methods in Software Development, Lecture Notes in Computer Science, Volume 428, pp. 290–381, Springer-Verlag, New York, 1990. +# +\ref [3] Daniel Jackson. Automating first-order relational logic. Proc. ACM SIGSOFT Conf. Foundations of Software Engineering. San Diego, November 2000. + +\ref [4] Daniel Jackson. Alloy: A Lightweight Object Modelling Notation. To appear, ACM Transactions on Software Engineering and Methodology, October 2001. + +\ref [5] Daniel Jackson, Ian Schechter and Ilya Shlyakhter. Alcoa: the Alloy Constraint Analyzer. Proc. International Conference on Software Engineering, Limerick, Ireland, June 2000. + +\ref [6] Daniel Jackson and Jeannette Wing. Lightweight Formal Methods. In: H. Saiedian (ed.), An Invitation to Formal Methods. IEEE Computer, 29(4):16-30, April 1996. + +\ref [7] Michael Jackson. Software Requirements and Specifications: A Lexicon of Practice, Principles and Prejudices. Addison-Wesley, 1995. + +\ref [8] Cliff Jones. Systematic Software Development Using VDM. Second edition, Prentice Hall, 1990. + +\ref [9] Kathleen Jensen and Nicklaus Wirth. Pascal: User Manual and Report. Springer-# Verlag, 1974. + +\ref [10] K. Rustan M. Leino and Greg Nelson. Data abstraction and information hiding . Research Report 160, Compaq Systems Research Center, November 2000. + +\ref [11] Hector Levesque, Fiora Pirri, and Ray Reiter. Foundations for the Situation Calculus. Linköping Electronic Articles in Computer and Information Science, ISSN 1401-9841, Vol. 3(1998), Nr. 018. + +\ref [12] Robin Milner, Mads Tofte and Robert Harper. The Definition of Standard ML. MIT Press, 1990. + +\ref [13] S. Owre, N. Shankar, J. M. Rushby, and D. W. J. Stringer-Calvert. PVS Language Reference. Computer Science Laboratory, SRI International, Menlo Park, CA, September 1999. + +\ref [14] J. Michael Spivey. The Z Notation: A Reference Manual. Second edition, Prentice Hall, 1992. + +\ref [15] Ian Toyn et al. Formal Specification---Z Notation---Syntax, Type and Semantics. Consensus Working Draft 2.6 of the Z Standards Panel BSI Panel IST/5/-/19/2 (Z Notation). August 24, 2000. + +\ref [16] Jim Woodcock and Jim Davies. Using Z: Specification, Refinement and Proof. Prentice Hall, 1996. + +\ref [17] Jos Warmer and Anneke Kleppe. The Object Constraint Language: Precise Modeling with UML. Addison Wesley, 1999. + +\ref [18] Pamela Zave and Michael Jackson. Conjunction as Composition. ACM Transactions on Software Engineering and Methodology II(4): 379--411, October 1993. diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt.index.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt.index.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt.tag.txt b/Robust/src/Benchmarks/mlp/tagger/mlp-java/test.txt.tag.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/charmap.txt new file mode 100755 index 00000000..cc4ab0e0 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/charmap.txt @@ -0,0 +1,31 @@ +# basic characters +<char:linebreak><index:22> + +# dots +<char:cdot><index:22> + +# quotes +<char:quote><index:22> +<char:quoteleft><index:22> +<char:quoteright><index:22> +<char:quotedblleft><index:22> +<char:quotedblright><index:22> + +#dashes +<char:hyphen><index:22> +<char:endash><index:22> +<char:emdash><index:22> + +# math symbols +<char:oplus><index:22> +<char:langle><index:22> +<char:rangle><index:22> +<char:textarrow><index:22> +<char:hat><index:22> +<char:fatsemi><index:22> +<char:forall><index:22> +<char:fatdot><index:22> +<char:fatsemi><index:22> +<char:implies><index:22> +<char:exists><index:22> +<char:and><index:22> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathext-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathext-charmap.txt new file mode 100755 index 00000000..623b3040 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathext-charmap.txt @@ -0,0 +1,130 @@ +# character map for Lucida New Math Extended font + +<char:parenleftbig><font:LucidNewMatExtT><index:161> +<char:parenrightbig><font:LucidNewMatExtT><index:162> +<char:bracketleftbig><font:LucidNewMatExtT><index:163> +<char:bracketrightbig><font:LucidNewMatExtT><index:164> +<char:floorleftbig><font:LucidNewMatExtT><index:165> +<char:floorrightbig><font:LucidNewMatExtT><index:166> +<char:ceilingleftbig><font:LucidNewMatExtT><index:167> +<char:ceilingrightbig><font:LucidNewMatExtT><index:168> +<char:braceleftbig><font:LucidNewMatExtT><index:169> +<char:bracerightbig><font:LucidNewMatExtT><index:170> +<char:angbracketleftbig><font:LucidNewMatExtT><index:173> +<char:angbracketrightbig><font:LucidNewMatExtT><index:174> +<char:vextendsingle><font:LucidNewMatExtT><index:175> +<char:vextenddouble><font:LucidNewMatExtT><index:176> +<char:slashbig><font:LucidNewMatExtT><index:177> +<char:backslashbig><font:LucidNewMatExtT><index:178> +<char:parenleftBig><font:LucidNewMatExtT><index:179> +<char:parenrightBig><font:LucidNewMatExtT><index:180> +<char:parenleftbigg><font:LucidNewMatExtT><index:181> +<char:parenrightbigg><font:LucidNewMatExtT><index:182> +<char:bracketleftbigg><font:LucidNewMatExtT><index:183> +<char:bracketrightbigg><font:LucidNewMatExtT><index:184> +<char:floorleftbigg><font:LucidNewMatExtT><index:185> +<char:floorrightbigg><font:LucidNewMatExtT><index:186> +<char:ceilingleftbigg><font:LucidNewMatExtT><index:187> +<char:ceilingrightbigg><font:LucidNewMatExtT><index:188> +<char:braceleftbigg><font:LucidNewMatExtT><index:189> +<char:bracerightbigg><font:LucidNewMatExtT><index:190> +<char:angbracketleftbigg><font:LucidNewMatExtT><index:28> +<char:angbracketrightbigg><font:LucidNewMatExtT><index:29> +<char:slashbigg><font:LucidNewMatExtT><index:193> +<char:backslashbigg><font:LucidNewMatExtT><index:194> +<char:parenleftBigg><font:LucidNewMatExtT><index:195> +<char:parenrightBigg><font:LucidNewMatExtT><index:33> +<char:bracketleftBigg><font:LucidNewMatExtT><index:34> +<char:bracketrightBigg><font:LucidNewMatExtT><index:35> +<char:floorleftBigg><font:LucidNewMatExtT><index:36> +<char:floorrightBigg><font:LucidNewMatExtT><index:37> +<char:ceilingleftBigg><font:LucidNewMatExtT><index:38> +<char:ceilingrightBigg><font:LucidNewMatExtT><index:39> +<char:braceleftBigg><font:LucidNewMatExtT><index:40> +<char:bracerightBigg><font:LucidNewMatExtT><index:41> +<char:angbracketleftBigg><font:LucidNewMatExtT><index:42> +<char:angbracketrightBigg><font:LucidNewMatExtT><index:43> +<char:slashBigg><font:LucidNewMatExtT><index:44> +<char:backslashBigg><font:LucidNewMatExtT><index:45> +<char:slashBig><font:LucidNewMatExtT><index:46> +<char:backslashBig><font:LucidNewMatExtT><index:47> +<char:parenlefttp><font:LucidNewMatExtT><index:48> +<char:parenrighttp><font:LucidNewMatExtT><index:49> +<char:bracketlefttp><font:LucidNewMatExtT><index:50> +<char:bracketrighttp><font:LucidNewMatExtT><index:51> +<char:bracketleftbt><font:LucidNewMatExtT><index:52> +<char:bracketrightbt><font:LucidNewMatExtT><index:53> +<char:bracketleftex><font:LucidNewMatExtT><index:54> +<char:bracketrightex><font:LucidNewMatExtT><index:55> +<char:bracelefttp><font:LucidNewMatExtT><index:56> +<char:bracerighttp><font:LucidNewMatExtT><index:57> +<char:braceleftbt><font:LucidNewMatExtT><index:58> +<char:bracerightbt><font:LucidNewMatExtT><index:59> +<char:braceleftmid><font:LucidNewMatExtT><index:60> +<char:bracerightmid><font:LucidNewMatExtT><index:61> +<char:braceex><font:LucidNewMatExtT><index:62> +<char:arrowvertex><font:LucidNewMatExtT><index:63> +<char:parenleftbt><font:LucidNewMatExtT><index:64> +<char:parenrightbt><font:LucidNewMatExtT><index:65> +<char:parenleftex><font:LucidNewMatExtT><index:66> +<char:parenrightex><font:LucidNewMatExtT><index:67> +<char:angbracketleftBig><font:LucidNewMatExtT><index:68> +<char:angbracketrightBig><font:LucidNewMatExtT><index:69> +<char:unionsqtext><font:LucidNewMatExtT><index:70> +<char:unionsqdisplay><font:LucidNewMatExtT><index:71> +<char:contintegraltext><font:LucidNewMatExtT><index:72> +<char:contintegraldisplay><font:LucidNewMatExtT><index:73> +<char:circledottext><font:LucidNewMatExtT><index:74> +<char:circledotdisplay><font:LucidNewMatExtT><index:75> +<char:circleplustext><font:LucidNewMatExtT><index:76> +<char:circleplusdisplay><font:LucidNewMatExtT><index:77> +<char:circlemultiplytext><font:LucidNewMatExtT><index:78> +<char:circlemultiplydisplay><font:LucidNewMatExtT><index:79> +<char:summationtext><font:LucidNewMatExtT><index:80> +<char:producttext><font:LucidNewMatExtT><index:81> +<char:integraltext><font:LucidNewMatExtT><index:82> +<char:uniontext><font:LucidNewMatExtT><index:83> +<char:intersectiontext><font:LucidNewMatExtT><index:84> +<char:unionmultitext><font:LucidNewMatExtT><index:85> +<char:logicalandtext><font:LucidNewMatExtT><index:86> +<char:logicalortext><font:LucidNewMatExtT><index:87> +<char:summationdisplay><font:LucidNewMatExtT><index:88> +<char:productdisplay><font:LucidNewMatExtT><index:89> +<char:integraldisplay><font:LucidNewMatExtT><index:90> +<char:uniondisplay><font:LucidNewMatExtT><index:91> +<char:intersectiondisplay><font:LucidNewMatExtT><index:92> +<char:unionmultidisplay><font:LucidNewMatExtT><index:93> +<char:logicalanddisplay><font:LucidNewMatExtT><index:94> +<char:logicalordisplay><font:LucidNewMatExtT><index:95> +<char:coproducttext><font:LucidNewMatExtT><index:96> +<char:coproductdisplay><font:LucidNewMatExtT><index:97> +<char:hatwide><font:LucidNewMatExtT><index:98> +<char:hatwider><font:LucidNewMatExtT><index:99> +<char:hatwidest><font:LucidNewMatExtT><index:100> +<char:tildewide><font:LucidNewMatExtT><index:101> +<char:tildewider><font:LucidNewMatExtT><index:102> +<char:tildewidest><font:LucidNewMatExtT><index:103> +<char:bracketleftBig><font:LucidNewMatExtT><index:104> +<char:bracketrightBig><font:LucidNewMatExtT><index:105> +<char:floorleftBig><font:LucidNewMatExtT><index:106> +<char:floorrightBig><font:LucidNewMatExtT><index:107> +<char:ceilingleftBig><font:LucidNewMatExtT><index:108> +<char:ceilingrightBig><font:LucidNewMatExtT><index:109> +<char:braceleftBig><font:LucidNewMatExtT><index:110> +<char:bracerightBig><font:LucidNewMatExtT><index:111> +<char:radicalbig><font:LucidNewMatExtT><index:112> +<char:radicalBig><font:LucidNewMatExtT><index:113> +<char:radicalbigg><font:LucidNewMatExtT><index:114> +<char:radicalBigg><font:LucidNewMatExtT><index:115> +<char:radicalbt><font:LucidNewMatExtT><index:116> +<char:radicalvertex><font:LucidNewMatExtT><index:117> +<char:radicaltp><font:LucidNewMatExtT><index:118> +<char:arrowvertexdbl><font:LucidNewMatExtT><index:119> +<char:arrowtp><font:LucidNewMatExtT><index:120> +<char:arrowbt><font:LucidNewMatExtT><index:121> +<char:bracehtipdownleft><font:LucidNewMatExtT><index:122> +<char:bracehtipdownright><font:LucidNewMatExtT><index:123> +<char:bracehtipupleft><font:LucidNewMatExtT><index:124> +<char:bracehtipupright><font:LucidNewMatExtT><index:125> +<char:arrowdbltp><font:LucidNewMatExtT><index:126> +<char:arrowdblbt><font:LucidNewMatExtT><index:196> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathit-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathit-charmap.txt new file mode 100755 index 00000000..612d09c4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathit-charmap.txt @@ -0,0 +1,68 @@ +# character map for Lucida Math Italic font + +<char:Gamma><font:LucidNewMatItaT><index:161> +<char:Delta><font:LucidNewMatItaT><index:162> +<char:Theta><font:LucidNewMatItaT><index:163> +<char:Lambda><font:LucidNewMatItaT><index:164> +<char:Xi><font:LucidNewMatItaT><index:165> +<char:Pi><font:LucidNewMatItaT><index:166> +<char:Sigma><font:LucidNewMatItaT><index:167> +<char:Upsilon><font:LucidNewMatItaT><index:7> +<char:Phi><font:LucidNewMatItaT><index:169> +<char:Psi><font:LucidNewMatItaT><index:170> +<char:Omega><font:LucidNewMatItaT><index:173> +<char:alpha><font:LucidNewMatItaT><index:174> +<char:beta><font:LucidNewMatItaT><index:175> +<char:gamma><font:LucidNewMatItaT><index:176> +<char:delta><font:LucidNewMatItaT><index:177> +<char:epsilon1><font:LucidNewMatItaT><index:178> +<char:zeta><font:LucidNewMatItaT><index:179> +<char:eta><font:LucidNewMatItaT><index:180> +<char:theta><font:LucidNewMatItaT><index:181> +<char:iota><font:LucidNewMatItaT><index:182> +<char:kappa><font:LucidNewMatItaT><index:183> +<char:lambda><font:LucidNewMatItaT><index:184> +<char:mu><font:LucidNewMatItaT><index:185> +<char:nu><font:LucidNewMatItaT><index:186> +<char:xi><font:LucidNewMatItaT><index:187> +<char:pi><font:LucidNewMatItaT><index:188> +<char:rho><font:LucidNewMatItaT><index:189> +<char:sigma><font:LucidNewMatItaT><index:190> +<char:tau><font:LucidNewMatItaT><index:191> +<char:upsilon><font:LucidNewMatItaT><index:192> +<char:phi><font:LucidNewMatItaT><index:193> +<char:chi><font:LucidNewMatItaT><index:194> +<char:psi><font:LucidNewMatItaT><index:195> +<char:tie><font:LucidNewMatItaT><index:196> +<char:omega><font:LucidNewMatItaT><index:33> +<char:epsilon><font:LucidNewMatItaT><index:34> +<char:theta1><font:LucidNewMatItaT><index:35> +<char:pi1><font:LucidNewMatItaT><index:36> +<char:rho1><font:LucidNewMatItaT><index:37> +<char:sigma1><font:LucidNewMatItaT><index:38> +<char:phi1><font:LucidNewMatItaT><index:39> +<char:arrowlefttophalf><font:LucidNewMatItaT><index:40> +<char:arrowleftbothalf><font:LucidNewMatItaT><index:41> +<char:arrowrighttophalf><font:LucidNewMatItaT><index:42> +<char:arrowrightbothalf><font:LucidNewMatItaT><index:43> +<char:arrowhookleft><font:LucidNewMatItaT><index:44> +<char:arrowhookright><font:LucidNewMatItaT><index:45> +<char:triangleright><font:LucidNewMatItaT><index:46> +<char:triangleleft><font:LucidNewMatItaT><index:47> +<char:period><font:LucidNewMatItaT><index:58> +<char:comma><font:LucidNewMatItaT><index:59> +<char:less><font:LucidNewMatItaT><index:60> +<char:slash><font:LucidNewMatItaT><index:61> +<char:greater><font:LucidNewMatItaT><index:62> +<char:star><font:LucidNewMatItaT><index:63> +<char:partialdiff><font:LucidNewMatItaT><index:64> +<char:flat><font:LucidNewMatItaT><index:91> +<char:natural><font:LucidNewMatItaT><index:92> +<char:sharp><font:LucidNewMatItaT><index:93> +<char:slurbelow><font:LucidNewMatItaT><index:94> +<char:slurabove><font:LucidNewMatItaT><index:95> +<char:lscript><font:LucidNewMatItaT><index:96> +<char:dotlessi><font:LucidNewMatItaT><index:123> +<char:dotlessj><font:LucidNewMatItaT><index:124> +<char:weierstrass><font:LucidNewMatItaT><index:125> +<char:vector><font:LucidNewMatItaT><index:126> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathsym-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathsym-charmap.txt new file mode 100755 index 00000000..bdde61d2 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/lucmathsym-charmap.txt @@ -0,0 +1,130 @@ +# mathematical characters for Lucida New Math Symbol font + +<char:minus><font:LucidNewMatSymT><index:161> +<char:periodcentered><font:LucidNewMatSymT><index:162> +<char:multiply><font:LucidNewMatSymT><index:163> +<char:asteriskmath><font:LucidNewMatSymT><index:164> +<char:divide><font:LucidNewMatSymT><index:165> +<char:diamondmath><font:LucidNewMatSymT><index:166> +<char:plusminus><font:LucidNewMatSymT><index:167> +<char:minusplus><font:LucidNewMatSymT><index:168> +<char:circleplus><font:LucidNewMatSymT><index:169> +<char:circleminus><font:LucidNewMatSymT><index:170> +<char:circlemultiply><font:LucidNewMatSymT><index:173> +<char:circledivide><font:LucidNewMatSymT><index:174> +<char:circledot><font:LucidNewMatSymT><index:175> +<char:circlecopyrt><font:LucidNewMatSymT><index:176> +<char:openbullet><font:LucidNewMatSymT><index:177> +<char:bullet><font:LucidNewMatSymT><index:178> +<char:equivasymptotic><font:LucidNewMatSymT><index:179> +<char:equivalence><font:LucidNewMatSymT><index:180> +<char:reflexsubset><font:LucidNewMatSymT><index:181> +<char:reflexsuperset><font:LucidNewMatSymT><index:182> +<char:lessequal><font:LucidNewMatSymT><index:183> +<char:greaterequal><font:LucidNewMatSymT><index:184> +<char:precedesequal><font:LucidNewMatSymT><index:185> +<char:followsequal><font:LucidNewMatSymT><index:186> +<char:similar><font:LucidNewMatSymT><index:187> +<char:approxequal><font:LucidNewMatSymT><index:188> +<char:propersubset><font:LucidNewMatSymT><index:189> +<char:propersuperset><font:LucidNewMatSymT><index:190> +<char:lessmuch><font:LucidNewMatSymT><index:191> +<char:greatermuch><font:LucidNewMatSymT><index:192> +<char:precedes><font:LucidNewMatSymT><index:193> +<char:follows><font:LucidNewMatSymT><index:194> +<char:arrowleft><font:LucidNewMatSymT><index:195> +<char:spade><font:LucidNewMatSymT><index:196> +<char:arrowright><font:LucidNewMatSymT><index:33> +<char:arrowup><font:LucidNewMatSymT><index:34> +<char:arrowdown><font:LucidNewMatSymT><index:35> +<char:arrowboth><font:LucidNewMatSymT><index:36> +<char:arrownortheast><font:LucidNewMatSymT><index:37> +<char:arrowsoutheast><font:LucidNewMatSymT><index:38> +<char:similarequal><font:LucidNewMatSymT><index:39> +<char:arrowdblleft><font:LucidNewMatSymT><index:40> +<char:arrowdblright><font:LucidNewMatSymT><index:41> +<char:arrowdblup><font:LucidNewMatSymT><index:42> +<char:arrowdbldown><font:LucidNewMatSymT><index:43> +<char:arrowdblboth><font:LucidNewMatSymT><index:44> +<char:arrownorthwest><font:LucidNewMatSymT><index:45> +<char:arrowsouthwest><font:LucidNewMatSymT><index:46> +<char:proportional><font:LucidNewMatSymT><index:47> +<char:prime><font:LucidNewMatSymT><index:48> +<char:infinity><font:LucidNewMatSymT><index:49> +<char:element><font:LucidNewMatSymT><index:50> +<char:owner><font:LucidNewMatSymT><index:51> +<char:triangle><font:LucidNewMatSymT><index:52> +<char:triangleinv><font:LucidNewMatSymT><index:53> +<char:negationslash><font:LucidNewMatSymT><index:54> +<char:mapsto><font:LucidNewMatSymT><index:55> +<char:universal><font:LucidNewMatSymT><index:56> +<char:existential><font:LucidNewMatSymT><index:57> +<char:logicalnot><font:LucidNewMatSymT><index:58> +<char:emptyset><font:LucidNewMatSymT><index:59> +<char:Rfractur><font:LucidNewMatSymT><index:60> +<char:Ifractur><font:LucidNewMatSymT><index:61> +<char:latticetop><font:LucidNewMatSymT><index:62> +<char:perpendicular><font:LucidNewMatSymT><index:63> +<char:aleph><font:LucidNewMatSymT><index:64> +<char:scriptA><font:LucidNewMatSymT><index:65> +<char:scriptB><font:LucidNewMatSymT><index:66> +<char:scriptC><font:LucidNewMatSymT><index:67> +<char:scriptD><font:LucidNewMatSymT><index:68> +<char:scriptE><font:LucidNewMatSymT><index:69> +<char:scriptF><font:LucidNewMatSymT><index:70> +<char:scriptG><font:LucidNewMatSymT><index:71> +<char:scriptH><font:LucidNewMatSymT><index:72> +<char:scriptI><font:LucidNewMatSymT><index:73> +<char:scriptJ><font:LucidNewMatSymT><index:74> +<char:scriptK><font:LucidNewMatSymT><index:75> +<char:scriptL><font:LucidNewMatSymT><index:76> +<char:scriptM><font:LucidNewMatSymT><index:77> +<char:scriptN><font:LucidNewMatSymT><index:78> +<char:scriptO><font:LucidNewMatSymT><index:79> +<char:scriptP><font:LucidNewMatSymT><index:80> +<char:scriptQ><font:LucidNewMatSymT><index:81> +<char:scriptR><font:LucidNewMatSymT><index:82> +<char:scriptS><font:LucidNewMatSymT><index:83> +<char:scriptT><font:LucidNewMatSymT><index:84> +<char:scriptU><font:LucidNewMatSymT><index:85> +<char:scriptV><font:LucidNewMatSymT><index:86> +<char:scriptW><font:LucidNewMatSymT><index:87> +<char:scriptX><font:LucidNewMatSymT><index:88> +<char:scriptY><font:LucidNewMatSymT><index:89> +<char:scriptZ><font:LucidNewMatSymT><index:90> +<char:union><font:LucidNewMatSymT><index:91> +<char:intersection><font:LucidNewMatSymT><index:92> +<char:unionmulti><font:LucidNewMatSymT><index:93> +<char:logicaland><font:LucidNewMatSymT><index:94> +<char:logicalor><font:LucidNewMatSymT><index:95> +<char:turnstileleft><font:LucidNewMatSymT><index:96> +<char:turnstileright><font:LucidNewMatSymT><index:97> +<char:floorleft><font:LucidNewMatSymT><index:98> +<char:floorright><font:LucidNewMatSymT><index:99> +<char:ceilingleft><font:LucidNewMatSymT><index:100> +<char:ceilingright><font:LucidNewMatSymT><index:101> +<char:braceleft><font:LucidNewMatSymT><index:102> +<char:braceright><font:LucidNewMatSymT><index:103> +<char:angbracketleft><font:LucidNewMatSymT><index:104> +<char:angbracketright><font:LucidNewMatSymT><index:105> +<char:bar><font:LucidNewMatSymT><index:106> +<char:bardbl><font:LucidNewMatSymT><index:107> +<char:arrowbothv><font:LucidNewMatSymT><index:108> +<char:arrowdblbothv><font:LucidNewMatSymT><index:109> +<char:backslash><font:LucidNewMatSymT><index:110> +<char:wreathproduct><font:LucidNewMatSymT><index:111> +<char:radical><font:LucidNewMatSymT><index:112> +<char:coproduct><font:LucidNewMatSymT><index:113> +<char:nabla><font:LucidNewMatSymT><index:114> +<char:integral><font:LucidNewMatSymT><index:115> +<char:unionsq><font:LucidNewMatSymT><index:116> +<char:intersectionsq><font:LucidNewMatSymT><index:117> +<char:subsetsqequal><font:LucidNewMatSymT><index:118> +<char:supersetsqequal><font:LucidNewMatSymT><index:119> +<char:section><font:LucidNewMatSymT><index:120> +<char:dagger><font:LucidNewMatSymT><index:121> +<char:daggerdbl><font:LucidNewMatSymT><index:122> +<char:paragraph><font:LucidNewMatSymT><index:123> +<char:club><font:LucidNewMatSymT><index:124> +<char:diamond><font:LucidNewMatSymT><index:125> +<char:heart><font:LucidNewMatSymT><index:126> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/makefile b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/makefile new file mode 100644 index 00000000..7c816d2b --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/makefile @@ -0,0 +1,10 @@ + + +run: + java -cp ../src tagger/Tagger test + + +clean: + rm -f test.index.txt + rm -f test.tag.txt + rm -f *~ diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/standard-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/standard-charmap.txt new file mode 100755 index 00000000..9e6a44ff --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/standard-charmap.txt @@ -0,0 +1,220 @@ +# character map for standard font + +<char:space><font:><index:32> +<char:exclam><font:><index:33> +<char:quotedbl><font:><index:34> +<char:numbersign><font:><index:35> +<char:dollar><font:><index:36> +<char:percent><font:><index:37> +<char:ampersand><font:><index:38> +<char:quotesingle><font:><index:39> +<char:parenleft><font:><index:40> +<char:parenright><font:><index:41> +<char:asterisk><font:><index:42> +<char:plus><font:><index:43> +<char:comma><font:><index:44> +<char:hyphen><font:><index:45> +<char:period><font:><index:46> +<char:slash><font:><index:47> +<char:zero><font:><index:48> +<char:one><font:><index:49> +<char:two><font:><index:50> +<char:three><font:><index:51> +<char:four><font:><index:52> +<char:five><font:><index:53> +<char:six><font:><index:54> +<char:seven><font:><index:55> +<char:eight><font:><index:56> +<char:nine><font:><index:57> +<char:colon><font:><index:58> +<char:semicolon><font:><index:59> +<char:less><font:><index:60> +<char:equal><font:><index:61> +<char:greater><font:><index:62> +<char:question><font:><index:63> +<char:at><font:><index:64> +<char:A><font:><index:65> +<char:B><font:><index:66> +<char:C><font:><index:67> +<char:D><font:><index:68> +<char:E><font:><index:69> +<char:F><font:><index:70> +<char:G><font:><index:71> +<char:H><font:><index:72> +<char:I><font:><index:73> +<char:J><font:><index:74> +<char:K><font:><index:75> +<char:L><font:><index:76> +<char:M><font:><index:77> +<char:N><font:><index:78> +<char:O><font:><index:79> +<char:P><font:><index:80> +<char:Q><font:><index:81> +<char:R><font:><index:82> +<char:S><font:><index:83> +<char:T><font:><index:84> +<char:U><font:><index:85> +<char:V><font:><index:86> +<char:W><font:><index:87> +<char:X><font:><index:88> +<char:Y><font:><index:89> +<char:Z><font:><index:90> +<char:bracketleft><font:><index:91> +<char:backslash><font:><index:92> +<char:bracketright><font:><index:93> +<char:asciicircum><font:><index:94> +<char:underscore><font:><index:95> +<char:grave><font:><index:96> +<char:a><font:><index:97> +<char:b><font:><index:98> +<char:c><font:><index:99> +<char:d><font:><index:100> +<char:e><font:><index:101> +<char:f><font:><index:102> +<char:g><font:><index:103> +<char:h><font:><index:104> +<char:i><font:><index:105> +<char:j><font:><index:106> +<char:k><font:><index:107> +<char:l><font:><index:108> +<char:m><font:><index:109> +<char:n><font:><index:110> +<char:o><font:><index:111> +<char:p><font:><index:112> +<char:q><font:><index:113> +<char:r><font:><index:114> +<char:s><font:><index:115> +<char:t><font:><index:116> +<char:u><font:><index:117> +<char:v><font:><index:118> +<char:w><font:><index:119> +<char:x><font:><index:120> +<char:y><font:><index:121> +<char:z><font:><index:122> +<char:braceleft><font:><index:123> +<char:bar><font:><index:124> +<char:braceright><font:><index:125> +<char:asciitilde><font:><index:126> +<char:euro><font:><index:128> +<char:quotesinglbase><font:><index:130> +<char:florin><font:><index:131> +<char:quotedblbase><font:><index:132> +<char:ellipsis><font:><index:133> +<char:dagger><font:><index:134> +<char:daggerdbl><font:><index:135> +<char:circumflex><font:><index:136> +<char:perthousand><font:><index:137> +<char:Scaron><font:><index:138> +<char:guilsinglleft><font:><index:139> +<char:OE><font:><index:140> +<char:Zcaron><font:><index:142> +<char:quoteleft><font:><index:145> +<char:quoteright><font:><index:146> +<char:quotedblleft><font:><index:147> +<char:quotedblright><font:><index:148> +<char:bullet><font:><index:149> +<char:endash><font:><index:150> +<char:emdash><font:><index:151> +<char:tilde><font:><index:152> +<char:trademark><font:><index:153> +<char:scaron><font:><index:154> +<char:guilsinglright><font:><index:155> +<char:oe><font:><index:156> +<char:zcaron><font:><index:158> +<char:Ydieresis><font:><index:159> +<char:nbspace><font:><index:160> +<char:exclamdown><font:><index:161> +<char:cent><font:><index:162> +<char:sterling><font:><index:163> +<char:currency><font:><index:164> +<char:yen><font:><index:165> +<char:brokenbar><font:><index:166> +<char:section><font:><index:167> +<char:dieresis><font:><index:168> +<char:copyright><font:><index:169> +<char:ordfeminine><font:><index:170> +<char:guillemotleft><font:><index:171> +<char:logicalnot><font:><index:172> +<char:sfthyphen><font:><index:173> +<char:registered><font:><index:174> +<char:macron><font:><index:175> +<char:degree><font:><index:176> +<char:plusminus><font:><index:177> +<char:twosuperior><font:><index:178> +<char:threesuperior><font:><index:179> +<char:acute><font:><index:180> +<char:mu><font:><index:181> +<char:paragraph><font:><index:182> +<char:periodcentered><font:><index:183> +<char:cedilla><font:><index:184> +<char:onesuperior><font:><index:185> +<char:ordmasculine><font:><index:186> +<char:guillemotright><font:><index:187> +<char:onequarter><font:><index:188> +<char:onehalf><font:><index:189> +<char:threequarters><font:><index:190> +<char:questiondown><font:><index:191> +<char:Agrave><font:><index:192> +<char:Aacute><font:><index:193> +<char:Acircumflex><font:><index:194> +<char:Atilde><font:><index:195> +<char:Adieresis><font:><index:196> +<char:Aring><font:><index:197> +<char:AE><font:><index:198> +<char:Ccedilla><font:><index:199> +<char:Egrave><font:><index:200> +<char:Eacute><font:><index:201> +<char:Ecircumflex><font:><index:202> +<char:Edieresis><font:><index:203> +<char:Igrave><font:><index:204> +<char:Iacute><font:><index:205> +<char:Icircumflex><font:><index:206> +<char:Idieresis><font:><index:207> +<char:Eth><font:><index:208> +<char:Ntilde><font:><index:209> +<char:Ograve><font:><index:210> +<char:Oacute><font:><index:211> +<char:Ocircumflex><font:><index:212> +<char:Otilde><font:><index:213> +<char:Odieresis><font:><index:214> +<char:multiply><font:><index:215> +<char:Oslash><font:><index:216> +<char:Ugrave><font:><index:217> +<char:Uacute><font:><index:218> +<char:Ucircumflex><font:><index:219> +<char:Udieresis><font:><index:220> +<char:Yacute><font:><index:221> +<char:Thorn><font:><index:222> +<char:germandbls><font:><index:223> +<char:agrave><font:><index:224> +<char:aacute><font:><index:225> +<char:acircumflex><font:><index:226> +<char:atilde><font:><index:227> +<char:adieresis><font:><index:228> +<char:aring><font:><index:229> +<char:ae><font:><index:230> +<char:ccedilla><font:><index:231> +<char:egrave><font:><index:232> +<char:eacute><font:><index:233> +<char:ecircumflex><font:><index:234> +<char:edieresis><font:><index:235> +<char:igrave><font:><index:236> +<char:iacute><font:><index:237> +<char:icircumflex><font:><index:238> +<char:idieresis><font:><index:239> +<char:eth><font:><index:240> +<char:ntilde><font:><index:241> +<char:ograve><font:><index:242> +<char:oacute><font:><index:243> +<char:ocircumflex><font:><index:244> +<char:otilde><font:><index:245> +<char:odieresis><font:><index:246> +<char:divide><font:><index:247> +<char:oslash><font:><index:248> +<char:ugrave><font:><index:249> +<char:uacute><font:><index:250> +<char:ucircumflex><font:><index:251> +<char:udieresis><font:><index:252> +<char:yacute><font:><index:253> +<char:thorn><font:><index:254> +<char:ydieresis><font:><index:255> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/styles.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/styles.txt new file mode 100755 index 00000000..55aaf72c --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/styles.txt @@ -0,0 +1,11 @@ +<style:title><next:author> +<style:author><next:section> +<style:section><next:noindent><counter:1><separator:.><trailer: > +<style:opening><next:noindent> +<style:noindent><next:body> +<style:body><next:body> +<style:subsection><next:noindent><parent:section><counter:1><separator:.><trailer: > +<style:subsubsection><next:noindent><parent:subsection><counter:a><separator:.><trailer: > +<style:geekmath><next:noindent> +<style:point><next:noindent><counter:A><leader:\alpha > +<style:ref><next:ref> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/symbol-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/symbol-charmap.txt new file mode 100755 index 00000000..4481c174 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/symbol-charmap.txt @@ -0,0 +1,195 @@ +# character map for Symbol font + +<char:Symbol><font:Symbol><index:for> +<char:space><font:Symbol><index:32> +<char:exclam><font:Symbol><index:33> +<char:universal><font:Symbol><index:34> +<char:numbersign><font:Symbol><index:35> +<char:existential><font:Symbol><index:36> +<char:percent><font:Symbol><index:37> +<char:ampersand><font:Symbol><index:38> +<char:suchthat><font:Symbol><index:39> +<char:parenleft><font:Symbol><index:40> +<char:parenright><font:Symbol><index:41> +<char:asteriskmath><font:Symbol><index:42> +<char:plus><font:Symbol><index:43> +<char:comma><font:Symbol><index:44> +<char:minus><font:Symbol><index:45> +<char:period><font:Symbol><index:46> +<char:slash><font:Symbol><index:47> +<char:zero><font:Symbol><index:48> +<char:one><font:Symbol><index:49> +<char:two><font:Symbol><index:50> +<char:three><font:Symbol><index:51> +<char:four><font:Symbol><index:52> +<char:five><font:Symbol><index:53> +<char:six><font:Symbol><index:54> +<char:seven><font:Symbol><index:55> +<char:eight><font:Symbol><index:56> +<char:nine><font:Symbol><index:57> +<char:colon><font:Symbol><index:58> +<char:semicolon><font:Symbol><index:59> +<char:less><font:Symbol><index:60> +<char:equal><font:Symbol><index:61> +<char:greater><font:Symbol><index:62> +<char:question><font:Symbol><index:63> +<char:congruent><font:Symbol><index:64> +<char:Alpha><font:Symbol><index:65> +<char:Beta><font:Symbol><index:66> +<char:Chi><font:Symbol><index:67> +<char:Delta><font:Symbol><index:68> +<char:Epsilon><font:Symbol><index:69> +<char:Phi><font:Symbol><index:70> +<char:Gamma><font:Symbol><index:71> +<char:Eta><font:Symbol><index:72> +<char:Iota><font:Symbol><index:73> +<char:theta1><font:Symbol><index:74> +<char:Kappa><font:Symbol><index:75> +<char:Lambda><font:Symbol><index:76> +<char:Mu><font:Symbol><index:77> +<char:Nu><font:Symbol><index:78> +<char:Omicron><font:Symbol><index:79> +<char:Pi><font:Symbol><index:80> +<char:Theta><font:Symbol><index:81> +<char:Rho><font:Symbol><index:82> +<char:Sigma><font:Symbol><index:83> +<char:Tau><font:Symbol><index:84> +<char:Upsilon><font:Symbol><index:85> +<char:sigma1><font:Symbol><index:86> +<char:Omega><font:Symbol><index:87> +<char:Xi><font:Symbol><index:88> +<char:Psi><font:Symbol><index:89> +<char:Zeta><font:Symbol><index:90> +<char:bracketleft><font:Symbol><index:91> +<char:therefore><font:Symbol><index:92> +<char:bracketright><font:Symbol><index:93> +<char:perpendicular><font:Symbol><index:94> +<char:underscore><font:Symbol><index:95> +<char:radicalex><font:Symbol><index:96> +<char:alpha><font:Symbol><index:97> +<char:beta><font:Symbol><index:98> +<char:chi><font:Symbol><index:99> +<char:delta><font:Symbol><index:100> +<char:epsilon><font:Symbol><index:101> +<char:phi><font:Symbol><index:102> +<char:gamma><font:Symbol><index:103> +<char:eta><font:Symbol><index:104> +<char:iota><font:Symbol><index:105> +<char:phi1><font:Symbol><index:106> +<char:kappa><font:Symbol><index:107> +<char:lambda><font:Symbol><index:108> +<char:mu><font:Symbol><index:109> +<char:nu><font:Symbol><index:110> +<char:omicron><font:Symbol><index:111> +<char:pi><font:Symbol><index:112> +<char:theta><font:Symbol><index:113> +<char:rho><font:Symbol><index:114> +<char:sigma><font:Symbol><index:115> +<char:tau><font:Symbol><index:116> +<char:upsilon><font:Symbol><index:117> +<char:omega1><font:Symbol><index:118> +<char:omega><font:Symbol><index:119> +<char:xi><font:Symbol><index:120> +<char:psi><font:Symbol><index:121> +<char:zeta><font:Symbol><index:122> +<char:braceleft><font:Symbol><index:123> +<char:bar><font:Symbol><index:124> +<char:braceright><font:Symbol><index:125> +<char:similar><font:Symbol><index:126> +<char:Euro><font:Symbol><index:160> +<char:Upsilon1><font:Symbol><index:161> +<char:minute><font:Symbol><index:162> +<char:lessequal><font:Symbol><index:163> +<char:fraction><font:Symbol><index:164> +<char:infinity><font:Symbol><index:165> +<char:florin><font:Symbol><index:166> +<char:club><font:Symbol><index:167> +<char:diamond><font:Symbol><index:168> +<char:heart><font:Symbol><index:169> +<char:spade><font:Symbol><index:170> +<char:arrowboth><font:Symbol><index:171> +<char:arrowleft><font:Symbol><index:172> +<char:arrowup><font:Symbol><index:173> +<char:arrowright><font:Symbol><index:174> +<char:arrowdown><font:Symbol><index:175> +<char:degree><font:Symbol><index:176> +<char:plusminus><font:Symbol><index:177> +<char:second><font:Symbol><index:178> +<char:greaterequal><font:Symbol><index:179> +<char:multiply><font:Symbol><index:180> +<char:proportional><font:Symbol><index:181> +<char:partialdiff><font:Symbol><index:182> +<char:bullet><font:Symbol><index:183> +<char:divide><font:Symbol><index:184> +<char:notequal><font:Symbol><index:185> +<char:equivalence><font:Symbol><index:186> +<char:approxequal><font:Symbol><index:187> + +# seems to be a quarter fraction +# <char:ellipsis><font:Symbol><index:188> + +<char:arrowvertex><font:Symbol><index:189> +<char:arrowhorizex><font:Symbol><index:190> +<char:carriagereturn><font:Symbol><index:191> +<char:aleph><font:Symbol><index:192> +<char:Ifraktur><font:Symbol><index:193> +<char:Rfraktur><font:Symbol><index:194> +<char:weierstrass><font:Symbol><index:195> +<char:circlemultiply><font:Symbol><index:196> +<char:circleplus><font:Symbol><index:197> +<char:emptyset><font:Symbol><index:198> +<char:intersection><font:Symbol><index:199> +<char:union><font:Symbol><index:200> +<char:propersuperset><font:Symbol><index:201> +<char:reflexsuperset><font:Symbol><index:202> +<char:notsubset><font:Symbol><index:203> +<char:propersubset><font:Symbol><index:204> +<char:reflexsubset><font:Symbol><index:205> +<char:element><font:Symbol><index:206> +<char:notelement><font:Symbol><index:207> +<char:angle><font:Symbol><index:208> +<char:gradient><font:Symbol><index:209> +<char:registerserif><font:Symbol><index:210> +<char:copyrightserif><font:Symbol><index:211> +<char:trademarkserif><font:Symbol><index:212> +<char:product><font:Symbol><index:213> +<char:radical><font:Symbol><index:214> +<char:dotmath><font:Symbol><index:215> +<char:logicalnot><font:Symbol><index:216> +<char:logicaland><font:Symbol><index:217> +<char:logicalor><font:Symbol><index:218> +<char:arrowdblboth><font:Symbol><index:219> +<char:arrowdblleft><font:Symbol><index:220> +<char:arrowdblup><font:Symbol><index:221> +<char:arrowdblright><font:Symbol><index:222> +<char:arrowdbldown><font:Symbol><index:223> +<char:lozenge><font:Symbol><index:224> +<char:angleleft><font:Symbol><index:225> +<char:registersans><font:Symbol><index:226> +<char:copyrightsans><font:Symbol><index:227> +<char:trademarksans><font:Symbol><index:228> +<char:summation><font:Symbol><index:229> +<char:parenlefttp><font:Symbol><index:230> +<char:parenleftex><font:Symbol><index:231> +<char:parenleftbt><font:Symbol><index:232> +<char:bracketlefttp><font:Symbol><index:233> +<char:bracketleftex><font:Symbol><index:234> +<char:bracketleftbt><font:Symbol><index:235> +<char:bracelefttp><font:Symbol><index:236> +<char:braceleftmid><font:Symbol><index:237> +<char:braceleftbt><font:Symbol><index:238> +<char:braceex><font:Symbol><index:239> +<char:angleright><font:Symbol><index:241> +<char:integral><font:Symbol><index:242> +<char:integraltp><font:Symbol><index:243> +<char:integralex><font:Symbol><index:244> +<char:integralbt><font:Symbol><index:245> +<char:parenrighttp><font:Symbol><index:246> +<char:parenrightex><font:Symbol><index:247> +<char:parenrightbt><font:Symbol><index:248> +<char:bracketrighttp><font:Symbol><index:249> +<char:bracketrightex><font:Symbol><index:250> +<char:bracketrightbt><font:Symbol><index:251> +<char:bracerighttp><font:Symbol><index:252> +<char:bracerightmid><font:Symbol><index:253> +<char:bracerightbt><font:Symbol><index:254> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/symbols.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/symbols.txt new file mode 100755 index 00000000..9b89caf8 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/symbols.txt @@ -0,0 +1,530 @@ +# character map for Lucida Math Italic font + +<char:Gamma><font:LucidNewMatItaT><index:161> +<char:Delta><font:LucidNewMatItaT><index:162> +<char:Theta><font:LucidNewMatItaT><index:163> +<char:Lambda><font:LucidNewMatItaT><index:164> +<char:Xi><font:LucidNewMatItaT><index:165> +<char:Pi><font:LucidNewMatItaT><index:166> +<char:Sigma><font:LucidNewMatItaT><index:167> +<char:Upsilon><font:LucidNewMatItaT><index:7> +<char:Phi><font:LucidNewMatItaT><index:169> +<char:Psi><font:LucidNewMatItaT><index:170> +<char:Omega><font:LucidNewMatItaT><index:173> +<char:alpha><font:LucidNewMatItaT><index:174> +<char:beta><font:LucidNewMatItaT><index:175> +<char:gamma><font:LucidNewMatItaT><index:176> +<char:delta><font:LucidNewMatItaT><index:177> +<char:epsilon1><font:LucidNewMatItaT><index:178> +<char:zeta><font:LucidNewMatItaT><index:179> +<char:eta><font:LucidNewMatItaT><index:180> +<char:theta><font:LucidNewMatItaT><index:181> +<char:iota><font:LucidNewMatItaT><index:182> +<char:kappa><font:LucidNewMatItaT><index:183> +<char:lambda><font:LucidNewMatItaT><index:184> +<char:mu><font:LucidNewMatItaT><index:185> +<char:nu><font:LucidNewMatItaT><index:186> +<char:xi><font:LucidNewMatItaT><index:187> +<char:pi><font:LucidNewMatItaT><index:188> +<char:rho><font:LucidNewMatItaT><index:189> +<char:sigma><font:LucidNewMatItaT><index:190> +<char:tau><font:LucidNewMatItaT><index:191> +<char:upsilon><font:LucidNewMatItaT><index:192> +<char:phi><font:LucidNewMatItaT><index:193> +<char:chi><font:LucidNewMatItaT><index:194> +<char:psi><font:LucidNewMatItaT><index:195> +<char:tie><font:LucidNewMatItaT><index:196> +<char:omega><font:LucidNewMatItaT><index:33> +<char:epsilon><font:LucidNewMatItaT><index:34> +<char:theta1><font:LucidNewMatItaT><index:35> +<char:pi1><font:LucidNewMatItaT><index:36> +<char:rho1><font:LucidNewMatItaT><index:37> +<char:sigma1><font:LucidNewMatItaT><index:38> +<char:phi1><font:LucidNewMatItaT><index:39> +<char:arrowlefttophalf><font:LucidNewMatItaT><index:40> +<char:arrowleftbothalf><font:LucidNewMatItaT><index:41> +<char:arrowrighttophalf><font:LucidNewMatItaT><index:42> +<char:arrowrightbothalf><font:LucidNewMatItaT><index:43> +<char:arrowhookleft><font:LucidNewMatItaT><index:44> +<char:arrowhookright><font:LucidNewMatItaT><index:45> +<char:triangleright><font:LucidNewMatItaT><index:46> +<char:triangleleft><font:LucidNewMatItaT><index:47> +<char:period><font:LucidNewMatItaT><index:58> +<char:comma><font:LucidNewMatItaT><index:59> +<char:less><font:LucidNewMatItaT><index:60> +<char:slash><font:LucidNewMatItaT><index:61> +<char:greater><font:LucidNewMatItaT><index:62> +<char:star><font:LucidNewMatItaT><index:63> +<char:partialdiff><font:LucidNewMatItaT><index:64> +<char:flat><font:LucidNewMatItaT><index:91> +<char:natural><font:LucidNewMatItaT><index:92> +<char:sharp><font:LucidNewMatItaT><index:93> +<char:slurbelow><font:LucidNewMatItaT><index:94> +<char:slurabove><font:LucidNewMatItaT><index:95> +<char:lscript><font:LucidNewMatItaT><index:96> +<char:dotlessi><font:LucidNewMatItaT><index:123> +<char:dotlessj><font:LucidNewMatItaT><index:124> +<char:weierstrass><font:LucidNewMatItaT><index:125> +<char:vector><font:LucidNewMatItaT><index:126> + + +# mathematical characters for Lucida New Math Symbol font + +<char:minus><font:LucidNewMatSymT><index:161> +<char:periodcentered><font:LucidNewMatSymT><index:162> +<char:multiply><font:LucidNewMatSymT><index:163> +<char:asteriskmath><font:LucidNewMatSymT><index:164> +<char:divide><font:LucidNewMatSymT><index:165> +<char:diamondmath><font:LucidNewMatSymT><index:166> +<char:plusminus><font:LucidNewMatSymT><index:167> +<char:minusplus><font:LucidNewMatSymT><index:168> +<char:circleplus><font:LucidNewMatSymT><index:169> +<char:circleminus><font:LucidNewMatSymT><index:170> +<char:circlemultiply><font:LucidNewMatSymT><index:173> +<char:circledivide><font:LucidNewMatSymT><index:174> +<char:circledot><font:LucidNewMatSymT><index:175> +<char:circlecopyrt><font:LucidNewMatSymT><index:176> +<char:openbullet><font:LucidNewMatSymT><index:177> +<char:bullet><font:LucidNewMatSymT><index:178> +<char:equivasymptotic><font:LucidNewMatSymT><index:179> +<char:equivalence><font:LucidNewMatSymT><index:180> +<char:reflexsubset><font:LucidNewMatSymT><index:181> +<char:reflexsuperset><font:LucidNewMatSymT><index:182> +<char:lessequal><font:LucidNewMatSymT><index:183> +<char:greaterequal><font:LucidNewMatSymT><index:184> +<char:precedesequal><font:LucidNewMatSymT><index:185> +<char:followsequal><font:LucidNewMatSymT><index:186> +<char:similar><font:LucidNewMatSymT><index:187> +<char:approxequal><font:LucidNewMatSymT><index:188> +<char:propersubset><font:LucidNewMatSymT><index:189> +<char:propersuperset><font:LucidNewMatSymT><index:190> +<char:lessmuch><font:LucidNewMatSymT><index:191> +<char:greatermuch><font:LucidNewMatSymT><index:192> +<char:precedes><font:LucidNewMatSymT><index:193> +<char:follows><font:LucidNewMatSymT><index:194> +<char:arrowleft><font:LucidNewMatSymT><index:195> +<char:spade><font:LucidNewMatSymT><index:196> +<char:arrowright><font:LucidNewMatSymT><index:33> +<char:arrowup><font:LucidNewMatSymT><index:34> +<char:arrowdown><font:LucidNewMatSymT><index:35> +<char:arrowboth><font:LucidNewMatSymT><index:36> +<char:arrownortheast><font:LucidNewMatSymT><index:37> +<char:arrowsoutheast><font:LucidNewMatSymT><index:38> +<char:similarequal><font:LucidNewMatSymT><index:39> +<char:arrowdblleft><font:LucidNewMatSymT><index:40> +<char:arrowdblright><font:LucidNewMatSymT><index:41> +<char:arrowdblup><font:LucidNewMatSymT><index:42> +<char:arrowdbldown><font:LucidNewMatSymT><index:43> +<char:arrowdblboth><font:LucidNewMatSymT><index:44> +<char:arrownorthwest><font:LucidNewMatSymT><index:45> +<char:arrowsouthwest><font:LucidNewMatSymT><index:46> +<char:proportional><font:LucidNewMatSymT><index:47> +<char:prime><font:LucidNewMatSymT><index:48> +<char:infinity><font:LucidNewMatSymT><index:49> +<char:element><font:LucidNewMatSymT><index:50> +<char:owner><font:LucidNewMatSymT><index:51> +<char:triangle><font:LucidNewMatSymT><index:52> +<char:triangleinv><font:LucidNewMatSymT><index:53> +<char:negationslash><font:LucidNewMatSymT><index:54> +<char:mapsto><font:LucidNewMatSymT><index:55> +<char:universal><font:LucidNewMatSymT><index:56> +<char:existential><font:LucidNewMatSymT><index:57> +<char:logicalnot><font:LucidNewMatSymT><index:58> +<char:emptyset><font:LucidNewMatSymT><index:59> +<char:Rfractur><font:LucidNewMatSymT><index:60> +<char:Ifractur><font:LucidNewMatSymT><index:61> +<char:latticetop><font:LucidNewMatSymT><index:62> +<char:perpendicular><font:LucidNewMatSymT><index:63> +<char:aleph><font:LucidNewMatSymT><index:64> +<char:scriptA><font:LucidNewMatSymT><index:65> +<char:scriptB><font:LucidNewMatSymT><index:66> +<char:scriptC><font:LucidNewMatSymT><index:67> +<char:scriptD><font:LucidNewMatSymT><index:68> +<char:scriptE><font:LucidNewMatSymT><index:69> +<char:scriptF><font:LucidNewMatSymT><index:70> +<char:scriptG><font:LucidNewMatSymT><index:71> +<char:scriptH><font:LucidNewMatSymT><index:72> +<char:scriptI><font:LucidNewMatSymT><index:73> +<char:scriptJ><font:LucidNewMatSymT><index:74> +<char:scriptK><font:LucidNewMatSymT><index:75> +<char:scriptL><font:LucidNewMatSymT><index:76> +<char:scriptM><font:LucidNewMatSymT><index:77> +<char:scriptN><font:LucidNewMatSymT><index:78> +<char:scriptO><font:LucidNewMatSymT><index:79> +<char:scriptP><font:LucidNewMatSymT><index:80> +<char:scriptQ><font:LucidNewMatSymT><index:81> +<char:scriptR><font:LucidNewMatSymT><index:82> +<char:scriptS><font:LucidNewMatSymT><index:83> +<char:scriptT><font:LucidNewMatSymT><index:84> +<char:scriptU><font:LucidNewMatSymT><index:85> +<char:scriptV><font:LucidNewMatSymT><index:86> +<char:scriptW><font:LucidNewMatSymT><index:87> +<char:scriptX><font:LucidNewMatSymT><index:88> +<char:scriptY><font:LucidNewMatSymT><index:89> +<char:scriptZ><font:LucidNewMatSymT><index:90> +<char:union><font:LucidNewMatSymT><index:91> +<char:intersection><font:LucidNewMatSymT><index:92> +<char:unionmulti><font:LucidNewMatSymT><index:93> +<char:logicaland><font:LucidNewMatSymT><index:94> +<char:logicalor><font:LucidNewMatSymT><index:95> +<char:turnstileleft><font:LucidNewMatSymT><index:96> +<char:turnstileright><font:LucidNewMatSymT><index:97> +<char:floorleft><font:LucidNewMatSymT><index:98> +<char:floorright><font:LucidNewMatSymT><index:99> +<char:ceilingleft><font:LucidNewMatSymT><index:100> +<char:ceilingright><font:LucidNewMatSymT><index:101> +<char:braceleft><font:LucidNewMatSymT><index:102> +<char:braceright><font:LucidNewMatSymT><index:103> +<char:angbracketleft><font:LucidNewMatSymT><index:104> +<char:angbracketright><font:LucidNewMatSymT><index:105> +<char:bar><font:LucidNewMatSymT><index:106> +<char:bardbl><font:LucidNewMatSymT><index:107> +<char:arrowbothv><font:LucidNewMatSymT><index:108> +<char:arrowdblbothv><font:LucidNewMatSymT><index:109> +<char:backslash><font:LucidNewMatSymT><index:110> +<char:wreathproduct><font:LucidNewMatSymT><index:111> +<char:radical><font:LucidNewMatSymT><index:112> +<char:coproduct><font:LucidNewMatSymT><index:113> +<char:nabla><font:LucidNewMatSymT><index:114> +<char:integral><font:LucidNewMatSymT><index:115> +<char:unionsq><font:LucidNewMatSymT><index:116> +<char:intersectionsq><font:LucidNewMatSymT><index:117> +<char:subsetsqequal><font:LucidNewMatSymT><index:118> +<char:supersetsqequal><font:LucidNewMatSymT><index:119> +<char:section><font:LucidNewMatSymT><index:120> +<char:dagger><font:LucidNewMatSymT><index:121> +<char:daggerdbl><font:LucidNewMatSymT><index:122> +<char:paragraph><font:LucidNewMatSymT><index:123> +<char:club><font:LucidNewMatSymT><index:124> +<char:diamond><font:LucidNewMatSymT><index:125> +<char:heart><font:LucidNewMatSymT><index:126> + + + +# character map for Symbol font + +<char:Symbol><font:Symbol><index:for> +<char:space><font:Symbol><index:32> +<char:exclam><font:Symbol><index:33> +<char:universal><font:Symbol><index:34> +<char:numbersign><font:Symbol><index:35> +<char:existential><font:Symbol><index:36> +<char:percent><font:Symbol><index:37> +<char:ampersand><font:Symbol><index:38> +<char:suchthat><font:Symbol><index:39> +<char:parenleft><font:Symbol><index:40> +<char:parenright><font:Symbol><index:41> +<char:asteriskmath><font:Symbol><index:42> +<char:plus><font:Symbol><index:43> +<char:comma><font:Symbol><index:44> +<char:minus><font:Symbol><index:45> +<char:period><font:Symbol><index:46> +<char:slash><font:Symbol><index:47> +<char:zero><font:Symbol><index:48> +<char:one><font:Symbol><index:49> +<char:two><font:Symbol><index:50> +<char:three><font:Symbol><index:51> +<char:four><font:Symbol><index:52> +<char:five><font:Symbol><index:53> +<char:six><font:Symbol><index:54> +<char:seven><font:Symbol><index:55> +<char:eight><font:Symbol><index:56> +<char:nine><font:Symbol><index:57> +<char:colon><font:Symbol><index:58> +<char:semicolon><font:Symbol><index:59> +<char:less><font:Symbol><index:60> +<char:equal><font:Symbol><index:61> +<char:greater><font:Symbol><index:62> +<char:question><font:Symbol><index:63> +<char:congruent><font:Symbol><index:64> +<char:Alpha><font:Symbol><index:65> +<char:Beta><font:Symbol><index:66> +<char:Chi><font:Symbol><index:67> +<char:Delta><font:Symbol><index:68> +<char:Epsilon><font:Symbol><index:69> +<char:Phi><font:Symbol><index:70> +<char:Gamma><font:Symbol><index:71> +<char:Eta><font:Symbol><index:72> +<char:Iota><font:Symbol><index:73> +<char:theta1><font:Symbol><index:74> +<char:Kappa><font:Symbol><index:75> +<char:Lambda><font:Symbol><index:76> +<char:Mu><font:Symbol><index:77> +<char:Nu><font:Symbol><index:78> +<char:Omicron><font:Symbol><index:79> +<char:Pi><font:Symbol><index:80> +<char:Theta><font:Symbol><index:81> +<char:Rho><font:Symbol><index:82> +<char:Sigma><font:Symbol><index:83> +<char:Tau><font:Symbol><index:84> +<char:Upsilon><font:Symbol><index:85> +<char:sigma1><font:Symbol><index:86> +<char:Omega><font:Symbol><index:87> +<char:Xi><font:Symbol><index:88> +<char:Psi><font:Symbol><index:89> +<char:Zeta><font:Symbol><index:90> +<char:bracketleft><font:Symbol><index:91> +<char:therefore><font:Symbol><index:92> +<char:bracketright><font:Symbol><index:93> +<char:perpendicular><font:Symbol><index:94> +<char:underscore><font:Symbol><index:95> +<char:radicalex><font:Symbol><index:96> +<char:alpha><font:Symbol><index:97> +<char:beta><font:Symbol><index:98> +<char:chi><font:Symbol><index:99> +<char:delta><font:Symbol><index:100> +<char:epsilon><font:Symbol><index:101> +<char:phi><font:Symbol><index:102> +<char:gamma><font:Symbol><index:103> +<char:eta><font:Symbol><index:104> +<char:iota><font:Symbol><index:105> +<char:phi1><font:Symbol><index:106> +<char:kappa><font:Symbol><index:107> +<char:lambda><font:Symbol><index:108> +<char:mu><font:Symbol><index:109> +<char:nu><font:Symbol><index:110> +<char:omicron><font:Symbol><index:111> +<char:pi><font:Symbol><index:112> +<char:theta><font:Symbol><index:113> +<char:rho><font:Symbol><index:114> +<char:sigma><font:Symbol><index:115> +<char:tau><font:Symbol><index:116> +<char:upsilon><font:Symbol><index:117> +<char:omega1><font:Symbol><index:118> +<char:omega><font:Symbol><index:119> +<char:xi><font:Symbol><index:120> +<char:psi><font:Symbol><index:121> +<char:zeta><font:Symbol><index:122> +<char:braceleft><font:Symbol><index:123> +<char:bar><font:Symbol><index:124> +<char:braceright><font:Symbol><index:125> +<char:similar><font:Symbol><index:126> +<char:Euro><font:Symbol><index:160> +<char:Upsilon1><font:Symbol><index:161> +<char:minute><font:Symbol><index:162> +<char:lessequal><font:Symbol><index:163> +<char:fraction><font:Symbol><index:164> +<char:infinity><font:Symbol><index:165> +<char:florin><font:Symbol><index:166> +<char:club><font:Symbol><index:167> +<char:diamond><font:Symbol><index:168> +<char:heart><font:Symbol><index:169> +<char:spade><font:Symbol><index:170> +<char:arrowboth><font:Symbol><index:171> +<char:arrowleft><font:Symbol><index:172> +<char:arrowup><font:Symbol><index:173> +<char:arrowright><font:Symbol><index:174> +<char:arrowdown><font:Symbol><index:175> +<char:degree><font:Symbol><index:176> +<char:plusminus><font:Symbol><index:177> +<char:second><font:Symbol><index:178> +<char:greaterequal><font:Symbol><index:179> +<char:multiply><font:Symbol><index:180> +<char:proportional><font:Symbol><index:181> +<char:partialdiff><font:Symbol><index:182> +<char:bullet><font:Symbol><index:183> +<char:divide><font:Symbol><index:184> +<char:notequal><font:Symbol><index:185> +<char:equivalence><font:Symbol><index:186> +<char:approxequal><font:Symbol><index:187> + +# seems to be a quarter fraction +# <char:ellipsis><font:Symbol><index:188> + +<char:arrowvertex><font:Symbol><index:189> +<char:arrowhorizex><font:Symbol><index:190> +<char:carriagereturn><font:Symbol><index:191> +<char:aleph><font:Symbol><index:192> +<char:Ifraktur><font:Symbol><index:193> +<char:Rfraktur><font:Symbol><index:194> +<char:weierstrass><font:Symbol><index:195> +<char:circlemultiply><font:Symbol><index:196> +<char:circleplus><font:Symbol><index:197> +<char:emptyset><font:Symbol><index:198> +<char:intersection><font:Symbol><index:199> +<char:union><font:Symbol><index:200> +<char:propersuperset><font:Symbol><index:201> +<char:reflexsuperset><font:Symbol><index:202> +<char:notsubset><font:Symbol><index:203> +<char:propersubset><font:Symbol><index:204> +<char:reflexsubset><font:Symbol><index:205> +<char:element><font:Symbol><index:206> +<char:notelement><font:Symbol><index:207> +<char:angle><font:Symbol><index:208> +<char:gradient><font:Symbol><index:209> +<char:registerserif><font:Symbol><index:210> +<char:copyrightserif><font:Symbol><index:211> +<char:trademarkserif><font:Symbol><index:212> +<char:product><font:Symbol><index:213> +<char:radical><font:Symbol><index:214> +<char:dotmath><font:Symbol><index:215> +<char:logicalnot><font:Symbol><index:216> +<char:logicaland><font:Symbol><index:217> +<char:logicalor><font:Symbol><index:218> +<char:arrowdblboth><font:Symbol><index:219> +<char:arrowdblleft><font:Symbol><index:220> +<char:arrowdblup><font:Symbol><index:221> +<char:arrowdblright><font:Symbol><index:222> +<char:arrowdbldown><font:Symbol><index:223> +<char:lozenge><font:Symbol><index:224> +<char:angleleft><font:Symbol><index:225> +<char:registersans><font:Symbol><index:226> +<char:copyrightsans><font:Symbol><index:227> +<char:trademarksans><font:Symbol><index:228> +<char:summation><font:Symbol><index:229> +<char:parenlefttp><font:Symbol><index:230> +<char:parenleftex><font:Symbol><index:231> +<char:parenleftbt><font:Symbol><index:232> +<char:bracketlefttp><font:Symbol><index:233> +<char:bracketleftex><font:Symbol><index:234> +<char:bracketleftbt><font:Symbol><index:235> +<char:bracelefttp><font:Symbol><index:236> +<char:braceleftmid><font:Symbol><index:237> +<char:braceleftbt><font:Symbol><index:238> +<char:braceex><font:Symbol><index:239> +<char:angleright><font:Symbol><index:241> +<char:integral><font:Symbol><index:242> +<char:integraltp><font:Symbol><index:243> +<char:integralex><font:Symbol><index:244> +<char:integralbt><font:Symbol><index:245> +<char:parenrighttp><font:Symbol><index:246> +<char:parenrightex><font:Symbol><index:247> +<char:parenrightbt><font:Symbol><index:248> +<char:bracketrighttp><font:Symbol><index:249> +<char:bracketrightex><font:Symbol><index:250> +<char:bracketrightbt><font:Symbol><index:251> +<char:bracerighttp><font:Symbol><index:252> +<char:bracerightmid><font:Symbol><index:253> +<char:bracerightbt><font:Symbol><index:254> + + +# character map for Lucida New Math Extended font + +<char:parenleftbig><font:LucidNewMatExtT><index:161> +<char:parenrightbig><font:LucidNewMatExtT><index:162> +<char:bracketleftbig><font:LucidNewMatExtT><index:163> +<char:bracketrightbig><font:LucidNewMatExtT><index:164> +<char:floorleftbig><font:LucidNewMatExtT><index:165> +<char:floorrightbig><font:LucidNewMatExtT><index:166> +<char:ceilingleftbig><font:LucidNewMatExtT><index:167> +<char:ceilingrightbig><font:LucidNewMatExtT><index:168> +<char:braceleftbig><font:LucidNewMatExtT><index:169> +<char:bracerightbig><font:LucidNewMatExtT><index:170> +<char:angbracketleftbig><font:LucidNewMatExtT><index:173> +<char:angbracketrightbig><font:LucidNewMatExtT><index:174> +<char:vextendsingle><font:LucidNewMatExtT><index:175> +<char:vextenddouble><font:LucidNewMatExtT><index:176> +<char:slashbig><font:LucidNewMatExtT><index:177> +<char:backslashbig><font:LucidNewMatExtT><index:178> +<char:parenleftBig><font:LucidNewMatExtT><index:179> +<char:parenrightBig><font:LucidNewMatExtT><index:180> +<char:parenleftbigg><font:LucidNewMatExtT><index:181> +<char:parenrightbigg><font:LucidNewMatExtT><index:182> +<char:bracketleftbigg><font:LucidNewMatExtT><index:183> +<char:bracketrightbigg><font:LucidNewMatExtT><index:184> +<char:floorleftbigg><font:LucidNewMatExtT><index:185> +<char:floorrightbigg><font:LucidNewMatExtT><index:186> +<char:ceilingleftbigg><font:LucidNewMatExtT><index:187> +<char:ceilingrightbigg><font:LucidNewMatExtT><index:188> +<char:braceleftbigg><font:LucidNewMatExtT><index:189> +<char:bracerightbigg><font:LucidNewMatExtT><index:190> +<char:angbracketleftbigg><font:LucidNewMatExtT><index:28> +<char:angbracketrightbigg><font:LucidNewMatExtT><index:29> +<char:slashbigg><font:LucidNewMatExtT><index:193> +<char:backslashbigg><font:LucidNewMatExtT><index:194> +<char:parenleftBigg><font:LucidNewMatExtT><index:195> +<char:parenrightBigg><font:LucidNewMatExtT><index:33> +<char:bracketleftBigg><font:LucidNewMatExtT><index:34> +<char:bracketrightBigg><font:LucidNewMatExtT><index:35> +<char:floorleftBigg><font:LucidNewMatExtT><index:36> +<char:floorrightBigg><font:LucidNewMatExtT><index:37> +<char:ceilingleftBigg><font:LucidNewMatExtT><index:38> +<char:ceilingrightBigg><font:LucidNewMatExtT><index:39> +<char:braceleftBigg><font:LucidNewMatExtT><index:40> +<char:bracerightBigg><font:LucidNewMatExtT><index:41> +<char:angbracketleftBigg><font:LucidNewMatExtT><index:42> +<char:angbracketrightBigg><font:LucidNewMatExtT><index:43> +<char:slashBigg><font:LucidNewMatExtT><index:44> +<char:backslashBigg><font:LucidNewMatExtT><index:45> +<char:slashBig><font:LucidNewMatExtT><index:46> +<char:backslashBig><font:LucidNewMatExtT><index:47> +<char:parenlefttp><font:LucidNewMatExtT><index:48> +<char:parenrighttp><font:LucidNewMatExtT><index:49> +<char:bracketlefttp><font:LucidNewMatExtT><index:50> +<char:bracketrighttp><font:LucidNewMatExtT><index:51> +<char:bracketleftbt><font:LucidNewMatExtT><index:52> +<char:bracketrightbt><font:LucidNewMatExtT><index:53> +<char:bracketleftex><font:LucidNewMatExtT><index:54> +<char:bracketrightex><font:LucidNewMatExtT><index:55> +<char:bracelefttp><font:LucidNewMatExtT><index:56> +<char:bracerighttp><font:LucidNewMatExtT><index:57> +<char:braceleftbt><font:LucidNewMatExtT><index:58> +<char:bracerightbt><font:LucidNewMatExtT><index:59> +<char:braceleftmid><font:LucidNewMatExtT><index:60> +<char:bracerightmid><font:LucidNewMatExtT><index:61> +<char:braceex><font:LucidNewMatExtT><index:62> +<char:arrowvertex><font:LucidNewMatExtT><index:63> +<char:parenleftbt><font:LucidNewMatExtT><index:64> +<char:parenrightbt><font:LucidNewMatExtT><index:65> +<char:parenleftex><font:LucidNewMatExtT><index:66> +<char:parenrightex><font:LucidNewMatExtT><index:67> +<char:angbracketleftBig><font:LucidNewMatExtT><index:68> +<char:angbracketrightBig><font:LucidNewMatExtT><index:69> +<char:unionsqtext><font:LucidNewMatExtT><index:70> +<char:unionsqdisplay><font:LucidNewMatExtT><index:71> +<char:contintegraltext><font:LucidNewMatExtT><index:72> +<char:contintegraldisplay><font:LucidNewMatExtT><index:73> +<char:circledottext><font:LucidNewMatExtT><index:74> +<char:circledotdisplay><font:LucidNewMatExtT><index:75> +<char:circleplustext><font:LucidNewMatExtT><index:76> +<char:circleplusdisplay><font:LucidNewMatExtT><index:77> +<char:circlemultiplytext><font:LucidNewMatExtT><index:78> +<char:circlemultiplydisplay><font:LucidNewMatExtT><index:79> +<char:summationtext><font:LucidNewMatExtT><index:80> +<char:producttext><font:LucidNewMatExtT><index:81> +<char:integraltext><font:LucidNewMatExtT><index:82> +<char:uniontext><font:LucidNewMatExtT><index:83> +<char:intersectiontext><font:LucidNewMatExtT><index:84> +<char:unionmultitext><font:LucidNewMatExtT><index:85> +<char:logicalandtext><font:LucidNewMatExtT><index:86> +<char:logicalortext><font:LucidNewMatExtT><index:87> +<char:summationdisplay><font:LucidNewMatExtT><index:88> +<char:productdisplay><font:LucidNewMatExtT><index:89> +<char:integraldisplay><font:LucidNewMatExtT><index:90> +<char:uniondisplay><font:LucidNewMatExtT><index:91> +<char:intersectiondisplay><font:LucidNewMatExtT><index:92> +<char:unionmultidisplay><font:LucidNewMatExtT><index:93> +<char:logicalanddisplay><font:LucidNewMatExtT><index:94> +<char:logicalordisplay><font:LucidNewMatExtT><index:95> +<char:coproducttext><font:LucidNewMatExtT><index:96> +<char:coproductdisplay><font:LucidNewMatExtT><index:97> +<char:hatwide><font:LucidNewMatExtT><index:98> +<char:hatwider><font:LucidNewMatExtT><index:99> +<char:hatwidest><font:LucidNewMatExtT><index:100> +<char:tildewide><font:LucidNewMatExtT><index:101> +<char:tildewider><font:LucidNewMatExtT><index:102> +<char:tildewidest><font:LucidNewMatExtT><index:103> +<char:bracketleftBig><font:LucidNewMatExtT><index:104> +<char:bracketrightBig><font:LucidNewMatExtT><index:105> +<char:floorleftBig><font:LucidNewMatExtT><index:106> +<char:floorrightBig><font:LucidNewMatExtT><index:107> +<char:ceilingleftBig><font:LucidNewMatExtT><index:108> +<char:ceilingrightBig><font:LucidNewMatExtT><index:109> +<char:braceleftBig><font:LucidNewMatExtT><index:110> +<char:bracerightBig><font:LucidNewMatExtT><index:111> +<char:radicalbig><font:LucidNewMatExtT><index:112> +<char:radicalBig><font:LucidNewMatExtT><index:113> +<char:radicalbigg><font:LucidNewMatExtT><index:114> +<char:radicalBigg><font:LucidNewMatExtT><index:115> +<char:radicalbt><font:LucidNewMatExtT><index:116> +<char:radicalvertex><font:LucidNewMatExtT><index:117> +<char:radicaltp><font:LucidNewMatExtT><index:118> +<char:arrowvertexdbl><font:LucidNewMatExtT><index:119> +<char:arrowtp><font:LucidNewMatExtT><index:120> +<char:arrowbt><font:LucidNewMatExtT><index:121> +<char:bracehtipdownleft><font:LucidNewMatExtT><index:122> +<char:bracehtipdownright><font:LucidNewMatExtT><index:123> +<char:bracehtipupleft><font:LucidNewMatExtT><index:124> +<char:bracehtipupright><font:LucidNewMatExtT><index:125> +<char:arrowdbltp><font:LucidNewMatExtT><index:126> +<char:arrowdblbt><font:LucidNewMatExtT><index:196> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/test.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/test.txt new file mode 100755 index 00000000..a41f4665 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/JIMTEST/test.txt @@ -0,0 +1,1057 @@ +\preamble +\loadchars{charmap.txt} +\loadchars{standard-charmap.txt} +\loadchars{lucmathsym-charmap.txt} +\loadchars{lucmathit-charmap.txt} +\loadchars{lucmathext-charmap.txt} +\loadchars{symbol-charmap.txt} +\loadstyles{styles.txt} + +\title A Micromodularity Mechanism + +\section Testing + +This is gamma: \gamma.\\ +This is Delta: \Delta.\\ +This is oplus: \oplus. +\scriptA \arrowdblright \scriptA + +This is a subscripted variable: A\sub<\bold<hello>\italics<there>>. +Math mode: $x + 2 = y, and && x\sub<2> = y\sub<3> = x\sub<ijk>$ + +\author Daniel Jackson, Ilya Shlyakhter and Manu Sridharan\\ +Laboratory for Computer Science\\ +Massachusetts Institute of Technology\\ +Cambridge, Massachusetts, USA\\ +dnj@mit.edu + +\opening Abstract + +A simple mechanism for structuring specifications is described. By modelling structures as atoms, it remains entirely first-order and thus amenable to automatic analysis. And by interpreting fields of structures as relations, it allows the same relational operators used in the formula language to be used for dereferencing. An extension feature allows structures to be developed incrementally, but requires no textual inclusion nor any notion of subtyping. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. + +\subsection* Categories and Subject Descriptors + +D.2.1 Requirements/Specifications---Languages; D.2.4 Software/Program Verification---Formal methods, Model checking; F.3.1 Specifying and Verifying and Reasoning about Programs---Assertions, Invariants, Specification techniques. + +\subsection* General Terms + +Design; Documentation; Languages; Verification. + +\subsection* Keywords + +Modeling languages; formal specification; first-order logic; relational calculus; Alloy language; Z specification language; schema calculus. + +\section* Introduction + +\quote I am neither crazy nor a micromaniac.\\ +(A micromaniac is someone obsessed with\\ +reducing things to their smallest possible form.\\ +This word, by the way, is not in the dictionary.)\\ +--_Edouard de Pomiane, French Cooking in Ten Minutes, 1930_ + +\noindent Most specification languages provide mechanisms that allow larger specifications to be built from smaller ones. These mechanisms are often the most complicated part of the language, and present obstacles to analysis. This paper presents a simple mechanism that seems to be expressive enough for a wide variety of uses, without compromising analyzability. + +This work is part of a larger project investigating the design of a "micro modelling language". Our premise is that lightweight application of formal methods [6] demands an unusually small and simple language that is amenable to fully automatic semantic analysis. The Alloy language is the result to date of our efforts to design such a language. Based on our experiences with the language [4] and its analyzer [5], we have recently developed a revision of Alloy that overcomes many of its limitations. This paper describes the key feature of the revised language: the _signature_, a new modularity mechanism. + +The mechanism allows our existing analysis scheme [3] to be applied to specifications involving structures. This is not achieved by treating the structuring mechanism as a syntactic sugar, which would limit the power of the notation (ruling out, for example, quantification over structures) and would complicate the analysis tool and make output harder for users to interpret. Because of the mechanism's generality, it has also enabled us to simplify the language as a whole, making it more uniform and eliminating some ad hoc elements. + +Our mechanism has a variety of applications. It can express inherent structure in the system being modelled, and can be used to organize a specification in which details are added incrementally. It can be used to construct a library of datatypes, or to describe a system as an instantiation of a more general system. And it can express state invariants, transitions, and sequences, despite the lack of any special syntax for state machines. + +In this last respect, the new language differs most markedly from its predecessor [4], which provided built-in notions of state invariants and operations. We now think this was a bad idea, because it made the language cumbersome for problems (such as the analysis of security policies or architectural topology constraints) in which temporal behaviour can be fruitfully ignored, and too inflexible for many problems in which temporal behaviour is important. + +#Because the notation as a whole is small, simple and analyzable, and free of bias towards any particular domain of application, it may be suitable as an intermediate language. A tool for architectural design, for example, might translate a more domain-specific notation into our language, allowing analyses that such tools do not currently support (such as automatic generation of sample configurations from style rules, and checking of consistency). +# +Our paper begins by explaining our motivations---the requirements our mechanism is designed to meet. The mechanism is then presented first informally in a series of examples, and then slightly more rigorously feature-by-feature. We discuss related work, especially the schema calculus of Z, and close with a summary of the merits and deficiences of our notation as a whole. + +\section Requirements + +The goal of this work was to find a single structuring mechanism that would support a variety of common specification idioms: + +\point \cdot _States_: description of complex state as a collection of named components; incremental description both by hierarchy, in which a complex state becomes a component of a larger state, and by extension, in which new components are added; declaration of invariants and definitions of derived components; + +\point \cdot _Datatypes_: separate description of a library of polymorphic datatypes, such as lists, sequences, trees and orders, along with their operators; + +\point \cdot _Transitions_: specification of state transitions as operations described implicitly as formulas relating pre- and post-state; composition of operations from previously defined invariants and operations; sequential composition of operations; description of traces as sequences of states; + +\point \cdot _Abstractions_: description of abstraction relations between state spaces; + +\point \cdot _Assertions_: expression of properties intended to be redundant, to be checked by analysis, including: relationships amongst invariants; wellformedness of definitions (eg, that an implicit definition is functional); establishment and preservation of invariants by operations; properties of states reachable along finite traces; and simulation relationships between abstract and concrete versions of an operation. + +\noindent We wanted additionally to meet some more general criteria: + +\point \cdot _Simplicity_. The language as a whole should be exceptionally small and simple. + +\point \cdot _Flexibility_. Support for the particular idioms of state-machine specification should not be a straitjacket; the language should not dictate how state machines are expressed, and should not make it hard to describe structures that are not state machines (such as security models and architectural styles). + +\point \cdot _Analyzability_. A fully automatic semantic analysis should be possible. In the present work, this has been achieved by requiring that the modularity mechanism be first order, and expressible in the kernel of the existing language. + +\noindent Finally, our language design decisions have been influenced by some principles that we believe contribute to these goals, make the language easier to use, and analysis tools easier to build: + +\point \cdot _Explicitness_. The language should be fully explicit, with as few implicit constraints, coercions, etc, as possible. + +\point \cdot _Minimal mathematics_. The basic theory of sets and relations should suffice; it should not be necessary to introduce domains, fixed points, infinities or special logical values. + +\point \cdot _Minimal syntax_. There should be very few keywords or special symbols, and no need for special typography or layout. + +\point \cdot _Uniformity_. A small and general set of constructs should be applied uniformly, independent of context. + +\point \cdot _Lack of novelty_. Whenever possible, notions and syntax should follow standard usage of conventional mathematics and programming. + +\section Informal Description + +As a running example, we will specify a simple memory system involving a cache and a main memory. The memory has a fixed set of addresses and associates a data value with each address. The cache, in contrast, associates data values with some subset of addresses that varies over time. The cache is updated by a "write-back scheme", which means that updates need not be reflected to main memory immediately. The cache may therefore hold a more current value for an address than the main memory; the two are brought into alignment when the address is flushed from the cache and its value is written to main memory. + +\subsection States + +We start by declaring the existence of addresses and data values: + +\geekmath sig Addr {}\\ +sig Data {} + +Each line declares a _signature_, and introduces a set of atoms: _Addr_ for the set of addresses, and _Data_ for the set of data values. Like 'given types' in Z, these sets are disjoint from one another, and their atoms are unstructured and uninterpreted. Signature names can be used as expressions denoting sets, but they are also treated as types, so the expression _Addr+Data_, for example, is ill-typed, since the union operator (+) requires the types of its operands to match. + +The signature declaration + +\geekmath sig Memory {\\ + addrs: set Addr,\\ + map: addrs ->! Data\\ + } + +likewise declares a set of atoms, _Memory_, corresponding to the set of all possible memories. In addition, it declares two fields: _addrs_ and _map_ which associate with a memory a set of addresses and a mapping from addresses to data values respectively. Thus, given a memory _m_, the expression _m.addrs_ will be a set of addresses, _m.map_ will be a relation from addresses to data values. The memory, addresses and data values should be viewed as distinct atoms in their own right; fields don't decompose an atom, but rather relate one atom to others. The exclamation mark in the declaration of the field _map_ is a 'multiplicity marking': it says that _m.map_ associates exactly one data value with each address in the set _m.addrs_. The use of _addrs_ rather than _Addr_ on the left side of the arrow indicates that _m.map_ does not associate a data value with an address that is not in the set _m.addrs_. + +In these expressions, the dot is simply relational image. More precisely, when we say that _m_ is a memory, we mean that the expression _m_ denotes a set consisting of a single atom. The field _addrs_ is a relation from _Memory_ to _Addr_, and _m.addrs_ denotes the image of the singleton set under this relation. So for a set of memories _ms_, the expression _ms.addrs_ will denote the union of the sets of addresses that belong to the individual memories. Given an address _a_, the expression _a.(m.map)_ denotes the set of data values associated with address _a_ in memory _m_, which will either be empty (when the address is not mapped) or a singleton. For convenience, we allow the relational image _s.r_ to be written equivalently as _r_[_s_], where [] binds more loosely than dot, so this expression may be written as _m.map_[_a_] instead. + +Like objects of an object-oriented language, two distinct atoms can have fields of the same value. Unlike objects, however, atoms are immutable. Each field is fixed, and cannot map an atom to one value at one time and another value at another time. To describe an operation that changes the state of a memory, therefore, we will use two distinct atoms in the set _Memory_ to represent the memory's state before and after. + +\subsection Extension + +A signature declaration can introduce a set as a subset of one previously declared, in which case we call it a _subsignature_. In this case, the set does not correspond to a type, but rather its atoms take on the type of the superset. For example, the declaration + +\geekmath sig MainMemory extends Memory {} + +introduces a set of atoms _MainMemory_ representing main memories, which is constrained to be a subset of the set _Memory_. Likewise + +\geekmath sig Cache extends Memory {\\ + dirty: set addrs\\ + } + +introduces a set of atoms _Cache_ representing those memories that can be regarded as caches. It also introduces a field _dirty_ that associates with a cache the set of addresses that is dirty; later, we will use this to represent those addresses for which a cache and main memory differ. Because _Cache_ is a subset of _Memory_, and _m.addrs_ (for any memory _m_) is a subset of _Addr_, the field denotes a relation whose type is from _Memory_ to _Addr_. Expressions such as _m.dirty_ are therefore type-correct for a memory _m_, whether or not _m_ is a cache. But since declaration of the field _dirty_ within the signature _Cache_ constrains _dirty_ to be a relation that maps only caches, _m.dirty_ will always denote the empty set when _m_ is not a cache. + +This approach avoids introducing a notion of subtyping. Subtypes complicate the language, and tend to make it more difficult to use. In OCL [17], which models extension with subtypes rather than subsets, an expression such as _m.dirty_ would be illegal, and would require a coercion of _m_ to the subtype _Cache_. Coercions do not fit smoothly into the relational framework; they interfere with the ability to take the image of a set under a relation, for example. Moreover, subtypes are generally disjoint, whereas our approach allows the sets denoted by subsignatures to overlap. In this case, we'll add a constraint (in Section 2.4 below) to ensure that _MainMemory_ and _Cache_ are in fact disjoint. + +Declaring _Cache_ and _MainMemory_ as subsignatures of _Memory_ serves to factor out their common properties. Extension can be used for a different purpose, in which a single signature is developed by repeated extensions along a chain. In this case, the supersignatures may not correspond to entities in the domain being modelled, but are simply artifacts of specification---fragments developed along the way. Z specifications are typically developed in this style. + +\subsection Hierarchy + +The signature declaration also supports hierarchical structuring. We can declare a signature for systems each consisting of a cache and a main memory: + +\geekmath sig System {\\ + cache: Cache,\\ + main: MainMemory\\ + } + +Again, _System_ introduces a set of atoms, and each field represents a relation. The omission of the keyword _set_ indicates that a relation is a total function. So for a system _s_, the expression _s.cache_ denotes one cache---that is, a set consisting of a single cache. This is one of very few instances of implicit constraints in our language, which we introduced in order to make declaration syntax conventional. + +Since signatures denote sets of atoms, apparently circular references are allowed. Linked lists, for example, may be modelled like this, exactly as they might be implemented in a language like Java: + +\geekmath sig List {}\\ +sig NonEmptyList extends List {elt: Elt, rest: List} + +There is no recursion here; the field _rest_ is simply a homogeneous relation of type _List_ to _List_, with its domain restricted to the subset _NonEmptyList_. + +\subsection State Properties + +Properties of signature atoms are recorded as logical formulas. To indicate that such a property always holds, we package it as a _fact_. To say that, for any memory system, the addresses in a cache are always addresses within the main memory, we might write: + +\geekmath fact {all s: System | s.cache.addrs in s.main.addrs} + +or, using a shorthand that allows facts about atoms of a signature to be appended to it: + +\geekmath sig System {cache: Cache, main: MainMemory}\\ + {cache.addrs in main.addrs} + +The appended fact is implicitly prefixed by + +\geekmath all this: System | with this | + +in which the _with_ construct, explained in Sectiom 3.6 below, causes the fields implicitly to be dereferences of the atom _this_. + +A fact can constrain atoms of arbitrary signatures; to say that no main memory is a cache we might write: + +\geekmath fact {no (MainMemory & Cache)} + +where _no e_ means that the expression _e_ has no elements, and & is intersection. + +#Again, this is common enough that we provide a shorthand. Declaring a subsignature as _disjoint_ indicates that it shares no atoms with any other subsignatures of the same supersignature. So the fact can be replaced by changing our declaration of _MainMemory_ to: +# +#\geekmath disjoint sig MainMemory extends Memory {} +# +Most descriptions have more interesting facts. We can express the fact that linked lists are acyclic, for example: + +\geekmath fact {no p: List | p in p.\hat @sep rest} + +The expression _\hat @sep rest_ denotes the transitive closure of the relation _rest_, so that _p.^rest_ denotes the set of lists reachable from _p_ by following the field _rest_ once or more. This illustrates a benefit of treating a field as a relation---that we can apply standard relational operators to it---and is also an example of an expression hard to write in a language that treats extension as subtyping (since each application of _rest_ would require its own coercion). + +Often we want to define a property without imposing it as a permanent constraint. In that case, we declare it as a _function_. Here, for example, is the invariant that the cache lines not marked as dirty are consistent with main memory: + +\geekmath fun DirtyInv (s: System) {\\ + all a !: s.cache.dirty | s.cache.map[a] = s.main.map[a]\\ + } + +(The exclamation mark negates an operator, so the quantification is over all addresses that are _not_ dirty.) Packaging this as a function that can be applied to a particular system, rather than as a fact for all systems, will allow us to express assertions about preservation of the invariant (Section 2.8). + +By default, a function returns a boolean value---the value of the formula in its body. The value of _DirtyInv(s)_ for a system _s_ is therefore true or false. A function may return non-boolean values. We might, for example, define the set of bad addresses to be those for which the cache and main memory differ: + +\geekmath fun BadAddrs (s: System): set Addr {\\ + result = {a: Addr | s.cache.map[a] != s.main.map[a]}\\ + } + +and then write our invariant like this: + +\geekmath fun DirtyInv (s: System) {BadAddrs(s) in s.cache.dirty} + +In this case, _BadAddrs(s)_ denotes a set of addresses, and is short for the expression on the right-hand side of the equality in the definition of the function _BadAddrs_. The use of the function application as an expression does not in fact depend on the function being defined explicitly. Had we written + +\geekmath fun BadAddrs (s: System): set Addr {\\ + all a: Addr | a in result iff s.cache.map[a] != s.main.map[a]\\ + } + +the application would still be legal; details are explained in Section 3.7. +# +# \geekmath BadAddrs(s) in s.cache.dirty +# +# would be treated as short for +# +# \geekmath all result: set Addr |\\ +# (all a: Addr | a in result iff s.cache.map[a] != s.main.map[a])\\ +# => result in s.cache.dirty +# +# This desugaring is explained in more detail in Section 99 below. + +\subsection Operations + +Following Z, we can specify operations as formulas that constrain pre- and post-states. An operation may be packaged as a single function (or as two functions if we want to separate pre- and post-conditions in the style of VDM or Larch). + +The action of writing a data value to an address in memory might be specified like this: + +\geekmath fun Write (m,m': Memory, d: Data, a: Addr) {\\ + m'.map = m.map ++ (a->d)\\ + } + +The formula in the body of the function relates _m_, the value of the memory before, to _m'_, the value after. These identifers are just formal arguments, so the choice of names is not significant. Moreover, the prime mark plays no special role akin to decoration in Z---it's a character like any other. The operator ++ is relational override, and the arrow forms a cross product. As mentioned above, scalars are represented as singleton sets, so there is no distinction between a tuple and a relation. The arrows in the expressions _a->d_ here and _addrs->Data_ in the declaration of the _map_ field of _Memory_ are one and the same. + +The action of reading a data value can likewise be specified as a function, although since it has no side-effect we omit the _m'_ parameter: + +\geekmath fun Read (m: Memory, d: Data, a: Addr) {\\ + d = m.map[a]\\ + } + +Actions on the system as a whole can be specified using these primitive operations; in Z, this idiom is called 'promotion'. A read on the system is equivalent to reading the cache: + +\geekmath fun SystemRead (s: System, d: Data, a: Addr) {\\ + Read (s.cache, d, a)\\ + } + +The _Read_ operation has an implicit precondition. Since the data parameter _d_ is constrained (implicitly by its declaration) to be scalar---that is, a singleton set---the relation _m.map_ must include a mapping for the address parameter _a_, since otherwise the expression _m.map[a]_ will evaluate to the empty set, and the formula will not be satisfiable. This precondition is inherited by _SystemRead_. If the address _a_ is not in the cache, the operation cannot proceed, and it will be necessary first to load the data from main memory. It is convenient to specify this action as a distinct operation: + +\geekmath fun Load (s,s': System, a: Addr) {\\ + a !in s.cache.addrs\\ + s'.cache.map = s.cache.map + (a->s.main.map[a])\\ + s'.main = s.main\\ + } + +The + operator is just set union (in this case, of two binary relations, the second consisting of a single tuple). A write on the system involves a write to the cache, and setting the dirty bit. Again, this can be specified using a primitive memory operation: + +\geekmath fun SystemWrite (s,s': System, d: Data, a: Addr) {\\ + Write (s.cache, s'.cache, d, a)\\ + s'.cache.dirty = s.cache.dirty + a\\ + s'.main = s.main\\ + } + +A cache has much smaller capacity than main memory, so it will occasionally be necessary (prior to loading or writing) to flush lines from the cache back to main memory. We specify flushing as a non-deterministic operation that picks some subset of the cache addrs and writes them back to main memory: + +\geekmath fun Flush (s,s': System) {\\ + some x: set s.cache.addrs {\\ + s'.cache.map = s'.cache.map - (x->Data)\\ + s'.cache.dirty = s.cache.dirty - x\\ + s'.main.map = s.main.map ++ \\ + {a: x, d: Data | d = s.cache.map[a]}\\ + } + +The - operator is set difference; note that it is applied to sets of addresses (in the third line) and to binary relations (in the second). The comprehension expression creates a relation of pairs _a_->_d_ satisfying the condition. + +Finally, it is often useful to specify the initial conditions of a system. To say that the cache initially has no addresses, we might write a function imposing this condition on a memory system: + +\geekmath fun Init (s: System) {no s.cache.addrs} + +\subsection Traces + +To support analyses of behaviours consisting of sequences of states, we declare two signatures, for ticks of a clock and traces of states: + +\geekmath sig Tick {}\\ +sig SystemTrace {\\ + ticks: set Tick,\\ + first, last: ticks,\\ + next: (ticks - last) !->! (ticks - first)\\ + state: ticks ->! System}\\ + {\\ + first.*next = ticks\\ + Init (first.state)\\ + all t: ticks - last | \\ + some s = t.state, s' = t.next.state |\\ + Flush (s,s')\\ + || (some a: Addr | Load (s,s',a))\\ + || (some d: Data, a: Addr | SystemWrite (s,s',d,a))\\ + } + +Each trace consists of a set of _ticks_, a _first_ and _last_ tick, an ordering relation _next_ (whose declaration makes it a bijection from all ticks except the last to all ticks except the first), and a relation _state_ that maps each tick to a system state. + +The fact appended to the signature states first a generic property of traces: that the ticks of a trace are those reachable from the first tick. It then imposes the constraints of the operations on the states in the trace. The initial condition is required to hold in the first state. Any subsequent pair of states is constrained to be related by one of the three side-effecting operations. The existential quantifier plays the role of a _let_ binding, allowing _s_ and _s'_ in place of _t.state_ and _t.next.state_, representing the state for tick _t_ and the state for its successor _t.next_. Note that this formulation precludes stuttering; we could admit it simply by adding the disjunct _s_=_s'_ allowing a transition that corresponds to no operation occurring. + +Bear in mind that this fact is a constraint on all atoms in the set _SystemTrace_. As a free standing fact, the second line of the fact---the initial condition--- would have been written: + +\geekmath fact {all x: SystemTrace | Init ((x.first).(x.state))} + +\subsection Abstraction + +Abstraction relationships are easily expressed using our function syntax. To show that our memory system refines a simple memory without a cache, we define an abstraction function _Alpha_ saying that a system corresponds to a memory that is like the system's memory, overwritten by the entries of the system's cache: + +\geekmath fun Alpha (s: System, m: Memory) {\\ + m.map = s.main.map ++ s.cache.map\\ + } + +As another example, if our linked list were to represent a set, we might define the set corresponding to a given list as that containing the elements reachable from the start: + +\geekmath fun ListAlpha (p: List, s: set Elt) {\\ + s = p.*rest.elt\\ + } + +\subsection Assertions + +Theorems about a specification are packaged as _assertions_. An assertion is simply a formula that is intended to hold. A tool can check an assertion by searching for a counterexample---that is, a model of the formula's negation. + +The simplest kinds of assertion record consequences of state properties. For example, + +\geekmath assert {\\ + all s: System | DirtyInv (s) && no s.cache.dirty\\ + => s.cache.map in s.main.map\\ + } + +asserts that if the dirtiness invariant holds,and there are no dirty addresses, then the mapping of addresses to data in the cache is a subset of the mapping in the main memory. + +An assertion can express consequences of operations. For example, + +\geekmath assert {\\ + all s: System, d: Data, a: Addr |\\ + SystemRead (s,d,a) => a in s.cache.addrs\\ + } + +embodies the claim made above that _SystemRead_ has an implicit precondition; it asserts that whenever _SystemRead_ occurs for an address, that address must be in the cache beforehand. An assertion can likewise identify a consequence in the post-state; this assertion + +\geekmath assert {\\ + all s,s': System, d: Data, a: Addr |\\ + SystemWrite (s,s',d,a) => s'.cache.map[a] = d\\ + } + +says that after a _SystemWrite_, the data value appears in the cache at the given address. + +Preservation of an invariant by an operation is easily recorded as an assertion. To check that our dirtiness invariant is preserved when writes occur, we would assert + +\geekmath assert {\\ + all s,s': System, d: Data, a: Addr |\\ + SystemWrite (s,s',d,a) && DirtyInv (s) => DirtyInv (s')\\ + } + +Invariant preservation is not the only consequence of an operation that we would like to check that relates pre- and post-states. We might, for example, want to check that operations on the memory system do not change the set of addresses of the main memory. For the _Flush_ operation, for example, the assertion would be + +\geekmath assert {\\ + all s,s': System | Flush(s,s') => s.main.addrs = s'.main.addrs\\ + } + +which holds only because the cache addresses are guaranteed to be a subset of the main memory addresses (by the fact associated with the _System_ signature). + +The effect of a sequence of operations can be expressed by quantifying appropriately over states. For example, + +\geekmath assert {\\ + all s, s': System, a: Addr, d,d': Data | \\ + SystemWrite (s,s',d,a) && SystemRead (s',d',a) => d = d'\\ + } + +says that when a write is followed by a read of the same address, the read returns the data value just written. + +To check that a property holds for all reachable states, we can assert that the property is an invariant of every operation, and is established by the initial condition. This strategy can be shown (by induction) to be sound, but it is not complete. A property may hold for all reachable states, but may not be preserved because an operation breaks the property when executed in a state that happens not to be reachable. + +Traces overcome this incompleteness. Suppose, for example, that we want to check the (rather contrived) property that, in every reachable state, if the cache contains an address that isn't dirty, then it agrees with the main memory on at least one address: + +\geekmath fun DirtyProp (s: System) {\\ + some (s.cache.addrs - s.cache.dirty)\\ + => some a: Addr | s.cache.map[a] = s.main.map[a]\\ + } + +We can assert that this property holds in the last state of every trace: + +\geekmath assert {\\ + all t: SystemTrace | with t | DirtyProp (last.state)\\ + } + +This assertion is valid, even though _DirtyProp_ is not an invariant. A write invoked in a state in which all clean entries but one had non-matching values can result in a state in which there are still clean entries but none has a matching value. + +Finally, refinements are checked by assertions involving abstraction relations. We can assert that a _SystemWrite_ refines a basic _Write_ operation on a simple memory: + +\geekmath assert {\\ + all s,s': System, m,m': Memory, a: Addr, d: Data |\\ + Alpha (s,m) && Alpha (s',m') && SystemWrite (s,s',a,d)\\ + => Write (m,m',a,d)\\ + } + +or that the _Flush_ operation is a no-op when viewed abstractly: + +\geekmath assert {\\ + all s,s': System, m,m': Memory |\\ + Alpha (s,m) && Alpha (s',m') && Flush (s,s')\\ + => m.map = m'.map\\ + } + +Note the form of the equality; _m = m'_ would be wrong, since two distinct memories may have the same mapping, and the abstraction _Alpha_ constrains only the mapping and not the memory atom itself. + +Many of the assertions shown here can be made more succinct by the function shorthand explained in Section 3.7 below. For example, the assertion that a read following a write returns the value just written becomes: + +\geekmath assert {\\ + all s: System, a: Addr, d: Data | \\ + SystemRead (SystemWrite (s,d,a),a) = d\\ + } + +and the assertion that _Flush_ is a no-op becomes: + +\geekmath assert {\\ + all s: System | Alpha (s).map = Alpha (Flush (s)).map\\ + } + +\subsection Polymorphism + +Signatures can be parameterized by signature types. Rather than declaring a linked list whose elements belong to a particular type _Elt_, as above, we would prefer to declare a generic list: + +\geekmath sig List [T] {}\\ +sig NonEmptyList [T] extends List [T] {elt: T, rest: List [T]} + +Functions and facts may be parameterized in the same way, so we can define generic operators, such as: + +\geekmath fun first [T] (p: List [T]): T {result = p.elt}\\ +fun last [T] (p: List [T]): T {some q: p.*rest | result = q.elt && no q.rest}\\ +fun elements [T] (p: List [T]): set T {result = p.*rest.elt} + +In addition, let's define a generic function that determines whether two elements follow one another in a list: + +\geekmath fun follows [T] (p: List[T], a,b: T) {\\ + some x: p.*rest | x.elt = a && x.next.elt = b\\ + } + +To see how a generic signature and operators are used, consider replacing the traces of Section 2.6 with lists of system states. Define a function that determines whether a list is a trace: + +\geekmath fun isTrace (t: List [System]) {\\ + Init (first(t))\\ + all s, s': System | follows (t,s,s') => {\\ + Flush (s,s')\\ + || (some a: Addr | Load (s,s',a))\\ + || (some d: Data, a: Addr | SystemWrite (s,s',d,a))\\ + }\\ + } + +Now our assertion that every reachable system state satisfies _DirtyProp_ can now be written: + +\geekmath assert {\\ + all t: List[System] | isTrace(t) => DirtyProp (last(t))\\ + } + +\subsection Variants + +To illustrate the flexibility of our notation, we sketch a different formulation of state machines oriented around transitions rather than states. + +Let's introduce a signature representing state transitions of our memory system: + +\geekmath sig SystemTrans {pre,post: System}\\ + {pre.main.addrs = post.main.addrs} + +Declaring the transitions as a signature gives us the opportunity to record properties of all transitions---in this case requiring that the set of addresses of the main memory is fixed. + +Now we introduce a subsignature for the transitions of each operation. For example, the transitions that correspond to load actions are given by: + +\geekmath sig LoadTrans extends SystemTrans {a: Addr}\\ + {Load (pre, post, a)} +# } { +# a !in pre.cache.addrs\\ +# post.cache.map = pre.cache.map ++ (a->pre.main.map[a])\\ +# post.main = pre.main\\ +# } +# +# The formula here is actually identical to the one declared above, but with _pre_ and _post_ for # _s_ and _s'_ ; we could in fact replace it by the function application _Load(pre,post,a)_. + +For each invariant, we define a set of states. For the states satisfying the dirty invariant, we might declare + +\geekmath sig DirtyInvStates extends System {} + +along with the fact + +\geekmath fact {DirtyInvStates = {s: System | DirtyInv(s)}} + +To express invariant preservation, it will be handy to declare a function that gives the image of a set of states under a set of transitions: + +\geekmath fun postimage (ss: set System, tt: set SystemTrans): set System {\\ + result = {s: System | some t: tt | t.pre in ss && s = t.post}\\ + } + +so that we can write the assertion like this: + +\geekmath assert {postimage (DirtyInvStates, LoadTrans) in DirtyInvStates} + +For an even more direct formulation of state machine properties, wemight have defined a transition relation instead: + +\geekmath fun Trans (r: System -> System) {\\ + all s, s' : System | \\ + s->s' in r => Flush (s,s') || ...\\ + } + +Then, using transitive closure, we can express the set of states reachable from an initial state, and assert that this set belongs to the set characterized by some property: + +\geekmath assert {all r: System -> System, s: System |\\ + Init (s) && Trans(r) => s.*r in DirtyPropStates\\ + } + +where _DirtyPropStates_ is defined analogously to _DirtyInvStates_. + +\subsection Definitions + +Instead of declaring the addresses of a memory along with its mapping, as we did before: + +\geekmath sig Memory {\\ + addrs: set Addr,\\ + map: addrs ->! Data\\ + } + +we could instead have declared the mapping alone: + +\geekmath sig Memory {\\ + map: Addr ->? Data\\ + } + +and then _defined_ the addresses using a subsignature: + +\geekmath sig MemoryWithAddrs extends Memory {\\ + addrs: set Addr}\\ + {addrs = {a: Addr | some a.map}} + +Now by making the subsignature subsume all memories: + +\geekmath fact {Memory in MemoryWithAddrs} + +we have essentially 'retrofitted' the field. Any formula involving memory atoms now implicitly constrains the _addrs_ field. For example, we can assert that _Read_ has an implicit precondition requiring that the argument be a valid address: + +\geekmath assert {all m: Memory, a: Addr, d: Data | Read (m,d,a) => a in m.addrs} + +even though the specification of _Read_ was written when the field _addrs_ did not even exist. + +\section Semantics + +For completeness, we give an overview of the semantics of the language. The novelties with respect to the original version of Alloy [4] are (1) the idea of organizing relations around basic types as signatures, (2) the treatment of extension as subsetting, and (3) the packaging of formulas in a more explicit (and conventional) style. The semantic basis has been made cleaner, by generalizing relations to arbitrary arity, eliminating 'indexed relations' and the need for a special treatment of sets. + +\subsection Types + +We assume a universe of atoms. The standard notion of a mathematical relation gives us our only composite datatype. The value of an expression will always be a relation---that is, a collection of tuples of atoms. Relations are first order: the elements of a tuple are themselves atoms and never relations. + +The language is strongly typed. We partition the universe into subsets each associated with a _basic_ type, and write (T_1, T_2, ..., T_n) for the type of a relation whose tuples each consist of _n_ atoms, with types T_1, T_2, etc. + +A set is represented semantically as a unary relation, namely a relation whose tuples each contain one atom. A tuple is represented as a singleton relation, namely a relation containing exactly one tuple. A scalar is represented as a unary, singleton relation. We use the terms 'set', 'tuple' and 'scalar' to describe relations with the appropriate properties. Basic types are used only to construct relation types, and every expression that appears in a specification has a relational type. Often we will say informally that an expression has a type _T_ where _T_ is the name of a basic type when more precisely we mean that the expression has the type (_T_). + +So, in contrast to traditional mathematical style, we do not make distinctions amongst the atom _a_, the tuple (_a_), the set {_a_} containing just the atom, or the set {(_a_)} containing the tuple, and represent all of these as the last. This simplifies the semantics and gives a more succinct and uniform syntax. +# Because the language is first order (and has no sets of sets, for example), it requires no coercions, and seems not to cause confusion even for novice specifiers. + +\subsection Expression Operators + +Expressions can be formed using the standard set operators written as ASCII characters: union (+), intersection (&) and difference (-). Some standard relational operators, such as transpose (~) and transitive closure (^), can be applied to expressions that denote binary relations. Relational override (++) has its standard meaning for binary relations but can applied more broadly. +#The type rules and semantics are completely standard. For example, if _e_ has the type (S,T), then ~_e_ has the type (T,S) and denotes the collection of pairs obtained by reversing each pair in _e_; if _p_ and _q_ both have the type (T_1, T_2, ..., T_n), then the union _p+q_, intersection _p_&_q_, and difference _p-q_ also have that type, and denote respectively the relations whose tuples are those that appear in either of _p_ and _q_, both of _p_ and _q_, and _p_ but not _q_. + +There are two special relational operators, dot and arrow. The dot operator is a generalized relational composition. Given expressions $p$ and $q$, the expression $p.q$ contains the tuple +$\angleleft\sep p\sub<1>, ... p\sub<m-1>, q\sub<2>, ..., q\sub<n>\angleright$ +when _p_ contains +@math \langle@sep p_1, ..., p_{m}\rangle, +_q_ contains +@math \langle@sep q_1, ... q_n\rangle, +and +@math p_m = q_1. The last type of _p_ and the first type of _q_ must match, and _m_ + _n_, the sum of the arities of _p_ and _q_, must be three or more so that the result is not degenerate. When _p_ is a set and _q_ is a binary relation, the composition _p.q_ is the standard relational image of _p_ under _q_; when _p_ and _q_ are both binary relations, _p.q_ is standard relational composition. In all of the examples above, the dot operator is used only for relational image. + +The arrow operator is cross product: _p \textarrow q_ is the relation containing the tuple +@math \langle@sep p_1, ..., p_{m}, q_1, ... q_n\rangle +when _p_ contains +@math \langle@sep p_1, ..., p_{m}\rangle, +and _q_ contains +@math \langle@sep q_1, ... q_n\rangle. +In all the examples in this paper, _p_ and _q_ are sets, and _p \textarrow q_ is their standard cross product. + +\subsection Formula Operators + +Elementary formulas are formed from the subset operator, written _in_. Thus _p in q_ is true when every tuple in _p_ is in _q_. The formula _p : q_ has the same meaning, but when _q_ is a set, adds an implicit constraint that _p_ be scalar (ie, a singleton). This constraint is overridden by writing _p: option q_ (which lets _p_ to be empty or a scalar) or _p: set q_ (which eliminates the constraint entirely). Equality is just standard set equality, and is short for a subset constraint in each direction. + +An arrow that appears as the outermost expression operator on the right-hand side of a subset formula can be annotated with _multiplicity markings_: + (one or more), ? (zero or one) and ! (exactly one). The formula + +\geekmath r: S m \textarrow n T + +where _m_ and _n_ are multiplicity markings constrains the relation _r_ to map each atom of _S_ to _n_ atoms of _T_, and to map _m_ atoms of _S_ to each atom of _T_. _S_ and _T_ may themselves be product expressions, but are usually variables denoting sets. For example, + +\geekmath r: S \textarrow ! T\\ +r: S ? \textarrow ! T + +make _r_ respectively a total function on _S_ and an injection. + +Larger formulas are obtained using the standard logical connectives: && (and), || (or), ! (not), => (implies), _iff_ (bi-implication). The formula _if b then f else g_ is short for _b_ => _f_ && !_b_ => _g_. Within curly braces, consecutive formulas are implicitly conjoined. + +Quantifications take their usual form: + +\geekmath all x: e | F + +is true when the formula _F_ holds under every binding of the variable _x_ to a member of the set _e_. In addition to the standard quantifiers, _all_ (universal) and _some_ (existential), we have _no_, _sole_ and _one_ meaning respectively that there are no values, at most one value, and exactly one value satisfying the formula. For a quantifier _Q_ and expression _e_, the formula _Q e_ is short for _Q x: T | e_ (where _T_ is the type of _e_), so _no e_, for example, says that _e_ is empty. + +The declaration of a quantified formula is itself a formula---an elementary formula in which the left-hand side is a variable. Thus + +\geekmath some x = e | F + +is permitted, and is a useful way to express a _let_ binding. Quantifiers may be higher-order; the formula + +\geekmath all f: s ->! t | F + +is true when _F_ holds for every binding of a total function from _s_ to _t_ to the variable _f_. Our analysis tool cannot currently handle higher-order quantifiers, but many uses of higher-order quantifiers that arise in practice can be eliminated by skolemization. + +Finally, we have relational comprehensions; the expression + +\geekmath {x_1: e_1, x_2: e_2, ... | F} + +constructs a relation of tuples with elements _x_1_, _x_2_, etc., drawn from set expressions _e_1_, _e_2_, etc., whose values satisfy _F_. + +# \subsection Choice of Operator Symbols +# +# The choice of symbols, especially the arrow, may seem unconventional, but results in familiar-# looking formulas. The dot operator generalizes the 'navigation expressions' of Syntropy# [CD94], now adopted by UML's Object Constraint Language [17], and is intended to be fa# miliar to programmers by resembling object dereferencing. Thus, _x.f_ can be viewed as dere# ferencing the object _x_ with field _f_ when _x_ is a scalar and _f_ is a binary relation. The cho# ice of relational composition rather than function application allows such an expression to be wr# itten without concern for whether _f_ is a function. It also gives a simple and workable treatmen# t of partiality. When _x_ is not in the domain of _f_, _x.f_ is the empty set, and _x.f = y_ will be# false if _y_ is a scalar. +# +# The arrow notation is designed to allow declarations to be written in a familiar way, but to be # given a simple, first-order interpretation. For example, if _S_ and _T_ denote sets, +# +# \geekmath f: S \textarrow T +# +# declares _f_ to be a binary relation from _S_ to _T_. A conventional interpretation would have # the arrow construct a set of relations---a higher-order notion. Instead, we interpret the arrow # as cross product and the colon as subset, with the same result. The choice of arrow is also # convenient for constructing tuples; when _x_ and _y_ are scalars, the formula# +# +# \geekmath r' = r + (x \textarrow y) +# +# makes _r'_ the relation containing the tuples of _r_, and additionally, a mapping from _x_ to # _y_. # +\subsection Signatures + +A _signature_ declaration introduces a basic type, along with a collection of relations called _fields_. The declaration + +\geekmath sig S {f: E} + +declares a basic type _S_, and a relation _f_. If _E_ has the type (T_1, T_2, ..., T_n), the relation _f_ will have the type (S, T_1, T_2, ..., T_n), and if _x_ has the type _S_, the expression _x.f_ will have the same type as _E_. When there are several fields, field names already declared may appear in expressions on the right-hand side of declarations; in this case, a field _f_ is typed as if it were the expression _this.f_, where _this_ denotes an atom of the signature type (see Section 3.6). + +The meaning of a specification consisting of a collection of signature declarations is an assignment of values to global constants-- the signatures and the fields. For example, the specification + +\geekmath sig Addr {}\\ +sig Data {}\\ +sig Memory {map: Addr -> Data} + +has 4 constants---the three signatures and one field---with assignments such as: + +\geekmath Addr = {a0, a1}\\ +Data = {d0, d1, d2}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d0), (m1,a0,d1), (m1,a0,d2)} + +corresponding to a world in which there are 2 addresses, 3 data values and 2 memories, with the first memory (_m0_) mapping the first address (_a0_) to the first data value (_d0_), and the second memory (_m1_) mapping the first address (_a0_) both to the second (_d1_) and third (_d2_) data values. + +A fact is a formula that constrains the constants of the specification, and therefore tends to reduce the set of assignments denoted by the specification. For example, + +\geekmath fact {all m: Memory | all a: Addr | sole m.map[a]} + +rules out the above assignment, since it does not permit a memory (such as _m1_) to map an address (such as _a0_) to more than one data value. + +The meaning of a function is a set of assignments, like the meaning of the specification as a whole, but these include bindings to parameters. For example, the function + +\geekmath fun Read (m: Memory, d: Data, a: Addr) {\\ + d = m.map[a]\\ + } + +has assignments such as: + +\geekmath Addr = {a0, a1}\\ +Data = {d0, d1, d2}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d1)}\\ +m = {m0}\\ +d = {d1}\\ +a = {a0} + +The assignments of a function representing a state invariant correspond to states satisfying the invariant; the functions of a function representing an operation (such as _Read_) correspond to executions of the operation. + +An assertion is a formula that is claimed to be _valid_: that is, true for every assignment that satisfies the facts of the specification. To check an assertion, one can search for a _counterexample_: an assignment that makes the formula false. +For example, the assertion + +\geekmath assert {\\ + all m,m': Memory, d: Data, a: Addr | Read (m,d,a) => Read (m',d,a)} + +which claims, implausibly, that if a read of memory _m_ returns _d_ at _a_, then so does a read at memory _m'_, has the counterexample + +\geekmath Addr = {a0}\\ +Data = {d0,d1}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d0), (m1,a0,d1)} + +To find a counterexample, a tool should negate the formula and then skolemize away the bound variables, treating them like the parameters of a function, with values to be determined as part of the assignment. In this case, the assignment might include: + +\geekmath m = {m0}\\ +m' = {m1}\\ +d = {d0}\\ +a = {a0} + +\subsection Extension + +Not every signature declaration introduces a new basic type. A signature declared without an extension clause is a _type signature_, and creates both a basic type and a set constant of the same name. A signature _S_ declared as an extension is a _subsignature_, and creates only a set constant, along with a constraint making it a subset of each _supersignature_ listed in the extension clause. The subsignature takes on the type of the supersignatures, so if there is more than one, they must therefore have the same type, by being direct or indirect subsignatures of the same type signature. + +A field declared in a subsignature is as if declared in the corresponding type signature, with the constraint that the domain of the field is the subsignature. For example, + +\geekmath sig List {}\\ +sig NonEmptyList extends List {elt: Elt,rest: List} + +makes _List_ a type signature, and _NonEmptyList_ a subset of _List_. The fields _elt_ and _rest_ map atoms from the type _List_, but are constrained to have domain _NonEmptyList_. Semantically, it would have been equivalent to declare them as fields of _List_, along with facts constraining their domains: + +\geekmath sig List {elt: Elt,rest: List}\\ +sig NonEmptyList extends List {}\\ +fact {elt.Elt in NonEmptyList}\\ +fact {rest.List in NonEmptyList} + +(exploiting our dot notation to write the domain of a relation _r_ from _S_ to _T_ as _r.T_). + +\subsection Overloading and Implicit Prefixing + +Whenever a variable is declared, its type can be easily obtained from its declaration (from the type of the expression on the right-hand side of the declaration), and every variable appearing in an expression is declared in an enclosing scope. The one complication to this rule is the typing of fields. + +For modularity, a signature creates a local namespace. Two fields with the name _f_ appearing in different signatures do not denote the same relational constant. Interpreting an expression therefore depends on first resolving any field names that appear in it. +#We have devised a simple resolution scheme whose details are beyond the scope of this paper. +In an expression of the form _e.f_, the signature to which _f_ belongs is determined according to the type of _e_. To keep the scheme simple, we require that sometimes the specifier resolve the overloading explicitly by writing the field _f_ of signature _S_ as _S$f_. (At the end of the previous section, for example, the reference in the fact to _rest_ should actually be to _List$rest_, since the context does not indicate which signature _rest_ belongs to.) + +In many formulas, a single expression is dereferenced several times with different fields. A couple of language features are designed to allow these formulas to be written more succinctly, and, if used with care, more comprehensibly. First, we provide two syntactic variants of the dot operator. Both _p_::_q_ and _q_[_p_] are equivalent to _p.q_, but have different precedence: the double colon binds more tightly than the dot, and the square brackets bind more loosely than the dot. Second, we provide a _with_ construct similar to Pascal's that makes dereferencing implicit. + +Consider, for example, the following simplified signature for a trace: + +\geekmath sig Trace {\\ + ticks: set Tick,\\ + first: Tick,\\ + next: Tick -> Tick,\\ + state: Tick -> State\\ + } + +Each trace _t_ has a set of ticks _t.ticks_, a first tick _t.first_, an ordering _t.next_ that maps ticks to ticks, and a relation _t.state_ mapping each tick to a state. For a trace _t_ and tick _k_, the state is _k_.(_t.state_); the square brackets allow this expression to be written instead as _t.state_[_k_]. To constrain _t.ticks_ to be those reachable from _t. first_ we might write: + +\geekmath fact {all t: Trace | (t.first).*(t.next ) = t.ticks} + +Relying on the tighter binding of the double colon, we can eliminate the parentheses: + +\geekmath fact {all t: Trace | t::first.*t::next = t.ticks} + +Using _with_, we can make the _t_ prefixes implicit: + +\geekmath fact {all t: Trace | with t | first.*next = ticks} + +In general, _with e | F_ is like _F_, but with _e_ prefixed wherever appropriate to a field name. Appropriateness is determined by type: _e_ is matched to any field name with which it can be composed using the dot operator. +#Fields that are prefixed using a double colon operator are not automatically prefixed, so one can use _with_ to prefix some fields of a given signature but not others. There is a corresponding _with_ construct for expressions also, so that _with e | E_ is like the expression _E_, with _e_ prefixed as appropriate. +A fact attached to a signature _S_ is implicitly enclosed by _all this: S | with this |_, and the declarations of a signature are interpreted as constraints as if they had been declared within this scope. Consequently, the declaration of _first_ above should be interpreted as if it were the formula: + +\geekmath all this: Trace | with this | first: ticks + +which is equivalent to + +\geekmath all this: Trace | this.first: this.ticks + +and should be typed accordingly. +# +# So, in the following fuller version of the above signature: +# +# \geekmath sig Trace {\\ +# ticks: set Tick\\ +# first: ticks,\\ +# next: (ticks - first) ->? ticks\\ +# state: ticks ->! State\\ +# } {first.*next = ticks} +# +# the declaration of the field _first_, for example, includes the constraint +# +# \geekmath all this: Trace | with this | first: ticks +# +# which is equivalent to +# +# \geekmath all this: Trace | this.first: this.ticks + +\subsection Function Applications + +A function may be applied by binding its parameters to expressions. The resulting application may be either an expression or a formula, but in both cases the function body is treated as a formula. The formula case is simple: the application is simply short for the body with the formal parameters replaced by the actual expressions (and bound variables renamed where necessary to avoid clashes). + +The expression case is more interesting. The application is treated as a syntactic sugar. Suppose we have a function application expression, _e_ say, of the form + +\geekmath f(a_1, a_2, ..., a_n) + +that appears in an elementary formula _F_. The declaration of the function _f_ must list _n_ + 1 formal arguments, of which the _second_ will be treated as the result. The entire elementary formula is taken to be short for + +\geekmath all result: D | f (a_1, result, a_2, ..., a_n) => F [result/e] + +where _D_ is the right-hand side of the declaration of the missing argument, and _F_ [_result_/_e_] is _F_ with the fresh variable _result_ substituted for the application expression _e_. The application of _f_ in this elaborated formula is now a formula, and is treated simply as an inlining of the formula of _f_. + +#Type checking will thus require that the actual arguments match the formals that are listed first, third, fourth, fifth, etc. (This choice of the second argument, incidentally, is one concession we make to specifying state machines; function applications can be used to model operation invocations in which it is convenient to declare the pre- and post- states as the first and second arguments of the operation.) +# +To see how this works, consider the definition of a function _dom_ that gives the domain of a relation over signature _X_: + +\geekmath fun dom (r: X -> X, d: set X) {d = r.X} + +(We have defined the function monomorphically for a homogeneous relation. In practice, one would define a polymorphic function, but we want to avoid conflating two unrelated issues.) Here is a trivial assertion that applies the function as an expression: + +\geekmath assert {all p: X \textarrow X | (dom (p)).p in X} + +Desugaring the formula, we get + +\geekmath all p: X \textarrow X | all result: set X | dom (p, result) => result.p in X + +and then inlining + +\geekmath all p: X \textarrow X | all result: set X | result = p.X => result.p in X + +This formula can be reduced (by applying a universal form of the One Point Rule) to + +\geekmath all p: X \textarrow X | (p.X).p in X + +which is exactly what would have been obtained had we just replaced the application expression by the expression on the right-hand side of the equality in the function's definition! +# +# If there is more than one application expression in an elementary formula, a fresh quantification is # generated for each. For example,# +# +# \geekmath assert {all p, q: X \textarrow X | dom (p.q) in dom (p)} +# +# becomes +# +# \geekmath all p,q: X \textarrow X | all result1, result2: set X | \\ +# dom (p.q, result1) => dom (p, result2) => result1 in result2 +# +# which can again be reduced by inlining and the One Point Rule to +# +# \geekmath all p,q: X \textarrow X | (p.q).X in p.X + +Now let's consider an implicit definition. Suppose we have a signature _X_ with an ordering _lte_, so that _e.lte_ is the set of elements that _e_ is less than or equal to, and a function _min_ that gives the minimum of a set, defined implicitly as the element that is a member of the set, and less than or equal to all members of the set: + +\geekmath sig X {lte: set X}\\ +fun min (s: set X, m: option X) {\\ + m in s && s in m.lte\\ + } + +Because the set may be empty, _min_ is partial. Depending on the properties of _lte_ it may also fail to be deterministic. A formula that applies this function + +\geekmath assert {all s: set X | min (s) in s} + +can as before be desugared + +\geekmath all s: set X | all result: option X | min (s, result) => result in s + +and expanded by inlining + +\geekmath all s: set X | all result: option X |\\ + (result in s) && s in result.lte => result in s + +but in this case the One Point Rule is not applicable. + +As a convenience, our language allows the result argument of a function to be declared anonymously in a special position, and given the name _result_. The domain function, for example, can be defined as: + +\geekmath fun dom (r: X -> X): set X {result = r.X} + +How the function is defined has no bearing on how it is used; this definition is entirely equivalent to the one above, and can also be applied as a formula with two arguments. + +\subsection Polymorphism + +Polymorphism is treated as a syntactic shorthand. Lack of space does not permit a full discussion here. + +\section Related Work + +We have shown how a handful of elements can be assembled into a rather simple but flexible notation. The elements themselves are far from novel---indeed, we hope that their familiarity will make the notation easy to learn and use---but their assembly into a coherent whole results in a language rather different from existing specification languages. + +\subsection New Aspects + +The more novel aspects of our work are: + +\point \cdot _Objectification of state_. Most specification languages represent states as cartesian products of components; in our approach, a state, like a member of any signature, is an individual---a distinct atom with identity. A similar idea is used in the situation calculus [11], whose 'relational fluents' add a situation variable to each time-varying relation. The general idea of objectifying all values is of course the foundation of object-oriented programming languages, and was present in LISP. Interestingly, object-oriented variants of Z (such as [1]) do not objectify schemas. The idea of representing structures in first-order style as atoms is present also in algebraic specifications such as Larch [2], which treat even sets and relations in this manner. + +\point \cdot _Components as relations_. Interpreting fields of a structure as functions goes back to early work on verification, and is widely used (for example, by Leino and Nelson [10]). We are not aware, however, of specification languages that use this idea, or that flatten fields to relations over atoms. + +\point \cdot _Extension by global axioms_. The 'facts' of our notation allow the properties of a signature to be extended monotonically. The idea of writing axioms that constrain the members of a set constant declared globally is hardly remarkable, but it appears not to have been widely exploited in specification languages. + +\point \cdot _Extension by subset_. Treating the extension of a structure as a refinement modelled by subset results in a simple semantics, and melds well with the use of global axioms. Again, this seems to be an unremarkable idea, but one whose power has not been fully recognized. + +\subsection Old Aspects + +The aspects of our work that are directly taken from existing languages are: + +\point \cdot _Formulas_. The idea of treating invariants, definitions, operations, etc, uniformly as logical formulas is due to Z [14]. + +\point \cdot _Assertions_. Larch [2] provides a variety of constructs for adding intentional redundancy to a specification in order to provide error-detection opportunities. + +\point \cdot _Parameterized formulas_. The 'functional' style we have adopted, in which all formulas are explicitly parameterized, in contrast to the style of most specification languages, is used also by languages for theorem provers, such as PVS [13]. VDM [8] offers a mechanism called 'operation quotation' in which pre- and post conditions are reused by interpreting them as functions similar to ours. + +\point \cdot _Parametric Polymorphism_. The idea of parameterizing descriptions by types was developed in the programming languages community, most notably in the context of ML [12]. + +\point \cdot _Implicit Prefixing_. Our 'with' operator is taken from Pascal [9]. + +\point \cdot _Relational operators_. The dot operator, and the treament of scalars as singletons, comes from the earlier version of Alloy [4]. +# +#\point \cdot _Function shorthands_. The idea of desugaring function applications by quantifying over the result is present in Beth's extensionality theorem [Beth]. + +\subsection Z's Schema Calculus + +Z has been a strong influence on our work; indeed, this paper may be viewed as an attempt to achieve some of the power and flexibility of Z's schema calculus in a first-order setting. Readers unfamiliar with Z can find an excellent presentation of the schema calculus in [16]. The current definitive reference is [15], although Spivey's manual [14] is more accessible for practioners. + +A _schema_ consists of a collection of variable declarations and a formula constraining the variables. Schemas can be anonymous. When a name has been bound to a schema, it can be used in three different ways, distinguished according to context. First, it can be used as a _declaration_, in which case it introduces its variables into the local scope, constraining them with its formula. Second, where the variables are already in scope, it can be used as a _predicate_, in which case the formula applies and no new declarations are added. Both of these uses are syntactic; the schema can be viewed as a macro. + +In the third use, the schema is semantic. Its name represents a set of _bindings_, each binding being a finite function from variables names to values. The bindings denoted by the schema name are the models of the schema's formula: those bindings of variable names to values that make the formula true. + +How a schema is being applied is not always obvious; in the set comprehension {_S_}, for example, _S_ represents a declaration, so that the expression as a whole denotes the same set of bindings as _S_ itself. Given a binding _b_ for a schema with component variable _x_, the expression _b.x_ denotes the value assigned to _x_ in _b_. Unlike Alloy's dot, this dot is a function application, so for a set of bindings _B_, the expression _B.x_ is not well formed. + +Operations in Z are expressed using the convention that primed variables denote components of the post-state. A mechanism known as _decoration_ allows one to write _S'_ for the schema that is like _S_, but whose variable names have been primed. Many idioms, such as promotion, rely on being able to manipulate the values of a schema's variables in aggregate. To support this, Z provides the theta operator: \theta @sep _S_ is an expression that denotes a binding in which each variable _x_ that belongs to _S_ is bound to a variable of the same name _x_ declared in the local scope. Theta and decoration interact subtly: \theta @sep _S'_ is not a binding of _S'_, but rather binds each variable _x_ of _S_ to a variable _x'_ declared locally. So where we would write _s=s'_ to say that pre- and post-states _s_ and _s'_ are the same, a Z specifier would write \theta @sep _S_ = \theta @sep _S'_. This formula equates each component _x_ of _S_ to its matching component _x'_ of _S'_, because _x_ and _x'_ are the respective values bound to _x_ by \theta @sep _S_ and \theta @sep _S'_ respectively. + +Our 'fact' construct allows the meaning of a signature name to be constrained subsequent to its declaration. A schema, in contrast, is 'closed': a new schema name must be introduced for each additional constraint. This can produce an undesirable proliferation of names for a system's state, but it does make it easier to track down those formulas that affect a schema's meaning. + +The variables of a schema can be renamed, but cannot be replaced by arbitrary expressions (since this would make nonsense of declarations).This requires the introduction of existential quantifiers where in our notation an expression is passed as an actual. On the other hand, when no renaming is needed, it is more succinct. + +Z's sequential composition operator is defined by a rather complicated transformation, and relies on adherence to particular conventions. The schema _P_ @sep \fatsemi @sep _Q_ is obtained by collecting primed variables in _P_ that match unprimed variables in _Q_; renaming these in both _P_ and _Q_ with a new set of variable names; and then existentially quantifying the new names away. For example, to say that a read following a write to the same address yields the value written, we would write: + +\geekmath +all m: Memory, a: Addr, d, d': Data | Read (Write(m,a,d),d') => d = d' + +which is short for + +\geekmath all m: Memory, a: Addr, d, d': Data |\\ + all m': Memory | Write (m,m',a,d) => Read (m,a,d') => d = d' + +In Z, assuming appropriate declarations of a schema _Memory_ and a given type _Data_, the formula would be: + +\geekmath +\forall Memory; Memory'; x!: Data \fatdot Write \fatsemi Read [x!/d!] \implies x! = d! + +which is short for + +\geekmath +\forall Memory; Memory'; x!: Data \fatdot \\ + \exists Memory'' \fatdot \\ + \exists Memory' \fatdot Write \and \theta @sep Memory' = \theta @sep Memory''\\ + \exists Memory'; d!: Data \fatdot \\ + Read \and \theta @sep Memory = \theta @sep Memory'' \and d! = x!\\ + \implies x! = d! + +The key semantic difference between signatures and schemas is this. A signature is a set of atoms; its fields are relational constants declared in global scope. A schema, on the other hand, denotes a higher-order object: a set of functions from field names to values. Our approach was motivated by the desire to remain first order, so that the analysis we have developed [3] can be applied. Not surprisingly, there is a cost in expressiveness. We cannot express higher-order formulas, most notably those involving preconditions. Suppose we want to assert that our write operation has no implicit precondition. In Z, such an assertion is easily written: + +\geekmath +\forall Memory; a?: Addr \fatdot \exists Memory'; d!: Data \fatdot Write + +We might attempt to formulate such an assertion in our notation as follows: + +\geekmath assert {\\ + all m: Memory, a: Addr, d: Data | some m': Memory | Write (m,m',d,a) + } + +Unfortunately, this has counterexamples such as + +\geekmath Addr = {a0}\\ +Data = {d0}\\ +Memory = {m0, m1}\\ +map = {} + +in which the _map_ relation lacks an appropriate tuple. Intuitively, the assertion claims that there is no context in which a write cannot proceed; a legitimate counterexample---but one we certainly did not intend---simply gives a context in which a memory with the appropriate address-value mapping is not available. + +We have focused in this discussion on schemas. It is worth noting that Z is expressive enough to allow a style of structuring almost identical to ours, simply by declaring signatures as given types, fields and functions as global variables, and by writing facts, and the bodies of functions, as axioms. Field names would have to be globally unique, and the resulting specification would likely be less succinct than if expressed in our notation. + +\subsection Phenomenology + +Pamela Zave and Michael Jackson have developed an approach to composing descriptions [18] that objectifies states, events and time intervals, and constrains their properties with global axioms. Objectification allows descriptions to be reduced to a common phenomenology, so that descriptions in different languages, and even in different paradigms can be combined. Michael Jackson has argued separately for the importance of objectification as a means of making a more direct connection between a formal description and the informal world: as he puts it, "domain phenomena are facts about individuals" [7]. It is reassuring that the concerns of language design and tractability of analysis that motivated our notation are not in conflict with sound method, and it seems that our notation would be a good choice for expressing descriptions in the form that Zave and Jackson have proposed. + +\section Evaluation + +\subsection Merits + +The key motivations of the design of our mechanism have been minimality and flexibility. It is worth noting how this has been achived by the _omission_ of certain features: + +\point \cdot There is only one form of semantic structuring; our opinion is that adding extra mechanisms, for example to group operations into classes, does not bring enough benefit to merit the additional complexity, and tends to be inflexible. (Our language does provide some namespace control for signature and paragraph names in the style of Java packages, but this is trivial and does not interact with the basic mechanism). + +\point \cdot There is no subtyping; subsignatures are just subsets of their supersignatures, and have the same type. There are only two types: basic types (for signatures), and relational types (for expressions). Types are not nested. + +\point \cdot There is only one way that formulas are packaged for reuse. The same function syntax is used for observers, operations, refinement relations, etc. The function shorthand syntax unifies the syntax of both declaration and use for explicit and implicit function definitions. + +\point \cdot The values of a signature with fields are just like the values of any basic type; there is nothing like Z's notion of a schema binding. + +Our interpretation of a subsignature as a subset of the supersignature appears to be novel as a mechanism for structuring in a specification language. It has three nice consequences: + +\point \cdot _Elimination of type coercions_. If _x_ belongs to a signature _S_ whose extension _S'_ defines a field _f_, the expression _x.f_ will just denote an empty set if _x_ does not belong to _S'_. Contrast this with the treatment of subclasses in the Object Constraint Language [17], for example, which results in pervasive coercions and often prevents the use of set and relation operators (since elements must be coerced one at a time). + +\point \cdot _Ease of extension_. Constraints can be added to the subsignature simply by writing a constraint that is universally quantified over elements of that subset. + +\point \cdot _Definitional extension_. We can declare an extension _S'_ of a signature _S_ with additional fields, relate these fields to the fields declared explicitly for _S_, and then record the fact that _S=S'_ (as illustrated in Section 2.11). The effect is that every atom of _S_ has been extended with appropriately defined fields, which can be accessed whenever an expression denoting such an atom is in scope! We expect to find this idiom especially useful for defining additional fields for visualization purposes. + +\subsection Deficiencies + +One might wonder whether, having encoded structures using atoms, and having provided quantifiers over those atoms, one can express arbitrary properties of higher-order structures. Unfortunately, but not surprisingly, this is not possible. The catch is that fields are treated in any formulas as global variables that are existentially quantified. To simulate higher-order logic, it would be necessary to allow quantifications over these variables, and since they have relational type, that would imply higher-order quantification. The practical consequence is that properties requiring higher-order logic cannot be expressed. One cannot assert that the precondition of an operation is no stronger than some predicate; one cannot in general specify operations by minimization; and one cannot express certain forms of refinement check. An example of this problem is given in Section 4.3 above. Whether the problem is fundamental or can be partially overcome remains to be seen. + +The treatment of subsignatures as subsets has a nasty consequence. Since a field declared in a subsignature becomes implicitly a field of the supersignature, two subsignatures cannot declare fields of the same name. The extension mechanism is therefore not properly modular, and a specification should use hierarchical structure instead where this matters. + +Modelling a set of states as atoms entails a certain loss of abstraction. In this specification + +\geekmath sig A {}\\ +sig S {a: A}\\ +fun op (s,s': S) {s.a = s'.a} + +the operation _op_ has executions in which the pre- and post-states are equal (that is, the same atom in _S_), and executions in which only their _a_ components are equal. One might object that this distinction is not observable. Moreover, replacing the formula by _s=s'_ would arguably be an overspecification---a 'bias' in VDM terminology [8]. The situation calculus [11] solves this problem by requiring every operation to produce a state change: _s_ and _s'_ are thus regarded as distinct situations by virtue of occurring at different points in the execution. The dual of this solution is to add an axiom requiring that no two distinct atoms of _S_ may have equal _a_ fields. Either of these solutions is easily imposed in our notation. + +Our treatment of scalars and sets uniformly as relations has raised the concern that the resulting succinctness comes with a loss of clarity and redundancy. Extensive use of the previous version of our language, mostly by inexperienced specifiers, suggests that this is not a problem. The loss of some static checking is more than compensated by the semantic analysis that our tool performs. + +\section Conclusion + +Two simple ideas form the basis of our modularity mechanism: (1) that a structure is just a set of atoms, and its fields are global relations that map those atoms to structure components; and (2) that extensions of a structure are just subsets. Our relational semantics, in which all variables and fields are represented as relations, makes the use of structures simple and succinct, and it ensures that the language as a whole remains first order. For a variety of modelling tasks, we believe that our approach provides a useful balance of expressiveness and tractability. + +\section* Acknowledgments + +The language described here was refined by experience writing specifications, long before an analyzer existed, and by the development of the analyzer tool itself. Mandana Vaziri and Sarfraz Khurshid were our early adopters, and Brian Lin and Joe Cohen helped implement the tool. The paper itself was improved greatly by comments from Mandana and Sarfraz, from Michael Jackson, from Tomi Mannisto, and especially from Pamela Zave, whose suggestions prompted a major rewrite. Jim Woodcock helped us understand Z, and the clarity and simplicity of his own work has been a source of inspiration to us. Our ideas have also been improved by the comments of the members of IFIP working groups 2.3 and 2.9, especially Tony Hoare, Greg Nelson and Rustan Leino. This work was funded in part by ITR grant #0086154 from the National Science Foundation, by a grant from NASA, and by an endowment from Doug and Pat Ross. + +\section* References + +#\ref [CD94] Steve Cook and John Daniels. Designing Object Systems: Object-Oriented Modelling with Syntropy. Prentice Hall, 1994. +# +\ref [1] R. Duke, G. Rose and G. Smith. Object-Z: A Specification Language Advocated for the Description of Standards. SVRC Technical Report 94-45. The Software Verification Research Centre, University of Queensland, Australia. + +\ref [2] John V. Guttag, James J. Horning, and Andres Modet. Report on the Larch Shared Language: Version 2.3. Technical Report 58, Compaq Systems Research Center, Palo Alto, CA, 1990. + +#\ref [Hal90] Anthony Hall. Using Z as a Specification Calculus for Object-Oriented Systems. In D. Bjorner, C.A.R. Hoare, and H. Langmaack, eds., VDM and Z: Formal Methods in Software Development, Lecture Notes in Computer Science, Volume 428, pp. 290–381, Springer-Verlag, New York, 1990. +# +\ref [3] Daniel Jackson. Automating first-order relational logic. Proc. ACM SIGSOFT Conf. Foundations of Software Engineering. San Diego, November 2000. + +\ref [4] Daniel Jackson. Alloy: A Lightweight Object Modelling Notation. To appear, ACM Transactions on Software Engineering and Methodology, October 2001. + +\ref [5] Daniel Jackson, Ian Schechter and Ilya Shlyakhter. Alcoa: the Alloy Constraint Analyzer. Proc. International Conference on Software Engineering, Limerick, Ireland, June 2000. + +\ref [6] Daniel Jackson and Jeannette Wing. Lightweight Formal Methods. In: H. Saiedian (ed.), An Invitation to Formal Methods. IEEE Computer, 29(4):16-30, April 1996. + +\ref [7] Michael Jackson. Software Requirements and Specifications: A Lexicon of Practice, Principles and Prejudices. Addison-Wesley, 1995. + +\ref [8] Cliff Jones. Systematic Software Development Using VDM. Second edition, Prentice Hall, 1990. + +\ref [9] Kathleen Jensen and Nicklaus Wirth. Pascal: User Manual and Report. Springer-# Verlag, 1974. + +\ref [10] K. Rustan M. Leino and Greg Nelson. Data abstraction and information hiding . Research Report 160, Compaq Systems Research Center, November 2000. + +\ref [11] Hector Levesque, Fiora Pirri, and Ray Reiter. Foundations for the Situation Calculus. Linköping Electronic Articles in Computer and Information Science, ISSN 1401-9841, Vol. 3(1998), Nr. 018. + +\ref [12] Robin Milner, Mads Tofte and Robert Harper. The Definition of Standard ML. MIT Press, 1990. + +\ref [13] S. Owre, N. Shankar, J. M. Rushby, and D. W. J. Stringer-Calvert. PVS Language Reference. Computer Science Laboratory, SRI International, Menlo Park, CA, September 1999. + +\ref [14] J. Michael Spivey. The Z Notation: A Reference Manual. Second edition, Prentice Hall, 1992. + +\ref [15] Ian Toyn et al. Formal Specification---Z Notation---Syntax, Type and Semantics. Consensus Working Draft 2.6 of the Z Standards Panel BSI Panel IST/5/-/19/2 (Z Notation). August 24, 2000. + +\ref [16] Jim Woodcock and Jim Davies. Using Z: Specification, Refinement and Proof. Prentice Hall, 1996. + +\ref [17] Jos Warmer and Anneke Kleppe. The Object Constraint Language: Precise Modeling with UML. Addison Wesley, 1999. + +\ref [18] Pamela Zave and Michael Jackson. Conjunction as Composition. ACM Transactions on Software Engineering and Methodology II(4): 379--411, October 1993. diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/doc/notes.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/doc/notes.txt new file mode 100755 index 00000000..ce232ec8 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/doc/notes.txt @@ -0,0 +1,154 @@ +A. Motivations +A.1 Independence from layout tool +A.2 Ability to produce HTML +A.3 Editable text, email, etc +A.4 Combine compilation and WYSIWYG +A.5 Better automation of cross refs than Frame +A.6 Better typography than TeX +A.7 Ability to use my own PS fonts +A.8 Easy layout of code, which TeX handles badly + +B. Project Plans +B.1 Small initial implementation; iterative extension +B.2 Experiment with JUnit +B.3 Try documentation lite +B.4 Look at conformance issues + +C. Requirements +C.1 Input format +C.1.1 Plain text, platform independent (no Unix/Windows line end problems) +C.1.2 Minimal syntax +C.1.3 Text layout respected for all but EOL, unless // +C.1.4 All keywords introduced by backslash +C.1.5 Kinds of tag + \section paragraph tag + \alpha special char + \date special + \bold formatting tag + kind of tag is resolved by tables loaded from file +C.1.6 Special Tokens + --, ---, .., ... dashes and ellipses + \\ hard line break + non-breaking hyphen, breaking hyphen, etc? + en, em space? + italic correction? + | separator + {,} used for grouping +C.1.6a Printing Characters Used in Special Tokens + \#,\_,\{,\} will print as #,_,{,} + \ is written \backslash +C.1.7 Groupings (obsolete) + \{x} may replace \x when want to avoid extra space + eg. \{alpha}_1 to give an alpha with a subscript 1 + but paragraph tags behave in opposite way: consume whitespace that follows, so use \{para} to include whitespace. +C.1.7 Groupings + abandoned this in favour of simpler scheme with | for separator and {} for logical grouping; + para tag consumes only first space token, so "\parastyle | hello" to put space before "hello" +C.1.8 Cross References + \tag{t} marks paragraph with tag t + \name{s} makes s the citation string for this para + \cite{t} citation of paragraph with tag t + for autonumbered paras, number is citation string by default +C.1.9 Para breaks + determined by blank line; mid-line paragraph tag is an error +C.1.10 Spacing + as given explicitly, except; line breaks and their leading spaces, replaced by single space; first whitespace group (including line break) following paragraph tag. +C.1.11 Italicization + between underscores + have this indicate a character style instead? allow override at start of file. +C.1.12 Sub and superscripts (obsolete) + text_i, text_{abc} + text^i, text^{abc} +C.1.12 Sub and superscripts + abandoned _^ scheme because it's more trouble parsing + may reinstate later + \sub{...} puts text between curlies in superscript +C.1.12a Formatting Tags + generalize to \bold, \roman, etc + formatting runs from that point onwards, unless explicit grouping: \bold{abc}def puts abc, but not def in bold. +C.1.13 Comments +# in first column makes line a comment +C.1.14 Quotes + "hello" + it's + '69 +C.2 Style File +C.2.1 Next + specifies default paragraph style order +C.2.2 Character Styles + assume none for now +C.2.3 Syntax + In all property files, use property list syntax: + <style:section><next:indent>... +C.2.4 Autonumbering +C.2.4.1 Star after style name suppresses numbering + \section* +C.2.4.2 Associate with each style + series + level + separator (comes before extension string for this style's numbering) + trailer (between number and para) + leader (before number) +C.2.4.2 Associate a series and a level with each style, eg, <series:A><level:2> +C.2.4.3 Numbering formats: + 0 gives 0,1,2 + 1 1,2,3 + a a,b,c + A A,B,C + i i,ii,iii,iv, + I I,II,III,IV +C.2.4.4 Examples + example: + <style:section><series:section><level:0><numbering:1><trailer:.\tab> + <style:subsection><series:section><level:1><numbering:1><separator:.><trailer:\tab> + <style:subsubsection><series:section><level:3><numbering:a><trailer:\tab> + gives + 1. First section + 1.1 First subsection + 1.1a First subsubsection + example: + <style:point><leader:\cdot><trailer:\tab> + gives points preceded by a center dot and a tab +C.2.4.5 Citation + Numbering string, excluding leader and trailer, becomes default citation string for its para. + +C.3 Stylesheets +At top of file, \stylesheet{filename} sets name of style file + +C.4 Other Features +C.4.1 Wishlist + Reserved word transformations (eg, bolding) + Inference rules +C.5 Character File +C.5.1 Must be backend independent +C.5.2 Example + <name:alpha><font:MathB><index:155> + + +D. Design Issues +D.1 General Thoughts +D.1.1 Parsing. Break into tokens consisting of whitespace groups, character (non-whitespace) groups, commands, etc. +D.1.2 Use simple hand-written recursive descent parser +D.1.3 Need to read in style file first so that paragraph style names are recognized. +D.1.4 Build a table and select action by token type +D.1.5 Incorporate trailing space and EOL into single token +D.1.6 Issue: stream or tree based? Probably stream, but have parser recognize close of italics, formattings, etc +D.1.7 For now, don't attempt generic backend. Better to handcraft a backend or two, then generalize. +D.1.8 For references, write out index file and generate warnings for unresolved refs. This allows a single pass. + +D.2 Design + +---------------------------------------------------------------------------------------- +Issues + +Grammar must handle +\section* generates token parastyle section, numbering suppressed +\sub{...} generates tokens begin-sub and end-sub? +quotes? + + + +must have preamble followed by blank line to generate first para +need something to mark end of preamble +how to have action install other actions? will be a comodification. + diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/charmap.txt new file mode 100755 index 00000000..4d81f8f2 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/charmap.txt @@ -0,0 +1,22 @@ +# basic characters +<char:linebreak><index:22> + +# dots +<char:period><index:22> +<char:twodotleader><index:22> +<char:ellipsis><index:22> + +# quotes +<char:apostrophe><index:22> +<char:prime><index:22> +<char:opensinglequote><index:22> +<char:closesinglequote><index:22> +<char:opendoublequote><index:22> +<char:closedoublequote><index:22> + +#dashes +<char:hyphen><index:22> +<char:endash><index:22> +<char:emdash><index:22> + +# math symbols \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/foo-new.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/foo-new.txt new file mode 100755 index 00000000..77a3221b --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/foo-new.txt @@ -0,0 +1,11 @@ +@title:A Micromodularity Mechanism +@author:Daniel Jackson Ilya Shlyakhter and Manu Sridharan<\n> Laboratory for Computer Science<\n> Massachusetts Institute of Technology<\n> Cambridge Massachusetts USA<\n> dnjmit.edu +@opening:Abstract +@noindent:A simple mechanism for structuring specifications is described. By modelling structures as atoms it remains entirely first<\#45>order and thus amenable to automatic analysis. And by interpreting fields of structures as relations it allows the same relational operators used in the formula language to be used for dereferencing. An extension feature allows structures to be developed incrementally but requires no textual inclusion nor any notion of subtyping. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. +@subsection:A.1 Categories and Subject Descriptors +@noindent:D.2.1 RequirementsSpecifications<\#151>Languages D.2.4 SoftwareProgram Verification<\#151>Formal methods Model checking F.3.1 Specifying and Verifying and Reasoning about Programs<\#151>Assertions Invariants Specification techniques. +@subsection:A.2 General Terms +@noindent:Design Documentation Languages Verification. +@subsection:A.3 Keywords +@noindent:Modeling languages formal specification first<\#45>order logic relational calculus Alloy language Z specification language schema calculus. +@section:B Introduction \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/foo-old.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/foo-old.txt new file mode 100755 index 00000000..051edfee --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/foo-old.txt @@ -0,0 +1 @@ +@title:A Micromodularity Mechanism diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.index.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.index.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.tag.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.tag.txt new file mode 100755 index 00000000..ce1f41b0 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.tag.txt @@ -0,0 +1,371 @@ +@title:A Micromodularity Mechanism +@section:1 Testing +@noindent:This is gamma: <f"Symbol"><\#103><f$>.<\n> This is Delta: <f"Symbol"><\#68><f$>.<\n> This is oplus: . <f"LucidNewMatSymT"><\#65><f$> <f"Symbol"><\#222><f$> <f"LucidNewMatSymT"><\#65><f$> +@body:This is a subscripted variable: A<-><B>hello<B>there<->. Math mode: <I>x<I> + 2 = <I>y<I>, <I>and<I> && <I>x<I><->2<-> = <I>y<I><->3<-> = <I>x<I><-><I>ijk<I><-> +@author:Daniel Jackson, Ilya Shlyakhter and Manu Sridharan<\n> Laboratory for Computer Science<\n> Massachusetts Institute of Technology<\n> Cambridge, Massachusetts, USA<\n> dnj@mit.edu +@opening:Abstract +@noindent:A simple mechanism for structuring specifications is described. By modelling structures as atoms, it remains entirely first<\#45>order and thus amenable to automatic analysis. And by interpreting fields of structures as relations, it allows the same relational operators used in the formula language to be used for dereferencing. An extension feature allows structures to be developed incrementally, but requires no textual inclusion nor any notion of subtyping. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. +@subsection:1.1 Categories and Subject Descriptors +@noindent:D.2.1 Requirements/Specifications<\#151>Languages; D.2.4 Software/Program Verification<\#151>Formal methods, Model checking; F.3.1 Specifying and Verifying and Reasoning about Programs<\#151>Assertions, Invariants, Specification techniques. +@subsection:1.2 General Terms +@noindent:Design; Documentation; Languages; Verification. +@subsection:1.3 Keywords +@noindent:Modeling languages; formal specification; first<\#45>order logic; relational calculus; Alloy language; Z specification language; schema calculus. +@section:2 Introduction +@noindent: I am neither crazy nor a micromaniac.<\n> (A micromaniac is someone obsessed with<\n> reducing things to their smallest possible form.<\n> This word, by the way, is not in the dictionary.)<\n> <\#150><I>Edouard de Pomiane, French Cooking in Ten Minutes, 1930<I> +@noindent:Most specification languages provide mechanisms that allow larger specifications to be built from smaller ones. These mechanisms are often the most complicated part of the language, and present obstacles to analysis. This paper presents a simple mechanism that seems to be expressive enough for a wide variety of uses, without compromising analyzability. +@body:This work is part of a larger project investigating the design of a <\#147>micro modelling language<\#148>. Our premise is that lightweight application of formal methods [6] demands an unusually small and simple language that is amenable to fully automatic semantic analysis. The Alloy language is the result to date of our efforts to design such a language. Based on our experiences with the language [4] and its analyzer [5], we have recently developed a revision of Alloy that overcomes many of its limitations. This paper describes the key feature of the revised language: the <I>signature<I>, a new modularity mechanism. +@body:The mechanism allows our existing analysis scheme [3] to be applied to specifications involving structures. This is not achieved by treating the structuring mechanism as a syntactic sugar, which would limit the power of the notation (ruling out, for example, quantification over structures) and would complicate the analysis tool and make output harder for users to interpret. Because of the mechanism<\#48>s generality, it has also enabled us to simplify the language as a whole, making it more uniform and eliminating some ad hoc elements. +@body:Our mechanism has a variety of applications. It can express inherent structure in the system being modelled, and can be used to organize a specification in which details are added incrementally. It can be used to construct a library of datatypes, or to describe a system as an instantiation of a more general system. And it can express state invariants, transitions, and sequences, despite the lack of any special syntax for state machines. +@body:In this last respect, the new language differs most markedly from its predecessor [4], which provided built<\#45>in notions of state invariants and operations. We now think this was a bad idea, because it made the language cumbersome for problems (such as the analysis of security policies or architectural topology constraints) in which temporal behaviour can be fruitfully ignored, and too inflexible for many problems in which temporal behaviour is important. +@body:Our paper begins by explaining our motivations<\#151>the requirements our mechanism is designed to meet. The mechanism is then presented first informally in a series of examples, and then slightly more rigorously feature<\#45>by<\#45>feature. We discuss related work, especially the schema calculus of Z, and close with a summary of the merits and deficiences of our notation as a whole. +@section:3 Requirements +@noindent:The goal of this work was to find a single structuring mechanism that would support a variety of common specification idioms: +@point:\alpha 3.0.A <I>States<I>: description of complex state as a collection of named components; incremental description both by hierarchy, in which a complex state becomes a component of a larger state, and by extension, in which new components are added; declaration of invariants and definitions of derived components; +@point:\alpha 3.0.B <I>Datatypes<I>: separate description of a library of polymorphic datatypes, such as lists, sequences, trees and orders, along with their operators; +@point:\alpha 3.0.C <I>Transitions<I>: specification of state transitions as operations described implicitly as formulas relating pre<\#45> and post<\#45>state; composition of operations from previously defined invariants and operations; sequential composition of operations; description of traces as sequences of states; +@point:\alpha 3.0.D <I>Abstractions<I>: description of abstraction relations between state spaces; +@point:\alpha 3.0.E <I>Assertions<I>: expression of properties intended to be redundant, to be checked by analysis, including: relationships amongst invariants; wellformedness of definitions (eg, that an implicit definition is functional); establishment and preservation of invariants by operations; properties of states reachable along finite traces; and simulation relationships between abstract and concrete versions of an operation. +@noindent:We wanted additionally to meet some more general criteria: +@point:\alpha 3.0.F <I>Simplicity<I>. The language as a whole should be exceptionally small and simple. +@point:\alpha 3.0.G <I>Flexibility<I>. Support for the particular idioms of state<\#45>machine specification should not be a straitjacket; the language should not dictate how state machines are expressed, and should not make it hard to describe structures that are not state machines (such as security models and architectural styles). +@point:\alpha 3.0.H <I>Analyzability<I>. A fully automatic semantic analysis should be possible. In the present work, this has been achieved by requiring that the modularity mechanism be first order, and expressible in the kernel of the existing language. +@noindent:Finally, our language design decisions have been influenced by some principles that we believe contribute to these goals, make the language easier to use, and analysis tools easier to build: +@point:\alpha 3.0.I <I>Explicitness<I>. The language should be fully explicit, with as few implicit constraints, coercions, etc, as possible. +@point:\alpha 3.0.J <I>Minimal mathematics<I>. The basic theory of sets and relations should suffice; it should not be necessary to introduce domains, fixed points, infinities or special logical values. +@point:\alpha 3.0.K <I>Minimal syntax<I>. There should be very few keywords or special symbols, and no need for special typography or layout. +@point:\alpha 3.0.L <I>Uniformity<I>. A small and general set of constructs should be applied uniformly, independent of context. +@point:\alpha 3.0.M <I>Lack of novelty<I>. Whenever possible, notions and syntax should follow standard usage of conventional mathematics and programming. +@section:4 Informal Description +@noindent:As a running example, we will specify a simple memory system involving a cache and a main memory. The memory has a fixed set of addresses and associates a data value with each address. The cache, in contrast, associates data values with some subset of addresses that varies over time. The cache is updated by a <\#147>write<\#45>back scheme<\#148>, which means that updates need not be reflected to main memory immediately. The cache may therefore hold a more current value for an address than the main memory; the two are brought into alignment when the address is flushed from the cache and its value is written to main memory. +@subsection:4.1 States +@noindent:We start by declaring the existence of addresses and data values: +@geekmath:sig Addr {}<\n> sig Data {} +@noindent:Each line declares a <I>signature<I>, and introduces a set of atoms: <I>Addr<I> for the set of addresses, and <I>Data<I> for the set of data values. Like <\#145>given types<\#146> in Z, these sets are disjoint from one another, and their atoms are unstructured and uninterpreted. Signature names can be used as expressions denoting sets, but they are also treated as types, so the expression <I>Addr+Data<I>, for example, is ill<\#45>typed, since the union operator (+) requires the types of its operands to match. +@body:The signature declaration +@geekmath:sig Memory {<\n> addrs: set Addr,<\n> map: addrs <\#45>! Data<\n> } +@noindent:likewise declares a set of atoms, <I>Memory<I>, corresponding to the set of all possible memories. In addition, it declares two fields: <I>addrs<I> and <I>map<I> which associate with a memory a set of addresses and a mapping from addresses to data values respectively. Thus, given a memory <I>m<I>, the expression <I>m.addrs<I> will be a set of addresses, <I>m.map<I> will be a relation from addresses to data values. The memory, addresses and data values should be viewed as distinct atoms in their own right; fields don<\#48>t decompose an atom, but rather relate one atom to others. The exclamation mark in the declaration of the field <I>map<I> is a <\#145>multiplicity marking<\#146>: it says that <I>m.map<I> associates exactly one data value with each address in the set <I>m.addrs<I>. The use of <I>addrs<I> rather than <I>Addr<I> on the left side of the arrow indicates that <I>m.map<I> does not associate a data value with an address that is not in the set <I>m.addrs<I>. +@body:In these expressions, the dot is simply relational image. More precisely, when we say that <I>m<I> is a memory, we mean that the expression <I>m<I> denotes a set consisting of a single atom. The field <I>addrs<I> is a relation from <I>Memory<I> to <I>Addr<I>, and <I>m.addrs<I> denotes the image of the singleton set under this relation. So for a set of memories <I>ms<I>, the expression <I>ms.addrs<I> will denote the union of the sets of addresses that belong to the individual memories. Given an address <I>a<I>, the expression <I>a.(m.map)<I> denotes the set of data values associated with address <I>a<I> in memory <I>m<I>, which will either be empty (when the address is not mapped) or a singleton. For convenience, we allow the relational image <I>s.r<I> to be written equivalently as <I>r<I>[<I>s<I>], where [] binds more loosely than dot, so this expression may be written as <I>m.map<I>[<I>a<I>] instead. +@body:Like objects of an object<\#45>oriented language, two distinct atoms can have fields of the same value. Unlike objects, however, atoms are immutable. Each field is fixed, and cannot map an atom to one value at one time and another value at another time. To describe an operation that changes the state of a memory, therefore, we will use two distinct atoms in the set <I>Memory<I> to represent the memory<\#48>s state before and after. +@subsection:4.2 Extension +@noindent:A signature declaration can introduce a set as a subset of one previously declared, in which case we call it a <I>subsignature<I>. In this case, the set does not correspond to a type, but rather its atoms take on the type of the superset. For example, the declaration +@geekmath:sig MainMemory extends Memory {} +@noindent:introduces a set of atoms <I>MainMemory<I> representing main memories, which is constrained to be a subset of the set <I>Memory<I>. Likewise +@geekmath:sig Cache extends Memory {<\n> dirty: set addrs<\n> } +@noindent:introduces a set of atoms <I>Cache<I> representing those memories that can be regarded as caches. It also introduces a field <I>dirty<I> that associates with a cache the set of addresses that is dirty; later, we will use this to represent those addresses for which a cache and main memory differ. Because <I>Cache<I> is a subset of <I>Memory<I>, and <I>m.addrs<I> (for any memory <I>m<I>) is a subset of <I>Addr<I>, the field denotes a relation whose type is from <I>Memory<I> to <I>Addr<I>. Expressions such as <I>m.dirty<I> are therefore type<\#45>correct for a memory <I>m<I>, whether or not <I>m<I> is a cache. But since declaration of the field <I>dirty<I> within the signature <I>Cache<I> constrains <I>dirty<I> to be a relation that maps only caches, <I>m.dirty<I> will always denote the empty set when <I>m<I> is not a cache. +@body:This approach avoids introducing a notion of subtyping. Subtypes complicate the language, and tend to make it more difficult to use. In OCL [17], which models extension with subtypes rather than subsets, an expression such as <I>m.dirty<I> would be illegal, and would require a coercion of <I>m<I> to the subtype <I>Cache<I>. Coercions do not fit smoothly into the relational framework; they interfere with the ability to take the image of a set under a relation, for example. Moreover, subtypes are generally disjoint, whereas our approach allows the sets denoted by subsignatures to overlap. In this case, we<\#48>ll add a constraint (in Section 2.4 below) to ensure that <I>MainMemory<I> and <I>Cache<I> are in fact disjoint. +@body:Declaring <I>Cache<I> and <I>MainMemory<I> as subsignatures of <I>Memory<I> serves to factor out their common properties. Extension can be used for a different purpose, in which a single signature is developed by repeated extensions along a chain. In this case, the supersignatures may not correspond to entities in the domain being modelled, but are simply artifacts of specification<\#151>fragments developed along the way. Z specifications are typically developed in this style. +@subsection:4.3 Hierarchy +@noindent:The signature declaration also supports hierarchical structuring. We can declare a signature for systems each consisting of a cache and a main memory: +@geekmath:sig System {<\n> cache: Cache,<\n> main: MainMemory<\n> } +@noindent:Again, <I>System<I> introduces a set of atoms, and each field represents a relation. The omission of the keyword <I>set<I> indicates that a relation is a total function. So for a system <I>s<I>, the expression <I>s.cache<I> denotes one cache<\#151>that is, a set consisting of a single cache. This is one of very few instances of implicit constraints in our language, which we introduced in order to make declaration syntax conventional. +@body:Since signatures denote sets of atoms, apparently circular references are allowed. Linked lists, for example, may be modelled like this, exactly as they might be implemented in a language like Java: +@geekmath:sig List {}<\n> sig NonEmptyList extends List {elt: Elt, rest: List} There is no recursion here; the field <I>rest<I> is simply a homogeneous relation of type <I>List<I> to <I>List<I>, with its domain restricted to the subset <I>NonEmptyList<I>. +@subsection:4.4 State Properties +@noindent:Properties of signature atoms are recorded as logical formulas. To indicate that such a property always holds, we package it as a <I>fact<I>. To say that, for any memory system, the addresses in a cache are always addresses within the main memory, we might write: +@geekmath:fact {all s: System | s.cache.addrs in s.main.addrs} +@noindent:or, using a shorthand that allows facts about atoms of a signature to be appended to it: +@geekmath:sig System {cache: Cache, main: MainMemory}<\n> {cache.addrs in main.addrs} +@noindent:The appended fact is implicitly prefixed by +@geekmath:all this: System | with this | +@noindent:in which the <I>with<I> construct, explained in Sectiom 3.6 below, causes the fields implicitly to be dereferences of the atom <I>this<I>. +@body:A fact can constrain atoms of arbitrary signatures; to say that no main memory is a cache we might write: +@geekmath:fact {no (MainMemory & Cache)} +@noindent:where <I>no e<I> means that the expression <I>e<I> has no elements, and & is intersection. +@body:Most descriptions have more interesting facts. We can express the fact that linked lists are acyclic, for example: +@geekmath:fact {no p: List | p in p. @sep rest} +@noindent:The expression <I> @sep rest<I> denotes the transitive closure of the relation <I>rest<I>, so that <I>p.^rest<I> denotes the set of lists reachable from <I>p<I> by following the field <I>rest<I> once or more. This illustrates a benefit of treating a field as a relation<\#151>that we can apply standard relational operators to it<\#151>and is also an example of an expression hard to write in a language that treats extension as subtyping (since each application of <I>rest<I> would require its own coercion). +@body:Often we want to define a property without imposing it as a permanent constraint. In that case, we declare it as a <I>function<I>. Here, for example, is the invariant that the cache lines not marked as dirty are consistent with main memory: +@geekmath:fun DirtyInv (s: System) {<\n> all a !: s.cache.dirty | s.cache.map[a] = s.main.map[a]<\n> } (The exclamation mark negates an operator, so the quantification is over all addresses that are <I>not<I> dirty.) Packaging this as a function that can be applied to a particular system, rather than as a fact for all systems, will allow us to express assertions about preservation of the invariant (Section 2.8). +@noindent:By default, a function returns a boolean value<\#151>the value of the formula in its body. The value of <I>DirtyInv(s)<I> for a system <I>s<I> is therefore true or false. A function may return non<\#45>boolean values. We might, for example, define the set of bad addresses to be those for which the cache and main memory differ: +@geekmath:fun BadAddrs (s: System): set Addr {<\n> result = {a: Addr | s.cache.map[a] != s.main.map[a]}<\n> } +@noindent:and then write our invariant like this: +@geekmath:fun DirtyInv (s: System) {BadAddrs(s) in s.cache.dirty} +@noindent:In this case, <I>BadAddrs(s)<I> denotes a set of addresses, and is short for the expression on the right<\#45>hand side of the equality in the definition of the function <I>BadAddrs<I>. The use of the function application as an expression does not in fact depend on the function being defined explicitly. Had we written +@geekmath:fun BadAddrs (s: System): set Addr {<\n> all a: Addr | a in result iff s.cache.map[a] != s.main.map[a]<\n> } the application would still be legal; details are explained in Section 3.7. +@subsection:4.5 Operations +@noindent:Following Z, we can specify operations as formulas that constrain pre<\#45> and post<\#45>states. An operation may be packaged as a single function (or as two functions if we want to separate pre<\#45> and post<\#45>conditions in the style of VDM or Larch). +@body:The action of writing a data value to an address in memory might be specified like this: +@geekmath:fun Write (m,m<\#48>: Memory, d: Data, a: Addr) {<\n> m<\#48>.map = m.map ++ (a<\#45>d)<\n> } +@noindent:The formula in the body of the function relates <I>m<I>, the value of the memory before, to <I>m<\#48><I>, the value after. These identifers are just formal arguments, so the choice of names is not significant. Moreover, the prime mark plays no special role akin to decoration in Z<\#151>it<\#48>s a character like any other. The operator ++ is relational override, and the arrow forms a cross product. As mentioned above, scalars are represented as singleton sets, so there is no distinction between a tuple and a relation. The arrows in the expressions <I>a<\#45><I>d here and <I>addrs<\#45><I>Data in the declaration of the <I>map<I> field of <I>Memory<I> are one and the same. The action of reading a data value can likewise be specified as a function, although since it has no side<\#45>effect we omit the <I>m<\#48><I> parameter: +@geekmath:fun Read (m: Memory, d: Data, a: Addr) {<\n> d = m.map[a]<\n> } Actions on the system as a whole can be specified using these primitive operations; in Z, this idiom is called <\#145>promotion<\#146>. A read on the system is equivalent to reading the cache: +@geekmath:fun SystemRead (s: System, d: Data, a: Addr) {<\n> Read (s.cache, d, a)<\n> } +@noindent:The <I>Read<I> operation has an implicit precondition. Since the data parameter <I>d<I> is constrained (implicitly by its declaration) to be scalar<\#151>that is, a singleton set<\#151>the relation <I>m.map<I> must include a mapping for the address parameter <I>a<I>, since otherwise the expression <I>m.map[a]<I> will evaluate to the empty set, and the formula will not be satisfiable. This precondition is inherited by <I>SystemRead<I>. If the address <I>a<I> is not in the cache, the operation cannot proceed, and it will be necessary first to load the data from main memory. It is convenient to specify this action as a distinct operation: +@geekmath:fun Load (s,s<\#48>: System, a: Addr) {<\n> a !in s.cache.addrs<\n> s<\#48>.cache.map = s.cache.map + (a<\#45>s.main.map[a])<\n> s<\#48>.main = s.main<\n> } +@noindent:The + operator is just set union (in this case, of two binary relations, the second consisting of a single tuple). A write on the system involves a write to the cache, and setting the dirty bit. Again, this can be specified using a primitive memory operation: +@geekmath:fun SystemWrite (s,s<\#48>: System, d: Data, a: Addr) {<\n> Write (s.cache, s<\#48>.cache, d, a)<\n> s<\#48>.cache.dirty = s.cache.dirty + a<\n> s<\#48>.main = s.main<\n> } +@noindent:A cache has much smaller capacity than main memory, so it will occasionally be necessary (prior to loading or writing) to flush lines from the cache back to main memory. We specify flushing as a non<\#45>deterministic operation that picks some subset of the cache addrs and writes them back to main memory: +@geekmath:fun Flush (s,s<\#48>: System) {<\n> some x: set s.cache.addrs {<\n> s<\#48>.cache.map = s<\#48>.cache.map <\#45> (x<\#45>Data)<\n> s<\#48>.cache.dirty = s.cache.dirty <\#45> x<\n> s<\#48>.main.map = s.main.map ++ <\n> {a: x, d: Data | d = s.cache.map[a]}<\n> } +@noindent:The <\#45> operator is set difference; note that it is applied to sets of addresses (in the third line) and to binary relations (in the second). The comprehension expression creates a relation of pairs <I>a<I><\#45><I>d<I> satisfying the condition. +@body:Finally, it is often useful to specify the initial conditions of a system. To say that the cache initially has no addresses, we might write a function imposing this condition on a memory system: +@geekmath:fun Init (s: System) {no s.cache.addrs} +@subsection:4.6 Traces +@noindent:To support analyses of behaviours consisting of sequences of states, we declare two signatures, for ticks of a clock and traces of states: +@geekmath:sig Tick {}<\n> sig SystemTrace {<\n> ticks: set Tick,<\n> first, last: ticks,<\n> next: (ticks <\#45> last) !<\#45>! (ticks <\#45> first)<\n> state: ticks <\#45>! System}<\n> {<\n> first.*next = ticks<\n> Init (first.state)<\n> all t: ticks <\#45> last | <\n> some s = t.state, s<\#48> = t.next.state |<\n> Flush (s,s<\#48>)<\n> || (some a: Addr | Load (s,s<\#48>,a))<\n> || (some d: Data, a: Addr | SystemWrite (s,s<\#48>,d,a))<\n> } +@noindent:Each trace consists of a set of <I>ticks<I>, a <I>first<I> and <I>last<I> tick, an ordering relation <I>next<I> (whose declaration makes it a bijection from all ticks except the last to all ticks except the first), and a relation <I>state<I> that maps each tick to a system state. +@body:The fact appended to the signature states first a generic property of traces: that the ticks of a trace are those reachable from the first tick. It then imposes the constraints of the operations on the states in the trace. The initial condition is required to hold in the first state. Any subsequent pair of states is constrained to be related by one of the three side<\#45>effecting operations. The existential quantifier plays the role of a <I>let<I> binding, allowing <I>s<I> and <I>s<\#48><I> in place of <I>t.state<I> and <I>t.next.state<I>, representing the state for tick <I>t<I> and the state for its successor <I>t.next<I>. Note that this formulation precludes stuttering; we could admit it simply by adding the disjunct <I>s<I>=<I>s<\#48><I> allowing a transition that corresponds to no operation occurring. +@body:Bear in mind that this fact is a constraint on all atoms in the set <I>SystemTrace<I>. As a free standing fact, the second line of the fact<\#151>the initial condition<\#151> would have been written: +@geekmath:fact {all x: SystemTrace | Init ((x.first).(x.state))} +@subsection:4.7 Abstraction +@noindent:Abstraction relationships are easily expressed using our function syntax. To show that our memory system refines a simple memory without a cache, we define an abstraction function <I>Alpha<I> saying that a system corresponds to a memory that is like the system<\#48>s memory, overwritten by the entries of the system<\#48>s cache: +@geekmath:fun Alpha (s: System, m: Memory) {<\n> m.map = s.main.map ++ s.cache.map<\n> } As another example, if our linked list were to represent a set, we might define the set corresponding to a given list as that containing the elements reachable from the start: +@geekmath:fun ListAlpha (p: List, s: set Elt) {<\n> s = p.*rest.elt<\n> } +@subsection:4.8 Assertions +@noindent:Theorems about a specification are packaged as <I>assertions<I>. An assertion is simply a formula that is intended to hold. A tool can check an assertion by searching for a counterexample<\#151>that is, a model of the formula<\#48>s negation. +@body:The simplest kinds of assertion record consequences of state properties. For example, +@geekmath:assert {<\n> all s: System | DirtyInv (s) && no s.cache.dirty<\n> = s.cache.map in s.main.map<\n> } +@noindent:asserts that if the dirtiness invariant holds,and there are no dirty addresses, then the mapping of addresses to data in the cache is a subset of the mapping in the main memory. +@body:An assertion can express consequences of operations. For example, +@geekmath:assert {<\n> all s: System, d: Data, a: Addr |<\n> SystemRead (s,d,a) = a in s.cache.addrs<\n> } +@noindent:embodies the claim made above that <I>SystemRead<I> has an implicit precondition; it asserts that whenever <I>SystemRead<I> occurs for an address, that address must be in the cache beforehand. An assertion can likewise identify a consequence in the post<\#45>state; this assertion +@geekmath:assert {<\n> all s,s<\#48>: System, d: Data, a: Addr |<\n> SystemWrite (s,s<\#48>,d,a) = s<\#48>.cache.map[a] = d<\n> } says that after a <I>SystemWrite<I>, the data value appears in the cache at the given address. Preservation of an invariant by an operation is easily recorded as an assertion. To check that our dirtiness invariant is preserved when writes occur, we would assert +@geekmath:assert {<\n> all s,s<\#48>: System, d: Data, a: Addr |<\n> SystemWrite (s,s<\#48>,d,a) && DirtyInv (s) = DirtyInv (s<\#48>)<\n> } +@noindent:Invariant preservation is not the only consequence of an operation that we would like to check that relates pre<\#45> and post<\#45>states. We might, for example, want to check that operations on the memory system do not change the set of addresses of the main memory. For the <I>Flush<I> operation, for example, the assertion would be +@geekmath:assert {<\n> all s,s<\#48>: System | Flush(s,s<\#48>) = s.main.addrs = s<\#48>.main.addrs<\n> } which holds only because the cache addresses are guaranteed to be a subset of the main memory addresses (by the fact associated with the <I>System<I> signature). +@noindent:The effect of a sequence of operations can be expressed by quantifying appropriately over states. For example, assert {<\n> all s, s<\#48>: System, a: Addr, d,d<\#48>: Data | <\n> SystemWrite (s,s<\#48>,d,a) && SystemRead (s<\#48>,d<\#48>,a) = d = d<\#48><\n> } +@noindent:says that when a write is followed by a read of the same address, the read returns the data value just written. +@body:To check that a property holds for all reachable states, we can assert that the property is an invariant of every operation, and is established by the initial condition. This strategy can be shown (by induction) to be sound, but it is not complete. A property may hold for all reachable states, but may not be preserved because an operation breaks the property when executed in a state that happens not to be reachable. +@body:Traces overcome this incompleteness. Suppose, for example, that we want to check the (rather contrived) property that, in every reachable state, if the cache contains an address that isn<\#48>t dirty, then it agrees with the main memory on at least one address: +@geekmath:fun DirtyProp (s: System) {<\n> some (s.cache.addrs <\#45> s.cache.dirty)<\n> = some a: Addr | s.cache.map[a] = s.main.map[a]<\n> } +@noindent:We can assert that this property holds in the last state of every trace: +@geekmath:assert {<\n> all t: SystemTrace | with t | DirtyProp (last.state)<\n> } This assertion is valid, even though <I>DirtyProp<I> is not an invariant. A write invoked in a state in which all clean entries but one had non<\#45>matching values can result in a state in which there are still clean entries but none has a matching value. +@noindent:Finally, refinements are checked by assertions involving abstraction relations. We can assert that a <I>SystemWrite<I> refines a basic <I>Write<I> operation on a simple memory: +@geekmath:assert {<\n> all s,s<\#48>: System, m,m<\#48>: Memory, a: Addr, d: Data |<\n> Alpha (s,m) && Alpha (s<\#48>,m<\#48>) && SystemWrite (s,s<\#48>,a,d)<\n> = Write (m,m<\#48>,a,d)<\n> } +@noindent:or that the <I>Flush<I> operation is a no<\#45>op when viewed abstractly: +@geekmath:assert {<\n> all s,s<\#48>: System, m,m<\#48>: Memory |<\n> Alpha (s,m) && Alpha (s<\#48>,m<\#48>) && Flush (s,s<\#48>)<\n> = m.map = m<\#48>.map<\n> } +@noindent:Note the form of the equality; <I>m = m<\#48><I> would be wrong, since two distinct memories may have the same mapping, and the abstraction <I>Alpha<I> constrains only the mapping and not the memory atom itself. +@body:Many of the assertions shown here can be made more succinct by the function shorthand explained in Section 3.7 below. For example, the assertion that a read following a write returns the value just written becomes: +@geekmath:assert {<\n> all s: System, a: Addr, d: Data | <\n> SystemRead (SystemWrite (s,d,a),a) = d<\n> } +@noindent:and the assertion that <I>Flush<I> is a no<\#45>op becomes: +@geekmath:assert {<\n> all s: System | Alpha (s).map = Alpha (Flush (s)).map<\n> } +@subsection:4.9 Polymorphism +@noindent:Signatures can be parameterized by signature types. Rather than declaring a linked list whose elements belong to a particular type <I>Elt<I>, as above, we would prefer to declare a generic list: +@geekmath:sig List [T] {}<\n> sig NonEmptyList [T] extends List [T] {elt: T, rest: List [T]} +@noindent:Functions and facts may be parameterized in the same way, so we can define generic operators, such as: +@geekmath:fun first [T] (p: List [T]): T {result = p.elt}<\n> fun last [T] (p: List [T]): T {some q: p.*rest | result = q.elt && no q.rest}<\n> fun elements [T] (p: List [T]): set T {result = p.*rest.elt} +@noindent:In addition, let<\#48>s define a generic function that determines whether two elements follow one another in a list: +@geekmath:fun follows [T] (p: List[T], a,b: T) {<\n> some x: p.*rest | x.elt = a && x.next.elt = b<\n> } +@noindent:To see how a generic signature and operators are used, consider replacing the traces of Section 2.6 with lists of system states. Define a function that determines whether a list is a trace: +@geekmath:fun isTrace (t: List [System]) {<\n> Init (first(t))<\n> all s, s<\#48>: System | follows (t,s,s<\#48>) = {<\n> Flush (s,s<\#48>)<\n> || (some a: Addr | Load (s,s<\#48>,a))<\n> || (some d: Data, a: Addr | SystemWrite (s,s<\#48>,d,a))<\n> }<\n> } +@noindent:Now our assertion that every reachable system state satisfies <I>DirtyProp<I> can now be written: +@geekmath:assert {<\n> all t: List[System] | isTrace(t) = DirtyProp (last(t))<\n> } +@subsection:4.10 Variants +@noindent:To illustrate the flexibility of our notation, we sketch a different formulation of state machines oriented around transitions rather than states. +@body:Let<\#48>s introduce a signature representing state transitions of our memory system: +@geekmath:sig SystemTrans {pre,post: System}<\n> {pre.main.addrs = post.main.addrs} +@noindent:Declaring the transitions as a signature gives us the opportunity to record properties of all transitions<\#151>in this case requiring that the set of addresses of the main memory is fixed. +@body:Now we introduce a subsignature for the transitions of each operation. For example, the transitions that correspond to load actions are given by: +@geekmath:sig LoadTrans extends SystemTrans {a: Addr}<\n> {Load (pre, post, a)} +@noindent:For each invariant, we define a set of states. For the states satisfying the dirty invariant, we might declare +@geekmath:sig DirtyInvStates extends System {} +@noindent:along with the fact +@geekmath:fact {DirtyInvStates = {s: System | DirtyInv(s)}} +@noindent:To express invariant preservation, it will be handy to declare a function that gives the image of a set of states under a set of transitions: +@geekmath:fun postimage (ss: set System, tt: set SystemTrans): set System {<\n> result = {s: System | some t: tt | t.pre in ss && s = t.post}<\n> } +@noindent:so that we can write the assertion like this: +@geekmath:assert {postimage (DirtyInvStates, LoadTrans) in DirtyInvStates} +@noindent:For an even more direct formulation of state machine properties, wemight have defined a transition relation instead: +@geekmath:fun Trans (r: System <\#45> System) {<\n> all s, s<\#48> : System | <\n> s<\#45>s<\#48> in r = Flush (s,s<\#48>) || <\#133><\n> } +@noindent:Then, using transitive closure, we can express the set of states reachable from an initial state, and assert that this set belongs to the set characterized by some property: +@geekmath:assert {all r: System <\#45> System, s: System |<\n> Init (s) && Trans(r) = s.*r in DirtyPropStates<\n> } +@noindent:where <I>DirtyPropStates<I> is defined analogously to <I>DirtyInvStates<I>. +@subsection:4.11 Definitions +@noindent:Instead of declaring the addresses of a memory along with its mapping, as we did before: +@geekmath:sig Memory {<\n> addrs: set Addr,<\n> map: addrs <\#45>! Data<\n> } +@noindent:we could instead have declared the mapping alone: +@geekmath:sig Memory {<\n> map: Addr <\#45>? Data<\n> } +@noindent:and then <I>defined<I> the addresses using a subsignature: +@geekmath:sig MemoryWithAddrs extends Memory {<\n> addrs: set Addr}<\n> {addrs = {a: Addr | some a.map}} Now by making the subsignature subsume all memories: +@geekmath:fact {Memory in MemoryWithAddrs} +@noindent:we have essentially <\#145>retrofitted<\#146> the field. Any formula involving memory atoms now implicitly constrains the <I>addrs<I> field. For example, we can assert that <I>Read<I> has an implicit precondition requiring that the argument be a valid address: +@geekmath:assert {all m: Memory, a: Addr, d: Data | Read (m,d,a) = a in m.addrs} +@noindent:even though the specification of <I>Read<I> was written when the field <I>addrs<I> did not even exist. +@section:5 Semantics +@noindent:For completeness, we give an overview of the semantics of the language. The novelties with respect to the original version of Alloy [4] are (1) the idea of organizing relations around basic types as signatures, (2) the treatment of extension as subsetting, and (3) the packaging of formulas in a more explicit (and conventional) style. The semantic basis has been made cleaner, by generalizing relations to arbitrary arity, eliminating <\#145>indexed relations<\#146> and the need for a special treatment of sets. +@subsection:5.1 Types +@noindent:We assume a universe of atoms. The standard notion of a mathematical relation gives us our only composite datatype. The value of an expression will always be a relation<\#151>that is, a collection of tuples of atoms. Relations are first order: the elements of a tuple are themselves atoms and never relations. +@body:The language is strongly typed. We partition the universe into subsets each associated with a <I>basic<I> type, and write (T<I>1, T<I>2, <\#133>, T<I>n) for the type of a relation whose tuples each consist of <I>n<I> atoms, with types T<I>1, T<I>2, etc. +@body:A set is represented semantically as a unary relation, namely a relation whose tuples each contain one atom. A tuple is represented as a singleton relation, namely a relation containing exactly one tuple. A scalar is represented as a unary, singleton relation. We use the terms <\#145>set<\#146>, <\#145>tuple<\#146> and <\#145>scalar<\#146> to describe relations with the appropriate properties. Basic types are used only to construct relation types, and every expression that appears in a specification has a relational type. Often we will say informally that an expression has a type <I>T<I> where <I>T<I> is the name of a basic type when more precisely we mean that the expression has the type (<I>T<I>). +@body:So, in contrast to traditional mathematical style, we do not make distinctions amongst the atom <I>a<I>, the tuple (<I>a<I>), the set {<I>a<I>} containing just the atom, or the set {(<I>a<I>)} containing the tuple, and represent all of these as the last. This simplifies the semantics and gives a more succinct and uniform syntax. +@subsection:5.2 Expression Operators +@noindent:Expressions can be formed using the standard set operators written as ASCII characters: union (+), intersection (&) and difference (<\#45>). Some standard relational operators, such as transpose (~) and transitive closure (^), can be applied to expressions that denote binary relations. Relational override (++) has its standard meaning for binary relations but can applied more broadly. +@body:There are two special relational operators, dot and arrow. The dot operator is a generalized relational composition. Given expressions <I>p<I> and <I>q<I>, the expression <I>p<I>.<I>q<I> contains the tuple <f"Symbol"><\#225><f$><I>p<I><->1<->, <\#133> <I>p<I><-><I>m<I><\#45>1<->, <I>q<I><->2<->, <\#133>, <I>q<I><-><I>n<I><-><f"Symbol"><\#241><f$> when <I>p<I> contains @math @sep p<I>1, <\#133>, p<I>{m}, <I>q<I> contains @math @sep q<I>1, <\#133> q<I>n, and @math p<I>m = q<I>1. The last type of <I>p<I> and the first type of <I>q<I> must match, and <I>m<I> + <I>n<I>, the sum of the arities of <I>p<I> and <I>q<I>, must be three or more so that the result is not degenerate. When <I>p<I> is a set and <I>q<I> is a binary relation, the composition <I>p.q<I> is the standard relational image of <I>p<I> under <I>q<I>; when <I>p<I> and <I>q<I> are both binary relations, <I>p.q<I> is standard relational composition. In all of the examples above, the dot operator is used only for relational image. +@body:The arrow operator is cross product: <I>p q<I> is the relation containing the tuple @math @sep p<I>1, <\#133>, p<I>{m}, q<I>1, <\#133> q<I>n when <I>p<I> contains @math @sep p<I>1, <\#133>, p<I>{m}, and <I>q<I> contains @math @sep q<I>1, <\#133> q<I>n. In all the examples in this paper, <I>p<I> and <I>q<I> are sets, and <I>p q<I> is their standard cross product. +@subsection:5.3 Formula Operators +@noindent:Elementary formulas are formed from the subset operator, written <I>in<I>. Thus <I>p in q<I> is true when every tuple in <I>p<I> is in <I>q<I>. The formula <I>p : q<I> has the same meaning, but when <I>q<I> is a set, adds an implicit constraint that <I>p<I> be scalar (ie, a singleton). This constraint is overridden by writing <I>p: option q<I> (which lets <I>p<I> to be empty or a scalar) or <I>p: set q<I> (which eliminates the constraint entirely). Equality is just standard set equality, and is short for a subset constraint in each direction. +@body:An arrow that appears as the outermost expression operator on the right<\#45>hand side of a subset formula can be annotated with <I>multiplicity markings<I>: + (one or more), ? (zero or one) and ! (exactly one). The formula +@geekmath:r: S m n T +@noindent:where <I>m<I> and <I>n<I> are multiplicity markings constrains the relation <I>r<I> to map each atom of <I>S<I> to <I>n<I> atoms of <I>T<I>, and to map <I>m<I> atoms of <I>S<I> to each atom of <I>T<I>. <I>S<I> and <I>T<I> may themselves be product expressions, but are usually variables denoting sets. For example, +@geekmath:r: S ! T<\n> r: S ? ! T +@noindent:make <I>r<I> respectively a total function on <I>S<I> and an injection. +@body:Larger formulas are obtained using the standard logical connectives: && (and), || (or), ! (not), =<I> (implies), iff<I> (bi<\#45>implication). The formula <I>if b then f else g<I> is short for <I>b<I> =<I> f<I> && !<I>b<I> =<I> g<I>. Within curly braces, consecutive formulas are implicitly conjoined. +@body:Quantifications take their usual form: +@geekmath:all x: e | F +@noindent:is true when the formula <I>F<I> holds under every binding of the variable <I>x<I> to a member of the set <I>e<I>. In addition to the standard quantifiers, <I>all<I> (universal) and <I>some<I> (existential), we have <I>no<I>, <I>sole<I> and <I>one<I> meaning respectively that there are no values, at most one value, and exactly one value satisfying the formula. For a quantifier <I>Q<I> and expression <I>e<I>, the formula <I>Q e<I> is short for <I>Q x: T | e<I> (where <I>T<I> is the type of <I>e<I>), so <I>no e<I>, for example, says that <I>e<I> is empty. +@body:The declaration of a quantified formula is itself a formula<\#151>an elementary formula in which the left<\#45>hand side is a variable. Thus +@geekmath:some x = e | F +@noindent:is permitted, and is a useful way to express a <I>let<I> binding. Quantifiers may be higher<\#45>order; the formula +@geekmath:all f: s <\#45><I>! t | F +@noindent:is true when F<I> holds for every binding of a total function from <I>s<I> to <I>t<I> to the variable <I>f<I>. Our analysis tool cannot currently handle higher<\#45>order quantifiers, but many uses of higher<\#45>order quantifiers that arise in practice can be eliminated by skolemization. +@body:Finally, we have relational comprehensions; the expression +@geekmath:{x<I>1: e<I>1, x<I>2: e<I>2, <\#133> | F} +@noindent:constructs a relation of tuples with elements <I>x<I>1<I>, <I>x<I>2<I>, etc., drawn from set expressions <I>e<I>1<I>, <I>e<I>2<I>, etc., whose values satisfy <I>F<I>. +@body:Signatures +@noindent:A <I>signature<I> declaration introduces a basic type, along with a collection of relations called <I>fields<I>. The declaration +@geekmath:sig S {f: E} +@noindent:declares a basic type <I>S<I>, and a relation <I>f<I>. If <I>E<I> has the type (T<I>1, T<I>2, <\#133>, T<I>n), the relation <I>f<I> will have the type (S, T<I>1, T<I>2, <\#133>, T<I>n), and if <I>x<I> has the type <I>S<I>, the expression <I>x.f<I> will have the same type as <I>E<I>. When there are several fields, field names already declared may appear in expressions on the right<\#45>hand side of declarations; in this case, a field <I>f<I> is typed as if it were the expression <I>this.f<I>, where <I>this<I> denotes an atom of the signature type (see Section 3.6). +@body:The meaning of a specification consisting of a collection of signature declarations is an assignment of values to global constants<\#150> the signatures and the fields. For example, the specification +@geekmath:sig Addr {}<\n> sig Data {}<\n> sig Memory {map: Addr <\#45><I> Data} +@noindent:has 4 constants<\#151>the three signatures and one field<\#151>with assignments such as: +@geekmath:Addr = {a0, a1}<\n> Data = {d0, d1, d2}<\n> Memory = {m0, m1}<\n> map = {(m0,a0,d0), (m1,a0,d1), (m1,a0,d2)} +@noindent:corresponding to a world in which there are 2 addresses, 3 data values and 2 memories, with the first memory (m0<I>) mapping the first address (<I>a0<I>) to the first data value (<I>d0<I>), and the second memory (<I>m1<I>) mapping the first address (<I>a0<I>) both to the second (<I>d1<I>) and third (<I>d2<I>) data values. +@body:A fact is a formula that constrains the constants of the specification, and therefore tends to reduce the set of assignments denoted by the specification. For example, +@geekmath:fact {all m: Memory | all a: Addr | sole m.map[a]} +@noindent:rules out the above assignment, since it does not permit a memory (such as <I>m1<I>) to map an address (such as <I>a0<I>) to more than one data value. The meaning of a function is a set of assignments, like the meaning of the specification as a whole, but these include bindings to parameters. For example, the function +@geekmath:fun Read (m: Memory, d: Data, a: Addr) {<\n> d = m.map[a]<\n> } +@noindent:has assignments such as: +@geekmath:Addr = {a0, a1}<\n> Data = {d0, d1, d2}<\n> Memory = {m0, m1}<\n> map = {(m0,a0,d1)}<\n> m = {m0}<\n> d = {d1}<\n> a = {a0} +@noindent:The assignments of a function representing a state invariant correspond to states satisfying the invariant; the functions of a function representing an operation (such as <I>Read<I>) correspond to executions of the operation. +@body:An assertion is a formula that is claimed to be <I>valid<I>: that is, true for every assignment that satisfies the facts of the specification. To check an assertion, one can search for a <I>counterexample<I>: an assignment that makes the formula false. For example, the assertion +@geekmath:assert {<\n> all m,m<\#48>: Memory, d: Data, a: Addr | Read (m,d,a) =<I> Read (m<\#48>,d,a)} +@noindent:which claims, implausibly, that if a read of memory m<I> returns <I>d<I> at <I>a<I>, then so does a read at memory <I>m<\#48><I>, has the counterexample +@geekmath:Addr = {a0}<\n> Data = {d0,d1}<\n> Memory = {m0, m1}<\n> map = {(m0,a0,d0), (m1,a0,d1)} +@noindent:To find a counterexample, a tool should negate the formula and then skolemize away the bound variables, treating them like the parameters of a function, with values to be determined as part of the assignment. In this case, the assignment might include: +@geekmath:m = {m0}<\n> m<\#48> = {m1}<\n> d = {d0}<\n> a = {a0} +@subsection:5.4 Extension +@noindent:Not every signature declaration introduces a new basic type. A signature declared without an extension clause is a <I>type signature<I>, and creates both a basic type and a set constant of the same name. A signature <I>S<I> declared as an extension is a <I>subsignature<I>, and creates only a set constant, along with a constraint making it a subset of each <I>supersignature<I> listed in the extension clause. The subsignature takes on the type of the supersignatures, so if there is more than one, they must therefore have the same type, by being direct or indirect subsignatures of the same type signature. +@body:A field declared in a subsignature is as if declared in the corresponding type signature, with the constraint that the domain of the field is the subsignature. For example, +@geekmath:sig List {}<\n> sig NonEmptyList extends List {elt: Elt,rest: List} +@noindent:makes <I>List<I> a type signature, and <I>NonEmptyList<I> a subset of <I>List<I>. The fields <I>elt<I> and <I>rest<I> map atoms from the type <I>List<I>, but are constrained to have domain <I>NonEmptyList<I>. Semantically, it would have been equivalent to declare them as fields of <I>List<I>, along with facts constraining their domains: +@geekmath:sig List {elt: Elt,rest: List}<\n> sig NonEmptyList extends List {}<\n> fact {elt.Elt in NonEmptyList}<\n> fact {rest.List in NonEmptyList} +@noindent:(exploiting our dot notation to write the domain of a relation <I>r<I> from <I>S<I> to <I>T<I> as <I>r.T<I>). +@subsection:5.5 Overloading and Implicit Prefixing +@noindent:Whenever a variable is declared, its type can be easily obtained from its declaration (from the type of the expression on the right<\#45>hand side of the declaration), and every variable appearing in an expression is declared in an enclosing scope. The one complication to this rule is the typing of fields. +@body:For modularity, a signature creates a local namespace. Two fields with the name <I>f<I> appearing in different signatures do not denote the same relational constant. Interpreting an expression therefore depends on first resolving any field names that appear in it. #We have devised a simple resolution scheme whose details are beyond the scope of this paper. In an expression of the form <I>e.f<I>, the signature to which <I>f<I> belongs is determined according to the type of <I>e<I>. To keep the scheme simple, we require that sometimes the specifier resolve the overloading explicitly by writing the field <I>f<I> of signature <I>S<I> as <I>S<I>f<I><I>. (<I>At<I> <I>the<I> <I>end<I> <I>of<I> <I>the<I> <I>previous<I> <I>section<I>, <I>for<I> <I>example<I>, <I>the<I> <I>reference<I> <I>in<I> <I>the<I> <I>fact<I> <I>to<I> <I><I>rest<I><I> <I>should<I> <I>actually<I> <I>be<I> <I>to<I> <I><I>List<I>rest<I>, since the context does not indicate which signature <I>rest<I> belongs to.) +@body:In many formulas, a single expression is dereferenced several times with different fields. A couple of language features are designed to allow these formulas to be written more succinctly, and, if used with care, more comprehensibly. First, we provide two syntactic variants of the dot operator. Both <I>p<I>::<I>q<I> and <I>q<I>[<I>p<I>] are equivalent to <I>p.q<I>, but have different precedence: the double colon binds more tightly than the dot, and the square brackets bind more loosely than the dot. Second, we provide a <I>with<I> construct similar to Pascal<\#48>s that makes dereferencing implicit. +@body:Consider, for example, the following simplified signature for a trace: +@geekmath:sig Trace {<\n> ticks: set Tick,<\n> first: Tick,<\n> next: Tick <\#45><I> Tick,<\n> state: Tick <\#45> State<\n> } +@noindent:Each trace t<I> has a set of ticks <I>t.ticks<I>, a first tick <I>t.first<I>, an ordering <I>t.next<I> that maps ticks to ticks, and a relation <I>t.state<I> mapping each tick to a state. For a trace <I>t<I> and tick <I>k<I>, the state is <I>k<I>.(<I>t.state<I>); the square brackets allow this expression to be written instead as <I>t.state<I>[<I>k<I>]. To constrain <I>t.ticks<I> to be those reachable from <I>t. first<I> we might write: +@geekmath:fact {all t: Trace | (t.first).*(t.next ) = t.ticks} +@noindent:Relying on the tighter binding of the double colon, we can eliminate the parentheses: +@geekmath:fact {all t: Trace | t::first.*t::next = t.ticks} +@noindent:Using <I>with<I>, we can make the <I>t<I> prefixes implicit: +@geekmath:fact {all t: Trace | with t | first.*next = ticks} +@noindent:In general, <I>with e | F<I> is like <I>F<I>, but with <I>e<I> prefixed wherever appropriate to a field name. Appropriateness is determined by type: <I>e<I> is matched to any field name with which it can be composed using the dot operator. A fact attached to a signature <I>S<I> is implicitly enclosed by <I>all this: S | with this |<I>, and the declarations of a signature are interpreted as constraints as if they had been declared within this scope. Consequently, the declaration of <I>first<I> above should be interpreted as if it were the formula: +@geekmath:all this: Trace | with this | first: ticks +@noindent:which is equivalent to +@geekmath:all this: Trace | this.first: this.ticks +@noindent:and should be typed accordingly. +@subsection:5.6 Function Applications +@noindent:A function may be applied by binding its parameters to expressions. The resulting application may be either an expression or a formula, but in both cases the function body is treated as a formula. The formula case is simple: the application is simply short for the body with the formal parameters replaced by the actual expressions (and bound variables renamed where necessary to avoid clashes). +@body:The expression case is more interesting. The application is treated as a syntactic sugar. Suppose we have a function application expression, <I>e<I> say, of the form +@geekmath:f(a<I>1, a<I>2, <\#133>, a<I>n) +@noindent:that appears in an elementary formula <I>F<I>. The declaration of the function <I>f<I> must list <I>n<I> + 1 formal arguments, of which the <I>second<I> will be treated as the result. The entire elementary formula is taken to be short for +@geekmath:all result: D | f (a<I>1, result, a<I>2, <\#133>, a<I>n) =<I> F [result/e] +@noindent:where D<I> is the right<\#45>hand side of the declaration of the missing argument, and <I>F<I> [<I>result<I>/<I>e<I>] is <I>F<I> with the fresh variable <I>result<I> substituted for the application expression <I>e<I>. The application of <I>f<I> in this elaborated formula is now a formula, and is treated simply as an inlining of the formula of <I>f<I>. +@body:To see how this works, consider the definition of a function <I>dom<I> that gives the domain of a relation over signature <I>X<I>: +@geekmath:fun dom (r: X <\#45><I> X, d: set X) {d = r.X} +@noindent:(We have defined the function monomorphically for a homogeneous relation. In practice, one would define a polymorphic function, but we want to avoid conflating two unrelated issues.) Here is a trivial assertion that applies the function as an expression: +@geekmath:assert {all p: X X | (dom (p)).p in X} +@noindent:Desugaring the formula, we get +@geekmath:all p: X X | all result: set X | dom (p, result) = result.p in X +@noindent:and then inlining +@geekmath:all p: X X | all result: set X | result = p.X = result.p in X +@noindent:This formula can be reduced (by applying a universal form of the One Point Rule) to +@geekmath:all p: X X | (p.X).p in X +@noindent:which is exactly what would have been obtained had we just replaced the application expression by the expression on the right<\#45>hand side of the equality in the function<\#48>s definition! +@body:Now let<\#48>s consider an implicit definition. Suppose we have a signature X<I> with an ordering <I>lte<I>, so that <I>e.lte<I> is the set of elements that <I>e<I> is less than or equal to, and a function <I>min<I> that gives the minimum of a set, defined implicitly as the element that is a member of the set, and less than or equal to all members of the set: +@geekmath:sig X {lte: set X}<\n> fun min (s: set X, m: option X) {<\n> m in s && s in m.lte<\n> } +@noindent:Because the set may be empty, <I>min<I> is partial. Depending on the properties of <I>lte<I> it may also fail to be deterministic. A formula that applies this function +@geekmath:assert {all s: set X | min (s) in s} +@noindent:can as before be desugared +@geekmath:all s: set X | all result: option X | min (s, result) =<I> result in s +@noindent:and expanded by inlining +@geekmath:all s: set X | all result: option X |<\n> (result in s) && s in result.lte = result in s +@noindent:but in this case the One Point Rule is not applicable. +@body:As a convenience, our language allows the result argument of a function to be declared anonymously in a special position, and given the name result<I>. The domain function, for example, can be defined as: +@geekmath:fun dom (r: X <\#45><I> X): set X {result = r.X} +@noindent:How the function is defined has no bearing on how it is used; this definition is entirely equivalent to the one above, and can also be applied as a formula with two arguments. +@subsection:5.7 Polymorphism +@noindent:Polymorphism is treated as a syntactic shorthand. Lack of space does not permit a full discussion here. +@section:6 Related Work +@noindent:We have shown how a handful of elements can be assembled into a rather simple but flexible notation. The elements themselves are far from novel<\#151>indeed, we hope that their familiarity will make the notation easy to learn and use<\#151>but their assembly into a coherent whole results in a language rather different from existing specification languages. +@subsection:6.1 New Aspects +@noindent:The more novel aspects of our work are: +@point:\alpha 6.1.A Objectification of state<I>. Most specification languages represent states as cartesian products of components; in our approach, a state, like a member of any signature, is an individual<\#151>a distinct atom with identity. A similar idea is used in the situation calculus [11], whose <\#145>relational fluents<\#146> add a situation variable to each time<\#45>varying relation. The general idea of objectifying all values is of course the foundation of object<\#45>oriented programming languages, and was present in LISP. Interestingly, object<\#45>oriented variants of Z (such as [1]) do not objectify schemas. The idea of representing structures in first<\#45>order style as atoms is present also in algebraic specifications such as Larch [2], which treat even sets and relations in this manner. +@point:\alpha 6.1.B <I>Components as relations<I>. Interpreting fields of a structure as functions goes back to early work on verification, and is widely used (for example, by Leino and Nelson [10]). We are not aware, however, of specification languages that use this idea, or that flatten fields to relations over atoms. +@point:\alpha 6.1.C <I>Extension by global axioms<I>. The <\#145>facts<\#146> of our notation allow the properties of a signature to be extended monotonically. The idea of writing axioms that constrain the members of a set constant declared globally is hardly remarkable, but it appears not to have been widely exploited in specification languages. +@point:\alpha 6.1.D <I>Extension by subset<I>. Treating the extension of a structure as a refinement modelled by subset results in a simple semantics, and melds well with the use of global axioms. Again, this seems to be an unremarkable idea, but one whose power has not been fully recognized. +@subsection:6.2 Old Aspects +@noindent:The aspects of our work that are directly taken from existing languages are: +@point:\alpha 6.2.A <I>Formulas<I>. The idea of treating invariants, definitions, operations, etc, uniformly as logical formulas is due to Z [14]. +@point:\alpha 6.2.B <I>Assertions<I>. Larch [2] provides a variety of constructs for adding intentional redundancy to a specification in order to provide error<\#45>detection opportunities. <I>Parameterized formulas<I>. The <\#145>functional<\#146> style we have adopted, in which all formulas are explicitly parameterized, in contrast to the style of most specification languages, is used also by languages for theorem provers, such as PVS [13]. VDM [8] offers a mechanism called <\#145>operation quotation<\#146> in which pre<\#45> and post conditions are reused by interpreting them as functions similar to ours. +@point:\alpha 6.2.C <I>Parametric Polymorphism<I>. The idea of parameterizing descriptions by types was developed in the programming languages community, most notably in the context of ML [12]. +@point:\alpha 6.2.D <I>Implicit Prefixing<I>. Our <\#145>with<\#146> operator is taken from Pascal [9]. +@point:\alpha 6.2.E <I>Relational operators<I>. The dot operator, and the treament of scalars as singletons, comes from the earlier version of Alloy [4]. +@subsection:6.3 Z<\#48>s Schema Calculus +@noindent:Z has been a strong influence on our work; indeed, this paper may be viewed as an attempt to achieve some of the power and flexibility of Z<\#48>s schema calculus in a first<\#45>order setting. Readers unfamiliar with Z can find an excellent presentation of the schema calculus in [16]. The current definitive reference is [15], although Spivey<\#48>s manual [14] is more accessible for practioners. +@body:A <I>schema<I> consists of a collection of variable declarations and a formula constraining the variables. Schemas can be anonymous. When a name has been bound to a schema, it can be used in three different ways, distinguished according to context. First, it can be used as a <I>declaration<I>, in which case it introduces its variables into the local scope, constraining them with its formula. Second, where the variables are already in scope, it can be used as a <I>predicate<I>, in which case the formula applies and no new declarations are added. Both of these uses are syntactic; the schema can be viewed as a macro. +@body:In the third use, the schema is semantic. Its name represents a set of <I>bindings<I>, each binding being a finite function from variables names to values. The bindings denoted by the schema name are the models of the schema<\#48>s formula: those bindings of variable names to values that make the formula true. +@body:How a schema is being applied is not always obvious; in the set comprehension {<I>S<I>}, for example, <I>S<I> represents a declaration, so that the expression as a whole denotes the same set of bindings as <I>S<I> itself. Given a binding <I>b<I> for a schema with component variable <I>x<I>, the expression <I>b.x<I> denotes the value assigned to <I>x<I> in <I>b<I>. Unlike Alloy<\#48>s dot, this dot is a function application, so for a set of bindings <I>B<I>, the expression <I>B.x<I> is not well formed. +@body:Operations in Z are expressed using the convention that primed variables denote components of the post<\#45>state. A mechanism known as <I>decoration<I> allows one to write <I>S<\#48><I> for the schema that is like <I>S<I>, but whose variable names have been primed. Many idioms, such as promotion, rely on being able to manipulate the values of a schema<\#48>s variables in aggregate. To support this, Z provides the theta operator: <f"Symbol"><\#113><f$> @sep <I>S<I> is an expression that denotes a binding in which each variable <I>x<I> that belongs to <I>S<I> is bound to a variable of the same name <I>x<I> declared in the local scope. Theta and decoration interact subtly: <f"Symbol"><\#113><f$> @sep <I>S<\#48><I> is not a binding of <I>S<\#48><I>, but rather binds each variable <I>x<I> of <I>S<I> to a variable <I>x<\#48><I> declared locally. So where we would write <I>s=s<\#48><I> to say that pre<\#45> and post<\#45>states <I>s<I> and <I>s<\#48><I> are the same, a Z specifier would write <f"Symbol"><\#113><f$> @sep <I>S<I> = <f"Symbol"><\#113><f$> @sep <I>S<\#48><I>. This formula equates each component <I>x<I> of <I>S<I> to its matching component <I>x<\#48><I> of <I>S<\#48><I>, because <I>x<I> and <I>x<\#48><I> are the respective values bound to <I>x<I> by <f"Symbol"><\#113><f$> @sep <I>S<I> and <f"Symbol"><\#113><f$> @sep <I>S<\#48><I> respectively. +@body:Our <\#145>fact<\#146> construct allows the meaning of a signature name to be constrained subsequent to its declaration. A schema, in contrast, is <\#145>closed<\#146>: a new schema name must be introduced for each additional constraint. This can produce an undesirable proliferation of names for a system<\#48>s state, but it does make it easier to track down those formulas that affect a schema<\#48>s meaning. +@body:The variables of a schema can be renamed, but cannot be replaced by arbitrary expressions (since this would make nonsense of declarations).This requires the introduction of existential quantifiers where in our notation an expression is passed as an actual. On the other hand, when no renaming is needed, it is more succinct. +@body:Z<\#48>s sequential composition operator is defined by a rather complicated transformation, and relies on adherence to particular conventions. The schema <I>P<I> @sep @sep <I>Q<I> is obtained by collecting primed variables in <I>P<I> that match unprimed variables in <I>Q<I>; renaming these in both <I>P<I> and <I>Q<I> with a new set of variable names; and then existentially quantifying the new names away. For example, to say that a read following a write to the same address yields the value written, we would write: +@geekmath:all m: Memory, a: Addr, d, d<\#48>: Data | Read (Write(m,a,d),d<\#48>) =<I> d = d<\#48> +@noindent:which is short for +@geekmath:all m: Memory, a: Addr, d, d<\#48>: Data |<\n> all m<\#48>: Memory | Write (m,m<\#48>,a,d) = Read (m,a,d<\#48>) = d = d<\#48> +@noindent:In Z, assuming appropriate declarations of a schema Memory<I> and a given type <I>Data<I>, the formula would be: +@geekmath: Memory; Memory<\#48>; x!: Data Write Read [x!/d!] x! = d! +@noindent:which is short for +@geekmath: Memory; Memory<\#48>; x!: Data <\n> Memory<\#48><\#145> <\n> Memory<\#146> Write <f"Symbol"><\#113><f$> @sep Memory<\#48> = <f"Symbol"><\#113><f$> @sep Memory<\#48><\#145><\n> Memory<\#146>; d!: Data <\n> Read <f"Symbol"><\#113><f$> @sep Memory = <f"Symbol"><\#113><f$> @sep Memory<\#48><\#145> d! = x!<\n> x! = d! +@noindent:The key semantic difference between signatures and schemas is this. A signature is a set of atoms; its fields are relational constants declared in global scope. A schema, on the other hand, denotes a higher<\#45>order object: a set of functions from field names to values. Our approach was motivated by the desire to remain first order, so that the analysis we have developed [3] can be applied. Not surprisingly, there is a cost in expressiveness. We cannot express higher<\#45>order formulas, most notably those involving preconditions. Suppose we want to assert that our write operation has no implicit precondition. In Z, such an assertion is easily written: +@geekmath: Memory; a?: Addr Memory<\#48>; d!: Data Write +@noindent:We might attempt to formulate such an assertion in our notation as follows: +@geekmath:assert {<\n> all m: Memory, a: Addr, d: Data | some m<\#48>: Memory | Write (m,m<\#48>,d,a) } +@noindent:Unfortunately, this has counterexamples such as +@geekmath:Addr = {a0}<\n> Data = {d0}<\n> Memory = {m0, m1}<\n> map = {} +@noindent:in which the <I>map<I> relation lacks an appropriate tuple. Intuitively, the assertion claims that there is no context in which a write cannot proceed; a legitimate counterexample<\#151>but one we certainly did not intend<\#151>simply gives a context in which a memory with the appropriate address<\#45>value mapping is not available. +@body:We have focused in this discussion on schemas. It is worth noting that Z is expressive enough to allow a style of structuring almost identical to ours, simply by declaring signatures as given types, fields and functions as global variables, and by writing facts, and the bodies of functions, as axioms. Field names would have to be globally unique, and the resulting specification would likely be less succinct than if expressed in our notation. +@subsection:6.4 Phenomenology +@noindent:Pamela Zave and Michael Jackson have developed an approach to composing descriptions [18] that objectifies states, events and time intervals, and constrains their properties with global axioms. Objectification allows descriptions to be reduced to a common phenomenology, so that descriptions in different languages, and even in different paradigms can be combined. Michael Jackson has argued separately for the importance of objectification as a means of making a more direct connection between a formal description and the informal world: as he puts it, <\#147>domain phenomena are facts about individuals<\#148> [7]. It is reassuring that the concerns of language design and tractability of analysis that motivated our notation are not in conflict with sound method, and it seems that our notation would be a good choice for expressing descriptions in the form that Zave and Jackson have proposed. +@section:7 Evaluation +@subsection:7.1 Merits +@noindent:The key motivations of the design of our mechanism have been minimality and flexibility. It is worth noting how this has been achived by the <I>omission<I> of certain features: +@point:\alpha 7.1.A There is only one form of semantic structuring; our opinion is that adding extra mechanisms, for example to group operations into classes, does not bring enough benefit to merit the additional complexity, and tends to be inflexible. (Our language does provide some namespace control for signature and paragraph names in the style of Java packages, but this is trivial and does not interact with the basic mechanism). +@point:\alpha 7.1.B There is no subtyping; subsignatures are just subsets of their supersignatures, and have the same type. There are only two types: basic types (for signatures), and relational types (for expressions). Types are not nested. +@point:\alpha 7.1.C There is only one way that formulas are packaged for reuse. The same function syntax is used for observers, operations, refinement relations, etc. The function shorthand syntax unifies the syntax of both declaration and use for explicit and implicit function definitions. +@point:\alpha 7.1.D The values of a signature with fields are just like the values of any basic type; there is nothing like Z<\#48>s notion of a schema binding. +@noindent:Our interpretation of a subsignature as a subset of the supersignature appears to be novel as a mechanism for structuring in a specification language. It has three nice consequences: +@point:\alpha 7.1.E <I>Elimination of type coercions<I>. If <I>x<I> belongs to a signature <I>S<I> whose extension <I>S<\#48><I> defines a field <I>f<I>, the expression <I>x.f<I> will just denote an empty set if <I>x<I> does not belong to <I>S<\#48><I>. Contrast this with the treatment of subclasses in the Object Constraint Language [17], for example, which results in pervasive coercions and often prevents the use of set and relation operators (since elements must be coerced one at a time). +@point:\alpha 7.1.F <I>Ease of extension<I>. Constraints can be added to the subsignature simply by writing a constraint that is universally quantified over elements of that subset. +@point:\alpha 7.1.G <I>Definitional extension<I>. We can declare an extension <I>S<\#48><I> of a signature <I>S<I> with additional fields, relate these fields to the fields declared explicitly for <I>S<I>, and then record the fact that <I>S=S<\#48><I> (as illustrated in Section 2.11). The effect is that every atom of <I>S<I> has been extended with appropriately defined fields, which can be accessed whenever an expression denoting such an atom is in scope! We expect to find this idiom especially useful for defining additional fields for visualization purposes. +@subsection:7.2 Deficiencies +@noindent:One might wonder whether, having encoded structures using atoms, and having provided quantifiers over those atoms, one can express arbitrary properties of higher<\#45>order structures. Unfortunately, but not surprisingly, this is not possible. The catch is that fields are treated in any formulas as global variables that are existentially quantified. To simulate higher<\#45>order logic, it would be necessary to allow quantifications over these variables, and since they have relational type, that would imply higher<\#45>order quantification. The practical consequence is that properties requiring higher<\#45>order logic cannot be expressed. One cannot assert that the precondition of an operation is no stronger than some predicate; one cannot in general specify operations by minimization; and one cannot express certain forms of refinement check. An example of this problem is given in Section 4.3 above. Whether the problem is fundamental or can be partially overcome remains to be seen. +@body:The treatment of subsignatures as subsets has a nasty consequence. Since a field declared in a subsignature becomes implicitly a field of the supersignature, two subsignatures cannot declare fields of the same name. The extension mechanism is therefore not properly modular, and a specification should use hierarchical structure instead where this matters. +@body:Modelling a set of states as atoms entails a certain loss of abstraction. In this specification +@geekmath:sig A {}<\n> sig S {a: A}<\n> fun op (s,s<\#48>: S) {s.a = s<\#48>.a} +@noindent:the operation <I>op<I> has executions in which the pre<\#45> and post<\#45>states are equal (that is, the same atom in <I>S<I>), and executions in which only their <I>a<I> components are equal. One might object that this distinction is not observable. Moreover, replacing the formula by <I>s=s<\#48><I> would arguably be an overspecification<\#151>a <\#145>bias<\#146> in VDM terminology [8]. The situation calculus [11] solves this problem by requiring every operation to produce a state change: <I>s<I> and <I>s<\#48><I> are thus regarded as distinct situations by virtue of occurring at different points in the execution. The dual of this solution is to add an axiom requiring that no two distinct atoms of <I>S<I> may have equal <I>a<I> fields. Either of these solutions is easily imposed in our notation. +@body:Our treatment of scalars and sets uniformly as relations has raised the concern that the resulting succinctness comes with a loss of clarity and redundancy. Extensive use of the previous version of our language, mostly by inexperienced specifiers, suggests that this is not a problem. The loss of some static checking is more than compensated by the semantic analysis that our tool performs. +@section:8 Conclusion +@noindent:Two simple ideas form the basis of our modularity mechanism: (1) that a structure is just a set of atoms, and its fields are global relations that map those atoms to structure components; and (2) that extensions of a structure are just subsets. Our relational semantics, in which all variables and fields are represented as relations, makes the use of structures simple and succinct, and it ensures that the language as a whole remains first order. For a variety of modelling tasks, we believe that our approach provides a useful balance of expressiveness and tractability. +@section:9 Acknowledgments +@noindent:The language described here was refined by experience writing specifications, long before an analyzer existed, and by the development of the analyzer tool itself. Mandana Vaziri and Sarfraz Khurshid were our early adopters, and Brian Lin and Joe Cohen helped implement the tool. The paper itself was improved greatly by comments from Mandana and Sarfraz, from Michael Jackson, from Tomi Mannisto, and especially from Pamela Zave, whose suggestions prompted a major rewrite. Jim Woodcock helped us understand Z, and the clarity and simplicity of his own work has been a source of inspiration to us. Our ideas have also been improved by the comments of the members of IFIP working groups 2.3 and 2.9, especially Tony Hoare, Greg Nelson and Rustan Leino. This work was funded in part by ITR grant #0086154 from the National Science Foundation, by a grant from NASA, and by an endowment from Doug and Pat Ross. +@section:10 References +@noindent:[1] R. Duke, G. Rose and G. Smith. Object<\#45>Z: A Specification Language Advocated for the Description of Standards. SVRC Technical Report 94<\#45>45. The Software Verification Research Centre, University of Queensland, Australia. +@ref:[2] John V. Guttag, James J. Horning, and Andres Modet. Report on the Larch Shared Language: Version 2.3. Technical Report 58, Compaq Systems Research Center, Palo Alto, CA, 1990. +@ref:[3] Daniel Jackson. Automating first<\#45>order relational logic. Proc. ACM SIGSOFT Conf. Foundations of Software Engineering. San Diego, November 2000. +@ref:[4] Daniel Jackson. Alloy: A Lightweight Object Modelling Notation. To appear, ACM Transactions on Software Engineering and Methodology, October 2001. +@ref:[5] Daniel Jackson, Ian Schechter and Ilya Shlyakhter. Alcoa: the Alloy Constraint Analyzer. Proc. International Conference on Software Engineering, Limerick, Ireland, June 2000. +@ref:[6] Daniel Jackson and Jeannette Wing. Lightweight Formal Methods. In: H. Saiedian (ed.), An Invitation to Formal Methods. IEEE Computer, 29(4):16<\#45>30, April 1996. [7] Michael Jackson. Software Requirements and Specifications: A Lexicon of Practice, Principles and Prejudices. Addison<\#45>Wesley, 1995. +@ref:[8] Cliff Jones. Systematic Software Development Using VDM. Second edition, Prentice Hall, 1990. +@ref:[9] Kathleen Jensen and Nicklaus Wirth. Pascal: User Manual and Report. Springer<\#45># Verlag, 1974. +@ref:[10] K. Rustan M. Leino and Greg Nelson. Data abstraction and information hiding . Research Report 160, Compaq Systems Research Center, November 2000. +@ref:[11] Hector Levesque, Fiora Pirri, and Ray Reiter. Foundations for the Situation Calculus. Linköping Electronic Articles in Computer and Information Science, ISSN 1401<\#45>9841, Vol. 3(1998), Nr. 018. +@ref:[12] Robin Milner, Mads Tofte and Robert Harper. The Definition of Standard ML. MIT Press, 1990. +@ref:[13] S. Owre, N. Shankar, J. M. Rushby, and D. W. J. Stringer<\#45>Calvert. PVS Language Reference. Computer Science Laboratory, SRI International, Menlo Park, CA, September 1999. +@ref:[14] J. Michael Spivey. The Z Notation: A Reference Manual. Second edition, Prentice Hall, 1992. +@ref:[15] Ian Toyn et al. Formal Specification<\#151>Z Notation<\#151>Syntax, Type and Semantics. Consensus Working Draft 2.6 of the Z Standards Panel BSI Panel IST/5/<\#45>/19/2 (Z Notation). August 24, 2000. +@ref:[16] Jim Woodcock and Jim Davies. Using Z: Specification, Refinement and Proof. Prentice Hall, 1996. +@ref:[17] Jos Warmer and Anneke Kleppe. The Object Constraint Language: Precise Modeling with UML. Addison Wesley, 1999. +@ref:[18] Pamela Zave and Michael Jackson. Conjunction as Composition. ACM Transactions on Software Engineering and Methodology II(4): 379<\#150>411, October 1993. \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.tag.txt.xtg b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.tag.txt.xtg new file mode 100755 index 00000000..90947f50 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.tag.txt.xtg @@ -0,0 +1 @@ +@title:A Micromodularity Mechanism diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.txt new file mode 100755 index 00000000..a9db1cca --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse-real.txt @@ -0,0 +1,1056 @@ +\preamble +\loadchars{k:\Research\Tagger\maps\standard-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathsym-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathit-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathext-charmap.txt} +\loadchars{k:\Research\Tagger\maps\symbol-charmap.txt} +\loadstyles{k:\Research\Tagger\examples\styles.txt} + +\title A Micromodularity Mechanism + +\section Testing + +This is gamma: \gamma.\\ +This is Delta: \Delta.\\ +This is oplus: \oplus. +\scriptA \arrowdblright \scriptA + +This is a subscripted variable: A\sub<\bold<hello>\italics<there>>. +Math mode: $x + 2 = y, and && x\sub<2> = y\sub<3> = x\sub<ijk>$ + +\author Daniel Jackson, Ilya Shlyakhter and Manu Sridharan\\ +Laboratory for Computer Science\\ +Massachusetts Institute of Technology\\ +Cambridge, Massachusetts, USA\\ +dnj@mit.edu + +\opening Abstract + +A simple mechanism for structuring specifications is described. By modelling structures as atoms, it remains entirely first-order and thus amenable to automatic analysis. And by interpreting fields of structures as relations, it allows the same relational operators used in the formula language to be used for dereferencing. An extension feature allows structures to be developed incrementally, but requires no textual inclusion nor any notion of subtyping. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. + +\subsection* Categories and Subject Descriptors + +D.2.1 Requirements/Specifications---Languages; D.2.4 Software/Program Verification---Formal methods, Model checking; F.3.1 Specifying and Verifying and Reasoning about Programs---Assertions, Invariants, Specification techniques. + +\subsection* General Terms + +Design; Documentation; Languages; Verification. + +\subsection* Keywords + +Modeling languages; formal specification; first-order logic; relational calculus; Alloy language; Z specification language; schema calculus. + +\section* Introduction + +\quote I am neither crazy nor a micromaniac.\\ +(A micromaniac is someone obsessed with\\ +reducing things to their smallest possible form.\\ +This word, by the way, is not in the dictionary.)\\ +--_Edouard de Pomiane, French Cooking in Ten Minutes, 1930_ + +\noindent Most specification languages provide mechanisms that allow larger specifications to be built from smaller ones. These mechanisms are often the most complicated part of the language, and present obstacles to analysis. This paper presents a simple mechanism that seems to be expressive enough for a wide variety of uses, without compromising analyzability. + +This work is part of a larger project investigating the design of a "micro modelling language". Our premise is that lightweight application of formal methods [6] demands an unusually small and simple language that is amenable to fully automatic semantic analysis. The Alloy language is the result to date of our efforts to design such a language. Based on our experiences with the language [4] and its analyzer [5], we have recently developed a revision of Alloy that overcomes many of its limitations. This paper describes the key feature of the revised language: the _signature_, a new modularity mechanism. + +The mechanism allows our existing analysis scheme [3] to be applied to specifications involving structures. This is not achieved by treating the structuring mechanism as a syntactic sugar, which would limit the power of the notation (ruling out, for example, quantification over structures) and would complicate the analysis tool and make output harder for users to interpret. Because of the mechanism's generality, it has also enabled us to simplify the language as a whole, making it more uniform and eliminating some ad hoc elements. + +Our mechanism has a variety of applications. It can express inherent structure in the system being modelled, and can be used to organize a specification in which details are added incrementally. It can be used to construct a library of datatypes, or to describe a system as an instantiation of a more general system. And it can express state invariants, transitions, and sequences, despite the lack of any special syntax for state machines. + +In this last respect, the new language differs most markedly from its predecessor [4], which provided built-in notions of state invariants and operations. We now think this was a bad idea, because it made the language cumbersome for problems (such as the analysis of security policies or architectural topology constraints) in which temporal behaviour can be fruitfully ignored, and too inflexible for many problems in which temporal behaviour is important. + +#Because the notation as a whole is small, simple and analyzable, and free of bias towards any particular domain of application, it may be suitable as an intermediate language. A tool for architectural design, for example, might translate a more domain-specific notation into our language, allowing analyses that such tools do not currently support (such as automatic generation of sample configurations from style rules, and checking of consistency). +# +Our paper begins by explaining our motivations---the requirements our mechanism is designed to meet. The mechanism is then presented first informally in a series of examples, and then slightly more rigorously feature-by-feature. We discuss related work, especially the schema calculus of Z, and close with a summary of the merits and deficiences of our notation as a whole. + +\section Requirements + +The goal of this work was to find a single structuring mechanism that would support a variety of common specification idioms: + +\point \cdot _States_: description of complex state as a collection of named components; incremental description both by hierarchy, in which a complex state becomes a component of a larger state, and by extension, in which new components are added; declaration of invariants and definitions of derived components; + +\point \cdot _Datatypes_: separate description of a library of polymorphic datatypes, such as lists, sequences, trees and orders, along with their operators; + +\point \cdot _Transitions_: specification of state transitions as operations described implicitly as formulas relating pre- and post-state; composition of operations from previously defined invariants and operations; sequential composition of operations; description of traces as sequences of states; + +\point \cdot _Abstractions_: description of abstraction relations between state spaces; + +\point \cdot _Assertions_: expression of properties intended to be redundant, to be checked by analysis, including: relationships amongst invariants; wellformedness of definitions (eg, that an implicit definition is functional); establishment and preservation of invariants by operations; properties of states reachable along finite traces; and simulation relationships between abstract and concrete versions of an operation. + +\noindent We wanted additionally to meet some more general criteria: + +\point \cdot _Simplicity_. The language as a whole should be exceptionally small and simple. + +\point \cdot _Flexibility_. Support for the particular idioms of state-machine specification should not be a straitjacket; the language should not dictate how state machines are expressed, and should not make it hard to describe structures that are not state machines (such as security models and architectural styles). + +\point \cdot _Analyzability_. A fully automatic semantic analysis should be possible. In the present work, this has been achieved by requiring that the modularity mechanism be first order, and expressible in the kernel of the existing language. + +\noindent Finally, our language design decisions have been influenced by some principles that we believe contribute to these goals, make the language easier to use, and analysis tools easier to build: + +\point \cdot _Explicitness_. The language should be fully explicit, with as few implicit constraints, coercions, etc, as possible. + +\point \cdot _Minimal mathematics_. The basic theory of sets and relations should suffice; it should not be necessary to introduce domains, fixed points, infinities or special logical values. + +\point \cdot _Minimal syntax_. There should be very few keywords or special symbols, and no need for special typography or layout. + +\point \cdot _Uniformity_. A small and general set of constructs should be applied uniformly, independent of context. + +\point \cdot _Lack of novelty_. Whenever possible, notions and syntax should follow standard usage of conventional mathematics and programming. + +\section Informal Description + +As a running example, we will specify a simple memory system involving a cache and a main memory. The memory has a fixed set of addresses and associates a data value with each address. The cache, in contrast, associates data values with some subset of addresses that varies over time. The cache is updated by a "write-back scheme", which means that updates need not be reflected to main memory immediately. The cache may therefore hold a more current value for an address than the main memory; the two are brought into alignment when the address is flushed from the cache and its value is written to main memory. + +\subsection States + +We start by declaring the existence of addresses and data values: + +\geekmath sig Addr {}\\ +sig Data {} + +Each line declares a _signature_, and introduces a set of atoms: _Addr_ for the set of addresses, and _Data_ for the set of data values. Like 'given types' in Z, these sets are disjoint from one another, and their atoms are unstructured and uninterpreted. Signature names can be used as expressions denoting sets, but they are also treated as types, so the expression _Addr+Data_, for example, is ill-typed, since the union operator (+) requires the types of its operands to match. + +The signature declaration + +\geekmath sig Memory {\\ + addrs: set Addr,\\ + map: addrs ->! Data\\ + } + +likewise declares a set of atoms, _Memory_, corresponding to the set of all possible memories. In addition, it declares two fields: _addrs_ and _map_ which associate with a memory a set of addresses and a mapping from addresses to data values respectively. Thus, given a memory _m_, the expression _m.addrs_ will be a set of addresses, _m.map_ will be a relation from addresses to data values. The memory, addresses and data values should be viewed as distinct atoms in their own right; fields don't decompose an atom, but rather relate one atom to others. The exclamation mark in the declaration of the field _map_ is a 'multiplicity marking': it says that _m.map_ associates exactly one data value with each address in the set _m.addrs_. The use of _addrs_ rather than _Addr_ on the left side of the arrow indicates that _m.map_ does not associate a data value with an address that is not in the set _m.addrs_. + +In these expressions, the dot is simply relational image. More precisely, when we say that _m_ is a memory, we mean that the expression _m_ denotes a set consisting of a single atom. The field _addrs_ is a relation from _Memory_ to _Addr_, and _m.addrs_ denotes the image of the singleton set under this relation. So for a set of memories _ms_, the expression _ms.addrs_ will denote the union of the sets of addresses that belong to the individual memories. Given an address _a_, the expression _a.(m.map)_ denotes the set of data values associated with address _a_ in memory _m_, which will either be empty (when the address is not mapped) or a singleton. For convenience, we allow the relational image _s.r_ to be written equivalently as _r_[_s_], where [] binds more loosely than dot, so this expression may be written as _m.map_[_a_] instead. + +Like objects of an object-oriented language, two distinct atoms can have fields of the same value. Unlike objects, however, atoms are immutable. Each field is fixed, and cannot map an atom to one value at one time and another value at another time. To describe an operation that changes the state of a memory, therefore, we will use two distinct atoms in the set _Memory_ to represent the memory's state before and after. + +\subsection Extension + +A signature declaration can introduce a set as a subset of one previously declared, in which case we call it a _subsignature_. In this case, the set does not correspond to a type, but rather its atoms take on the type of the superset. For example, the declaration + +\geekmath sig MainMemory extends Memory {} + +introduces a set of atoms _MainMemory_ representing main memories, which is constrained to be a subset of the set _Memory_. Likewise + +\geekmath sig Cache extends Memory {\\ + dirty: set addrs\\ + } + +introduces a set of atoms _Cache_ representing those memories that can be regarded as caches. It also introduces a field _dirty_ that associates with a cache the set of addresses that is dirty; later, we will use this to represent those addresses for which a cache and main memory differ. Because _Cache_ is a subset of _Memory_, and _m.addrs_ (for any memory _m_) is a subset of _Addr_, the field denotes a relation whose type is from _Memory_ to _Addr_. Expressions such as _m.dirty_ are therefore type-correct for a memory _m_, whether or not _m_ is a cache. But since declaration of the field _dirty_ within the signature _Cache_ constrains _dirty_ to be a relation that maps only caches, _m.dirty_ will always denote the empty set when _m_ is not a cache. + +This approach avoids introducing a notion of subtyping. Subtypes complicate the language, and tend to make it more difficult to use. In OCL [17], which models extension with subtypes rather than subsets, an expression such as _m.dirty_ would be illegal, and would require a coercion of _m_ to the subtype _Cache_. Coercions do not fit smoothly into the relational framework; they interfere with the ability to take the image of a set under a relation, for example. Moreover, subtypes are generally disjoint, whereas our approach allows the sets denoted by subsignatures to overlap. In this case, we'll add a constraint (in Section 2.4 below) to ensure that _MainMemory_ and _Cache_ are in fact disjoint. + +Declaring _Cache_ and _MainMemory_ as subsignatures of _Memory_ serves to factor out their common properties. Extension can be used for a different purpose, in which a single signature is developed by repeated extensions along a chain. In this case, the supersignatures may not correspond to entities in the domain being modelled, but are simply artifacts of specification---fragments developed along the way. Z specifications are typically developed in this style. + +\subsection Hierarchy + +The signature declaration also supports hierarchical structuring. We can declare a signature for systems each consisting of a cache and a main memory: + +\geekmath sig System {\\ + cache: Cache,\\ + main: MainMemory\\ + } + +Again, _System_ introduces a set of atoms, and each field represents a relation. The omission of the keyword _set_ indicates that a relation is a total function. So for a system _s_, the expression _s.cache_ denotes one cache---that is, a set consisting of a single cache. This is one of very few instances of implicit constraints in our language, which we introduced in order to make declaration syntax conventional. + +Since signatures denote sets of atoms, apparently circular references are allowed. Linked lists, for example, may be modelled like this, exactly as they might be implemented in a language like Java: + +\geekmath sig List {}\\ +sig NonEmptyList extends List {elt: Elt, rest: List} + +There is no recursion here; the field _rest_ is simply a homogeneous relation of type _List_ to _List_, with its domain restricted to the subset _NonEmptyList_. + +\subsection State Properties + +Properties of signature atoms are recorded as logical formulas. To indicate that such a property always holds, we package it as a _fact_. To say that, for any memory system, the addresses in a cache are always addresses within the main memory, we might write: + +\geekmath fact {all s: System | s.cache.addrs in s.main.addrs} + +or, using a shorthand that allows facts about atoms of a signature to be appended to it: + +\geekmath sig System {cache: Cache, main: MainMemory}\\ + {cache.addrs in main.addrs} + +The appended fact is implicitly prefixed by + +\geekmath all this: System | with this | + +in which the _with_ construct, explained in Sectiom 3.6 below, causes the fields implicitly to be dereferences of the atom _this_. + +A fact can constrain atoms of arbitrary signatures; to say that no main memory is a cache we might write: + +\geekmath fact {no (MainMemory & Cache)} + +where _no e_ means that the expression _e_ has no elements, and & is intersection. + +#Again, this is common enough that we provide a shorthand. Declaring a subsignature as _disjoint_ indicates that it shares no atoms with any other subsignatures of the same supersignature. So the fact can be replaced by changing our declaration of _MainMemory_ to: +# +#\geekmath disjoint sig MainMemory extends Memory {} +# +Most descriptions have more interesting facts. We can express the fact that linked lists are acyclic, for example: + +\geekmath fact {no p: List | p in p.\hat @sep rest} + +The expression _\hat @sep rest_ denotes the transitive closure of the relation _rest_, so that _p.^rest_ denotes the set of lists reachable from _p_ by following the field _rest_ once or more. This illustrates a benefit of treating a field as a relation---that we can apply standard relational operators to it---and is also an example of an expression hard to write in a language that treats extension as subtyping (since each application of _rest_ would require its own coercion). + +Often we want to define a property without imposing it as a permanent constraint. In that case, we declare it as a _function_. Here, for example, is the invariant that the cache lines not marked as dirty are consistent with main memory: + +\geekmath fun DirtyInv (s: System) {\\ + all a !: s.cache.dirty | s.cache.map[a] = s.main.map[a]\\ + } + +(The exclamation mark negates an operator, so the quantification is over all addresses that are _not_ dirty.) Packaging this as a function that can be applied to a particular system, rather than as a fact for all systems, will allow us to express assertions about preservation of the invariant (Section 2.8). + +By default, a function returns a boolean value---the value of the formula in its body. The value of _DirtyInv(s)_ for a system _s_ is therefore true or false. A function may return non-boolean values. We might, for example, define the set of bad addresses to be those for which the cache and main memory differ: + +\geekmath fun BadAddrs (s: System): set Addr {\\ + result = {a: Addr | s.cache.map[a] != s.main.map[a]}\\ + } + +and then write our invariant like this: + +\geekmath fun DirtyInv (s: System) {BadAddrs(s) in s.cache.dirty} + +In this case, _BadAddrs(s)_ denotes a set of addresses, and is short for the expression on the right-hand side of the equality in the definition of the function _BadAddrs_. The use of the function application as an expression does not in fact depend on the function being defined explicitly. Had we written + +\geekmath fun BadAddrs (s: System): set Addr {\\ + all a: Addr | a in result iff s.cache.map[a] != s.main.map[a]\\ + } + +the application would still be legal; details are explained in Section 3.7. +# +# \geekmath BadAddrs(s) in s.cache.dirty +# +# would be treated as short for +# +# \geekmath all result: set Addr |\\ +# (all a: Addr | a in result iff s.cache.map[a] != s.main.map[a])\\ +# => result in s.cache.dirty +# +# This desugaring is explained in more detail in Section 99 below. + +\subsection Operations + +Following Z, we can specify operations as formulas that constrain pre- and post-states. An operation may be packaged as a single function (or as two functions if we want to separate pre- and post-conditions in the style of VDM or Larch). + +The action of writing a data value to an address in memory might be specified like this: + +\geekmath fun Write (m,m': Memory, d: Data, a: Addr) {\\ + m'.map = m.map ++ (a->d)\\ + } + +The formula in the body of the function relates _m_, the value of the memory before, to _m'_, the value after. These identifers are just formal arguments, so the choice of names is not significant. Moreover, the prime mark plays no special role akin to decoration in Z---it's a character like any other. The operator ++ is relational override, and the arrow forms a cross product. As mentioned above, scalars are represented as singleton sets, so there is no distinction between a tuple and a relation. The arrows in the expressions _a->d_ here and _addrs->Data_ in the declaration of the _map_ field of _Memory_ are one and the same. + +The action of reading a data value can likewise be specified as a function, although since it has no side-effect we omit the _m'_ parameter: + +\geekmath fun Read (m: Memory, d: Data, a: Addr) {\\ + d = m.map[a]\\ + } + +Actions on the system as a whole can be specified using these primitive operations; in Z, this idiom is called 'promotion'. A read on the system is equivalent to reading the cache: + +\geekmath fun SystemRead (s: System, d: Data, a: Addr) {\\ + Read (s.cache, d, a)\\ + } + +The _Read_ operation has an implicit precondition. Since the data parameter _d_ is constrained (implicitly by its declaration) to be scalar---that is, a singleton set---the relation _m.map_ must include a mapping for the address parameter _a_, since otherwise the expression _m.map[a]_ will evaluate to the empty set, and the formula will not be satisfiable. This precondition is inherited by _SystemRead_. If the address _a_ is not in the cache, the operation cannot proceed, and it will be necessary first to load the data from main memory. It is convenient to specify this action as a distinct operation: + +\geekmath fun Load (s,s': System, a: Addr) {\\ + a !in s.cache.addrs\\ + s'.cache.map = s.cache.map + (a->s.main.map[a])\\ + s'.main = s.main\\ + } + +The + operator is just set union (in this case, of two binary relations, the second consisting of a single tuple). A write on the system involves a write to the cache, and setting the dirty bit. Again, this can be specified using a primitive memory operation: + +\geekmath fun SystemWrite (s,s': System, d: Data, a: Addr) {\\ + Write (s.cache, s'.cache, d, a)\\ + s'.cache.dirty = s.cache.dirty + a\\ + s'.main = s.main\\ + } + +A cache has much smaller capacity than main memory, so it will occasionally be necessary (prior to loading or writing) to flush lines from the cache back to main memory. We specify flushing as a non-deterministic operation that picks some subset of the cache addrs and writes them back to main memory: + +\geekmath fun Flush (s,s': System) {\\ + some x: set s.cache.addrs {\\ + s'.cache.map = s'.cache.map - (x->Data)\\ + s'.cache.dirty = s.cache.dirty - x\\ + s'.main.map = s.main.map ++ \\ + {a: x, d: Data | d = s.cache.map[a]}\\ + } + +The - operator is set difference; note that it is applied to sets of addresses (in the third line) and to binary relations (in the second). The comprehension expression creates a relation of pairs _a_->_d_ satisfying the condition. + +Finally, it is often useful to specify the initial conditions of a system. To say that the cache initially has no addresses, we might write a function imposing this condition on a memory system: + +\geekmath fun Init (s: System) {no s.cache.addrs} + +\subsection Traces + +To support analyses of behaviours consisting of sequences of states, we declare two signatures, for ticks of a clock and traces of states: + +\geekmath sig Tick {}\\ +sig SystemTrace {\\ + ticks: set Tick,\\ + first, last: ticks,\\ + next: (ticks - last) !->! (ticks - first)\\ + state: ticks ->! System}\\ + {\\ + first.*next = ticks\\ + Init (first.state)\\ + all t: ticks - last | \\ + some s = t.state, s' = t.next.state |\\ + Flush (s,s')\\ + || (some a: Addr | Load (s,s',a))\\ + || (some d: Data, a: Addr | SystemWrite (s,s',d,a))\\ + } + +Each trace consists of a set of _ticks_, a _first_ and _last_ tick, an ordering relation _next_ (whose declaration makes it a bijection from all ticks except the last to all ticks except the first), and a relation _state_ that maps each tick to a system state. + +The fact appended to the signature states first a generic property of traces: that the ticks of a trace are those reachable from the first tick. It then imposes the constraints of the operations on the states in the trace. The initial condition is required to hold in the first state. Any subsequent pair of states is constrained to be related by one of the three side-effecting operations. The existential quantifier plays the role of a _let_ binding, allowing _s_ and _s'_ in place of _t.state_ and _t.next.state_, representing the state for tick _t_ and the state for its successor _t.next_. Note that this formulation precludes stuttering; we could admit it simply by adding the disjunct _s_=_s'_ allowing a transition that corresponds to no operation occurring. + +Bear in mind that this fact is a constraint on all atoms in the set _SystemTrace_. As a free standing fact, the second line of the fact---the initial condition--- would have been written: + +\geekmath fact {all x: SystemTrace | Init ((x.first).(x.state))} + +\subsection Abstraction + +Abstraction relationships are easily expressed using our function syntax. To show that our memory system refines a simple memory without a cache, we define an abstraction function _Alpha_ saying that a system corresponds to a memory that is like the system's memory, overwritten by the entries of the system's cache: + +\geekmath fun Alpha (s: System, m: Memory) {\\ + m.map = s.main.map ++ s.cache.map\\ + } + +As another example, if our linked list were to represent a set, we might define the set corresponding to a given list as that containing the elements reachable from the start: + +\geekmath fun ListAlpha (p: List, s: set Elt) {\\ + s = p.*rest.elt\\ + } + +\subsection Assertions + +Theorems about a specification are packaged as _assertions_. An assertion is simply a formula that is intended to hold. A tool can check an assertion by searching for a counterexample---that is, a model of the formula's negation. + +The simplest kinds of assertion record consequences of state properties. For example, + +\geekmath assert {\\ + all s: System | DirtyInv (s) && no s.cache.dirty\\ + => s.cache.map in s.main.map\\ + } + +asserts that if the dirtiness invariant holds,and there are no dirty addresses, then the mapping of addresses to data in the cache is a subset of the mapping in the main memory. + +An assertion can express consequences of operations. For example, + +\geekmath assert {\\ + all s: System, d: Data, a: Addr |\\ + SystemRead (s,d,a) => a in s.cache.addrs\\ + } + +embodies the claim made above that _SystemRead_ has an implicit precondition; it asserts that whenever _SystemRead_ occurs for an address, that address must be in the cache beforehand. An assertion can likewise identify a consequence in the post-state; this assertion + +\geekmath assert {\\ + all s,s': System, d: Data, a: Addr |\\ + SystemWrite (s,s',d,a) => s'.cache.map[a] = d\\ + } + +says that after a _SystemWrite_, the data value appears in the cache at the given address. + +Preservation of an invariant by an operation is easily recorded as an assertion. To check that our dirtiness invariant is preserved when writes occur, we would assert + +\geekmath assert {\\ + all s,s': System, d: Data, a: Addr |\\ + SystemWrite (s,s',d,a) && DirtyInv (s) => DirtyInv (s')\\ + } + +Invariant preservation is not the only consequence of an operation that we would like to check that relates pre- and post-states. We might, for example, want to check that operations on the memory system do not change the set of addresses of the main memory. For the _Flush_ operation, for example, the assertion would be + +\geekmath assert {\\ + all s,s': System | Flush(s,s') => s.main.addrs = s'.main.addrs\\ + } + +which holds only because the cache addresses are guaranteed to be a subset of the main memory addresses (by the fact associated with the _System_ signature). + +The effect of a sequence of operations can be expressed by quantifying appropriately over states. For example, + +\geekmath assert {\\ + all s, s': System, a: Addr, d,d': Data | \\ + SystemWrite (s,s',d,a) && SystemRead (s',d',a) => d = d'\\ + } + +says that when a write is followed by a read of the same address, the read returns the data value just written. + +To check that a property holds for all reachable states, we can assert that the property is an invariant of every operation, and is established by the initial condition. This strategy can be shown (by induction) to be sound, but it is not complete. A property may hold for all reachable states, but may not be preserved because an operation breaks the property when executed in a state that happens not to be reachable. + +Traces overcome this incompleteness. Suppose, for example, that we want to check the (rather contrived) property that, in every reachable state, if the cache contains an address that isn't dirty, then it agrees with the main memory on at least one address: + +\geekmath fun DirtyProp (s: System) {\\ + some (s.cache.addrs - s.cache.dirty)\\ + => some a: Addr | s.cache.map[a] = s.main.map[a]\\ + } + +We can assert that this property holds in the last state of every trace: + +\geekmath assert {\\ + all t: SystemTrace | with t | DirtyProp (last.state)\\ + } + +This assertion is valid, even though _DirtyProp_ is not an invariant. A write invoked in a state in which all clean entries but one had non-matching values can result in a state in which there are still clean entries but none has a matching value. + +Finally, refinements are checked by assertions involving abstraction relations. We can assert that a _SystemWrite_ refines a basic _Write_ operation on a simple memory: + +\geekmath assert {\\ + all s,s': System, m,m': Memory, a: Addr, d: Data |\\ + Alpha (s,m) && Alpha (s',m') && SystemWrite (s,s',a,d)\\ + => Write (m,m',a,d)\\ + } + +or that the _Flush_ operation is a no-op when viewed abstractly: + +\geekmath assert {\\ + all s,s': System, m,m': Memory |\\ + Alpha (s,m) && Alpha (s',m') && Flush (s,s')\\ + => m.map = m'.map\\ + } + +Note the form of the equality; _m = m'_ would be wrong, since two distinct memories may have the same mapping, and the abstraction _Alpha_ constrains only the mapping and not the memory atom itself. + +Many of the assertions shown here can be made more succinct by the function shorthand explained in Section 3.7 below. For example, the assertion that a read following a write returns the value just written becomes: + +\geekmath assert {\\ + all s: System, a: Addr, d: Data | \\ + SystemRead (SystemWrite (s,d,a),a) = d\\ + } + +and the assertion that _Flush_ is a no-op becomes: + +\geekmath assert {\\ + all s: System | Alpha (s).map = Alpha (Flush (s)).map\\ + } + +\subsection Polymorphism + +Signatures can be parameterized by signature types. Rather than declaring a linked list whose elements belong to a particular type _Elt_, as above, we would prefer to declare a generic list: + +\geekmath sig List [T] {}\\ +sig NonEmptyList [T] extends List [T] {elt: T, rest: List [T]} + +Functions and facts may be parameterized in the same way, so we can define generic operators, such as: + +\geekmath fun first [T] (p: List [T]): T {result = p.elt}\\ +fun last [T] (p: List [T]): T {some q: p.*rest | result = q.elt && no q.rest}\\ +fun elements [T] (p: List [T]): set T {result = p.*rest.elt} + +In addition, let's define a generic function that determines whether two elements follow one another in a list: + +\geekmath fun follows [T] (p: List[T], a,b: T) {\\ + some x: p.*rest | x.elt = a && x.next.elt = b\\ + } + +To see how a generic signature and operators are used, consider replacing the traces of Section 2.6 with lists of system states. Define a function that determines whether a list is a trace: + +\geekmath fun isTrace (t: List [System]) {\\ + Init (first(t))\\ + all s, s': System | follows (t,s,s') => {\\ + Flush (s,s')\\ + || (some a: Addr | Load (s,s',a))\\ + || (some d: Data, a: Addr | SystemWrite (s,s',d,a))\\ + }\\ + } + +Now our assertion that every reachable system state satisfies _DirtyProp_ can now be written: + +\geekmath assert {\\ + all t: List[System] | isTrace(t) => DirtyProp (last(t))\\ + } + +\subsection Variants + +To illustrate the flexibility of our notation, we sketch a different formulation of state machines oriented around transitions rather than states. + +Let's introduce a signature representing state transitions of our memory system: + +\geekmath sig SystemTrans {pre,post: System}\\ + {pre.main.addrs = post.main.addrs} + +Declaring the transitions as a signature gives us the opportunity to record properties of all transitions---in this case requiring that the set of addresses of the main memory is fixed. + +Now we introduce a subsignature for the transitions of each operation. For example, the transitions that correspond to load actions are given by: + +\geekmath sig LoadTrans extends SystemTrans {a: Addr}\\ + {Load (pre, post, a)} +# } { +# a !in pre.cache.addrs\\ +# post.cache.map = pre.cache.map ++ (a->pre.main.map[a])\\ +# post.main = pre.main\\ +# } +# +# The formula here is actually identical to the one declared above, but with _pre_ and _post_ for # _s_ and _s'_ ; we could in fact replace it by the function application _Load(pre,post,a)_. + +For each invariant, we define a set of states. For the states satisfying the dirty invariant, we might declare + +\geekmath sig DirtyInvStates extends System {} + +along with the fact + +\geekmath fact {DirtyInvStates = {s: System | DirtyInv(s)}} + +To express invariant preservation, it will be handy to declare a function that gives the image of a set of states under a set of transitions: + +\geekmath fun postimage (ss: set System, tt: set SystemTrans): set System {\\ + result = {s: System | some t: tt | t.pre in ss && s = t.post}\\ + } + +so that we can write the assertion like this: + +\geekmath assert {postimage (DirtyInvStates, LoadTrans) in DirtyInvStates} + +For an even more direct formulation of state machine properties, wemight have defined a transition relation instead: + +\geekmath fun Trans (r: System -> System) {\\ + all s, s' : System | \\ + s->s' in r => Flush (s,s') || ...\\ + } + +Then, using transitive closure, we can express the set of states reachable from an initial state, and assert that this set belongs to the set characterized by some property: + +\geekmath assert {all r: System -> System, s: System |\\ + Init (s) && Trans(r) => s.*r in DirtyPropStates\\ + } + +where _DirtyPropStates_ is defined analogously to _DirtyInvStates_. + +\subsection Definitions + +Instead of declaring the addresses of a memory along with its mapping, as we did before: + +\geekmath sig Memory {\\ + addrs: set Addr,\\ + map: addrs ->! Data\\ + } + +we could instead have declared the mapping alone: + +\geekmath sig Memory {\\ + map: Addr ->? Data\\ + } + +and then _defined_ the addresses using a subsignature: + +\geekmath sig MemoryWithAddrs extends Memory {\\ + addrs: set Addr}\\ + {addrs = {a: Addr | some a.map}} + +Now by making the subsignature subsume all memories: + +\geekmath fact {Memory in MemoryWithAddrs} + +we have essentially 'retrofitted' the field. Any formula involving memory atoms now implicitly constrains the _addrs_ field. For example, we can assert that _Read_ has an implicit precondition requiring that the argument be a valid address: + +\geekmath assert {all m: Memory, a: Addr, d: Data | Read (m,d,a) => a in m.addrs} + +even though the specification of _Read_ was written when the field _addrs_ did not even exist. + +\section Semantics + +For completeness, we give an overview of the semantics of the language. The novelties with respect to the original version of Alloy [4] are (1) the idea of organizing relations around basic types as signatures, (2) the treatment of extension as subsetting, and (3) the packaging of formulas in a more explicit (and conventional) style. The semantic basis has been made cleaner, by generalizing relations to arbitrary arity, eliminating 'indexed relations' and the need for a special treatment of sets. + +\subsection Types + +We assume a universe of atoms. The standard notion of a mathematical relation gives us our only composite datatype. The value of an expression will always be a relation---that is, a collection of tuples of atoms. Relations are first order: the elements of a tuple are themselves atoms and never relations. + +The language is strongly typed. We partition the universe into subsets each associated with a _basic_ type, and write (T_1, T_2, ..., T_n) for the type of a relation whose tuples each consist of _n_ atoms, with types T_1, T_2, etc. + +A set is represented semantically as a unary relation, namely a relation whose tuples each contain one atom. A tuple is represented as a singleton relation, namely a relation containing exactly one tuple. A scalar is represented as a unary, singleton relation. We use the terms 'set', 'tuple' and 'scalar' to describe relations with the appropriate properties. Basic types are used only to construct relation types, and every expression that appears in a specification has a relational type. Often we will say informally that an expression has a type _T_ where _T_ is the name of a basic type when more precisely we mean that the expression has the type (_T_). + +So, in contrast to traditional mathematical style, we do not make distinctions amongst the atom _a_, the tuple (_a_), the set {_a_} containing just the atom, or the set {(_a_)} containing the tuple, and represent all of these as the last. This simplifies the semantics and gives a more succinct and uniform syntax. +# Because the language is first order (and has no sets of sets, for example), it requires no coercions, and seems not to cause confusion even for novice specifiers. + +\subsection Expression Operators + +Expressions can be formed using the standard set operators written as ASCII characters: union (+), intersection (&) and difference (-). Some standard relational operators, such as transpose (~) and transitive closure (^), can be applied to expressions that denote binary relations. Relational override (++) has its standard meaning for binary relations but can applied more broadly. +#The type rules and semantics are completely standard. For example, if _e_ has the type (S,T), then ~_e_ has the type (T,S) and denotes the collection of pairs obtained by reversing each pair in _e_; if _p_ and _q_ both have the type (T_1, T_2, ..., T_n), then the union _p+q_, intersection _p_&_q_, and difference _p-q_ also have that type, and denote respectively the relations whose tuples are those that appear in either of _p_ and _q_, both of _p_ and _q_, and _p_ but not _q_. + +There are two special relational operators, dot and arrow. The dot operator is a generalized relational composition. Given expressions $p$ and $q$, the expression $p.q$ contains the tuple +$\angleleft\sep p\sub<1>, ... p\sub<m-1>, q\sub<2>, ..., q\sub<n>\angleright$ +when _p_ contains +@math \langle@sep p_1, ..., p_{m}\rangle, +_q_ contains +@math \langle@sep q_1, ... q_n\rangle, +and +@math p_m = q_1. The last type of _p_ and the first type of _q_ must match, and _m_ + _n_, the sum of the arities of _p_ and _q_, must be three or more so that the result is not degenerate. When _p_ is a set and _q_ is a binary relation, the composition _p.q_ is the standard relational image of _p_ under _q_; when _p_ and _q_ are both binary relations, _p.q_ is standard relational composition. In all of the examples above, the dot operator is used only for relational image. + +The arrow operator is cross product: _p \textarrow q_ is the relation containing the tuple +@math \langle@sep p_1, ..., p_{m}, q_1, ... q_n\rangle +when _p_ contains +@math \langle@sep p_1, ..., p_{m}\rangle, +and _q_ contains +@math \langle@sep q_1, ... q_n\rangle. +In all the examples in this paper, _p_ and _q_ are sets, and _p \textarrow q_ is their standard cross product. + +\subsection Formula Operators + +Elementary formulas are formed from the subset operator, written _in_. Thus _p in q_ is true when every tuple in _p_ is in _q_. The formula _p : q_ has the same meaning, but when _q_ is a set, adds an implicit constraint that _p_ be scalar (ie, a singleton). This constraint is overridden by writing _p: option q_ (which lets _p_ to be empty or a scalar) or _p: set q_ (which eliminates the constraint entirely). Equality is just standard set equality, and is short for a subset constraint in each direction. + +An arrow that appears as the outermost expression operator on the right-hand side of a subset formula can be annotated with _multiplicity markings_: + (one or more), ? (zero or one) and ! (exactly one). The formula + +\geekmath r: S m \textarrow n T + +where _m_ and _n_ are multiplicity markings constrains the relation _r_ to map each atom of _S_ to _n_ atoms of _T_, and to map _m_ atoms of _S_ to each atom of _T_. _S_ and _T_ may themselves be product expressions, but are usually variables denoting sets. For example, + +\geekmath r: S \textarrow ! T\\ +r: S ? \textarrow ! T + +make _r_ respectively a total function on _S_ and an injection. + +Larger formulas are obtained using the standard logical connectives: && (and), || (or), ! (not), => (implies), _iff_ (bi-implication). The formula _if b then f else g_ is short for _b_ => _f_ && !_b_ => _g_. Within curly braces, consecutive formulas are implicitly conjoined. + +Quantifications take their usual form: + +\geekmath all x: e | F + +is true when the formula _F_ holds under every binding of the variable _x_ to a member of the set _e_. In addition to the standard quantifiers, _all_ (universal) and _some_ (existential), we have _no_, _sole_ and _one_ meaning respectively that there are no values, at most one value, and exactly one value satisfying the formula. For a quantifier _Q_ and expression _e_, the formula _Q e_ is short for _Q x: T | e_ (where _T_ is the type of _e_), so _no e_, for example, says that _e_ is empty. + +The declaration of a quantified formula is itself a formula---an elementary formula in which the left-hand side is a variable. Thus + +\geekmath some x = e | F + +is permitted, and is a useful way to express a _let_ binding. Quantifiers may be higher-order; the formula + +\geekmath all f: s ->! t | F + +is true when _F_ holds for every binding of a total function from _s_ to _t_ to the variable _f_. Our analysis tool cannot currently handle higher-order quantifiers, but many uses of higher-order quantifiers that arise in practice can be eliminated by skolemization. + +Finally, we have relational comprehensions; the expression + +\geekmath {x_1: e_1, x_2: e_2, ... | F} + +constructs a relation of tuples with elements _x_1_, _x_2_, etc., drawn from set expressions _e_1_, _e_2_, etc., whose values satisfy _F_. + +# \subsection Choice of Operator Symbols +# +# The choice of symbols, especially the arrow, may seem unconventional, but results in familiar-# looking formulas. The dot operator generalizes the 'navigation expressions' of Syntropy# [CD94], now adopted by UML's Object Constraint Language [17], and is intended to be fa# miliar to programmers by resembling object dereferencing. Thus, _x.f_ can be viewed as dere# ferencing the object _x_ with field _f_ when _x_ is a scalar and _f_ is a binary relation. The cho# ice of relational composition rather than function application allows such an expression to be wr# itten without concern for whether _f_ is a function. It also gives a simple and workable treatmen# t of partiality. When _x_ is not in the domain of _f_, _x.f_ is the empty set, and _x.f = y_ will be# false if _y_ is a scalar. +# +# The arrow notation is designed to allow declarations to be written in a familiar way, but to be # given a simple, first-order interpretation. For example, if _S_ and _T_ denote sets, +# +# \geekmath f: S \textarrow T +# +# declares _f_ to be a binary relation from _S_ to _T_. A conventional interpretation would have # the arrow construct a set of relations---a higher-order notion. Instead, we interpret the arrow # as cross product and the colon as subset, with the same result. The choice of arrow is also # convenient for constructing tuples; when _x_ and _y_ are scalars, the formula# +# +# \geekmath r' = r + (x \textarrow y) +# +# makes _r'_ the relation containing the tuples of _r_, and additionally, a mapping from _x_ to # _y_. # +\subsection Signatures + +A _signature_ declaration introduces a basic type, along with a collection of relations called _fields_. The declaration + +\geekmath sig S {f: E} + +declares a basic type _S_, and a relation _f_. If _E_ has the type (T_1, T_2, ..., T_n), the relation _f_ will have the type (S, T_1, T_2, ..., T_n), and if _x_ has the type _S_, the expression _x.f_ will have the same type as _E_. When there are several fields, field names already declared may appear in expressions on the right-hand side of declarations; in this case, a field _f_ is typed as if it were the expression _this.f_, where _this_ denotes an atom of the signature type (see Section 3.6). + +The meaning of a specification consisting of a collection of signature declarations is an assignment of values to global constants-- the signatures and the fields. For example, the specification + +\geekmath sig Addr {}\\ +sig Data {}\\ +sig Memory {map: Addr -> Data} + +has 4 constants---the three signatures and one field---with assignments such as: + +\geekmath Addr = {a0, a1}\\ +Data = {d0, d1, d2}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d0), (m1,a0,d1), (m1,a0,d2)} + +corresponding to a world in which there are 2 addresses, 3 data values and 2 memories, with the first memory (_m0_) mapping the first address (_a0_) to the first data value (_d0_), and the second memory (_m1_) mapping the first address (_a0_) both to the second (_d1_) and third (_d2_) data values. + +A fact is a formula that constrains the constants of the specification, and therefore tends to reduce the set of assignments denoted by the specification. For example, + +\geekmath fact {all m: Memory | all a: Addr | sole m.map[a]} + +rules out the above assignment, since it does not permit a memory (such as _m1_) to map an address (such as _a0_) to more than one data value. + +The meaning of a function is a set of assignments, like the meaning of the specification as a whole, but these include bindings to parameters. For example, the function + +\geekmath fun Read (m: Memory, d: Data, a: Addr) {\\ + d = m.map[a]\\ + } + +has assignments such as: + +\geekmath Addr = {a0, a1}\\ +Data = {d0, d1, d2}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d1)}\\ +m = {m0}\\ +d = {d1}\\ +a = {a0} + +The assignments of a function representing a state invariant correspond to states satisfying the invariant; the functions of a function representing an operation (such as _Read_) correspond to executions of the operation. + +An assertion is a formula that is claimed to be _valid_: that is, true for every assignment that satisfies the facts of the specification. To check an assertion, one can search for a _counterexample_: an assignment that makes the formula false. +For example, the assertion + +\geekmath assert {\\ + all m,m': Memory, d: Data, a: Addr | Read (m,d,a) => Read (m',d,a)} + +which claims, implausibly, that if a read of memory _m_ returns _d_ at _a_, then so does a read at memory _m'_, has the counterexample + +\geekmath Addr = {a0}\\ +Data = {d0,d1}\\ +Memory = {m0, m1}\\ +map = {(m0,a0,d0), (m1,a0,d1)} + +To find a counterexample, a tool should negate the formula and then skolemize away the bound variables, treating them like the parameters of a function, with values to be determined as part of the assignment. In this case, the assignment might include: + +\geekmath m = {m0}\\ +m' = {m1}\\ +d = {d0}\\ +a = {a0} + +\subsection Extension + +Not every signature declaration introduces a new basic type. A signature declared without an extension clause is a _type signature_, and creates both a basic type and a set constant of the same name. A signature _S_ declared as an extension is a _subsignature_, and creates only a set constant, along with a constraint making it a subset of each _supersignature_ listed in the extension clause. The subsignature takes on the type of the supersignatures, so if there is more than one, they must therefore have the same type, by being direct or indirect subsignatures of the same type signature. + +A field declared in a subsignature is as if declared in the corresponding type signature, with the constraint that the domain of the field is the subsignature. For example, + +\geekmath sig List {}\\ +sig NonEmptyList extends List {elt: Elt,rest: List} + +makes _List_ a type signature, and _NonEmptyList_ a subset of _List_. The fields _elt_ and _rest_ map atoms from the type _List_, but are constrained to have domain _NonEmptyList_. Semantically, it would have been equivalent to declare them as fields of _List_, along with facts constraining their domains: + +\geekmath sig List {elt: Elt,rest: List}\\ +sig NonEmptyList extends List {}\\ +fact {elt.Elt in NonEmptyList}\\ +fact {rest.List in NonEmptyList} + +(exploiting our dot notation to write the domain of a relation _r_ from _S_ to _T_ as _r.T_). + +\subsection Overloading and Implicit Prefixing + +Whenever a variable is declared, its type can be easily obtained from its declaration (from the type of the expression on the right-hand side of the declaration), and every variable appearing in an expression is declared in an enclosing scope. The one complication to this rule is the typing of fields. + +For modularity, a signature creates a local namespace. Two fields with the name _f_ appearing in different signatures do not denote the same relational constant. Interpreting an expression therefore depends on first resolving any field names that appear in it. +#We have devised a simple resolution scheme whose details are beyond the scope of this paper. +In an expression of the form _e.f_, the signature to which _f_ belongs is determined according to the type of _e_. To keep the scheme simple, we require that sometimes the specifier resolve the overloading explicitly by writing the field _f_ of signature _S_ as _S$f_. (At the end of the previous section, for example, the reference in the fact to _rest_ should actually be to _List$rest_, since the context does not indicate which signature _rest_ belongs to.) + +In many formulas, a single expression is dereferenced several times with different fields. A couple of language features are designed to allow these formulas to be written more succinctly, and, if used with care, more comprehensibly. First, we provide two syntactic variants of the dot operator. Both _p_::_q_ and _q_[_p_] are equivalent to _p.q_, but have different precedence: the double colon binds more tightly than the dot, and the square brackets bind more loosely than the dot. Second, we provide a _with_ construct similar to Pascal's that makes dereferencing implicit. + +Consider, for example, the following simplified signature for a trace: + +\geekmath sig Trace {\\ + ticks: set Tick,\\ + first: Tick,\\ + next: Tick -> Tick,\\ + state: Tick -> State\\ + } + +Each trace _t_ has a set of ticks _t.ticks_, a first tick _t.first_, an ordering _t.next_ that maps ticks to ticks, and a relation _t.state_ mapping each tick to a state. For a trace _t_ and tick _k_, the state is _k_.(_t.state_); the square brackets allow this expression to be written instead as _t.state_[_k_]. To constrain _t.ticks_ to be those reachable from _t. first_ we might write: + +\geekmath fact {all t: Trace | (t.first).*(t.next ) = t.ticks} + +Relying on the tighter binding of the double colon, we can eliminate the parentheses: + +\geekmath fact {all t: Trace | t::first.*t::next = t.ticks} + +Using _with_, we can make the _t_ prefixes implicit: + +\geekmath fact {all t: Trace | with t | first.*next = ticks} + +In general, _with e | F_ is like _F_, but with _e_ prefixed wherever appropriate to a field name. Appropriateness is determined by type: _e_ is matched to any field name with which it can be composed using the dot operator. +#Fields that are prefixed using a double colon operator are not automatically prefixed, so one can use _with_ to prefix some fields of a given signature but not others. There is a corresponding _with_ construct for expressions also, so that _with e | E_ is like the expression _E_, with _e_ prefixed as appropriate. +A fact attached to a signature _S_ is implicitly enclosed by _all this: S | with this |_, and the declarations of a signature are interpreted as constraints as if they had been declared within this scope. Consequently, the declaration of _first_ above should be interpreted as if it were the formula: + +\geekmath all this: Trace | with this | first: ticks + +which is equivalent to + +\geekmath all this: Trace | this.first: this.ticks + +and should be typed accordingly. +# +# So, in the following fuller version of the above signature: +# +# \geekmath sig Trace {\\ +# ticks: set Tick\\ +# first: ticks,\\ +# next: (ticks - first) ->? ticks\\ +# state: ticks ->! State\\ +# } {first.*next = ticks} +# +# the declaration of the field _first_, for example, includes the constraint +# +# \geekmath all this: Trace | with this | first: ticks +# +# which is equivalent to +# +# \geekmath all this: Trace | this.first: this.ticks + +\subsection Function Applications + +A function may be applied by binding its parameters to expressions. The resulting application may be either an expression or a formula, but in both cases the function body is treated as a formula. The formula case is simple: the application is simply short for the body with the formal parameters replaced by the actual expressions (and bound variables renamed where necessary to avoid clashes). + +The expression case is more interesting. The application is treated as a syntactic sugar. Suppose we have a function application expression, _e_ say, of the form + +\geekmath f(a_1, a_2, ..., a_n) + +that appears in an elementary formula _F_. The declaration of the function _f_ must list _n_ + 1 formal arguments, of which the _second_ will be treated as the result. The entire elementary formula is taken to be short for + +\geekmath all result: D | f (a_1, result, a_2, ..., a_n) => F [result/e] + +where _D_ is the right-hand side of the declaration of the missing argument, and _F_ [_result_/_e_] is _F_ with the fresh variable _result_ substituted for the application expression _e_. The application of _f_ in this elaborated formula is now a formula, and is treated simply as an inlining of the formula of _f_. + +#Type checking will thus require that the actual arguments match the formals that are listed first, third, fourth, fifth, etc. (This choice of the second argument, incidentally, is one concession we make to specifying state machines; function applications can be used to model operation invocations in which it is convenient to declare the pre- and post- states as the first and second arguments of the operation.) +# +To see how this works, consider the definition of a function _dom_ that gives the domain of a relation over signature _X_: + +\geekmath fun dom (r: X -> X, d: set X) {d = r.X} + +(We have defined the function monomorphically for a homogeneous relation. In practice, one would define a polymorphic function, but we want to avoid conflating two unrelated issues.) Here is a trivial assertion that applies the function as an expression: + +\geekmath assert {all p: X \textarrow X | (dom (p)).p in X} + +Desugaring the formula, we get + +\geekmath all p: X \textarrow X | all result: set X | dom (p, result) => result.p in X + +and then inlining + +\geekmath all p: X \textarrow X | all result: set X | result = p.X => result.p in X + +This formula can be reduced (by applying a universal form of the One Point Rule) to + +\geekmath all p: X \textarrow X | (p.X).p in X + +which is exactly what would have been obtained had we just replaced the application expression by the expression on the right-hand side of the equality in the function's definition! +# +# If there is more than one application expression in an elementary formula, a fresh quantification is # generated for each. For example,# +# +# \geekmath assert {all p, q: X \textarrow X | dom (p.q) in dom (p)} +# +# becomes +# +# \geekmath all p,q: X \textarrow X | all result1, result2: set X | \\ +# dom (p.q, result1) => dom (p, result2) => result1 in result2 +# +# which can again be reduced by inlining and the One Point Rule to +# +# \geekmath all p,q: X \textarrow X | (p.q).X in p.X + +Now let's consider an implicit definition. Suppose we have a signature _X_ with an ordering _lte_, so that _e.lte_ is the set of elements that _e_ is less than or equal to, and a function _min_ that gives the minimum of a set, defined implicitly as the element that is a member of the set, and less than or equal to all members of the set: + +\geekmath sig X {lte: set X}\\ +fun min (s: set X, m: option X) {\\ + m in s && s in m.lte\\ + } + +Because the set may be empty, _min_ is partial. Depending on the properties of _lte_ it may also fail to be deterministic. A formula that applies this function + +\geekmath assert {all s: set X | min (s) in s} + +can as before be desugared + +\geekmath all s: set X | all result: option X | min (s, result) => result in s + +and expanded by inlining + +\geekmath all s: set X | all result: option X |\\ + (result in s) && s in result.lte => result in s + +but in this case the One Point Rule is not applicable. + +As a convenience, our language allows the result argument of a function to be declared anonymously in a special position, and given the name _result_. The domain function, for example, can be defined as: + +\geekmath fun dom (r: X -> X): set X {result = r.X} + +How the function is defined has no bearing on how it is used; this definition is entirely equivalent to the one above, and can also be applied as a formula with two arguments. + +\subsection Polymorphism + +Polymorphism is treated as a syntactic shorthand. Lack of space does not permit a full discussion here. + +\section Related Work + +We have shown how a handful of elements can be assembled into a rather simple but flexible notation. The elements themselves are far from novel---indeed, we hope that their familiarity will make the notation easy to learn and use---but their assembly into a coherent whole results in a language rather different from existing specification languages. + +\subsection New Aspects + +The more novel aspects of our work are: + +\point \cdot _Objectification of state_. Most specification languages represent states as cartesian products of components; in our approach, a state, like a member of any signature, is an individual---a distinct atom with identity. A similar idea is used in the situation calculus [11], whose 'relational fluents' add a situation variable to each time-varying relation. The general idea of objectifying all values is of course the foundation of object-oriented programming languages, and was present in LISP. Interestingly, object-oriented variants of Z (such as [1]) do not objectify schemas. The idea of representing structures in first-order style as atoms is present also in algebraic specifications such as Larch [2], which treat even sets and relations in this manner. + +\point \cdot _Components as relations_. Interpreting fields of a structure as functions goes back to early work on verification, and is widely used (for example, by Leino and Nelson [10]). We are not aware, however, of specification languages that use this idea, or that flatten fields to relations over atoms. + +\point \cdot _Extension by global axioms_. The 'facts' of our notation allow the properties of a signature to be extended monotonically. The idea of writing axioms that constrain the members of a set constant declared globally is hardly remarkable, but it appears not to have been widely exploited in specification languages. + +\point \cdot _Extension by subset_. Treating the extension of a structure as a refinement modelled by subset results in a simple semantics, and melds well with the use of global axioms. Again, this seems to be an unremarkable idea, but one whose power has not been fully recognized. + +\subsection Old Aspects + +The aspects of our work that are directly taken from existing languages are: + +\point \cdot _Formulas_. The idea of treating invariants, definitions, operations, etc, uniformly as logical formulas is due to Z [14]. + +\point \cdot _Assertions_. Larch [2] provides a variety of constructs for adding intentional redundancy to a specification in order to provide error-detection opportunities. + +\point \cdot _Parameterized formulas_. The 'functional' style we have adopted, in which all formulas are explicitly parameterized, in contrast to the style of most specification languages, is used also by languages for theorem provers, such as PVS [13]. VDM [8] offers a mechanism called 'operation quotation' in which pre- and post conditions are reused by interpreting them as functions similar to ours. + +\point \cdot _Parametric Polymorphism_. The idea of parameterizing descriptions by types was developed in the programming languages community, most notably in the context of ML [12]. + +\point \cdot _Implicit Prefixing_. Our 'with' operator is taken from Pascal [9]. + +\point \cdot _Relational operators_. The dot operator, and the treament of scalars as singletons, comes from the earlier version of Alloy [4]. +# +#\point \cdot _Function shorthands_. The idea of desugaring function applications by quantifying over the result is present in Beth's extensionality theorem [Beth]. + +\subsection Z's Schema Calculus + +Z has been a strong influence on our work; indeed, this paper may be viewed as an attempt to achieve some of the power and flexibility of Z's schema calculus in a first-order setting. Readers unfamiliar with Z can find an excellent presentation of the schema calculus in [16]. The current definitive reference is [15], although Spivey's manual [14] is more accessible for practioners. + +A _schema_ consists of a collection of variable declarations and a formula constraining the variables. Schemas can be anonymous. When a name has been bound to a schema, it can be used in three different ways, distinguished according to context. First, it can be used as a _declaration_, in which case it introduces its variables into the local scope, constraining them with its formula. Second, where the variables are already in scope, it can be used as a _predicate_, in which case the formula applies and no new declarations are added. Both of these uses are syntactic; the schema can be viewed as a macro. + +In the third use, the schema is semantic. Its name represents a set of _bindings_, each binding being a finite function from variables names to values. The bindings denoted by the schema name are the models of the schema's formula: those bindings of variable names to values that make the formula true. + +How a schema is being applied is not always obvious; in the set comprehension {_S_}, for example, _S_ represents a declaration, so that the expression as a whole denotes the same set of bindings as _S_ itself. Given a binding _b_ for a schema with component variable _x_, the expression _b.x_ denotes the value assigned to _x_ in _b_. Unlike Alloy's dot, this dot is a function application, so for a set of bindings _B_, the expression _B.x_ is not well formed. + +Operations in Z are expressed using the convention that primed variables denote components of the post-state. A mechanism known as _decoration_ allows one to write _S'_ for the schema that is like _S_, but whose variable names have been primed. Many idioms, such as promotion, rely on being able to manipulate the values of a schema's variables in aggregate. To support this, Z provides the theta operator: \theta @sep _S_ is an expression that denotes a binding in which each variable _x_ that belongs to _S_ is bound to a variable of the same name _x_ declared in the local scope. Theta and decoration interact subtly: \theta @sep _S'_ is not a binding of _S'_, but rather binds each variable _x_ of _S_ to a variable _x'_ declared locally. So where we would write _s=s'_ to say that pre- and post-states _s_ and _s'_ are the same, a Z specifier would write \theta @sep _S_ = \theta @sep _S'_. This formula equates each component _x_ of _S_ to its matching component _x'_ of _S'_, because _x_ and _x'_ are the respective values bound to _x_ by \theta @sep _S_ and \theta @sep _S'_ respectively. + +Our 'fact' construct allows the meaning of a signature name to be constrained subsequent to its declaration. A schema, in contrast, is 'closed': a new schema name must be introduced for each additional constraint. This can produce an undesirable proliferation of names for a system's state, but it does make it easier to track down those formulas that affect a schema's meaning. + +The variables of a schema can be renamed, but cannot be replaced by arbitrary expressions (since this would make nonsense of declarations).This requires the introduction of existential quantifiers where in our notation an expression is passed as an actual. On the other hand, when no renaming is needed, it is more succinct. + +Z's sequential composition operator is defined by a rather complicated transformation, and relies on adherence to particular conventions. The schema _P_ @sep \fatsemi @sep _Q_ is obtained by collecting primed variables in _P_ that match unprimed variables in _Q_; renaming these in both _P_ and _Q_ with a new set of variable names; and then existentially quantifying the new names away. For example, to say that a read following a write to the same address yields the value written, we would write: + +\geekmath +all m: Memory, a: Addr, d, d': Data | Read (Write(m,a,d),d') => d = d' + +which is short for + +\geekmath all m: Memory, a: Addr, d, d': Data |\\ + all m': Memory | Write (m,m',a,d) => Read (m,a,d') => d = d' + +In Z, assuming appropriate declarations of a schema _Memory_ and a given type _Data_, the formula would be: + +\geekmath +\forall Memory; Memory'; x!: Data \fatdot Write \fatsemi Read [x!/d!] \implies x! = d! + +which is short for + +\geekmath +\forall Memory; Memory'; x!: Data \fatdot \\ + \exists Memory'' \fatdot \\ + \exists Memory' \fatdot Write \and \theta @sep Memory' = \theta @sep Memory''\\ + \exists Memory'; d!: Data \fatdot \\ + Read \and \theta @sep Memory = \theta @sep Memory'' \and d! = x!\\ + \implies x! = d! + +The key semantic difference between signatures and schemas is this. A signature is a set of atoms; its fields are relational constants declared in global scope. A schema, on the other hand, denotes a higher-order object: a set of functions from field names to values. Our approach was motivated by the desire to remain first order, so that the analysis we have developed [3] can be applied. Not surprisingly, there is a cost in expressiveness. We cannot express higher-order formulas, most notably those involving preconditions. Suppose we want to assert that our write operation has no implicit precondition. In Z, such an assertion is easily written: + +\geekmath +\forall Memory; a?: Addr \fatdot \exists Memory'; d!: Data \fatdot Write + +We might attempt to formulate such an assertion in our notation as follows: + +\geekmath assert {\\ + all m: Memory, a: Addr, d: Data | some m': Memory | Write (m,m',d,a) + } + +Unfortunately, this has counterexamples such as + +\geekmath Addr = {a0}\\ +Data = {d0}\\ +Memory = {m0, m1}\\ +map = {} + +in which the _map_ relation lacks an appropriate tuple. Intuitively, the assertion claims that there is no context in which a write cannot proceed; a legitimate counterexample---but one we certainly did not intend---simply gives a context in which a memory with the appropriate address-value mapping is not available. + +We have focused in this discussion on schemas. It is worth noting that Z is expressive enough to allow a style of structuring almost identical to ours, simply by declaring signatures as given types, fields and functions as global variables, and by writing facts, and the bodies of functions, as axioms. Field names would have to be globally unique, and the resulting specification would likely be less succinct than if expressed in our notation. + +\subsection Phenomenology + +Pamela Zave and Michael Jackson have developed an approach to composing descriptions [18] that objectifies states, events and time intervals, and constrains their properties with global axioms. Objectification allows descriptions to be reduced to a common phenomenology, so that descriptions in different languages, and even in different paradigms can be combined. Michael Jackson has argued separately for the importance of objectification as a means of making a more direct connection between a formal description and the informal world: as he puts it, "domain phenomena are facts about individuals" [7]. It is reassuring that the concerns of language design and tractability of analysis that motivated our notation are not in conflict with sound method, and it seems that our notation would be a good choice for expressing descriptions in the form that Zave and Jackson have proposed. + +\section Evaluation + +\subsection Merits + +The key motivations of the design of our mechanism have been minimality and flexibility. It is worth noting how this has been achived by the _omission_ of certain features: + +\point \cdot There is only one form of semantic structuring; our opinion is that adding extra mechanisms, for example to group operations into classes, does not bring enough benefit to merit the additional complexity, and tends to be inflexible. (Our language does provide some namespace control for signature and paragraph names in the style of Java packages, but this is trivial and does not interact with the basic mechanism). + +\point \cdot There is no subtyping; subsignatures are just subsets of their supersignatures, and have the same type. There are only two types: basic types (for signatures), and relational types (for expressions). Types are not nested. + +\point \cdot There is only one way that formulas are packaged for reuse. The same function syntax is used for observers, operations, refinement relations, etc. The function shorthand syntax unifies the syntax of both declaration and use for explicit and implicit function definitions. + +\point \cdot The values of a signature with fields are just like the values of any basic type; there is nothing like Z's notion of a schema binding. + +Our interpretation of a subsignature as a subset of the supersignature appears to be novel as a mechanism for structuring in a specification language. It has three nice consequences: + +\point \cdot _Elimination of type coercions_. If _x_ belongs to a signature _S_ whose extension _S'_ defines a field _f_, the expression _x.f_ will just denote an empty set if _x_ does not belong to _S'_. Contrast this with the treatment of subclasses in the Object Constraint Language [17], for example, which results in pervasive coercions and often prevents the use of set and relation operators (since elements must be coerced one at a time). + +\point \cdot _Ease of extension_. Constraints can be added to the subsignature simply by writing a constraint that is universally quantified over elements of that subset. + +\point \cdot _Definitional extension_. We can declare an extension _S'_ of a signature _S_ with additional fields, relate these fields to the fields declared explicitly for _S_, and then record the fact that _S=S'_ (as illustrated in Section 2.11). The effect is that every atom of _S_ has been extended with appropriately defined fields, which can be accessed whenever an expression denoting such an atom is in scope! We expect to find this idiom especially useful for defining additional fields for visualization purposes. + +\subsection Deficiencies + +One might wonder whether, having encoded structures using atoms, and having provided quantifiers over those atoms, one can express arbitrary properties of higher-order structures. Unfortunately, but not surprisingly, this is not possible. The catch is that fields are treated in any formulas as global variables that are existentially quantified. To simulate higher-order logic, it would be necessary to allow quantifications over these variables, and since they have relational type, that would imply higher-order quantification. The practical consequence is that properties requiring higher-order logic cannot be expressed. One cannot assert that the precondition of an operation is no stronger than some predicate; one cannot in general specify operations by minimization; and one cannot express certain forms of refinement check. An example of this problem is given in Section 4.3 above. Whether the problem is fundamental or can be partially overcome remains to be seen. + +The treatment of subsignatures as subsets has a nasty consequence. Since a field declared in a subsignature becomes implicitly a field of the supersignature, two subsignatures cannot declare fields of the same name. The extension mechanism is therefore not properly modular, and a specification should use hierarchical structure instead where this matters. + +Modelling a set of states as atoms entails a certain loss of abstraction. In this specification + +\geekmath sig A {}\\ +sig S {a: A}\\ +fun op (s,s': S) {s.a = s'.a} + +the operation _op_ has executions in which the pre- and post-states are equal (that is, the same atom in _S_), and executions in which only their _a_ components are equal. One might object that this distinction is not observable. Moreover, replacing the formula by _s=s'_ would arguably be an overspecification---a 'bias' in VDM terminology [8]. The situation calculus [11] solves this problem by requiring every operation to produce a state change: _s_ and _s'_ are thus regarded as distinct situations by virtue of occurring at different points in the execution. The dual of this solution is to add an axiom requiring that no two distinct atoms of _S_ may have equal _a_ fields. Either of these solutions is easily imposed in our notation. + +Our treatment of scalars and sets uniformly as relations has raised the concern that the resulting succinctness comes with a loss of clarity and redundancy. Extensive use of the previous version of our language, mostly by inexperienced specifiers, suggests that this is not a problem. The loss of some static checking is more than compensated by the semantic analysis that our tool performs. + +\section Conclusion + +Two simple ideas form the basis of our modularity mechanism: (1) that a structure is just a set of atoms, and its fields are global relations that map those atoms to structure components; and (2) that extensions of a structure are just subsets. Our relational semantics, in which all variables and fields are represented as relations, makes the use of structures simple and succinct, and it ensures that the language as a whole remains first order. For a variety of modelling tasks, we believe that our approach provides a useful balance of expressiveness and tractability. + +\section* Acknowledgments + +The language described here was refined by experience writing specifications, long before an analyzer existed, and by the development of the analyzer tool itself. Mandana Vaziri and Sarfraz Khurshid were our early adopters, and Brian Lin and Joe Cohen helped implement the tool. The paper itself was improved greatly by comments from Mandana and Sarfraz, from Michael Jackson, from Tomi Mannisto, and especially from Pamela Zave, whose suggestions prompted a major rewrite. Jim Woodcock helped us understand Z, and the clarity and simplicity of his own work has been a source of inspiration to us. Our ideas have also been improved by the comments of the members of IFIP working groups 2.3 and 2.9, especially Tony Hoare, Greg Nelson and Rustan Leino. This work was funded in part by ITR grant #0086154 from the National Science Foundation, by a grant from NASA, and by an endowment from Doug and Pat Ross. + +\section* References + +#\ref [CD94] Steve Cook and John Daniels. Designing Object Systems: Object-Oriented Modelling with Syntropy. Prentice Hall, 1994. +# +\ref [1] R. Duke, G. Rose and G. Smith. Object-Z: A Specification Language Advocated for the Description of Standards. SVRC Technical Report 94-45. The Software Verification Research Centre, University of Queensland, Australia. + +\ref [2] John V. Guttag, James J. Horning, and Andres Modet. Report on the Larch Shared Language: Version 2.3. Technical Report 58, Compaq Systems Research Center, Palo Alto, CA, 1990. + +#\ref [Hal90] Anthony Hall. Using Z as a Specification Calculus for Object-Oriented Systems. In D. Bjorner, C.A.R. Hoare, and H. Langmaack, eds., VDM and Z: Formal Methods in Software Development, Lecture Notes in Computer Science, Volume 428, pp. 290–381, Springer-Verlag, New York, 1990. +# +\ref [3] Daniel Jackson. Automating first-order relational logic. Proc. ACM SIGSOFT Conf. Foundations of Software Engineering. San Diego, November 2000. + +\ref [4] Daniel Jackson. Alloy: A Lightweight Object Modelling Notation. To appear, ACM Transactions on Software Engineering and Methodology, October 2001. + +\ref [5] Daniel Jackson, Ian Schechter and Ilya Shlyakhter. Alcoa: the Alloy Constraint Analyzer. Proc. International Conference on Software Engineering, Limerick, Ireland, June 2000. + +\ref [6] Daniel Jackson and Jeannette Wing. Lightweight Formal Methods. In: H. Saiedian (ed.), An Invitation to Formal Methods. IEEE Computer, 29(4):16-30, April 1996. + +\ref [7] Michael Jackson. Software Requirements and Specifications: A Lexicon of Practice, Principles and Prejudices. Addison-Wesley, 1995. + +\ref [8] Cliff Jones. Systematic Software Development Using VDM. Second edition, Prentice Hall, 1990. + +\ref [9] Kathleen Jensen and Nicklaus Wirth. Pascal: User Manual and Report. Springer-# Verlag, 1974. + +\ref [10] K. Rustan M. Leino and Greg Nelson. Data abstraction and information hiding . Research Report 160, Compaq Systems Research Center, November 2000. + +\ref [11] Hector Levesque, Fiora Pirri, and Ray Reiter. Foundations for the Situation Calculus. Linköping Electronic Articles in Computer and Information Science, ISSN 1401-9841, Vol. 3(1998), Nr. 018. + +\ref [12] Robin Milner, Mads Tofte and Robert Harper. The Definition of Standard ML. MIT Press, 1990. + +\ref [13] S. Owre, N. Shankar, J. M. Rushby, and D. W. J. Stringer-Calvert. PVS Language Reference. Computer Science Laboratory, SRI International, Menlo Park, CA, September 1999. + +\ref [14] J. Michael Spivey. The Z Notation: A Reference Manual. Second edition, Prentice Hall, 1992. + +\ref [15] Ian Toyn et al. Formal Specification---Z Notation---Syntax, Type and Semantics. Consensus Working Draft 2.6 of the Z Standards Panel BSI Panel IST/5/-/19/2 (Z Notation). August 24, 2000. + +\ref [16] Jim Woodcock and Jim Davies. Using Z: Specification, Refinement and Proof. Prentice Hall, 1996. + +\ref [17] Jos Warmer and Anneke Kleppe. The Object Constraint Language: Precise Modeling with UML. Addison Wesley, 1999. + +\ref [18] Pamela Zave and Michael Jackson. Conjunction as Composition. ACM Transactions on Software Engineering and Methodology II(4): 379--411, October 1993. diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.index.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.index.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.qxd b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.qxd new file mode 100755 index 00000000..3d319128 Binary files /dev/null and b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.qxd differ diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.tag.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.tag.txt new file mode 100755 index 00000000..8d5ec1c8 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.tag.txt @@ -0,0 +1,29 @@ +@title:A Micromodularity Mechanism +@author:Daniel Jackson Ilya Shlyakhter and Manu Sridharan<\n> Laboratory for Computer Science<\n> Massachusetts Institute of Technology<\n> Cambridge Massachusetts USA<\n> dnjmit.edu +@title:A Micromodularity Mechanism for <f""><\#161><f$> <f""><\#177><f$> floor left big <f""><\#165><f$> . Daniel Jackson +@section:B Introduction +@noindent:hello there. +@subsection:B.1 First sub +@subsection:B.2 First sub +@noindent: +@subsection:B.3 First sub +@section:C Introduction +@noindent:hello there. +@subsection:C.1 First sub +@subsection:C.2 First sub +@subsubsection:C.2.a First subsub +@subsubsection:C.2.b First subsub +@subsubsection:C.2.c First subsub +@subsubsection:C.2.d First subsub +@subsubsection:C.2.e First subsub +@subsubsection:C.2.f First subsub +@subsubsection:C.2.g First subsub +@noindent: +@subsection:C.3 First sub +@author:Daniel Jackson123 Ilya Shlyakhter and Manu Sridharan<\n> +@section:D +@noindent:Laboratory for Computer Science<\n> dnjmit.edu +@opening:Abstract +@noindent:Hello <\#133> and here A simple mechanism. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. +@body:<\#147>I really love this <\#145>quote<\#146> kind of thing <\#150> said Ian<\#48>s mother who always primed x<\#48> in <\#146>69.<\#148> Categories and Subject Descriptors +@noindent:D.2.1 RequirementsSpecifications<\#151>Languages Assertions Invariants Specification techniques. This is an ellipsis<\#133>And there you go \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.txt new file mode 100755 index 00000000..ce70c8f8 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/fse.txt @@ -0,0 +1,73 @@ +\preamble +\loadchars{k:\Research\Tagger\maps\standard-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathsym-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathit-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathext-charmap.txt} +\loadstyles{k:\Research\Tagger\examples\styles.txt} + +\title A Micromodularity Mechanism + +\author Daniel Jackson, Ilya Shlyakhter and Manu Sridharan\\ +Laboratory for Computer Science\\ +Massachusetts Institute of Technology\\ +Cambridge, Massachusetts, USA\\ +dnj@mit.edu + +\title A Micromodularity Mechanism for \Gamma \delta + +floor left big: \floorleftbig . + +Daniel Jackson + +\section Introduction + +hello there. + +\subsection First sub + +\subsection First sub + + +\subsection First sub + +\section Introduction + +hello there. + +\subsection First sub + +\subsection First sub + +\subsubsection First subsub + +\subsubsection First subsub + +\subsubsection First subsub + +\subsubsection First subsub + +\subsubsection First subsub + +\subsubsection First subsub + +\subsubsection First subsub + + +\subsection First sub + +\author Daniel Jackson123, Ilya Shlyakhter and Manu Sridharan\\ + +# comment here! + +Laboratory for Computer Science\\ +dnj@mit.edu + +\opening Abstract + +Hello ... and here .. +A simple mechanism. The paper demonstrates the flexibility of the mechanism by application in a variety of common idioms. + +"I really love this 'quote' kind of thing -- said Ian's mother, who always primed x' in '69." +\subsection* Categories and Subject Descriptors + +D.2.1 Requirements/Specifications---Languages; Assertions, Invariants, Specification techniques. This is an ellipsis...And there you go! \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/styles.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/styles.txt new file mode 100755 index 00000000..55aaf72c --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/styles.txt @@ -0,0 +1,11 @@ +<style:title><next:author> +<style:author><next:section> +<style:section><next:noindent><counter:1><separator:.><trailer: > +<style:opening><next:noindent> +<style:noindent><next:body> +<style:body><next:body> +<style:subsection><next:noindent><parent:section><counter:1><separator:.><trailer: > +<style:subsubsection><next:noindent><parent:subsection><counter:a><separator:.><trailer: > +<style:geekmath><next:noindent> +<style:point><next:noindent><counter:A><leader:\alpha > +<style:ref><next:ref> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/symbols.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/symbols.txt new file mode 100755 index 00000000..9b89caf8 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/symbols.txt @@ -0,0 +1,530 @@ +# character map for Lucida Math Italic font + +<char:Gamma><font:LucidNewMatItaT><index:161> +<char:Delta><font:LucidNewMatItaT><index:162> +<char:Theta><font:LucidNewMatItaT><index:163> +<char:Lambda><font:LucidNewMatItaT><index:164> +<char:Xi><font:LucidNewMatItaT><index:165> +<char:Pi><font:LucidNewMatItaT><index:166> +<char:Sigma><font:LucidNewMatItaT><index:167> +<char:Upsilon><font:LucidNewMatItaT><index:7> +<char:Phi><font:LucidNewMatItaT><index:169> +<char:Psi><font:LucidNewMatItaT><index:170> +<char:Omega><font:LucidNewMatItaT><index:173> +<char:alpha><font:LucidNewMatItaT><index:174> +<char:beta><font:LucidNewMatItaT><index:175> +<char:gamma><font:LucidNewMatItaT><index:176> +<char:delta><font:LucidNewMatItaT><index:177> +<char:epsilon1><font:LucidNewMatItaT><index:178> +<char:zeta><font:LucidNewMatItaT><index:179> +<char:eta><font:LucidNewMatItaT><index:180> +<char:theta><font:LucidNewMatItaT><index:181> +<char:iota><font:LucidNewMatItaT><index:182> +<char:kappa><font:LucidNewMatItaT><index:183> +<char:lambda><font:LucidNewMatItaT><index:184> +<char:mu><font:LucidNewMatItaT><index:185> +<char:nu><font:LucidNewMatItaT><index:186> +<char:xi><font:LucidNewMatItaT><index:187> +<char:pi><font:LucidNewMatItaT><index:188> +<char:rho><font:LucidNewMatItaT><index:189> +<char:sigma><font:LucidNewMatItaT><index:190> +<char:tau><font:LucidNewMatItaT><index:191> +<char:upsilon><font:LucidNewMatItaT><index:192> +<char:phi><font:LucidNewMatItaT><index:193> +<char:chi><font:LucidNewMatItaT><index:194> +<char:psi><font:LucidNewMatItaT><index:195> +<char:tie><font:LucidNewMatItaT><index:196> +<char:omega><font:LucidNewMatItaT><index:33> +<char:epsilon><font:LucidNewMatItaT><index:34> +<char:theta1><font:LucidNewMatItaT><index:35> +<char:pi1><font:LucidNewMatItaT><index:36> +<char:rho1><font:LucidNewMatItaT><index:37> +<char:sigma1><font:LucidNewMatItaT><index:38> +<char:phi1><font:LucidNewMatItaT><index:39> +<char:arrowlefttophalf><font:LucidNewMatItaT><index:40> +<char:arrowleftbothalf><font:LucidNewMatItaT><index:41> +<char:arrowrighttophalf><font:LucidNewMatItaT><index:42> +<char:arrowrightbothalf><font:LucidNewMatItaT><index:43> +<char:arrowhookleft><font:LucidNewMatItaT><index:44> +<char:arrowhookright><font:LucidNewMatItaT><index:45> +<char:triangleright><font:LucidNewMatItaT><index:46> +<char:triangleleft><font:LucidNewMatItaT><index:47> +<char:period><font:LucidNewMatItaT><index:58> +<char:comma><font:LucidNewMatItaT><index:59> +<char:less><font:LucidNewMatItaT><index:60> +<char:slash><font:LucidNewMatItaT><index:61> +<char:greater><font:LucidNewMatItaT><index:62> +<char:star><font:LucidNewMatItaT><index:63> +<char:partialdiff><font:LucidNewMatItaT><index:64> +<char:flat><font:LucidNewMatItaT><index:91> +<char:natural><font:LucidNewMatItaT><index:92> +<char:sharp><font:LucidNewMatItaT><index:93> +<char:slurbelow><font:LucidNewMatItaT><index:94> +<char:slurabove><font:LucidNewMatItaT><index:95> +<char:lscript><font:LucidNewMatItaT><index:96> +<char:dotlessi><font:LucidNewMatItaT><index:123> +<char:dotlessj><font:LucidNewMatItaT><index:124> +<char:weierstrass><font:LucidNewMatItaT><index:125> +<char:vector><font:LucidNewMatItaT><index:126> + + +# mathematical characters for Lucida New Math Symbol font + +<char:minus><font:LucidNewMatSymT><index:161> +<char:periodcentered><font:LucidNewMatSymT><index:162> +<char:multiply><font:LucidNewMatSymT><index:163> +<char:asteriskmath><font:LucidNewMatSymT><index:164> +<char:divide><font:LucidNewMatSymT><index:165> +<char:diamondmath><font:LucidNewMatSymT><index:166> +<char:plusminus><font:LucidNewMatSymT><index:167> +<char:minusplus><font:LucidNewMatSymT><index:168> +<char:circleplus><font:LucidNewMatSymT><index:169> +<char:circleminus><font:LucidNewMatSymT><index:170> +<char:circlemultiply><font:LucidNewMatSymT><index:173> +<char:circledivide><font:LucidNewMatSymT><index:174> +<char:circledot><font:LucidNewMatSymT><index:175> +<char:circlecopyrt><font:LucidNewMatSymT><index:176> +<char:openbullet><font:LucidNewMatSymT><index:177> +<char:bullet><font:LucidNewMatSymT><index:178> +<char:equivasymptotic><font:LucidNewMatSymT><index:179> +<char:equivalence><font:LucidNewMatSymT><index:180> +<char:reflexsubset><font:LucidNewMatSymT><index:181> +<char:reflexsuperset><font:LucidNewMatSymT><index:182> +<char:lessequal><font:LucidNewMatSymT><index:183> +<char:greaterequal><font:LucidNewMatSymT><index:184> +<char:precedesequal><font:LucidNewMatSymT><index:185> +<char:followsequal><font:LucidNewMatSymT><index:186> +<char:similar><font:LucidNewMatSymT><index:187> +<char:approxequal><font:LucidNewMatSymT><index:188> +<char:propersubset><font:LucidNewMatSymT><index:189> +<char:propersuperset><font:LucidNewMatSymT><index:190> +<char:lessmuch><font:LucidNewMatSymT><index:191> +<char:greatermuch><font:LucidNewMatSymT><index:192> +<char:precedes><font:LucidNewMatSymT><index:193> +<char:follows><font:LucidNewMatSymT><index:194> +<char:arrowleft><font:LucidNewMatSymT><index:195> +<char:spade><font:LucidNewMatSymT><index:196> +<char:arrowright><font:LucidNewMatSymT><index:33> +<char:arrowup><font:LucidNewMatSymT><index:34> +<char:arrowdown><font:LucidNewMatSymT><index:35> +<char:arrowboth><font:LucidNewMatSymT><index:36> +<char:arrownortheast><font:LucidNewMatSymT><index:37> +<char:arrowsoutheast><font:LucidNewMatSymT><index:38> +<char:similarequal><font:LucidNewMatSymT><index:39> +<char:arrowdblleft><font:LucidNewMatSymT><index:40> +<char:arrowdblright><font:LucidNewMatSymT><index:41> +<char:arrowdblup><font:LucidNewMatSymT><index:42> +<char:arrowdbldown><font:LucidNewMatSymT><index:43> +<char:arrowdblboth><font:LucidNewMatSymT><index:44> +<char:arrownorthwest><font:LucidNewMatSymT><index:45> +<char:arrowsouthwest><font:LucidNewMatSymT><index:46> +<char:proportional><font:LucidNewMatSymT><index:47> +<char:prime><font:LucidNewMatSymT><index:48> +<char:infinity><font:LucidNewMatSymT><index:49> +<char:element><font:LucidNewMatSymT><index:50> +<char:owner><font:LucidNewMatSymT><index:51> +<char:triangle><font:LucidNewMatSymT><index:52> +<char:triangleinv><font:LucidNewMatSymT><index:53> +<char:negationslash><font:LucidNewMatSymT><index:54> +<char:mapsto><font:LucidNewMatSymT><index:55> +<char:universal><font:LucidNewMatSymT><index:56> +<char:existential><font:LucidNewMatSymT><index:57> +<char:logicalnot><font:LucidNewMatSymT><index:58> +<char:emptyset><font:LucidNewMatSymT><index:59> +<char:Rfractur><font:LucidNewMatSymT><index:60> +<char:Ifractur><font:LucidNewMatSymT><index:61> +<char:latticetop><font:LucidNewMatSymT><index:62> +<char:perpendicular><font:LucidNewMatSymT><index:63> +<char:aleph><font:LucidNewMatSymT><index:64> +<char:scriptA><font:LucidNewMatSymT><index:65> +<char:scriptB><font:LucidNewMatSymT><index:66> +<char:scriptC><font:LucidNewMatSymT><index:67> +<char:scriptD><font:LucidNewMatSymT><index:68> +<char:scriptE><font:LucidNewMatSymT><index:69> +<char:scriptF><font:LucidNewMatSymT><index:70> +<char:scriptG><font:LucidNewMatSymT><index:71> +<char:scriptH><font:LucidNewMatSymT><index:72> +<char:scriptI><font:LucidNewMatSymT><index:73> +<char:scriptJ><font:LucidNewMatSymT><index:74> +<char:scriptK><font:LucidNewMatSymT><index:75> +<char:scriptL><font:LucidNewMatSymT><index:76> +<char:scriptM><font:LucidNewMatSymT><index:77> +<char:scriptN><font:LucidNewMatSymT><index:78> +<char:scriptO><font:LucidNewMatSymT><index:79> +<char:scriptP><font:LucidNewMatSymT><index:80> +<char:scriptQ><font:LucidNewMatSymT><index:81> +<char:scriptR><font:LucidNewMatSymT><index:82> +<char:scriptS><font:LucidNewMatSymT><index:83> +<char:scriptT><font:LucidNewMatSymT><index:84> +<char:scriptU><font:LucidNewMatSymT><index:85> +<char:scriptV><font:LucidNewMatSymT><index:86> +<char:scriptW><font:LucidNewMatSymT><index:87> +<char:scriptX><font:LucidNewMatSymT><index:88> +<char:scriptY><font:LucidNewMatSymT><index:89> +<char:scriptZ><font:LucidNewMatSymT><index:90> +<char:union><font:LucidNewMatSymT><index:91> +<char:intersection><font:LucidNewMatSymT><index:92> +<char:unionmulti><font:LucidNewMatSymT><index:93> +<char:logicaland><font:LucidNewMatSymT><index:94> +<char:logicalor><font:LucidNewMatSymT><index:95> +<char:turnstileleft><font:LucidNewMatSymT><index:96> +<char:turnstileright><font:LucidNewMatSymT><index:97> +<char:floorleft><font:LucidNewMatSymT><index:98> +<char:floorright><font:LucidNewMatSymT><index:99> +<char:ceilingleft><font:LucidNewMatSymT><index:100> +<char:ceilingright><font:LucidNewMatSymT><index:101> +<char:braceleft><font:LucidNewMatSymT><index:102> +<char:braceright><font:LucidNewMatSymT><index:103> +<char:angbracketleft><font:LucidNewMatSymT><index:104> +<char:angbracketright><font:LucidNewMatSymT><index:105> +<char:bar><font:LucidNewMatSymT><index:106> +<char:bardbl><font:LucidNewMatSymT><index:107> +<char:arrowbothv><font:LucidNewMatSymT><index:108> +<char:arrowdblbothv><font:LucidNewMatSymT><index:109> +<char:backslash><font:LucidNewMatSymT><index:110> +<char:wreathproduct><font:LucidNewMatSymT><index:111> +<char:radical><font:LucidNewMatSymT><index:112> +<char:coproduct><font:LucidNewMatSymT><index:113> +<char:nabla><font:LucidNewMatSymT><index:114> +<char:integral><font:LucidNewMatSymT><index:115> +<char:unionsq><font:LucidNewMatSymT><index:116> +<char:intersectionsq><font:LucidNewMatSymT><index:117> +<char:subsetsqequal><font:LucidNewMatSymT><index:118> +<char:supersetsqequal><font:LucidNewMatSymT><index:119> +<char:section><font:LucidNewMatSymT><index:120> +<char:dagger><font:LucidNewMatSymT><index:121> +<char:daggerdbl><font:LucidNewMatSymT><index:122> +<char:paragraph><font:LucidNewMatSymT><index:123> +<char:club><font:LucidNewMatSymT><index:124> +<char:diamond><font:LucidNewMatSymT><index:125> +<char:heart><font:LucidNewMatSymT><index:126> + + + +# character map for Symbol font + +<char:Symbol><font:Symbol><index:for> +<char:space><font:Symbol><index:32> +<char:exclam><font:Symbol><index:33> +<char:universal><font:Symbol><index:34> +<char:numbersign><font:Symbol><index:35> +<char:existential><font:Symbol><index:36> +<char:percent><font:Symbol><index:37> +<char:ampersand><font:Symbol><index:38> +<char:suchthat><font:Symbol><index:39> +<char:parenleft><font:Symbol><index:40> +<char:parenright><font:Symbol><index:41> +<char:asteriskmath><font:Symbol><index:42> +<char:plus><font:Symbol><index:43> +<char:comma><font:Symbol><index:44> +<char:minus><font:Symbol><index:45> +<char:period><font:Symbol><index:46> +<char:slash><font:Symbol><index:47> +<char:zero><font:Symbol><index:48> +<char:one><font:Symbol><index:49> +<char:two><font:Symbol><index:50> +<char:three><font:Symbol><index:51> +<char:four><font:Symbol><index:52> +<char:five><font:Symbol><index:53> +<char:six><font:Symbol><index:54> +<char:seven><font:Symbol><index:55> +<char:eight><font:Symbol><index:56> +<char:nine><font:Symbol><index:57> +<char:colon><font:Symbol><index:58> +<char:semicolon><font:Symbol><index:59> +<char:less><font:Symbol><index:60> +<char:equal><font:Symbol><index:61> +<char:greater><font:Symbol><index:62> +<char:question><font:Symbol><index:63> +<char:congruent><font:Symbol><index:64> +<char:Alpha><font:Symbol><index:65> +<char:Beta><font:Symbol><index:66> +<char:Chi><font:Symbol><index:67> +<char:Delta><font:Symbol><index:68> +<char:Epsilon><font:Symbol><index:69> +<char:Phi><font:Symbol><index:70> +<char:Gamma><font:Symbol><index:71> +<char:Eta><font:Symbol><index:72> +<char:Iota><font:Symbol><index:73> +<char:theta1><font:Symbol><index:74> +<char:Kappa><font:Symbol><index:75> +<char:Lambda><font:Symbol><index:76> +<char:Mu><font:Symbol><index:77> +<char:Nu><font:Symbol><index:78> +<char:Omicron><font:Symbol><index:79> +<char:Pi><font:Symbol><index:80> +<char:Theta><font:Symbol><index:81> +<char:Rho><font:Symbol><index:82> +<char:Sigma><font:Symbol><index:83> +<char:Tau><font:Symbol><index:84> +<char:Upsilon><font:Symbol><index:85> +<char:sigma1><font:Symbol><index:86> +<char:Omega><font:Symbol><index:87> +<char:Xi><font:Symbol><index:88> +<char:Psi><font:Symbol><index:89> +<char:Zeta><font:Symbol><index:90> +<char:bracketleft><font:Symbol><index:91> +<char:therefore><font:Symbol><index:92> +<char:bracketright><font:Symbol><index:93> +<char:perpendicular><font:Symbol><index:94> +<char:underscore><font:Symbol><index:95> +<char:radicalex><font:Symbol><index:96> +<char:alpha><font:Symbol><index:97> +<char:beta><font:Symbol><index:98> +<char:chi><font:Symbol><index:99> +<char:delta><font:Symbol><index:100> +<char:epsilon><font:Symbol><index:101> +<char:phi><font:Symbol><index:102> +<char:gamma><font:Symbol><index:103> +<char:eta><font:Symbol><index:104> +<char:iota><font:Symbol><index:105> +<char:phi1><font:Symbol><index:106> +<char:kappa><font:Symbol><index:107> +<char:lambda><font:Symbol><index:108> +<char:mu><font:Symbol><index:109> +<char:nu><font:Symbol><index:110> +<char:omicron><font:Symbol><index:111> +<char:pi><font:Symbol><index:112> +<char:theta><font:Symbol><index:113> +<char:rho><font:Symbol><index:114> +<char:sigma><font:Symbol><index:115> +<char:tau><font:Symbol><index:116> +<char:upsilon><font:Symbol><index:117> +<char:omega1><font:Symbol><index:118> +<char:omega><font:Symbol><index:119> +<char:xi><font:Symbol><index:120> +<char:psi><font:Symbol><index:121> +<char:zeta><font:Symbol><index:122> +<char:braceleft><font:Symbol><index:123> +<char:bar><font:Symbol><index:124> +<char:braceright><font:Symbol><index:125> +<char:similar><font:Symbol><index:126> +<char:Euro><font:Symbol><index:160> +<char:Upsilon1><font:Symbol><index:161> +<char:minute><font:Symbol><index:162> +<char:lessequal><font:Symbol><index:163> +<char:fraction><font:Symbol><index:164> +<char:infinity><font:Symbol><index:165> +<char:florin><font:Symbol><index:166> +<char:club><font:Symbol><index:167> +<char:diamond><font:Symbol><index:168> +<char:heart><font:Symbol><index:169> +<char:spade><font:Symbol><index:170> +<char:arrowboth><font:Symbol><index:171> +<char:arrowleft><font:Symbol><index:172> +<char:arrowup><font:Symbol><index:173> +<char:arrowright><font:Symbol><index:174> +<char:arrowdown><font:Symbol><index:175> +<char:degree><font:Symbol><index:176> +<char:plusminus><font:Symbol><index:177> +<char:second><font:Symbol><index:178> +<char:greaterequal><font:Symbol><index:179> +<char:multiply><font:Symbol><index:180> +<char:proportional><font:Symbol><index:181> +<char:partialdiff><font:Symbol><index:182> +<char:bullet><font:Symbol><index:183> +<char:divide><font:Symbol><index:184> +<char:notequal><font:Symbol><index:185> +<char:equivalence><font:Symbol><index:186> +<char:approxequal><font:Symbol><index:187> + +# seems to be a quarter fraction +# <char:ellipsis><font:Symbol><index:188> + +<char:arrowvertex><font:Symbol><index:189> +<char:arrowhorizex><font:Symbol><index:190> +<char:carriagereturn><font:Symbol><index:191> +<char:aleph><font:Symbol><index:192> +<char:Ifraktur><font:Symbol><index:193> +<char:Rfraktur><font:Symbol><index:194> +<char:weierstrass><font:Symbol><index:195> +<char:circlemultiply><font:Symbol><index:196> +<char:circleplus><font:Symbol><index:197> +<char:emptyset><font:Symbol><index:198> +<char:intersection><font:Symbol><index:199> +<char:union><font:Symbol><index:200> +<char:propersuperset><font:Symbol><index:201> +<char:reflexsuperset><font:Symbol><index:202> +<char:notsubset><font:Symbol><index:203> +<char:propersubset><font:Symbol><index:204> +<char:reflexsubset><font:Symbol><index:205> +<char:element><font:Symbol><index:206> +<char:notelement><font:Symbol><index:207> +<char:angle><font:Symbol><index:208> +<char:gradient><font:Symbol><index:209> +<char:registerserif><font:Symbol><index:210> +<char:copyrightserif><font:Symbol><index:211> +<char:trademarkserif><font:Symbol><index:212> +<char:product><font:Symbol><index:213> +<char:radical><font:Symbol><index:214> +<char:dotmath><font:Symbol><index:215> +<char:logicalnot><font:Symbol><index:216> +<char:logicaland><font:Symbol><index:217> +<char:logicalor><font:Symbol><index:218> +<char:arrowdblboth><font:Symbol><index:219> +<char:arrowdblleft><font:Symbol><index:220> +<char:arrowdblup><font:Symbol><index:221> +<char:arrowdblright><font:Symbol><index:222> +<char:arrowdbldown><font:Symbol><index:223> +<char:lozenge><font:Symbol><index:224> +<char:angleleft><font:Symbol><index:225> +<char:registersans><font:Symbol><index:226> +<char:copyrightsans><font:Symbol><index:227> +<char:trademarksans><font:Symbol><index:228> +<char:summation><font:Symbol><index:229> +<char:parenlefttp><font:Symbol><index:230> +<char:parenleftex><font:Symbol><index:231> +<char:parenleftbt><font:Symbol><index:232> +<char:bracketlefttp><font:Symbol><index:233> +<char:bracketleftex><font:Symbol><index:234> +<char:bracketleftbt><font:Symbol><index:235> +<char:bracelefttp><font:Symbol><index:236> +<char:braceleftmid><font:Symbol><index:237> +<char:braceleftbt><font:Symbol><index:238> +<char:braceex><font:Symbol><index:239> +<char:angleright><font:Symbol><index:241> +<char:integral><font:Symbol><index:242> +<char:integraltp><font:Symbol><index:243> +<char:integralex><font:Symbol><index:244> +<char:integralbt><font:Symbol><index:245> +<char:parenrighttp><font:Symbol><index:246> +<char:parenrightex><font:Symbol><index:247> +<char:parenrightbt><font:Symbol><index:248> +<char:bracketrighttp><font:Symbol><index:249> +<char:bracketrightex><font:Symbol><index:250> +<char:bracketrightbt><font:Symbol><index:251> +<char:bracerighttp><font:Symbol><index:252> +<char:bracerightmid><font:Symbol><index:253> +<char:bracerightbt><font:Symbol><index:254> + + +# character map for Lucida New Math Extended font + +<char:parenleftbig><font:LucidNewMatExtT><index:161> +<char:parenrightbig><font:LucidNewMatExtT><index:162> +<char:bracketleftbig><font:LucidNewMatExtT><index:163> +<char:bracketrightbig><font:LucidNewMatExtT><index:164> +<char:floorleftbig><font:LucidNewMatExtT><index:165> +<char:floorrightbig><font:LucidNewMatExtT><index:166> +<char:ceilingleftbig><font:LucidNewMatExtT><index:167> +<char:ceilingrightbig><font:LucidNewMatExtT><index:168> +<char:braceleftbig><font:LucidNewMatExtT><index:169> +<char:bracerightbig><font:LucidNewMatExtT><index:170> +<char:angbracketleftbig><font:LucidNewMatExtT><index:173> +<char:angbracketrightbig><font:LucidNewMatExtT><index:174> +<char:vextendsingle><font:LucidNewMatExtT><index:175> +<char:vextenddouble><font:LucidNewMatExtT><index:176> +<char:slashbig><font:LucidNewMatExtT><index:177> +<char:backslashbig><font:LucidNewMatExtT><index:178> +<char:parenleftBig><font:LucidNewMatExtT><index:179> +<char:parenrightBig><font:LucidNewMatExtT><index:180> +<char:parenleftbigg><font:LucidNewMatExtT><index:181> +<char:parenrightbigg><font:LucidNewMatExtT><index:182> +<char:bracketleftbigg><font:LucidNewMatExtT><index:183> +<char:bracketrightbigg><font:LucidNewMatExtT><index:184> +<char:floorleftbigg><font:LucidNewMatExtT><index:185> +<char:floorrightbigg><font:LucidNewMatExtT><index:186> +<char:ceilingleftbigg><font:LucidNewMatExtT><index:187> +<char:ceilingrightbigg><font:LucidNewMatExtT><index:188> +<char:braceleftbigg><font:LucidNewMatExtT><index:189> +<char:bracerightbigg><font:LucidNewMatExtT><index:190> +<char:angbracketleftbigg><font:LucidNewMatExtT><index:28> +<char:angbracketrightbigg><font:LucidNewMatExtT><index:29> +<char:slashbigg><font:LucidNewMatExtT><index:193> +<char:backslashbigg><font:LucidNewMatExtT><index:194> +<char:parenleftBigg><font:LucidNewMatExtT><index:195> +<char:parenrightBigg><font:LucidNewMatExtT><index:33> +<char:bracketleftBigg><font:LucidNewMatExtT><index:34> +<char:bracketrightBigg><font:LucidNewMatExtT><index:35> +<char:floorleftBigg><font:LucidNewMatExtT><index:36> +<char:floorrightBigg><font:LucidNewMatExtT><index:37> +<char:ceilingleftBigg><font:LucidNewMatExtT><index:38> +<char:ceilingrightBigg><font:LucidNewMatExtT><index:39> +<char:braceleftBigg><font:LucidNewMatExtT><index:40> +<char:bracerightBigg><font:LucidNewMatExtT><index:41> +<char:angbracketleftBigg><font:LucidNewMatExtT><index:42> +<char:angbracketrightBigg><font:LucidNewMatExtT><index:43> +<char:slashBigg><font:LucidNewMatExtT><index:44> +<char:backslashBigg><font:LucidNewMatExtT><index:45> +<char:slashBig><font:LucidNewMatExtT><index:46> +<char:backslashBig><font:LucidNewMatExtT><index:47> +<char:parenlefttp><font:LucidNewMatExtT><index:48> +<char:parenrighttp><font:LucidNewMatExtT><index:49> +<char:bracketlefttp><font:LucidNewMatExtT><index:50> +<char:bracketrighttp><font:LucidNewMatExtT><index:51> +<char:bracketleftbt><font:LucidNewMatExtT><index:52> +<char:bracketrightbt><font:LucidNewMatExtT><index:53> +<char:bracketleftex><font:LucidNewMatExtT><index:54> +<char:bracketrightex><font:LucidNewMatExtT><index:55> +<char:bracelefttp><font:LucidNewMatExtT><index:56> +<char:bracerighttp><font:LucidNewMatExtT><index:57> +<char:braceleftbt><font:LucidNewMatExtT><index:58> +<char:bracerightbt><font:LucidNewMatExtT><index:59> +<char:braceleftmid><font:LucidNewMatExtT><index:60> +<char:bracerightmid><font:LucidNewMatExtT><index:61> +<char:braceex><font:LucidNewMatExtT><index:62> +<char:arrowvertex><font:LucidNewMatExtT><index:63> +<char:parenleftbt><font:LucidNewMatExtT><index:64> +<char:parenrightbt><font:LucidNewMatExtT><index:65> +<char:parenleftex><font:LucidNewMatExtT><index:66> +<char:parenrightex><font:LucidNewMatExtT><index:67> +<char:angbracketleftBig><font:LucidNewMatExtT><index:68> +<char:angbracketrightBig><font:LucidNewMatExtT><index:69> +<char:unionsqtext><font:LucidNewMatExtT><index:70> +<char:unionsqdisplay><font:LucidNewMatExtT><index:71> +<char:contintegraltext><font:LucidNewMatExtT><index:72> +<char:contintegraldisplay><font:LucidNewMatExtT><index:73> +<char:circledottext><font:LucidNewMatExtT><index:74> +<char:circledotdisplay><font:LucidNewMatExtT><index:75> +<char:circleplustext><font:LucidNewMatExtT><index:76> +<char:circleplusdisplay><font:LucidNewMatExtT><index:77> +<char:circlemultiplytext><font:LucidNewMatExtT><index:78> +<char:circlemultiplydisplay><font:LucidNewMatExtT><index:79> +<char:summationtext><font:LucidNewMatExtT><index:80> +<char:producttext><font:LucidNewMatExtT><index:81> +<char:integraltext><font:LucidNewMatExtT><index:82> +<char:uniontext><font:LucidNewMatExtT><index:83> +<char:intersectiontext><font:LucidNewMatExtT><index:84> +<char:unionmultitext><font:LucidNewMatExtT><index:85> +<char:logicalandtext><font:LucidNewMatExtT><index:86> +<char:logicalortext><font:LucidNewMatExtT><index:87> +<char:summationdisplay><font:LucidNewMatExtT><index:88> +<char:productdisplay><font:LucidNewMatExtT><index:89> +<char:integraldisplay><font:LucidNewMatExtT><index:90> +<char:uniondisplay><font:LucidNewMatExtT><index:91> +<char:intersectiondisplay><font:LucidNewMatExtT><index:92> +<char:unionmultidisplay><font:LucidNewMatExtT><index:93> +<char:logicalanddisplay><font:LucidNewMatExtT><index:94> +<char:logicalordisplay><font:LucidNewMatExtT><index:95> +<char:coproducttext><font:LucidNewMatExtT><index:96> +<char:coproductdisplay><font:LucidNewMatExtT><index:97> +<char:hatwide><font:LucidNewMatExtT><index:98> +<char:hatwider><font:LucidNewMatExtT><index:99> +<char:hatwidest><font:LucidNewMatExtT><index:100> +<char:tildewide><font:LucidNewMatExtT><index:101> +<char:tildewider><font:LucidNewMatExtT><index:102> +<char:tildewidest><font:LucidNewMatExtT><index:103> +<char:bracketleftBig><font:LucidNewMatExtT><index:104> +<char:bracketrightBig><font:LucidNewMatExtT><index:105> +<char:floorleftBig><font:LucidNewMatExtT><index:106> +<char:floorrightBig><font:LucidNewMatExtT><index:107> +<char:ceilingleftBig><font:LucidNewMatExtT><index:108> +<char:ceilingrightBig><font:LucidNewMatExtT><index:109> +<char:braceleftBig><font:LucidNewMatExtT><index:110> +<char:bracerightBig><font:LucidNewMatExtT><index:111> +<char:radicalbig><font:LucidNewMatExtT><index:112> +<char:radicalBig><font:LucidNewMatExtT><index:113> +<char:radicalbigg><font:LucidNewMatExtT><index:114> +<char:radicalBigg><font:LucidNewMatExtT><index:115> +<char:radicalbt><font:LucidNewMatExtT><index:116> +<char:radicalvertex><font:LucidNewMatExtT><index:117> +<char:radicaltp><font:LucidNewMatExtT><index:118> +<char:arrowvertexdbl><font:LucidNewMatExtT><index:119> +<char:arrowtp><font:LucidNewMatExtT><index:120> +<char:arrowbt><font:LucidNewMatExtT><index:121> +<char:bracehtipdownleft><font:LucidNewMatExtT><index:122> +<char:bracehtipdownright><font:LucidNewMatExtT><index:123> +<char:bracehtipupleft><font:LucidNewMatExtT><index:124> +<char:bracehtipupright><font:LucidNewMatExtT><index:125> +<char:arrowdbltp><font:LucidNewMatExtT><index:126> +<char:arrowdblbt><font:LucidNewMatExtT><index:196> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.index.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.index.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.tag.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.tag.txt new file mode 100755 index 00000000..5cf4de36 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.tag.txt @@ -0,0 +1 @@ + A Micromodularity Mechanism A Micromodularity Mechanism \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.txt new file mode 100755 index 00000000..e58bee78 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/examples/test.txt @@ -0,0 +1,10 @@ +\preamble +\loadchars{k:\Research\Tagger\maps\standard-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathsym-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathit-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathext-charmap.txt} +\loadstyles{k:\Research\Tagger\examples\styles.txt} + +\title A Micromodularity Mechanism + +\title A Micromodularity Mechanism diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmap.pdf b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmap.pdf new file mode 100755 index 00000000..eb501ec0 Binary files /dev/null and b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmap.pdf differ diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmaps-display.tag.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmaps-display.tag.txt new file mode 100755 index 00000000..b5060d78 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmaps-display.tag.txt @@ -0,0 +1,13 @@ +@section:1 Standard Charmap +@noindent:<f"Symbol"><\#32><f$> space <\n> <f"Symbol"><\#33><f$> exclam <\n> <f""><\#34><f$> quotedbl <\n> <f"Symbol"><\#35><f$> numbersign <\n> <f""><\#36><f$> dollar <\n> <f"Symbol"><\#37><f$> percent <\n> <f"Symbol"><\#38><f$> ampersand <\n> <f""><\#39><f$> quotesingle <\n> <f"Symbol"><\#40><f$> parenleft <\n> <f"Symbol"><\#41><f$> parenright <\n> <f""><\#42><f$> asterisk <\n> <f"Symbol"><\#43><f$> plus <\n> <f"Symbol"><\#44><f$> comma <\n> <f""><\#45><f$> hyphen <\n> <f"Symbol"><\#46><f$> period <\n> <f"Symbol"><\#47><f$> slash <\n> <f"Symbol"><\#48><f$> zero <\n> <f"Symbol"><\#49><f$> one <\n> <f"Symbol"><\#50><f$> two <\n> <f"Symbol"><\#51><f$> three <\n> <f"Symbol"><\#52><f$> four <\n> <f"Symbol"><\#53><f$> five <\n> <f"Symbol"><\#54><f$> six <\n> <f"Symbol"><\#55><f$> seven <\n> <f"Symbol"><\#56><f$> eight <\n> <f"Symbol"><\#57><f$> nine <\n> <f"Symbol"><\#58><f$> colon <\n> <f"Symbol"><\#59><f$> semicolon <\n> <f"Symbol"><\#60><f$> less <\n> <f"Symbol"><\#61><f$> equal <\n> <f"Symbol"><\#62><f$> greater <\n> <f"Symbol"><\#63><f$> question <\n> <f""><\#64><f$> at <\n> <f""><\#65><f$> A <\n> <f""><\#66><f$> B <\n> <f""><\#67><f$> C <\n> <f""><\#68><f$> D <\n> <f""><\#69><f$> E <\n> <f""><\#70><f$> F <\n> <f""><\#71><f$> G <\n> <f""><\#72><f$> H <\n> <f""><\#73><f$> I <\n> <f""><\#74><f$> J <\n> <f""><\#75><f$> K <\n> <f""><\#76><f$> L <\n> <f""><\#77><f$> M <\n> <f""><\#78><f$> N <\n> <f""><\#79><f$> O <\n> <f""><\#80><f$> P <\n> <f""><\#81><f$> Q <\n> <f""><\#82><f$> R <\n> <f""><\#83><f$> S <\n> <f""><\#84><f$> T <\n> <f""><\#85><f$> U <\n> <f""><\#86><f$> V <\n> <f""><\#87><f$> W <\n> <f""><\#88><f$> X <\n> <f""><\#89><f$> Y <\n> <f""><\#90><f$> Z <\n> <f"Symbol"><\#91><f$> bracketleft <\n> <f"LucidNewMatSymT"><\#110><f$> backslash <\n> <f"Symbol"><\#93><f$> bracketright <\n> <f""><\#94><f$> asciicircum <\n> <f"Symbol"><\#95><f$> underscore <\n> <f""><\#96><f$> grave <\n> <f""><\#97><f$> a <\n> <f""><\#98><f$> b <\n> <f""><\#99><f$> c <\n> <f""><\#100><f$> d <\n> <f""><\#101><f$> e <\n> <f""><\#102><f$> f <\n> <f""><\#103><f$> g <\n> <f""><\#104><f$> h <\n> <f""><\#105><f$> i <\n> <f""><\#106><f$> j <\n> <f""><\#107><f$> k <\n> <f""><\#108><f$> l <\n> <f""><\#109><f$> m <\n> <f""><\#110><f$> n <\n> <f""><\#111><f$> o <\n> <f""><\#112><f$> p <\n> <f""><\#113><f$> q <\n> <f""><\#114><f$> r <\n> <f""><\#115><f$> s <\n> <f""><\#116><f$> t <\n> <f""><\#117><f$> u <\n> <f""><\#118><f$> v <\n> <f""><\#119><f$> w <\n> <f""><\#120><f$> x <\n> <f""><\#121><f$> y <\n> <f""><\#122><f$> z <\n> <f"Symbol"><\#123><f$> braceleft <\n> <f"Symbol"><\#124><f$> bar <\n> <f"Symbol"><\#125><f$> braceright <\n> <f""><\#126><f$> asciitilde <\n> <f""><\#128><f$> euro <\n> <f""><\#130><f$> quotesinglbase <\n> <f"Symbol"><\#166><f$> florin <\n> <f""><\#132><f$> quotedblbase <\n> <f""><\#133><f$> ellipsis <\n> <f"LucidNewMatSymT"><\#121><f$> dagger <\n> <f"LucidNewMatSymT"><\#122><f$> daggerdbl <\n> <f""><\#136><f$> circumflex <\n> <f""><\#137><f$> perthousand <\n> <f""><\#138><f$> Scaron <\n> <f""><\#139><f$> guilsinglleft <\n> <f""><\#140><f$> OE <\n> <f""><\#142><f$> Zcaron <\n> <f""><\#145><f$> quoteleft <\n> <f""><\#146><f$> quoteright <\n> <f""><\#147><f$> quotedblleft <\n> <f""><\#148><f$> quotedblright <\n> <f"Symbol"><\#183><f$> bullet <\n> <f""><\#150><f$> endash <\n> <f""><\#151><f$> emdash <\n> <f""><\#152><f$> tilde <\n> <f""><\#153><f$> trademark <\n> <f""><\#154><f$> scaron <\n> <f""><\#155><f$> guilsinglright <\n> <f""><\#156><f$> oe <\n> <f""><\#158><f$> zcaron <\n> <f""><\#159><f$> Ydieresis <\n> <f""><\#160><f$> nbspace <\n> <f""><\#161><f$> exclamdown <\n> <f""><\#162><f$> cent <\n> <f""><\#163><f$> sterling <\n> <f""><\#164><f$> currency <\n> <f""><\#165><f$> yen <\n> <f""><\#166><f$> brokenbar <\n> section <\n> <f""><\#168><f$> dieresis <\n> <f""><\#169><f$> copyright <\n> <f""><\#170><f$> ordfeminine <\n> <f""><\#171><f$> guillemotleft <\n> <f"Symbol"><\#216><f$> logicalnot <\n> <f""><\#173><f$> sfthyphen <\n> <f""><\#174><f$> registered <\n> <f""><\#175><f$> macron <\n> <f"Symbol"><\#176><f$> degree <\n> <f"Symbol"><\#177><f$> plusminus <\n> <f""><\#178><f$> twosuperior <\n> <f""><\#179><f$> threesuperior <\n> <f""><\#180><f$> acute <\n> <f"Symbol"><\#109><f$> mu <\n> <f"LucidNewMatSymT"><\#123><f$> paragraph <\n> <f"LucidNewMatSymT"><\#162><f$> periodcentered <\n> <f""><\#184><f$> cedilla <\n> <f""><\#185><f$> onesuperior <\n> <f""><\#186><f$> ordmasculine <\n> <f""><\#187><f$> guillemotright <\n> <f""><\#188><f$> onequarter <\n> <f""><\#189><f$> onehalf <\n> <f""><\#190><f$> threequarters <\n> <f""><\#191><f$> questiondown <\n> <f""><\#192><f$> Agrave <\n> <f""><\#193><f$> Aacute <\n> <f""><\#194><f$> Acircumflex <\n> <f""><\#195><f$> Atilde <\n> <f""><\#196><f$> Adieresis <\n> <f""><\#197><f$> Aring <\n> <f""><\#198><f$> AE <\n> <f""><\#199><f$> Ccedilla <\n> <f""><\#200><f$> Egrave <\n> <f""><\#201><f$> Eacute <\n> <f""><\#202><f$> Ecircumflex <\n> <f""><\#203><f$> Edieresis <\n> <f""><\#204><f$> Igrave <\n> <f""><\#205><f$> Iacute <\n> <f""><\#206><f$> Icircumflex <\n> <f""><\#207><f$> Idieresis <\n> <f""><\#208><f$> Eth <\n> <f""><\#209><f$> Ntilde <\n> <f""><\#210><f$> Ograve <\n> <f""><\#211><f$> Oacute <\n> <f""><\#212><f$> Ocircumflex <\n> <f""><\#213><f$> Otilde <\n> <f""><\#214><f$> Odieresis <\n> <f"Symbol"><\#180><f$> multiply <\n> <f""><\#216><f$> Oslash <\n> <f""><\#217><f$> Ugrave <\n> <f""><\#218><f$> Uacute <\n> <f""><\#219><f$> Ucircumflex <\n> <f""><\#220><f$> Udieresis <\n> <f""><\#221><f$> Yacute <\n> <f""><\#222><f$> Thorn <\n> <f""><\#223><f$> germandbls <\n> <f""><\#224><f$> agrave <\n> <f""><\#225><f$> aacute <\n> <f""><\#226><f$> acircumflex <\n> <f""><\#227><f$> atilde <\n> <f""><\#228><f$> adieresis <\n> <f""><\#229><f$> aring <\n> <f""><\#230><f$> ae <\n> <f""><\#231><f$> ccedilla <\n> <f""><\#232><f$> egrave <\n> <f""><\#233><f$> eacute <\n> <f""><\#234><f$> ecircumflex <\n> <f""><\#235><f$> edieresis <\n> <f""><\#236><f$> igrave <\n> <f""><\#237><f$> iacute <\n> <f""><\#238><f$> icircumflex <\n> <f""><\#239><f$> idieresis <\n> <f""><\#240><f$> eth <\n> <f""><\#241><f$> ntilde <\n> <f""><\#242><f$> ograve <\n> <f""><\#243><f$> oacute <\n> <f""><\#244><f$> ocircumflex <\n> <f""><\#245><f$> otilde <\n> <f""><\#246><f$> odieresis <\n> <f"Symbol"><\#184><f$> divide <\n> <f""><\#248><f$> oslash <\n> <f""><\#249><f$> ugrave <\n> <f""><\#250><f$> uacute <\n> <f""><\#251><f$> ucircumflex <\n> <f""><\#252><f$> udieresis <\n> <f""><\#253><f$> yacute <\n> <f""><\#254><f$> thorn <\n> <f""><\#255><f$> ydieresis <\n> +@section:2 Symbol Charmap +@noindent:<f"Symbol"><\#32><f$> space Symbol<\n> <f"Symbol"><\#33><f$> exclam Symbol<\n> <f"Symbol"><\#34><f$> universal Symbol<\n> <f"Symbol"><\#35><f$> numbersign Symbol<\n> <f"Symbol"><\#36><f$> existential Symbol<\n> <f"Symbol"><\#37><f$> percent Symbol<\n> <f"Symbol"><\#38><f$> ampersand Symbol<\n> <f"Symbol"><\#39><f$> suchthat Symbol<\n> <f"Symbol"><\#40><f$> parenleft Symbol<\n> <f"Symbol"><\#41><f$> parenright Symbol<\n> <f"Symbol"><\#42><f$> asteriskmath Symbol<\n> <f"Symbol"><\#43><f$> plus Symbol<\n> <f"Symbol"><\#44><f$> comma Symbol<\n> <f"Symbol"><\#45><f$> minus Symbol<\n> <f"Symbol"><\#46><f$> period Symbol<\n> <f"Symbol"><\#47><f$> slash Symbol<\n> <f"Symbol"><\#48><f$> zero Symbol<\n> <f"Symbol"><\#49><f$> one Symbol<\n> <f"Symbol"><\#50><f$> two Symbol<\n> <f"Symbol"><\#51><f$> three Symbol<\n> <f"Symbol"><\#52><f$> four Symbol<\n> <f"Symbol"><\#53><f$> five Symbol<\n> <f"Symbol"><\#54><f$> six Symbol<\n> <f"Symbol"><\#55><f$> seven Symbol<\n> <f"Symbol"><\#56><f$> eight Symbol<\n> <f"Symbol"><\#57><f$> nine Symbol<\n> <f"Symbol"><\#58><f$> colon Symbol<\n> <f"Symbol"><\#59><f$> semicolon Symbol<\n> <f"Symbol"><\#60><f$> less Symbol<\n> <f"Symbol"><\#61><f$> equal Symbol<\n> <f"Symbol"><\#62><f$> greater Symbol<\n> <f"Symbol"><\#63><f$> question Symbol<\n> <f"Symbol"><\#64><f$> congruent Symbol<\n> <f"Symbol"><\#65><f$> Alpha Symbol<\n> <f"Symbol"><\#66><f$> Beta Symbol<\n> <f"Symbol"><\#67><f$> Chi Symbol<\n> <f"Symbol"><\#68><f$> Delta Symbol<\n> <f"Symbol"><\#69><f$> Epsilon Symbol<\n> <f"Symbol"><\#70><f$> Phi Symbol<\n> <f"Symbol"><\#71><f$> Gamma Symbol<\n> <f"Symbol"><\#72><f$> Eta Symbol<\n> <f"Symbol"><\#73><f$> Iota Symbol<\n> <f"Symbol"><\#74><f$> theta1 Symbol<\n> <f"Symbol"><\#75><f$> Kappa Symbol<\n> <f"Symbol"><\#76><f$> Lambda Symbol<\n> <f"Symbol"><\#77><f$> Mu Symbol<\n> <f"Symbol"><\#78><f$> Nu Symbol<\n> <f"Symbol"><\#79><f$> Omicron Symbol<\n> <f"Symbol"><\#80><f$> Pi Symbol<\n> <f"Symbol"><\#81><f$> Theta Symbol<\n> <f"Symbol"><\#82><f$> Rho Symbol<\n> <f"Symbol"><\#83><f$> Sigma Symbol<\n> <f"Symbol"><\#84><f$> Tau Symbol<\n> <f"Symbol"><\#85><f$> Upsilon Symbol<\n> <f"Symbol"><\#86><f$> sigma1 Symbol<\n> <f"Symbol"><\#87><f$> Omega Symbol<\n> <f"Symbol"><\#88><f$> Xi Symbol<\n> <f"Symbol"><\#89><f$> Psi Symbol<\n> <f"Symbol"><\#90><f$> Zeta Symbol<\n> <f"Symbol"><\#91><f$> bracketleft Symbol<\n> <f"Symbol"><\#92><f$> therefore Symbol<\n> <f"Symbol"><\#93><f$> bracketright Symbol<\n> <f"Symbol"><\#94><f$> perpendicular Symbol<\n> <f"Symbol"><\#95><f$> underscore Symbol<\n> <f"Symbol"><\#96><f$> radicalex Symbol<\n> <f"Symbol"><\#97><f$> alpha Symbol<\n> <f"Symbol"><\#98><f$> beta Symbol<\n> <f"Symbol"><\#99><f$> chi Symbol<\n> <f"Symbol"><\#100><f$> delta Symbol<\n> <f"Symbol"><\#101><f$> epsilon Symbol<\n> <f"Symbol"><\#102><f$> phi Symbol<\n> <f"Symbol"><\#103><f$> gamma Symbol<\n> <f"Symbol"><\#104><f$> eta Symbol<\n> <f"Symbol"><\#105><f$> iota Symbol<\n> <f"Symbol"><\#106><f$> phi1 Symbol<\n> <f"Symbol"><\#107><f$> kappa Symbol<\n> <f"Symbol"><\#108><f$> lambda Symbol<\n> <f"Symbol"><\#109><f$> mu Symbol<\n> <f"Symbol"><\#110><f$> nu Symbol<\n> <f"Symbol"><\#111><f$> omicron Symbol<\n> <f"Symbol"><\#112><f$> pi Symbol<\n> <f"Symbol"><\#113><f$> theta Symbol<\n> <f"Symbol"><\#114><f$> rho Symbol<\n> <f"Symbol"><\#115><f$> sigma Symbol<\n> <f"Symbol"><\#116><f$> tau Symbol<\n> <f"Symbol"><\#117><f$> upsilon Symbol<\n> <f"Symbol"><\#118><f$> omega1 Symbol<\n> <f"Symbol"><\#119><f$> omega Symbol<\n> <f"Symbol"><\#120><f$> xi Symbol<\n> <f"Symbol"><\#121><f$> psi Symbol<\n> <f"Symbol"><\#122><f$> zeta Symbol<\n> <f"Symbol"><\#123><f$> braceleft Symbol<\n> <f"Symbol"><\#124><f$> bar Symbol<\n> <f"Symbol"><\#125><f$> braceright Symbol<\n> <f"Symbol"><\#126><f$> similar Symbol<\n> <f"Symbol"><\#160><f$> Euro Symbol<\n> <f"Symbol"><\#161><f$> Upsilon1 Symbol<\n> <f"Symbol"><\#162><f$> minute Symbol<\n> <f"Symbol"><\#163><f$> lessequal Symbol<\n> <f"Symbol"><\#164><f$> fraction Symbol<\n> <f"Symbol"><\#165><f$> infinity Symbol<\n> <f"Symbol"><\#166><f$> florin Symbol<\n> <f"Symbol"><\#167><f$> club Symbol<\n> <f"Symbol"><\#168><f$> diamond Symbol<\n> <f"Symbol"><\#169><f$> heart Symbol<\n> <f"Symbol"><\#170><f$> spade Symbol<\n> <f"Symbol"><\#171><f$> arrowboth Symbol<\n> <f"Symbol"><\#172><f$> arrowleft Symbol<\n> <f"Symbol"><\#173><f$> arrowup Symbol<\n> <f"Symbol"><\#174><f$> arrowright Symbol<\n> <f"Symbol"><\#175><f$> arrowdown Symbol<\n> <f"Symbol"><\#176><f$> degree Symbol<\n> <f"Symbol"><\#177><f$> plusminus Symbol<\n> <f"Symbol"><\#178><f$> second Symbol<\n> <f"Symbol"><\#179><f$> greaterequal Symbol<\n> <f"Symbol"><\#180><f$> multiply Symbol<\n> <f"Symbol"><\#181><f$> proportional Symbol<\n> <f"Symbol"><\#182><f$> partialdiff Symbol<\n> <f"Symbol"><\#183><f$> bullet Symbol<\n> <f"Symbol"><\#184><f$> divide Symbol<\n> <f"Symbol"><\#185><f$> notequal Symbol<\n> <f"Symbol"><\#186><f$> equivalence Symbol<\n> <f"Symbol"><\#187><f$> approxequal Symbol<\n> <f"Symbol"><\#189><f$> arrowvertex Symbol<\n> <f"Symbol"><\#190><f$> arrowhorizex Symbol<\n> <f"Symbol"><\#191><f$> carriagereturn Symbol<\n> <f"Symbol"><\#192><f$> aleph Symbol<\n> <f"Symbol"><\#193><f$> Ifraktur Symbol<\n> <f"Symbol"><\#194><f$> Rfraktur Symbol<\n> <f"Symbol"><\#195><f$> weierstrass Symbol<\n> <f"Symbol"><\#196><f$> circlemultiply Symbol<\n> <f"Symbol"><\#197><f$> circleplus Symbol<\n> <f"Symbol"><\#198><f$> emptyset Symbol<\n> <f"Symbol"><\#199><f$> intersection Symbol<\n> <f"Symbol"><\#200><f$> union Symbol<\n> <f"Symbol"><\#201><f$> propersuperset Symbol<\n> <f"Symbol"><\#202><f$> reflexsuperset Symbol<\n> <f"Symbol"><\#203><f$> notsubset Symbol<\n> <f"Symbol"><\#204><f$> propersubset Symbol<\n> <f"Symbol"><\#205><f$> reflexsubset Symbol<\n> <f"Symbol"><\#206><f$> element Symbol<\n> <f"Symbol"><\#207><f$> notelement Symbol<\n> <f"Symbol"><\#208><f$> angle Symbol<\n> <f"Symbol"><\#209><f$> gradient Symbol<\n> <f"Symbol"><\#210><f$> registerserif Symbol<\n> <f"Symbol"><\#211><f$> copyrightserif Symbol<\n> <f"Symbol"><\#212><f$> trademarkserif Symbol<\n> <f"Symbol"><\#213><f$> product Symbol<\n> <f"Symbol"><\#214><f$> radical Symbol<\n> <f"Symbol"><\#215><f$> dotmath Symbol<\n> <f"Symbol"><\#216><f$> logicalnot Symbol<\n> <f"Symbol"><\#217><f$> logicaland Symbol<\n> <f"Symbol"><\#218><f$> logicalor Symbol<\n> <f"Symbol"><\#219><f$> arrowdblboth Symbol<\n> <f"Symbol"><\#220><f$> arrowdblleft Symbol<\n> <f"Symbol"><\#221><f$> arrowdblup Symbol<\n> <f"Symbol"><\#222><f$> arrowdblright Symbol<\n> <f"Symbol"><\#223><f$> arrowdbldown Symbol<\n> <f"Symbol"><\#224><f$> lozenge Symbol<\n> <f"Symbol"><\#225><f$> angleleft Symbol<\n> <f"Symbol"><\#226><f$> registersans Symbol<\n> <f"Symbol"><\#227><f$> copyrightsans Symbol<\n> <f"Symbol"><\#228><f$> trademarksans Symbol<\n> <f"Symbol"><\#229><f$> summation Symbol<\n> <f"Symbol"><\#230><f$> parenlefttp Symbol<\n> <f"Symbol"><\#231><f$> parenleftex Symbol<\n> <f"Symbol"><\#232><f$> parenleftbt Symbol<\n> <f"Symbol"><\#233><f$> bracketlefttp Symbol<\n> <f"Symbol"><\#234><f$> bracketleftex Symbol<\n> <f"Symbol"><\#235><f$> bracketleftbt Symbol<\n> <f"Symbol"><\#236><f$> bracelefttp Symbol<\n> <f"Symbol"><\#237><f$> braceleftmid Symbol<\n> <f"Symbol"><\#238><f$> braceleftbt Symbol<\n> <f"Symbol"><\#239><f$> braceex Symbol<\n> <f"Symbol"><\#241><f$> angleright Symbol<\n> <f"Symbol"><\#242><f$> integral Symbol<\n> <f"Symbol"><\#243><f$> integraltp Symbol<\n> <f"Symbol"><\#244><f$> integralex Symbol<\n> <f"Symbol"><\#245><f$> integralbt Symbol<\n> <f"Symbol"><\#246><f$> parenrighttp Symbol<\n> <f"Symbol"><\#247><f$> parenrightex Symbol<\n> <f"Symbol"><\#248><f$> parenrightbt Symbol<\n> <f"Symbol"><\#249><f$> bracketrighttp Symbol<\n> <f"Symbol"><\#250><f$> bracketrightex Symbol<\n> <f"Symbol"><\#251><f$> bracketrightbt Symbol<\n> <f"Symbol"><\#252><f$> bracerighttp Symbol<\n> <f"Symbol"><\#253><f$> bracerightmid Symbol<\n> <f"Symbol"><\#254><f$> bracerightbt Symbol<\n> +@body: +@section:3 Lucida Math Italic Charmap +@noindent:<f"Symbol"><\#71><f$> Gamma LucidNewMatItaT<\n> <f"Symbol"><\#68><f$> Delta LucidNewMatItaT<\n> <f"Symbol"><\#81><f$> Theta LucidNewMatItaT<\n> <f"Symbol"><\#76><f$> Lambda LucidNewMatItaT<\n> <f"Symbol"><\#88><f$> Xi LucidNewMatItaT<\n> <f"Symbol"><\#80><f$> Pi LucidNewMatItaT<\n> <f"Symbol"><\#83><f$> Sigma LucidNewMatItaT<\n> <f"Symbol"><\#85><f$> Upsilon LucidNewMatItaT<\n> <f"Symbol"><\#70><f$> Phi LucidNewMatItaT<\n> <f"Symbol"><\#89><f$> Psi LucidNewMatItaT<\n> <f"Symbol"><\#87><f$> Omega LucidNewMatItaT<\n> <f"Symbol"><\#97><f$> alpha LucidNewMatItaT<\n> <f"Symbol"><\#98><f$> beta LucidNewMatItaT<\n> <f"Symbol"><\#103><f$> gamma LucidNewMatItaT<\n> <f"Symbol"><\#100><f$> delta LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#178><f$> epsilon1 LucidNewMatItaT<\n> <f"Symbol"><\#122><f$> zeta LucidNewMatItaT<\n> <f"Symbol"><\#104><f$> eta LucidNewMatItaT<\n> <f"Symbol"><\#113><f$> theta LucidNewMatItaT<\n> <f"Symbol"><\#105><f$> iota LucidNewMatItaT<\n> <f"Symbol"><\#107><f$> kappa LucidNewMatItaT<\n> <f"Symbol"><\#108><f$> lambda LucidNewMatItaT<\n> <f"Symbol"><\#109><f$> mu LucidNewMatItaT<\n> <f"Symbol"><\#110><f$> nu LucidNewMatItaT<\n> <f"Symbol"><\#120><f$> xi LucidNewMatItaT<\n> <f"Symbol"><\#112><f$> pi LucidNewMatItaT<\n> <f"Symbol"><\#114><f$> rho LucidNewMatItaT<\n> <f"Symbol"><\#115><f$> sigma LucidNewMatItaT<\n> <f"Symbol"><\#116><f$> tau LucidNewMatItaT<\n> <f"Symbol"><\#117><f$> upsilon LucidNewMatItaT<\n> <f"Symbol"><\#102><f$> phi LucidNewMatItaT<\n> <f"Symbol"><\#99><f$> chi LucidNewMatItaT<\n> <f"Symbol"><\#121><f$> psi LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#196><f$> tie LucidNewMatItaT<\n> <f"Symbol"><\#119><f$> omega LucidNewMatItaT<\n> <f"Symbol"><\#101><f$> epsilon LucidNewMatItaT<\n> <f"Symbol"><\#74><f$> theta1 LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#36><f$> pi1 LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#37><f$> rho1 LucidNewMatItaT<\n> <f"Symbol"><\#86><f$> sigma1 LucidNewMatItaT<\n> <f"Symbol"><\#106><f$> phi1 LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#40><f$> arrowlefttophalf LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#41><f$> arrowleftbothalf LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#42><f$> arrowrighttophalf LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#43><f$> arrowrightbothalf LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#44><f$> arrowhookleft LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#45><f$> arrowhookright LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#46><f$> triangleright LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#47><f$> triangleleft LucidNewMatItaT<\n> <f"Symbol"><\#46><f$> period LucidNewMatItaT<\n> <f"Symbol"><\#44><f$> comma LucidNewMatItaT<\n> <f"Symbol"><\#60><f$> less LucidNewMatItaT<\n> <f"Symbol"><\#47><f$> slash LucidNewMatItaT<\n> <f"Symbol"><\#62><f$> greater LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#63><f$> star LucidNewMatItaT<\n> <f"Symbol"><\#182><f$> partialdiff LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#91><f$> flat LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#92><f$> natural LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#93><f$> sharp LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#94><f$> slurbelow LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#95><f$> slurabove LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#96><f$> lscript LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#123><f$> dotlessi LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#124><f$> dotlessj LucidNewMatItaT<\n> <f"Symbol"><\#195><f$> weierstrass LucidNewMatItaT<\n> <f"LucidNewMatItaT"><\#126><f$> vector LucidNewMatItaT<\n> +@section:4 Lucida Math Symbol Charmap +@noindent:<f"Symbol"><\#45><f$> minus LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#162><f$> periodcentered LucidNewMatSymT<\n> <f"Symbol"><\#180><f$> multiply LucidNewMatSymT<\n> <f"Symbol"><\#42><f$> asteriskmath LucidNewMatSymT<\n> <f"Symbol"><\#184><f$> divide LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#166><f$> diamondmath LucidNewMatSymT<\n> <f"Symbol"><\#177><f$> plusminus LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#168><f$> minusplus LucidNewMatSymT<\n> <f"Symbol"><\#197><f$> circleplus LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#170><f$> circleminus LucidNewMatSymT<\n> <f"Symbol"><\#196><f$> circlemultiply LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#174><f$> circledivide LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#175><f$> circledot LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#176><f$> circlecopyrt LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#177><f$> openbullet LucidNewMatSymT<\n> <f"Symbol"><\#183><f$> bullet LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#179><f$> equivasymptotic LucidNewMatSymT<\n> <f"Symbol"><\#186><f$> equivalence LucidNewMatSymT<\n> <f"Symbol"><\#205><f$> reflexsubset LucidNewMatSymT<\n> <f"Symbol"><\#202><f$> reflexsuperset LucidNewMatSymT<\n> <f"Symbol"><\#163><f$> lessequal LucidNewMatSymT<\n> <f"Symbol"><\#179><f$> greaterequal LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#185><f$> precedesequal LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#186><f$> followsequal LucidNewMatSymT<\n> <f"Symbol"><\#126><f$> similar LucidNewMatSymT<\n> <f"Symbol"><\#187><f$> approxequal LucidNewMatSymT<\n> <f"Symbol"><\#204><f$> propersubset LucidNewMatSymT<\n> <f"Symbol"><\#201><f$> propersuperset LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#191><f$> lessmuch LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#192><f$> greatermuch LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#193><f$> precedes LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#194><f$> follows LucidNewMatSymT<\n> <f"Symbol"><\#172><f$> arrowleft LucidNewMatSymT<\n> <f"Symbol"><\#170><f$> spade LucidNewMatSymT<\n> <f"Symbol"><\#174><f$> arrowright LucidNewMatSymT<\n> <f"Symbol"><\#173><f$> arrowup LucidNewMatSymT<\n> <f"Symbol"><\#175><f$> arrowdown LucidNewMatSymT<\n> <f"Symbol"><\#171><f$> arrowboth LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#37><f$> arrownortheast LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#38><f$> arrowsoutheast LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#39><f$> similarequal LucidNewMatSymT<\n> <f"Symbol"><\#220><f$> arrowdblleft LucidNewMatSymT<\n> <f"Symbol"><\#222><f$> arrowdblright LucidNewMatSymT<\n> <f"Symbol"><\#221><f$> arrowdblup LucidNewMatSymT<\n> <f"Symbol"><\#223><f$> arrowdbldown LucidNewMatSymT<\n> <f"Symbol"><\#219><f$> arrowdblboth LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#45><f$> arrownorthwest LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#46><f$> arrowsouthwest LucidNewMatSymT<\n> <f"Symbol"><\#181><f$> proportional LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#48><f$> prime LucidNewMatSymT<\n> <f"Symbol"><\#165><f$> infinity LucidNewMatSymT<\n> <f"Symbol"><\#206><f$> element LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#51><f$> owner LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#52><f$> triangle LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#53><f$> triangleinv LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#54><f$> negationslash LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#55><f$> mapsto LucidNewMatSymT<\n> <f"Symbol"><\#34><f$> universal LucidNewMatSymT<\n> <f"Symbol"><\#36><f$> existential LucidNewMatSymT<\n> <f"Symbol"><\#216><f$> logicalnot LucidNewMatSymT<\n> <f"Symbol"><\#198><f$> emptyset LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#60><f$> Rfractur LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#61><f$> Ifractur LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#62><f$> latticetop LucidNewMatSymT<\n> <f"Symbol"><\#94><f$> perpendicular LucidNewMatSymT<\n> <f"Symbol"><\#192><f$> aleph LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#65><f$> scriptA LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#66><f$> scriptB LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#67><f$> scriptC LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#68><f$> scriptD LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#69><f$> scriptE LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#70><f$> scriptF LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#71><f$> scriptG LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#72><f$> scriptH LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#73><f$> scriptI LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#74><f$> scriptJ LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#75><f$> scriptK LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#76><f$> scriptL LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#77><f$> scriptM LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#78><f$> scriptN LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#79><f$> scriptO LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#80><f$> scriptP LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#81><f$> scriptQ LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#82><f$> scriptR LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#83><f$> scriptS LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#84><f$> scriptT LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#85><f$> scriptU LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#86><f$> scriptV LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#87><f$> scriptW LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#88><f$> scriptX LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#89><f$> scriptY LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#90><f$> scriptZ LucidNewMatSymT<\n> <f"Symbol"><\#200><f$> union LucidNewMatSymT<\n> <f"Symbol"><\#199><f$> intersection LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#93><f$> unionmulti LucidNewMatSymT<\n> <f"Symbol"><\#217><f$> logicaland LucidNewMatSymT<\n> <f"Symbol"><\#218><f$> logicalor LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#96><f$> turnstileleft LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#97><f$> turnstileright LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#98><f$> floorleft LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#99><f$> floorright LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#100><f$> ceilingleft LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#101><f$> ceilingright LucidNewMatSymT<\n> <f"Symbol"><\#123><f$> braceleft LucidNewMatSymT<\n> <f"Symbol"><\#125><f$> braceright LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#104><f$> angbracketleft LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#105><f$> angbracketright LucidNewMatSymT<\n> <f"Symbol"><\#124><f$> bar LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#107><f$> bardbl LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#108><f$> arrowbothv LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#109><f$> arrowdblbothv LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#110><f$> backslash LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#111><f$> wreathproduct LucidNewMatSymT<\n> <f"Symbol"><\#214><f$> radical LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#113><f$> coproduct LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#114><f$> nabla LucidNewMatSymT<\n> <f"Symbol"><\#242><f$> integral LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#116><f$> unionsq LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#117><f$> intersectionsq LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#118><f$> subsetsqequal LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#119><f$> supersetsqequal LucidNewMatSymT<\n> section LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#121><f$> dagger LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#122><f$> daggerdbl LucidNewMatSymT<\n> <f"LucidNewMatSymT"><\#123><f$> paragraph LucidNewMatSymT<\n> <f"Symbol"><\#167><f$> club LucidNewMatSymT<\n> <f"Symbol"><\#168><f$> diamond LucidNewMatSymT<\n> <f"Symbol"><\#169><f$> heart LucidNewMatSymT<\n> +@section:5 Lucida Math Extended Charmap +@noindent:<f"LucidNewMatExtT"><\#161><f$> parenleftbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#162><f$> parenrightbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#163><f$> bracketleftbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#164><f$> bracketrightbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#165><f$> floorleftbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#166><f$> floorrightbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#167><f$> ceilingleftbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#168><f$> ceilingrightbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#169><f$> braceleftbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#170><f$> bracerightbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#173><f$> angbracketleftbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#174><f$> angbracketrightbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#175><f$> vextendsingle LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#176><f$> vextenddouble LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#177><f$> slashbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#178><f$> backslashbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#179><f$> parenleftBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#180><f$> parenrightBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#181><f$> parenleftbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#182><f$> parenrightbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#183><f$> bracketleftbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#184><f$> bracketrightbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#185><f$> floorleftbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#186><f$> floorrightbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#187><f$> ceilingleftbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#188><f$> ceilingrightbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#189><f$> braceleftbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#190><f$> bracerightbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#28><f$> angbracketleftbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#29><f$> angbracketrightbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#193><f$> slashbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#194><f$> backslashbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#195><f$> parenleftBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#33><f$> parenrightBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#34><f$> bracketleftBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#35><f$> bracketrightBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#36><f$> floorleftBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#37><f$> floorrightBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#38><f$> ceilingleftBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#39><f$> ceilingrightBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#40><f$> braceleftBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#41><f$> bracerightBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#42><f$> angbracketleftBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#43><f$> angbracketrightBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#44><f$> slashBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#45><f$> backslashBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#46><f$> slashBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#47><f$> backslashBig LucidNewMatExtT<\n> <f"Symbol"><\#230><f$> parenlefttp LucidNewMatExtT<\n> <f"Symbol"><\#246><f$> parenrighttp LucidNewMatExtT<\n> <f"Symbol"><\#233><f$> bracketlefttp LucidNewMatExtT<\n> <f"Symbol"><\#249><f$> bracketrighttp LucidNewMatExtT<\n> <f"Symbol"><\#235><f$> bracketleftbt LucidNewMatExtT<\n> <f"Symbol"><\#251><f$> bracketrightbt LucidNewMatExtT<\n> <f"Symbol"><\#234><f$> bracketleftex LucidNewMatExtT<\n> <f"Symbol"><\#250><f$> bracketrightex LucidNewMatExtT<\n> <f"Symbol"><\#236><f$> bracelefttp LucidNewMatExtT<\n> <f"Symbol"><\#252><f$> bracerighttp LucidNewMatExtT<\n> <f"Symbol"><\#238><f$> braceleftbt LucidNewMatExtT<\n> <f"Symbol"><\#254><f$> bracerightbt LucidNewMatExtT<\n> <f"Symbol"><\#237><f$> braceleftmid LucidNewMatExtT<\n> <f"Symbol"><\#253><f$> bracerightmid LucidNewMatExtT<\n> <f"Symbol"><\#239><f$> braceex LucidNewMatExtT<\n> <f"Symbol"><\#189><f$> arrowvertex LucidNewMatExtT<\n> <f"Symbol"><\#232><f$> parenleftbt LucidNewMatExtT<\n> <f"Symbol"><\#248><f$> parenrightbt LucidNewMatExtT<\n> <f"Symbol"><\#231><f$> parenleftex LucidNewMatExtT<\n> <f"Symbol"><\#247><f$> parenrightex LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#68><f$> angbracketleftBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#69><f$> angbracketrightBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#70><f$> unionsqtext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#71><f$> unionsqdisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#72><f$> contintegraltext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#73><f$> contintegraldisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#74><f$> circledottext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#75><f$> circledotdisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#76><f$> circleplustext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#77><f$> circleplusdisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#78><f$> circlemultiplytext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#79><f$> circlemultiplydisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#80><f$> summationtext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#81><f$> producttext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#82><f$> integraltext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#83><f$> uniontext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#84><f$> intersectiontext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#85><f$> unionmultitext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#86><f$> logicalandtext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#87><f$> logicalortext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#88><f$> summationdisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#89><f$> productdisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#90><f$> integraldisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#91><f$> uniondisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#92><f$> intersectiondisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#93><f$> unionmultidisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#94><f$> logicalanddisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#95><f$> logicalordisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#96><f$> coproducttext LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#97><f$> coproductdisplay LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#98><f$> hatwide LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#99><f$> hatwider LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#100><f$> hatwidest LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#101><f$> tildewide LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#102><f$> tildewider LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#103><f$> tildewidest LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#104><f$> bracketleftBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#105><f$> bracketrightBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#106><f$> floorleftBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#107><f$> floorrightBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#108><f$> ceilingleftBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#109><f$> ceilingrightBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#110><f$> braceleftBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#111><f$> bracerightBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#112><f$> radicalbig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#113><f$> radicalBig LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#114><f$> radicalbigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#115><f$> radicalBigg LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#116><f$> radicalbt LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#117><f$> radicalvertex LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#118><f$> radicaltp LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#119><f$> arrowvertexdbl LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#120><f$> arrowtp LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#121><f$> arrowbt LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#122><f$> bracehtipdownleft LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#123><f$> bracehtipdownright LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#124><f$> bracehtipupleft LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#125><f$> bracehtipupright LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#126><f$> arrowdbltp LucidNewMatExtT<\n> <f"LucidNewMatExtT"><\#196><f$> arrowdblbt LucidNewMatExtT<\n> +@body: +@body: \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmaps-display.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmaps-display.txt new file mode 100755 index 00000000..9bb1843d --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/charmaps-display.txt @@ -0,0 +1,753 @@ +\preamble +\loadchars{k:\Research\Tagger\maps\standard-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathsym-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathit-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathext-charmap.txt} +\loadchars{k:\Research\Tagger\maps\symbol-charmap.txt} +\loadstyles{k:\Research\Tagger\examples\styles.txt} + +\section Standard Charmap + +\space space \\ +\exclam exclam \\ +\quotedbl quotedbl \\ +\numbersign numbersign \\ +\dollar dollar \\ +\percent percent \\ +\ampersand ampersand \\ +\quotesingle quotesingle \\ +\parenleft parenleft \\ +\parenright parenright \\ +\asterisk asterisk \\ +\plus plus \\ +\comma comma \\ +\hyphen hyphen \\ +\period period \\ +\slash slash \\ +\zero zero \\ +\one one \\ +\two two \\ +\three three \\ +\four four \\ +\five five \\ +\six six \\ +\seven seven \\ +\eight eight \\ +\nine nine \\ +\colon colon \\ +\semicolon semicolon \\ +\less less \\ +\equal equal \\ +\greater greater \\ +\question question \\ +\at at \\ +\A A \\ +\B B \\ +\C C \\ +\D D \\ +\E E \\ +\F F \\ +\G G \\ +\H H \\ +\I I \\ +\J J \\ +\K K \\ +\L L \\ +\M M \\ +\N N \\ +\O O \\ +\P P \\ +\Q Q \\ +\R R \\ +\S S \\ +\T T \\ +\U U \\ +\V V \\ +\W W \\ +\X X \\ +\Y Y \\ +\Z Z \\ +\bracketleft bracketleft \\ +\backslash backslash \\ +\bracketright bracketright \\ +\asciicircum asciicircum \\ +\underscore underscore \\ +\grave grave \\ +\a a \\ +\b b \\ +\c c \\ +\d d \\ +\e e \\ +\f f \\ +\g g \\ +\h h \\ +\i i \\ +\j j \\ +\k k \\ +\l l \\ +\m m \\ +\n n \\ +\o o \\ +\p p \\ +\q q \\ +\r r \\ +\s s \\ +\t t \\ +\u u \\ +\v v \\ +\w w \\ +\x x \\ +\y y \\ +\z z \\ +\braceleft braceleft \\ +\bar bar \\ +\braceright braceright \\ +\asciitilde asciitilde \\ +\euro euro \\ +\quotesinglbase quotesinglbase \\ +\florin florin \\ +\quotedblbase quotedblbase \\ +\ellipsis ellipsis \\ +\dagger dagger \\ +\daggerdbl daggerdbl \\ +\circumflex circumflex \\ +\perthousand perthousand \\ +\Scaron Scaron \\ +\guilsinglleft guilsinglleft \\ +\OE OE \\ +\Zcaron Zcaron \\ +\quoteleft quoteleft \\ +\quoteright quoteright \\ +\quotedblleft quotedblleft \\ +\quotedblright quotedblright \\ +\bullet bullet \\ +\endash endash \\ +\emdash emdash \\ +\tilde tilde \\ +\trademark trademark \\ +\scaron scaron \\ +\guilsinglright guilsinglright \\ +\oe oe \\ +\zcaron zcaron \\ +\Ydieresis Ydieresis \\ +\nbspace nbspace \\ +\exclamdown exclamdown \\ +\cent cent \\ +\sterling sterling \\ +\currency currency \\ +\yen yen \\ +\brokenbar brokenbar \\ +\section section \\ +\dieresis dieresis \\ +\copyright copyright \\ +\ordfeminine ordfeminine \\ +\guillemotleft guillemotleft \\ +\logicalnot logicalnot \\ +\sfthyphen sfthyphen \\ +\registered registered \\ +\macron macron \\ +\degree degree \\ +\plusminus plusminus \\ +\twosuperior twosuperior \\ +\threesuperior threesuperior \\ +\acute acute \\ +\mu mu \\ +\paragraph paragraph \\ +\periodcentered periodcentered \\ +\cedilla cedilla \\ +\onesuperior onesuperior \\ +\ordmasculine ordmasculine \\ +\guillemotright guillemotright \\ +\onequarter onequarter \\ +\onehalf onehalf \\ +\threequarters threequarters \\ +\questiondown questiondown \\ +\Agrave Agrave \\ +\Aacute Aacute \\ +\Acircumflex Acircumflex \\ +\Atilde Atilde \\ +\Adieresis Adieresis \\ +\Aring Aring \\ +\AE AE \\ +\Ccedilla Ccedilla \\ +\Egrave Egrave \\ +\Eacute Eacute \\ +\Ecircumflex Ecircumflex \\ +\Edieresis Edieresis \\ +\Igrave Igrave \\ +\Iacute Iacute \\ +\Icircumflex Icircumflex \\ +\Idieresis Idieresis \\ +\Eth Eth \\ +\Ntilde Ntilde \\ +\Ograve Ograve \\ +\Oacute Oacute \\ +\Ocircumflex Ocircumflex \\ +\Otilde Otilde \\ +\Odieresis Odieresis \\ +\multiply multiply \\ +\Oslash Oslash \\ +\Ugrave Ugrave \\ +\Uacute Uacute \\ +\Ucircumflex Ucircumflex \\ +\Udieresis Udieresis \\ +\Yacute Yacute \\ +\Thorn Thorn \\ +\germandbls germandbls \\ +\agrave agrave \\ +\aacute aacute \\ +\acircumflex acircumflex \\ +\atilde atilde \\ +\adieresis adieresis \\ +\aring aring \\ +\ae ae \\ +\ccedilla ccedilla \\ +\egrave egrave \\ +\eacute eacute \\ +\ecircumflex ecircumflex \\ +\edieresis edieresis \\ +\igrave igrave \\ +\iacute iacute \\ +\icircumflex icircumflex \\ +\idieresis idieresis \\ +\eth eth \\ +\ntilde ntilde \\ +\ograve ograve \\ +\oacute oacute \\ +\ocircumflex ocircumflex \\ +\otilde otilde \\ +\odieresis odieresis \\ +\divide divide \\ +\oslash oslash \\ +\ugrave ugrave \\ +\uacute uacute \\ +\ucircumflex ucircumflex \\ +\udieresis udieresis \\ +\yacute yacute \\ +\thorn thorn \\ +\ydieresis ydieresis \\ + +\section Symbol Charmap + +\space space Symbol\\ +\exclam exclam Symbol\\ +\universal universal Symbol\\ +\numbersign numbersign Symbol\\ +\existential existential Symbol\\ +\percent percent Symbol\\ +\ampersand ampersand Symbol\\ +\suchthat suchthat Symbol\\ +\parenleft parenleft Symbol\\ +\parenright parenright Symbol\\ +\asteriskmath asteriskmath Symbol\\ +\plus plus Symbol\\ +\comma comma Symbol\\ +\minus minus Symbol\\ +\period period Symbol\\ +\slash slash Symbol\\ +\zero zero Symbol\\ +\one one Symbol\\ +\two two Symbol\\ +\three three Symbol\\ +\four four Symbol\\ +\five five Symbol\\ +\six six Symbol\\ +\seven seven Symbol\\ +\eight eight Symbol\\ +\nine nine Symbol\\ +\colon colon Symbol\\ +\semicolon semicolon Symbol\\ +\less less Symbol\\ +\equal equal Symbol\\ +\greater greater Symbol\\ +\question question Symbol\\ +\congruent congruent Symbol\\ +\Alpha Alpha Symbol\\ +\Beta Beta Symbol\\ +\Chi Chi Symbol\\ +\Delta Delta Symbol\\ +\Epsilon Epsilon Symbol\\ +\Phi Phi Symbol\\ +\Gamma Gamma Symbol\\ +\Eta Eta Symbol\\ +\Iota Iota Symbol\\ +\theta1 theta1 Symbol\\ +\Kappa Kappa Symbol\\ +\Lambda Lambda Symbol\\ +\Mu Mu Symbol\\ +\Nu Nu Symbol\\ +\Omicron Omicron Symbol\\ +\Pi Pi Symbol\\ +\Theta Theta Symbol\\ +\Rho Rho Symbol\\ +\Sigma Sigma Symbol\\ +\Tau Tau Symbol\\ +\Upsilon Upsilon Symbol\\ +\sigma1 sigma1 Symbol\\ +\Omega Omega Symbol\\ +\Xi Xi Symbol\\ +\Psi Psi Symbol\\ +\Zeta Zeta Symbol\\ +\bracketleft bracketleft Symbol\\ +\therefore therefore Symbol\\ +\bracketright bracketright Symbol\\ +\perpendicular perpendicular Symbol\\ +\underscore underscore Symbol\\ +\radicalex radicalex Symbol\\ +\alpha alpha Symbol\\ +\beta beta Symbol\\ +\chi chi Symbol\\ +\delta delta Symbol\\ +\epsilon epsilon Symbol\\ +\phi phi Symbol\\ +\gamma gamma Symbol\\ +\eta eta Symbol\\ +\iota iota Symbol\\ +\phi1 phi1 Symbol\\ +\kappa kappa Symbol\\ +\lambda lambda Symbol\\ +\mu mu Symbol\\ +\nu nu Symbol\\ +\omicron omicron Symbol\\ +\pi pi Symbol\\ +\theta theta Symbol\\ +\rho rho Symbol\\ +\sigma sigma Symbol\\ +\tau tau Symbol\\ +\upsilon upsilon Symbol\\ +\omega1 omega1 Symbol\\ +\omega omega Symbol\\ +\xi xi Symbol\\ +\psi psi Symbol\\ +\zeta zeta Symbol\\ +\braceleft braceleft Symbol\\ +\bar bar Symbol\\ +\braceright braceright Symbol\\ +\similar similar Symbol\\ +\Euro Euro Symbol\\ +\Upsilon1 Upsilon1 Symbol\\ +\minute minute Symbol\\ +\lessequal lessequal Symbol\\ +\fraction fraction Symbol\\ +\infinity infinity Symbol\\ +\florin florin Symbol\\ +\club club Symbol\\ +\diamond diamond Symbol\\ +\heart heart Symbol\\ +\spade spade Symbol\\ +\arrowboth arrowboth Symbol\\ +\arrowleft arrowleft Symbol\\ +\arrowup arrowup Symbol\\ +\arrowright arrowright Symbol\\ +\arrowdown arrowdown Symbol\\ +\degree degree Symbol\\ +\plusminus plusminus Symbol\\ +\second second Symbol\\ +\greaterequal greaterequal Symbol\\ +\multiply multiply Symbol\\ +\proportional proportional Symbol\\ +\partialdiff partialdiff Symbol\\ +\bullet bullet Symbol\\ +\divide divide Symbol\\ +\notequal notequal Symbol\\ +\equivalence equivalence Symbol\\ +\approxequal approxequal Symbol\\ +\arrowvertex arrowvertex Symbol\\ +\arrowhorizex arrowhorizex Symbol\\ +\carriagereturn carriagereturn Symbol\\ +\aleph aleph Symbol\\ +\Ifraktur Ifraktur Symbol\\ +\Rfraktur Rfraktur Symbol\\ +\weierstrass weierstrass Symbol\\ +\circlemultiply circlemultiply Symbol\\ +\circleplus circleplus Symbol\\ +\emptyset emptyset Symbol\\ +\intersection intersection Symbol\\ +\union union Symbol\\ +\propersuperset propersuperset Symbol\\ +\reflexsuperset reflexsuperset Symbol\\ +\notsubset notsubset Symbol\\ +\propersubset propersubset Symbol\\ +\reflexsubset reflexsubset Symbol\\ +\element element Symbol\\ +\notelement notelement Symbol\\ +\angle angle Symbol\\ +\gradient gradient Symbol\\ +\registerserif registerserif Symbol\\ +\copyrightserif copyrightserif Symbol\\ +\trademarkserif trademarkserif Symbol\\ +\product product Symbol\\ +\radical radical Symbol\\ +\dotmath dotmath Symbol\\ +\logicalnot logicalnot Symbol\\ +\logicaland logicaland Symbol\\ +\logicalor logicalor Symbol\\ +\arrowdblboth arrowdblboth Symbol\\ +\arrowdblleft arrowdblleft Symbol\\ +\arrowdblup arrowdblup Symbol\\ +\arrowdblright arrowdblright Symbol\\ +\arrowdbldown arrowdbldown Symbol\\ +\lozenge lozenge Symbol\\ +\angleleft angleleft Symbol\\ +\registersans registersans Symbol\\ +\copyrightsans copyrightsans Symbol\\ +\trademarksans trademarksans Symbol\\ +\summation summation Symbol\\ +\parenlefttp parenlefttp Symbol\\ +\parenleftex parenleftex Symbol\\ +\parenleftbt parenleftbt Symbol\\ +\bracketlefttp bracketlefttp Symbol\\ +\bracketleftex bracketleftex Symbol\\ +\bracketleftbt bracketleftbt Symbol\\ +\bracelefttp bracelefttp Symbol\\ +\braceleftmid braceleftmid Symbol\\ +\braceleftbt braceleftbt Symbol\\ +\braceex braceex Symbol\\ +\angleright angleright Symbol\\ +\integral integral Symbol\\ +\integraltp integraltp Symbol\\ +\integralex integralex Symbol\\ +\integralbt integralbt Symbol\\ +\parenrighttp parenrighttp Symbol\\ +\parenrightex parenrightex Symbol\\ +\parenrightbt parenrightbt Symbol\\ +\bracketrighttp bracketrighttp Symbol\\ +\bracketrightex bracketrightex Symbol\\ +\bracketrightbt bracketrightbt Symbol\\ +\bracerighttp bracerighttp Symbol\\ +\bracerightmid bracerightmid Symbol\\ +\bracerightbt bracerightbt Symbol\\ + + +\section Lucida Math Italic Charmap + +\Gamma Gamma LucidNewMatItaT\\ +\Delta Delta LucidNewMatItaT\\ +\Theta Theta LucidNewMatItaT\\ +\Lambda Lambda LucidNewMatItaT\\ +\Xi Xi LucidNewMatItaT\\ +\Pi Pi LucidNewMatItaT\\ +\Sigma Sigma LucidNewMatItaT\\ +\Upsilon Upsilon LucidNewMatItaT\\ +\Phi Phi LucidNewMatItaT\\ +\Psi Psi LucidNewMatItaT\\ +\Omega Omega LucidNewMatItaT\\ +\alpha alpha LucidNewMatItaT\\ +\beta beta LucidNewMatItaT\\ +\gamma gamma LucidNewMatItaT\\ +\delta delta LucidNewMatItaT\\ +\epsilon1 epsilon1 LucidNewMatItaT\\ +\zeta zeta LucidNewMatItaT\\ +\eta eta LucidNewMatItaT\\ +\theta theta LucidNewMatItaT\\ +\iota iota LucidNewMatItaT\\ +\kappa kappa LucidNewMatItaT\\ +\lambda lambda LucidNewMatItaT\\ +\mu mu LucidNewMatItaT\\ +\nu nu LucidNewMatItaT\\ +\xi xi LucidNewMatItaT\\ +\pi pi LucidNewMatItaT\\ +\rho rho LucidNewMatItaT\\ +\sigma sigma LucidNewMatItaT\\ +\tau tau LucidNewMatItaT\\ +\upsilon upsilon LucidNewMatItaT\\ +\phi phi LucidNewMatItaT\\ +\chi chi LucidNewMatItaT\\ +\psi psi LucidNewMatItaT\\ +\tie tie LucidNewMatItaT\\ +\omega omega LucidNewMatItaT\\ +\epsilon epsilon LucidNewMatItaT\\ +\theta1 theta1 LucidNewMatItaT\\ +\pi1 pi1 LucidNewMatItaT\\ +\rho1 rho1 LucidNewMatItaT\\ +\sigma1 sigma1 LucidNewMatItaT\\ +\phi1 phi1 LucidNewMatItaT\\ +\arrowlefttophalf arrowlefttophalf LucidNewMatItaT\\ +\arrowleftbothalf arrowleftbothalf LucidNewMatItaT\\ +\arrowrighttophalf arrowrighttophalf LucidNewMatItaT\\ +\arrowrightbothalf arrowrightbothalf LucidNewMatItaT\\ +\arrowhookleft arrowhookleft LucidNewMatItaT\\ +\arrowhookright arrowhookright LucidNewMatItaT\\ +\triangleright triangleright LucidNewMatItaT\\ +\triangleleft triangleleft LucidNewMatItaT\\ +\period period LucidNewMatItaT\\ +\comma comma LucidNewMatItaT\\ +\less less LucidNewMatItaT\\ +\slash slash LucidNewMatItaT\\ +\greater greater LucidNewMatItaT\\ +\star star LucidNewMatItaT\\ +\partialdiff partialdiff LucidNewMatItaT\\ +\flat flat LucidNewMatItaT\\ +\natural natural LucidNewMatItaT\\ +\sharp sharp LucidNewMatItaT\\ +\slurbelow slurbelow LucidNewMatItaT\\ +\slurabove slurabove LucidNewMatItaT\\ +\lscript lscript LucidNewMatItaT\\ +\dotlessi dotlessi LucidNewMatItaT\\ +\dotlessj dotlessj LucidNewMatItaT\\ +\weierstrass weierstrass LucidNewMatItaT\\ +\vector vector LucidNewMatItaT\\ + +\section Lucida Math Symbol Charmap + +\minus minus LucidNewMatSymT\\ +\periodcentered periodcentered LucidNewMatSymT\\ +\multiply multiply LucidNewMatSymT\\ +\asteriskmath asteriskmath LucidNewMatSymT\\ +\divide divide LucidNewMatSymT\\ +\diamondmath diamondmath LucidNewMatSymT\\ +\plusminus plusminus LucidNewMatSymT\\ +\minusplus minusplus LucidNewMatSymT\\ +\circleplus circleplus LucidNewMatSymT\\ +\circleminus circleminus LucidNewMatSymT\\ +\circlemultiply circlemultiply LucidNewMatSymT\\ +\circledivide circledivide LucidNewMatSymT\\ +\circledot circledot LucidNewMatSymT\\ +\circlecopyrt circlecopyrt LucidNewMatSymT\\ +\openbullet openbullet LucidNewMatSymT\\ +\bullet bullet LucidNewMatSymT\\ +\equivasymptotic equivasymptotic LucidNewMatSymT\\ +\equivalence equivalence LucidNewMatSymT\\ +\reflexsubset reflexsubset LucidNewMatSymT\\ +\reflexsuperset reflexsuperset LucidNewMatSymT\\ +\lessequal lessequal LucidNewMatSymT\\ +\greaterequal greaterequal LucidNewMatSymT\\ +\precedesequal precedesequal LucidNewMatSymT\\ +\followsequal followsequal LucidNewMatSymT\\ +\similar similar LucidNewMatSymT\\ +\approxequal approxequal LucidNewMatSymT\\ +\propersubset propersubset LucidNewMatSymT\\ +\propersuperset propersuperset LucidNewMatSymT\\ +\lessmuch lessmuch LucidNewMatSymT\\ +\greatermuch greatermuch LucidNewMatSymT\\ +\precedes precedes LucidNewMatSymT\\ +\follows follows LucidNewMatSymT\\ +\arrowleft arrowleft LucidNewMatSymT\\ +\spade spade LucidNewMatSymT\\ +\arrowright arrowright LucidNewMatSymT\\ +\arrowup arrowup LucidNewMatSymT\\ +\arrowdown arrowdown LucidNewMatSymT\\ +\arrowboth arrowboth LucidNewMatSymT\\ +\arrownortheast arrownortheast LucidNewMatSymT\\ +\arrowsoutheast arrowsoutheast LucidNewMatSymT\\ +\similarequal similarequal LucidNewMatSymT\\ +\arrowdblleft arrowdblleft LucidNewMatSymT\\ +\arrowdblright arrowdblright LucidNewMatSymT\\ +\arrowdblup arrowdblup LucidNewMatSymT\\ +\arrowdbldown arrowdbldown LucidNewMatSymT\\ +\arrowdblboth arrowdblboth LucidNewMatSymT\\ +\arrownorthwest arrownorthwest LucidNewMatSymT\\ +\arrowsouthwest arrowsouthwest LucidNewMatSymT\\ +\proportional proportional LucidNewMatSymT\\ +\prime prime LucidNewMatSymT\\ +\infinity infinity LucidNewMatSymT\\ +\element element LucidNewMatSymT\\ +\owner owner LucidNewMatSymT\\ +\triangle triangle LucidNewMatSymT\\ +\triangleinv triangleinv LucidNewMatSymT\\ +\negationslash negationslash LucidNewMatSymT\\ +\mapsto mapsto LucidNewMatSymT\\ +\universal universal LucidNewMatSymT\\ +\existential existential LucidNewMatSymT\\ +\logicalnot logicalnot LucidNewMatSymT\\ +\emptyset emptyset LucidNewMatSymT\\ +\Rfractur Rfractur LucidNewMatSymT\\ +\Ifractur Ifractur LucidNewMatSymT\\ +\latticetop latticetop LucidNewMatSymT\\ +\perpendicular perpendicular LucidNewMatSymT\\ +\aleph aleph LucidNewMatSymT\\ +\scriptA scriptA LucidNewMatSymT\\ +\scriptB scriptB LucidNewMatSymT\\ +\scriptC scriptC LucidNewMatSymT\\ +\scriptD scriptD LucidNewMatSymT\\ +\scriptE scriptE LucidNewMatSymT\\ +\scriptF scriptF LucidNewMatSymT\\ +\scriptG scriptG LucidNewMatSymT\\ +\scriptH scriptH LucidNewMatSymT\\ +\scriptI scriptI LucidNewMatSymT\\ +\scriptJ scriptJ LucidNewMatSymT\\ +\scriptK scriptK LucidNewMatSymT\\ +\scriptL scriptL LucidNewMatSymT\\ +\scriptM scriptM LucidNewMatSymT\\ +\scriptN scriptN LucidNewMatSymT\\ +\scriptO scriptO LucidNewMatSymT\\ +\scriptP scriptP LucidNewMatSymT\\ +\scriptQ scriptQ LucidNewMatSymT\\ +\scriptR scriptR LucidNewMatSymT\\ +\scriptS scriptS LucidNewMatSymT\\ +\scriptT scriptT LucidNewMatSymT\\ +\scriptU scriptU LucidNewMatSymT\\ +\scriptV scriptV LucidNewMatSymT\\ +\scriptW scriptW LucidNewMatSymT\\ +\scriptX scriptX LucidNewMatSymT\\ +\scriptY scriptY LucidNewMatSymT\\ +\scriptZ scriptZ LucidNewMatSymT\\ +\union union LucidNewMatSymT\\ +\intersection intersection LucidNewMatSymT\\ +\unionmulti unionmulti LucidNewMatSymT\\ +\logicaland logicaland LucidNewMatSymT\\ +\logicalor logicalor LucidNewMatSymT\\ +\turnstileleft turnstileleft LucidNewMatSymT\\ +\turnstileright turnstileright LucidNewMatSymT\\ +\floorleft floorleft LucidNewMatSymT\\ +\floorright floorright LucidNewMatSymT\\ +\ceilingleft ceilingleft LucidNewMatSymT\\ +\ceilingright ceilingright LucidNewMatSymT\\ +\braceleft braceleft LucidNewMatSymT\\ +\braceright braceright LucidNewMatSymT\\ +\angbracketleft angbracketleft LucidNewMatSymT\\ +\angbracketright angbracketright LucidNewMatSymT\\ +\bar bar LucidNewMatSymT\\ +\bardbl bardbl LucidNewMatSymT\\ +\arrowbothv arrowbothv LucidNewMatSymT\\ +\arrowdblbothv arrowdblbothv LucidNewMatSymT\\ +\backslash backslash LucidNewMatSymT\\ +\wreathproduct wreathproduct LucidNewMatSymT\\ +\radical radical LucidNewMatSymT\\ +\coproduct coproduct LucidNewMatSymT\\ +\nabla nabla LucidNewMatSymT\\ +\integral integral LucidNewMatSymT\\ +\unionsq unionsq LucidNewMatSymT\\ +\intersectionsq intersectionsq LucidNewMatSymT\\ +\subsetsqequal subsetsqequal LucidNewMatSymT\\ +\supersetsqequal supersetsqequal LucidNewMatSymT\\ +\section section LucidNewMatSymT\\ +\dagger dagger LucidNewMatSymT\\ +\daggerdbl daggerdbl LucidNewMatSymT\\ +\paragraph paragraph LucidNewMatSymT\\ +\club club LucidNewMatSymT\\ +\diamond diamond LucidNewMatSymT\\ +\heart heart LucidNewMatSymT\\ + +\section Lucida Math Extended Charmap + +\parenleftbig parenleftbig LucidNewMatExtT\\ +\parenrightbig parenrightbig LucidNewMatExtT\\ +\bracketleftbig bracketleftbig LucidNewMatExtT\\ +\bracketrightbig bracketrightbig LucidNewMatExtT\\ +\floorleftbig floorleftbig LucidNewMatExtT\\ +\floorrightbig floorrightbig LucidNewMatExtT\\ +\ceilingleftbig ceilingleftbig LucidNewMatExtT\\ +\ceilingrightbig ceilingrightbig LucidNewMatExtT\\ +\braceleftbig braceleftbig LucidNewMatExtT\\ +\bracerightbig bracerightbig LucidNewMatExtT\\ +\angbracketleftbig angbracketleftbig LucidNewMatExtT\\ +\angbracketrightbig angbracketrightbig LucidNewMatExtT\\ +\vextendsingle vextendsingle LucidNewMatExtT\\ +\vextenddouble vextenddouble LucidNewMatExtT\\ +\slashbig slashbig LucidNewMatExtT\\ +\backslashbig backslashbig LucidNewMatExtT\\ +\parenleftBig parenleftBig LucidNewMatExtT\\ +\parenrightBig parenrightBig LucidNewMatExtT\\ +\parenleftbigg parenleftbigg LucidNewMatExtT\\ +\parenrightbigg parenrightbigg LucidNewMatExtT\\ +\bracketleftbigg bracketleftbigg LucidNewMatExtT\\ +\bracketrightbigg bracketrightbigg LucidNewMatExtT\\ +\floorleftbigg floorleftbigg LucidNewMatExtT\\ +\floorrightbigg floorrightbigg LucidNewMatExtT\\ +\ceilingleftbigg ceilingleftbigg LucidNewMatExtT\\ +\ceilingrightbigg ceilingrightbigg LucidNewMatExtT\\ +\braceleftbigg braceleftbigg LucidNewMatExtT\\ +\bracerightbigg bracerightbigg LucidNewMatExtT\\ +\angbracketleftbigg angbracketleftbigg LucidNewMatExtT\\ +\angbracketrightbigg angbracketrightbigg LucidNewMatExtT\\ +\slashbigg slashbigg LucidNewMatExtT\\ +\backslashbigg backslashbigg LucidNewMatExtT\\ +\parenleftBigg parenleftBigg LucidNewMatExtT\\ +\parenrightBigg parenrightBigg LucidNewMatExtT\\ +\bracketleftBigg bracketleftBigg LucidNewMatExtT\\ +\bracketrightBigg bracketrightBigg LucidNewMatExtT\\ +\floorleftBigg floorleftBigg LucidNewMatExtT\\ +\floorrightBigg floorrightBigg LucidNewMatExtT\\ +\ceilingleftBigg ceilingleftBigg LucidNewMatExtT\\ +\ceilingrightBigg ceilingrightBigg LucidNewMatExtT\\ +\braceleftBigg braceleftBigg LucidNewMatExtT\\ +\bracerightBigg bracerightBigg LucidNewMatExtT\\ +\angbracketleftBigg angbracketleftBigg LucidNewMatExtT\\ +\angbracketrightBigg angbracketrightBigg LucidNewMatExtT\\ +\slashBigg slashBigg LucidNewMatExtT\\ +\backslashBigg backslashBigg LucidNewMatExtT\\ +\slashBig slashBig LucidNewMatExtT\\ +\backslashBig backslashBig LucidNewMatExtT\\ +\parenlefttp parenlefttp LucidNewMatExtT\\ +\parenrighttp parenrighttp LucidNewMatExtT\\ +\bracketlefttp bracketlefttp LucidNewMatExtT\\ +\bracketrighttp bracketrighttp LucidNewMatExtT\\ +\bracketleftbt bracketleftbt LucidNewMatExtT\\ +\bracketrightbt bracketrightbt LucidNewMatExtT\\ +\bracketleftex bracketleftex LucidNewMatExtT\\ +\bracketrightex bracketrightex LucidNewMatExtT\\ +\bracelefttp bracelefttp LucidNewMatExtT\\ +\bracerighttp bracerighttp LucidNewMatExtT\\ +\braceleftbt braceleftbt LucidNewMatExtT\\ +\bracerightbt bracerightbt LucidNewMatExtT\\ +\braceleftmid braceleftmid LucidNewMatExtT\\ +\bracerightmid bracerightmid LucidNewMatExtT\\ +\braceex braceex LucidNewMatExtT\\ +\arrowvertex arrowvertex LucidNewMatExtT\\ +\parenleftbt parenleftbt LucidNewMatExtT\\ +\parenrightbt parenrightbt LucidNewMatExtT\\ +\parenleftex parenleftex LucidNewMatExtT\\ +\parenrightex parenrightex LucidNewMatExtT\\ +\angbracketleftBig angbracketleftBig LucidNewMatExtT\\ +\angbracketrightBig angbracketrightBig LucidNewMatExtT\\ +\unionsqtext unionsqtext LucidNewMatExtT\\ +\unionsqdisplay unionsqdisplay LucidNewMatExtT\\ +\contintegraltext contintegraltext LucidNewMatExtT\\ +\contintegraldisplay contintegraldisplay LucidNewMatExtT\\ +\circledottext circledottext LucidNewMatExtT\\ +\circledotdisplay circledotdisplay LucidNewMatExtT\\ +\circleplustext circleplustext LucidNewMatExtT\\ +\circleplusdisplay circleplusdisplay LucidNewMatExtT\\ +\circlemultiplytext circlemultiplytext LucidNewMatExtT\\ +\circlemultiplydisplay circlemultiplydisplay LucidNewMatExtT\\ +\summationtext summationtext LucidNewMatExtT\\ +\producttext producttext LucidNewMatExtT\\ +\integraltext integraltext LucidNewMatExtT\\ +\uniontext uniontext LucidNewMatExtT\\ +\intersectiontext intersectiontext LucidNewMatExtT\\ +\unionmultitext unionmultitext LucidNewMatExtT\\ +\logicalandtext logicalandtext LucidNewMatExtT\\ +\logicalortext logicalortext LucidNewMatExtT\\ +\summationdisplay summationdisplay LucidNewMatExtT\\ +\productdisplay productdisplay LucidNewMatExtT\\ +\integraldisplay integraldisplay LucidNewMatExtT\\ +\uniondisplay uniondisplay LucidNewMatExtT\\ +\intersectiondisplay intersectiondisplay LucidNewMatExtT\\ +\unionmultidisplay unionmultidisplay LucidNewMatExtT\\ +\logicalanddisplay logicalanddisplay LucidNewMatExtT\\ +\logicalordisplay logicalordisplay LucidNewMatExtT\\ +\coproducttext coproducttext LucidNewMatExtT\\ +\coproductdisplay coproductdisplay LucidNewMatExtT\\ +\hatwide hatwide LucidNewMatExtT\\ +\hatwider hatwider LucidNewMatExtT\\ +\hatwidest hatwidest LucidNewMatExtT\\ +\tildewide tildewide LucidNewMatExtT\\ +\tildewider tildewider LucidNewMatExtT\\ +\tildewidest tildewidest LucidNewMatExtT\\ +\bracketleftBig bracketleftBig LucidNewMatExtT\\ +\bracketrightBig bracketrightBig LucidNewMatExtT\\ +\floorleftBig floorleftBig LucidNewMatExtT\\ +\floorrightBig floorrightBig LucidNewMatExtT\\ +\ceilingleftBig ceilingleftBig LucidNewMatExtT\\ +\ceilingrightBig ceilingrightBig LucidNewMatExtT\\ +\braceleftBig braceleftBig LucidNewMatExtT\\ +\bracerightBig bracerightBig LucidNewMatExtT\\ +\radicalbig radicalbig LucidNewMatExtT\\ +\radicalBig radicalBig LucidNewMatExtT\\ +\radicalbigg radicalbigg LucidNewMatExtT\\ +\radicalBigg radicalBigg LucidNewMatExtT\\ +\radicalbt radicalbt LucidNewMatExtT\\ +\radicalvertex radicalvertex LucidNewMatExtT\\ +\radicaltp radicaltp LucidNewMatExtT\\ +\arrowvertexdbl arrowvertexdbl LucidNewMatExtT\\ +\arrowtp arrowtp LucidNewMatExtT\\ +\arrowbt arrowbt LucidNewMatExtT\\ +\bracehtipdownleft bracehtipdownleft LucidNewMatExtT\\ +\bracehtipdownright bracehtipdownright LucidNewMatExtT\\ +\bracehtipupleft bracehtipupleft LucidNewMatExtT\\ +\bracehtipupright bracehtipupright LucidNewMatExtT\\ +\arrowdbltp arrowdbltp LucidNewMatExtT\\ +\arrowdblbt arrowdblbt LucidNewMatExtT\\ + + diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmatext-charmap.processed.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmatext-charmap.processed.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathext-charmap.processed.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathext-charmap.processed.txt new file mode 100755 index 00000000..23f5f296 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathext-charmap.processed.txt @@ -0,0 +1,128 @@ +\parenleftbig parenleftbig LucidNewMatExtT\\ +\parenrightbig parenrightbig LucidNewMatExtT\\ +\bracketleftbig bracketleftbig LucidNewMatExtT\\ +\bracketrightbig bracketrightbig LucidNewMatExtT\\ +\floorleftbig floorleftbig LucidNewMatExtT\\ +\floorrightbig floorrightbig LucidNewMatExtT\\ +\ceilingleftbig ceilingleftbig LucidNewMatExtT\\ +\ceilingrightbig ceilingrightbig LucidNewMatExtT\\ +\braceleftbig braceleftbig LucidNewMatExtT\\ +\bracerightbig bracerightbig LucidNewMatExtT\\ +\angbracketleftbig angbracketleftbig LucidNewMatExtT\\ +\angbracketrightbig angbracketrightbig LucidNewMatExtT\\ +\vextendsingle vextendsingle LucidNewMatExtT\\ +\vextenddouble vextenddouble LucidNewMatExtT\\ +\slashbig slashbig LucidNewMatExtT\\ +\backslashbig backslashbig LucidNewMatExtT\\ +\parenleftBig parenleftBig LucidNewMatExtT\\ +\parenrightBig parenrightBig LucidNewMatExtT\\ +\parenleftbigg parenleftbigg LucidNewMatExtT\\ +\parenrightbigg parenrightbigg LucidNewMatExtT\\ +\bracketleftbigg bracketleftbigg LucidNewMatExtT\\ +\bracketrightbigg bracketrightbigg LucidNewMatExtT\\ +\floorleftbigg floorleftbigg LucidNewMatExtT\\ +\floorrightbigg floorrightbigg LucidNewMatExtT\\ +\ceilingleftbigg ceilingleftbigg LucidNewMatExtT\\ +\ceilingrightbigg ceilingrightbigg LucidNewMatExtT\\ +\braceleftbigg braceleftbigg LucidNewMatExtT\\ +\bracerightbigg bracerightbigg LucidNewMatExtT\\ +\angbracketleftbigg angbracketleftbigg LucidNewMatExtT\\ +\angbracketrightbigg angbracketrightbigg LucidNewMatExtT\\ +\slashbigg slashbigg LucidNewMatExtT\\ +\backslashbigg backslashbigg LucidNewMatExtT\\ +\parenleftBigg parenleftBigg LucidNewMatExtT\\ +\parenrightBigg parenrightBigg LucidNewMatExtT\\ +\bracketleftBigg bracketleftBigg LucidNewMatExtT\\ +\bracketrightBigg bracketrightBigg LucidNewMatExtT\\ +\floorleftBigg floorleftBigg LucidNewMatExtT\\ +\floorrightBigg floorrightBigg LucidNewMatExtT\\ +\ceilingleftBigg ceilingleftBigg LucidNewMatExtT\\ +\ceilingrightBigg ceilingrightBigg LucidNewMatExtT\\ +\braceleftBigg braceleftBigg LucidNewMatExtT\\ +\bracerightBigg bracerightBigg LucidNewMatExtT\\ +\angbracketleftBigg angbracketleftBigg LucidNewMatExtT\\ +\angbracketrightBigg angbracketrightBigg LucidNewMatExtT\\ +\slashBigg slashBigg LucidNewMatExtT\\ +\backslashBigg backslashBigg LucidNewMatExtT\\ +\slashBig slashBig LucidNewMatExtT\\ +\backslashBig backslashBig LucidNewMatExtT\\ +\parenlefttp parenlefttp LucidNewMatExtT\\ +\parenrighttp parenrighttp LucidNewMatExtT\\ +\bracketlefttp bracketlefttp LucidNewMatExtT\\ +\bracketrighttp bracketrighttp LucidNewMatExtT\\ +\bracketleftbt bracketleftbt LucidNewMatExtT\\ +\bracketrightbt bracketrightbt LucidNewMatExtT\\ +\bracketleftex bracketleftex LucidNewMatExtT\\ +\bracketrightex bracketrightex LucidNewMatExtT\\ +\bracelefttp bracelefttp LucidNewMatExtT\\ +\bracerighttp bracerighttp LucidNewMatExtT\\ +\braceleftbt braceleftbt LucidNewMatExtT\\ +\bracerightbt bracerightbt LucidNewMatExtT\\ +\braceleftmid braceleftmid LucidNewMatExtT\\ +\bracerightmid bracerightmid LucidNewMatExtT\\ +\braceex braceex LucidNewMatExtT\\ +\arrowvertex arrowvertex LucidNewMatExtT\\ +\parenleftbt parenleftbt LucidNewMatExtT\\ +\parenrightbt parenrightbt LucidNewMatExtT\\ +\parenleftex parenleftex LucidNewMatExtT\\ +\parenrightex parenrightex LucidNewMatExtT\\ +\angbracketleftBig angbracketleftBig LucidNewMatExtT\\ +\angbracketrightBig angbracketrightBig LucidNewMatExtT\\ +\unionsqtext unionsqtext LucidNewMatExtT\\ +\unionsqdisplay unionsqdisplay LucidNewMatExtT\\ +\contintegraltext contintegraltext LucidNewMatExtT\\ +\contintegraldisplay contintegraldisplay LucidNewMatExtT\\ +\circledottext circledottext LucidNewMatExtT\\ +\circledotdisplay circledotdisplay LucidNewMatExtT\\ +\circleplustext circleplustext LucidNewMatExtT\\ +\circleplusdisplay circleplusdisplay LucidNewMatExtT\\ +\circlemultiplytext circlemultiplytext LucidNewMatExtT\\ +\circlemultiplydisplay circlemultiplydisplay LucidNewMatExtT\\ +\summationtext summationtext LucidNewMatExtT\\ +\producttext producttext LucidNewMatExtT\\ +\integraltext integraltext LucidNewMatExtT\\ +\uniontext uniontext LucidNewMatExtT\\ +\intersectiontext intersectiontext LucidNewMatExtT\\ +\unionmultitext unionmultitext LucidNewMatExtT\\ +\logicalandtext logicalandtext LucidNewMatExtT\\ +\logicalortext logicalortext LucidNewMatExtT\\ +\summationdisplay summationdisplay LucidNewMatExtT\\ +\productdisplay productdisplay LucidNewMatExtT\\ +\integraldisplay integraldisplay LucidNewMatExtT\\ +\uniondisplay uniondisplay LucidNewMatExtT\\ +\intersectiondisplay intersectiondisplay LucidNewMatExtT\\ +\unionmultidisplay unionmultidisplay LucidNewMatExtT\\ +\logicalanddisplay logicalanddisplay LucidNewMatExtT\\ +\logicalordisplay logicalordisplay LucidNewMatExtT\\ +\coproducttext coproducttext LucidNewMatExtT\\ +\coproductdisplay coproductdisplay LucidNewMatExtT\\ +\hatwide hatwide LucidNewMatExtT\\ +\hatwider hatwider LucidNewMatExtT\\ +\hatwidest hatwidest LucidNewMatExtT\\ +\tildewide tildewide LucidNewMatExtT\\ +\tildewider tildewider LucidNewMatExtT\\ +\tildewidest tildewidest LucidNewMatExtT\\ +\bracketleftBig bracketleftBig LucidNewMatExtT\\ +\bracketrightBig bracketrightBig LucidNewMatExtT\\ +\floorleftBig floorleftBig LucidNewMatExtT\\ +\floorrightBig floorrightBig LucidNewMatExtT\\ +\ceilingleftBig ceilingleftBig LucidNewMatExtT\\ +\ceilingrightBig ceilingrightBig LucidNewMatExtT\\ +\braceleftBig braceleftBig LucidNewMatExtT\\ +\bracerightBig bracerightBig LucidNewMatExtT\\ +\radicalbig radicalbig LucidNewMatExtT\\ +\radicalBig radicalBig LucidNewMatExtT\\ +\radicalbigg radicalbigg LucidNewMatExtT\\ +\radicalBigg radicalBigg LucidNewMatExtT\\ +\radicalbt radicalbt LucidNewMatExtT\\ +\radicalvertex radicalvertex LucidNewMatExtT\\ +\radicaltp radicaltp LucidNewMatExtT\\ +\arrowvertexdbl arrowvertexdbl LucidNewMatExtT\\ +\arrowtp arrowtp LucidNewMatExtT\\ +\arrowbt arrowbt LucidNewMatExtT\\ +\bracehtipdownleft bracehtipdownleft LucidNewMatExtT\\ +\bracehtipdownright bracehtipdownright LucidNewMatExtT\\ +\bracehtipupleft bracehtipupleft LucidNewMatExtT\\ +\bracehtipupright bracehtipupright LucidNewMatExtT\\ +\arrowdbltp arrowdbltp LucidNewMatExtT\\ +\arrowdblbt arrowdblbt LucidNewMatExtT\\ diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathit-charmap.processed.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathit-charmap.processed.txt new file mode 100755 index 00000000..63b39993 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathit-charmap.processed.txt @@ -0,0 +1,66 @@ +\Gamma Gamma LucidNewMatItaT +\Delta Delta LucidNewMatItaT +\Theta Theta LucidNewMatItaT +\Lambda Lambda LucidNewMatItaT +\Xi Xi LucidNewMatItaT +\Pi Pi LucidNewMatItaT +\Sigma Sigma LucidNewMatItaT +\Upsilon Upsilon LucidNewMatItaT +\Phi Phi LucidNewMatItaT +\Psi Psi LucidNewMatItaT +\Omega Omega LucidNewMatItaT +\alpha alpha LucidNewMatItaT +\beta beta LucidNewMatItaT +\gamma gamma LucidNewMatItaT +\delta delta LucidNewMatItaT +\epsilon1 epsilon1 LucidNewMatItaT +\zeta zeta LucidNewMatItaT +\eta eta LucidNewMatItaT +\theta theta LucidNewMatItaT +\iota iota LucidNewMatItaT +\kappa kappa LucidNewMatItaT +\lambda lambda LucidNewMatItaT +\mu mu LucidNewMatItaT +\nu nu LucidNewMatItaT +\xi xi LucidNewMatItaT +\pi pi LucidNewMatItaT +\rho rho LucidNewMatItaT +\sigma sigma LucidNewMatItaT +\tau tau LucidNewMatItaT +\upsilon upsilon LucidNewMatItaT +\phi phi LucidNewMatItaT +\chi chi LucidNewMatItaT +\psi psi LucidNewMatItaT +\tie tie LucidNewMatItaT +\omega omega LucidNewMatItaT +\epsilon epsilon LucidNewMatItaT +\theta1 theta1 LucidNewMatItaT +\pi1 pi1 LucidNewMatItaT +\rho1 rho1 LucidNewMatItaT +\sigma1 sigma1 LucidNewMatItaT +\phi1 phi1 LucidNewMatItaT +\arrowlefttophalf arrowlefttophalf LucidNewMatItaT +\arrowleftbothalf arrowleftbothalf LucidNewMatItaT +\arrowrighttophalf arrowrighttophalf LucidNewMatItaT +\arrowrightbothalf arrowrightbothalf LucidNewMatItaT +\arrowhookleft arrowhookleft LucidNewMatItaT +\arrowhookright arrowhookright LucidNewMatItaT +\triangleright triangleright LucidNewMatItaT +\triangleleft triangleleft LucidNewMatItaT +\period period LucidNewMatItaT +\comma comma LucidNewMatItaT +\less less LucidNewMatItaT +\slash slash LucidNewMatItaT +\greater greater LucidNewMatItaT +\star star LucidNewMatItaT +\partialdiff partialdiff LucidNewMatItaT +\flat flat LucidNewMatItaT +\natural natural LucidNewMatItaT +\sharp sharp LucidNewMatItaT +\slurbelow slurbelow LucidNewMatItaT +\slurabove slurabove LucidNewMatItaT +\lscript lscript LucidNewMatItaT +\dotlessi dotlessi LucidNewMatItaT +\dotlessj dotlessj LucidNewMatItaT +\weierstrass weierstrass LucidNewMatItaT +\vector vector LucidNewMatItaT diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathsym-charmap.processed.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathsym-charmap.processed.txt new file mode 100755 index 00000000..2b9c7333 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/lucmathsym-charmap.processed.txt @@ -0,0 +1,128 @@ +\minus minus LucidNewMatSymT +\periodcentered periodcentered LucidNewMatSymT +\multiply multiply LucidNewMatSymT +\asteriskmath asteriskmath LucidNewMatSymT +\divide divide LucidNewMatSymT +\diamondmath diamondmath LucidNewMatSymT +\plusminus plusminus LucidNewMatSymT +\minusplus minusplus LucidNewMatSymT +\circleplus circleplus LucidNewMatSymT +\circleminus circleminus LucidNewMatSymT +\circlemultiply circlemultiply LucidNewMatSymT +\circledivide circledivide LucidNewMatSymT +\circledot circledot LucidNewMatSymT +\circlecopyrt circlecopyrt LucidNewMatSymT +\openbullet openbullet LucidNewMatSymT +\bullet bullet LucidNewMatSymT +\equivasymptotic equivasymptotic LucidNewMatSymT +\equivalence equivalence LucidNewMatSymT +\reflexsubset reflexsubset LucidNewMatSymT +\reflexsuperset reflexsuperset LucidNewMatSymT +\lessequal lessequal LucidNewMatSymT +\greaterequal greaterequal LucidNewMatSymT +\precedesequal precedesequal LucidNewMatSymT +\followsequal followsequal LucidNewMatSymT +\similar similar LucidNewMatSymT +\approxequal approxequal LucidNewMatSymT +\propersubset propersubset LucidNewMatSymT +\propersuperset propersuperset LucidNewMatSymT +\lessmuch lessmuch LucidNewMatSymT +\greatermuch greatermuch LucidNewMatSymT +\precedes precedes LucidNewMatSymT +\follows follows LucidNewMatSymT +\arrowleft arrowleft LucidNewMatSymT +\spade spade LucidNewMatSymT +\arrowright arrowright LucidNewMatSymT +\arrowup arrowup LucidNewMatSymT +\arrowdown arrowdown LucidNewMatSymT +\arrowboth arrowboth LucidNewMatSymT +\arrownortheast arrownortheast LucidNewMatSymT +\arrowsoutheast arrowsoutheast LucidNewMatSymT +\similarequal similarequal LucidNewMatSymT +\arrowdblleft arrowdblleft LucidNewMatSymT +\arrowdblright arrowdblright LucidNewMatSymT +\arrowdblup arrowdblup LucidNewMatSymT +\arrowdbldown arrowdbldown LucidNewMatSymT +\arrowdblboth arrowdblboth LucidNewMatSymT +\arrownorthwest arrownorthwest LucidNewMatSymT +\arrowsouthwest arrowsouthwest LucidNewMatSymT +\proportional proportional LucidNewMatSymT +\prime prime LucidNewMatSymT +\infinity infinity LucidNewMatSymT +\element element LucidNewMatSymT +\owner owner LucidNewMatSymT +\triangle triangle LucidNewMatSymT +\triangleinv triangleinv LucidNewMatSymT +\negationslash negationslash LucidNewMatSymT +\mapsto mapsto LucidNewMatSymT +\universal universal LucidNewMatSymT +\existential existential LucidNewMatSymT +\logicalnot logicalnot LucidNewMatSymT +\emptyset emptyset LucidNewMatSymT +\Rfractur Rfractur LucidNewMatSymT +\Ifractur Ifractur LucidNewMatSymT +\latticetop latticetop LucidNewMatSymT +\perpendicular perpendicular LucidNewMatSymT +\aleph aleph LucidNewMatSymT +\scriptA scriptA LucidNewMatSymT +\scriptB scriptB LucidNewMatSymT +\scriptC scriptC LucidNewMatSymT +\scriptD scriptD LucidNewMatSymT +\scriptE scriptE LucidNewMatSymT +\scriptF scriptF LucidNewMatSymT +\scriptG scriptG LucidNewMatSymT +\scriptH scriptH LucidNewMatSymT +\scriptI scriptI LucidNewMatSymT +\scriptJ scriptJ LucidNewMatSymT +\scriptK scriptK LucidNewMatSymT +\scriptL scriptL LucidNewMatSymT +\scriptM scriptM LucidNewMatSymT +\scriptN scriptN LucidNewMatSymT +\scriptO scriptO LucidNewMatSymT +\scriptP scriptP LucidNewMatSymT +\scriptQ scriptQ LucidNewMatSymT +\scriptR scriptR LucidNewMatSymT +\scriptS scriptS LucidNewMatSymT +\scriptT scriptT LucidNewMatSymT +\scriptU scriptU LucidNewMatSymT +\scriptV scriptV LucidNewMatSymT +\scriptW scriptW LucidNewMatSymT +\scriptX scriptX LucidNewMatSymT +\scriptY scriptY LucidNewMatSymT +\scriptZ scriptZ LucidNewMatSymT +\union union LucidNewMatSymT +\intersection intersection LucidNewMatSymT +\unionmulti unionmulti LucidNewMatSymT +\logicaland logicaland LucidNewMatSymT +\logicalor logicalor LucidNewMatSymT +\turnstileleft turnstileleft LucidNewMatSymT +\turnstileright turnstileright LucidNewMatSymT +\floorleft floorleft LucidNewMatSymT +\floorright floorright LucidNewMatSymT +\ceilingleft ceilingleft LucidNewMatSymT +\ceilingright ceilingright LucidNewMatSymT +\braceleft braceleft LucidNewMatSymT +\braceright braceright LucidNewMatSymT +\angbracketleft angbracketleft LucidNewMatSymT +\angbracketright angbracketright LucidNewMatSymT +\bar bar LucidNewMatSymT +\bardbl bardbl LucidNewMatSymT +\arrowbothv arrowbothv LucidNewMatSymT +\arrowdblbothv arrowdblbothv LucidNewMatSymT +\backslash backslash LucidNewMatSymT +\wreathproduct wreathproduct LucidNewMatSymT +\radical radical LucidNewMatSymT +\coproduct coproduct LucidNewMatSymT +\nabla nabla LucidNewMatSymT +\integral integral LucidNewMatSymT +\unionsq unionsq LucidNewMatSymT +\intersectionsq intersectionsq LucidNewMatSymT +\subsetsqequal subsetsqequal LucidNewMatSymT +\supersetsqequal supersetsqequal LucidNewMatSymT +\section section LucidNewMatSymT +\dagger dagger LucidNewMatSymT +\daggerdbl daggerdbl LucidNewMatSymT +\paragraph paragraph LucidNewMatSymT +\club club LucidNewMatSymT +\diamond diamond LucidNewMatSymT +\heart heart LucidNewMatSymT diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/standard-charmap.processed.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/standard-charmap.processed.txt new file mode 100755 index 00000000..e26df1cb --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/standard-charmap.processed.txt @@ -0,0 +1,218 @@ +\space space +\exclam exclam +\quotedbl quotedbl +\numbersign numbersign +\dollar dollar +\percent percent +\ampersand ampersand +\quotesingle quotesingle +\parenleft parenleft +\parenright parenright +\asterisk asterisk +\plus plus +\comma comma +\hyphen hyphen +\period period +\slash slash +\zero zero +\one one +\two two +\three three +\four four +\five five +\six six +\seven seven +\eight eight +\nine nine +\colon colon +\semicolon semicolon +\less less +\equal equal +\greater greater +\question question +\at at +\A A +\B B +\C C +\D D +\E E +\F F +\G G +\H H +\I I +\J J +\K K +\L L +\M M +\N N +\O O +\P P +\Q Q +\R R +\S S +\T T +\U U +\V V +\W W +\X X +\Y Y +\Z Z +\bracketleft bracketleft +\backslash backslash +\bracketright bracketright +\asciicircum asciicircum +\underscore underscore +\grave grave +\a a +\b b +\c c +\d d +\e e +\f f +\g g +\h h +\i i +\j j +\k k +\l l +\m m +\n n +\o o +\p p +\q q +\r r +\s s +\t t +\u u +\v v +\w w +\x x +\y y +\z z +\braceleft braceleft +\bar bar +\braceright braceright +\asciitilde asciitilde +\euro euro +\quotesinglbase quotesinglbase +\florin florin +\quotedblbase quotedblbase +\ellipsis ellipsis +\dagger dagger +\daggerdbl daggerdbl +\circumflex circumflex +\perthousand perthousand +\Scaron Scaron +\guilsinglleft guilsinglleft +\OE OE +\Zcaron Zcaron +\quoteleft quoteleft +\quoteright quoteright +\quotedblleft quotedblleft +\quotedblright quotedblright +\bullet bullet +\endash endash +\emdash emdash +\tilde tilde +\trademark trademark +\scaron scaron +\guilsinglright guilsinglright +\oe oe +\zcaron zcaron +\Ydieresis Ydieresis +\nbspace nbspace +\exclamdown exclamdown +\cent cent +\sterling sterling +\currency currency +\yen yen +\brokenbar brokenbar +\section section +\dieresis dieresis +\copyright copyright +\ordfeminine ordfeminine +\guillemotleft guillemotleft +\logicalnot logicalnot +\sfthyphen sfthyphen +\registered registered +\macron macron +\degree degree +\plusminus plusminus +\twosuperior twosuperior +\threesuperior threesuperior +\acute acute +\mu mu +\paragraph paragraph +\periodcentered periodcentered +\cedilla cedilla +\onesuperior onesuperior +\ordmasculine ordmasculine +\guillemotright guillemotright +\onequarter onequarter +\onehalf onehalf +\threequarters threequarters +\questiondown questiondown +\Agrave Agrave +\Aacute Aacute +\Acircumflex Acircumflex +\Atilde Atilde +\Adieresis Adieresis +\Aring Aring +\AE AE +\Ccedilla Ccedilla +\Egrave Egrave +\Eacute Eacute +\Ecircumflex Ecircumflex +\Edieresis Edieresis +\Igrave Igrave +\Iacute Iacute +\Icircumflex Icircumflex +\Idieresis Idieresis +\Eth Eth +\Ntilde Ntilde +\Ograve Ograve +\Oacute Oacute +\Ocircumflex Ocircumflex +\Otilde Otilde +\Odieresis Odieresis +\multiply multiply +\Oslash Oslash +\Ugrave Ugrave +\Uacute Uacute +\Ucircumflex Ucircumflex +\Udieresis Udieresis +\Yacute Yacute +\Thorn Thorn +\germandbls germandbls +\agrave agrave +\aacute aacute +\acircumflex acircumflex +\atilde atilde +\adieresis adieresis +\aring aring +\ae ae +\ccedilla ccedilla +\egrave egrave +\eacute eacute +\ecircumflex ecircumflex +\edieresis edieresis +\igrave igrave +\iacute iacute +\icircumflex icircumflex +\idieresis idieresis +\eth eth +\ntilde ntilde +\ograve ograve +\oacute oacute +\ocircumflex ocircumflex +\otilde otilde +\odieresis odieresis +\divide divide +\oslash oslash +\ugrave ugrave +\uacute uacute +\ucircumflex ucircumflex +\udieresis udieresis +\yacute yacute +\thorn thorn +\ydieresis ydieresis diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.index.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.index.txt new file mode 100755 index 00000000..e69de29b diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.tag.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.tag.txt new file mode 100755 index 00000000..ade4f6f7 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.tag.txt @@ -0,0 +1 @@ +@body:<f"Symbol"><\#32><f$> space Symbol<\n> <f"Symbol"><\#33><f$> exclam Symbol<\n> <f"Symbol"><\#34><f$> universal Symbol<\n> <f"Symbol"><\#35><f$> numbersign Symbol<\n> <f"Symbol"><\#36><f$> existential Symbol<\n> <f"Symbol"><\#37><f$> percent Symbol<\n> <f"Symbol"><\#38><f$> ampersand Symbol<\n> <f"Symbol"><\#39><f$> suchthat Symbol<\n> <f"Symbol"><\#40><f$> parenleft Symbol<\n> <f"Symbol"><\#41><f$> parenright Symbol<\n> <f"Symbol"><\#42><f$> asteriskmath Symbol<\n> <f"Symbol"><\#43><f$> plus Symbol<\n> <f"Symbol"><\#44><f$> comma Symbol<\n> <f"Symbol"><\#45><f$> minus Symbol<\n> <f"Symbol"><\#46><f$> period Symbol<\n> <f"Symbol"><\#47><f$> slash Symbol<\n> <f"Symbol"><\#48><f$> zero Symbol<\n> <f"Symbol"><\#49><f$> one Symbol<\n> <f"Symbol"><\#50><f$> two Symbol<\n> <f"Symbol"><\#51><f$> three Symbol<\n> <f"Symbol"><\#52><f$> four Symbol<\n> <f"Symbol"><\#53><f$> five Symbol<\n> <f"Symbol"><\#54><f$> six Symbol<\n> <f"Symbol"><\#55><f$> seven Symbol<\n> <f"Symbol"><\#56><f$> eight Symbol<\n> <f"Symbol"><\#57><f$> nine Symbol<\n> <f"Symbol"><\#58><f$> colon Symbol<\n> <f"Symbol"><\#59><f$> semicolon Symbol<\n> <f"Symbol"><\#60><f$> less Symbol<\n> <f"Symbol"><\#61><f$> equal Symbol<\n> <f"Symbol"><\#62><f$> greater Symbol<\n> <f"Symbol"><\#63><f$> question Symbol<\n> <f"Symbol"><\#64><f$> congruent Symbol<\n> <f"Symbol"><\#65><f$> Alpha Symbol<\n> <f"Symbol"><\#66><f$> Beta Symbol<\n> <f"Symbol"><\#67><f$> Chi Symbol<\n> <f"Symbol"><\#68><f$> Delta Symbol<\n> <f"Symbol"><\#69><f$> Epsilon Symbol<\n> <f"Symbol"><\#70><f$> Phi Symbol<\n> <f"Symbol"><\#71><f$> Gamma Symbol<\n> <f"Symbol"><\#72><f$> Eta Symbol<\n> <f"Symbol"><\#73><f$> Iota Symbol<\n> <f"Symbol"><\#74><f$> theta1 Symbol<\n> <f"Symbol"><\#75><f$> Kappa Symbol<\n> <f"Symbol"><\#76><f$> Lambda Symbol<\n> <f"Symbol"><\#77><f$> Mu Symbol<\n> <f"Symbol"><\#78><f$> Nu Symbol<\n> <f"Symbol"><\#79><f$> Omicron Symbol<\n> <f"Symbol"><\#80><f$> Pi Symbol<\n> <f"Symbol"><\#81><f$> Theta Symbol<\n> <f"Symbol"><\#82><f$> Rho Symbol<\n> <f"Symbol"><\#83><f$> Sigma Symbol<\n> <f"Symbol"><\#84><f$> Tau Symbol<\n> <f"Symbol"><\#85><f$> Upsilon Symbol<\n> <f"Symbol"><\#86><f$> sigma1 Symbol<\n> <f"Symbol"><\#87><f$> Omega Symbol<\n> <f"Symbol"><\#88><f$> Xi Symbol<\n> <f"Symbol"><\#89><f$> Psi Symbol<\n> <f"Symbol"><\#90><f$> Zeta Symbol<\n> <f"Symbol"><\#91><f$> bracketleft Symbol<\n> <f"Symbol"><\#92><f$> therefore Symbol<\n> <f"Symbol"><\#93><f$> bracketright Symbol<\n> <f"Symbol"><\#94><f$> perpendicular Symbol<\n> <f"Symbol"><\#95><f$> underscore Symbol<\n> <f"Symbol"><\#96><f$> radicalex Symbol<\n> <f"Symbol"><\#97><f$> alpha Symbol<\n> <f"Symbol"><\#98><f$> beta Symbol<\n> <f"Symbol"><\#99><f$> chi Symbol<\n> <f"Symbol"><\#100><f$> delta Symbol<\n> <f"Symbol"><\#101><f$> epsilon Symbol<\n> <f"Symbol"><\#102><f$> phi Symbol<\n> <f"Symbol"><\#103><f$> gamma Symbol<\n> <f"Symbol"><\#104><f$> eta Symbol<\n> <f"Symbol"><\#105><f$> iota Symbol<\n> <f"Symbol"><\#106><f$> phi1 Symbol<\n> <f"Symbol"><\#107><f$> kappa Symbol<\n> <f"Symbol"><\#108><f$> lambda Symbol<\n> <f"Symbol"><\#109><f$> mu Symbol<\n> <f"Symbol"><\#110><f$> nu Symbol<\n> <f"Symbol"><\#111><f$> omicron Symbol<\n> <f"Symbol"><\#112><f$> pi Symbol<\n> <f"Symbol"><\#113><f$> theta Symbol<\n> <f"Symbol"><\#114><f$> rho Symbol<\n> <f"Symbol"><\#115><f$> sigma Symbol<\n> <f"Symbol"><\#116><f$> tau Symbol<\n> <f"Symbol"><\#117><f$> upsilon Symbol<\n> <f"Symbol"><\#118><f$> omega1 Symbol<\n> <f"Symbol"><\#119><f$> omega Symbol<\n> <f"Symbol"><\#120><f$> xi Symbol<\n> <f"Symbol"><\#121><f$> psi Symbol<\n> <f"Symbol"><\#122><f$> zeta Symbol<\n> <f"Symbol"><\#123><f$> braceleft Symbol<\n> <f"Symbol"><\#124><f$> bar Symbol<\n> <f"Symbol"><\#125><f$> braceright Symbol<\n> <f"Symbol"><\#126><f$> similar Symbol<\n> <f"Symbol"><\#160><f$> Euro Symbol<\n> <f"Symbol"><\#161><f$> Upsilon1 Symbol<\n> <f"Symbol"><\#162><f$> minute Symbol<\n> <f"Symbol"><\#163><f$> lessequal Symbol<\n> <f"Symbol"><\#164><f$> fraction Symbol<\n> <f"Symbol"><\#165><f$> infinity Symbol<\n> <f"Symbol"><\#166><f$> florin Symbol<\n> <f"Symbol"><\#167><f$> club Symbol<\n> <f"Symbol"><\#168><f$> diamond Symbol<\n> <f"Symbol"><\#169><f$> heart Symbol<\n> <f"Symbol"><\#170><f$> spade Symbol<\n> <f"Symbol"><\#171><f$> arrowboth Symbol<\n> <f"Symbol"><\#172><f$> arrowleft Symbol<\n> <f"Symbol"><\#173><f$> arrowup Symbol<\n> <f"Symbol"><\#174><f$> arrowright Symbol<\n> <f"Symbol"><\#175><f$> arrowdown Symbol<\n> <f"Symbol"><\#176><f$> degree Symbol<\n> <f"Symbol"><\#177><f$> plusminus Symbol<\n> <f"Symbol"><\#178><f$> second Symbol<\n> <f"Symbol"><\#179><f$> greaterequal Symbol<\n> <f"Symbol"><\#180><f$> multiply Symbol<\n> <f"Symbol"><\#181><f$> proportional Symbol<\n> <f"Symbol"><\#182><f$> partialdiff Symbol<\n> <f"Symbol"><\#183><f$> bullet Symbol<\n> <f"Symbol"><\#184><f$> divide Symbol<\n> <f"Symbol"><\#185><f$> notequal Symbol<\n> <f"Symbol"><\#186><f$> equivalence Symbol<\n> <f"Symbol"><\#187><f$> approxequal Symbol<\n> <f"Symbol"><\#189><f$> arrowvertex Symbol<\n> <f"Symbol"><\#190><f$> arrowhorizex Symbol<\n> <f"Symbol"><\#191><f$> carriagereturn Symbol<\n> <f"Symbol"><\#192><f$> aleph Symbol<\n> <f"Symbol"><\#193><f$> Ifraktur Symbol<\n> <f"Symbol"><\#194><f$> Rfraktur Symbol<\n> <f"Symbol"><\#195><f$> weierstrass Symbol<\n> <f"Symbol"><\#196><f$> circlemultiply Symbol<\n> <f"Symbol"><\#197><f$> circleplus Symbol<\n> <f"Symbol"><\#198><f$> emptyset Symbol<\n> <f"Symbol"><\#199><f$> intersection Symbol<\n> <f"Symbol"><\#200><f$> union Symbol<\n> <f"Symbol"><\#201><f$> propersuperset Symbol<\n> <f"Symbol"><\#202><f$> reflexsuperset Symbol<\n> <f"Symbol"><\#203><f$> notsubset Symbol<\n> <f"Symbol"><\#204><f$> propersubset Symbol<\n> <f"Symbol"><\#205><f$> reflexsubset Symbol<\n> <f"Symbol"><\#206><f$> element Symbol<\n> <f"Symbol"><\#207><f$> notelement Symbol<\n> <f"Symbol"><\#208><f$> angle Symbol<\n> <f"Symbol"><\#209><f$> gradient Symbol<\n> <f"Symbol"><\#210><f$> registerserif Symbol<\n> <f"Symbol"><\#211><f$> copyrightserif Symbol<\n> <f"Symbol"><\#212><f$> trademarkserif Symbol<\n> <f"Symbol"><\#213><f$> product Symbol<\n> <f"Symbol"><\#214><f$> radical Symbol<\n> <f"Symbol"><\#215><f$> dotmath Symbol<\n> <f"Symbol"><\#216><f$> logicalnot Symbol<\n> <f"Symbol"><\#217><f$> logicaland Symbol<\n> <f"Symbol"><\#218><f$> logicalor Symbol<\n> <f"Symbol"><\#219><f$> arrowdblboth Symbol<\n> <f"Symbol"><\#220><f$> arrowdblleft Symbol<\n> <f"Symbol"><\#221><f$> arrowdblup Symbol<\n> <f"Symbol"><\#222><f$> arrowdblright Symbol<\n> <f"Symbol"><\#223><f$> arrowdbldown Symbol<\n> <f"Symbol"><\#224><f$> lozenge Symbol<\n> <f"Symbol"><\#225><f$> angleleft Symbol<\n> <f"Symbol"><\#226><f$> registersans Symbol<\n> <f"Symbol"><\#227><f$> copyrightsans Symbol<\n> <f"Symbol"><\#228><f$> trademarksans Symbol<\n> <f"Symbol"><\#229><f$> summation Symbol<\n> <f"Symbol"><\#230><f$> parenlefttp Symbol<\n> <f"Symbol"><\#231><f$> parenleftex Symbol<\n> <f"Symbol"><\#232><f$> parenleftbt Symbol<\n> <f"Symbol"><\#233><f$> bracketlefttp Symbol<\n> <f"Symbol"><\#234><f$> bracketleftex Symbol<\n> <f"Symbol"><\#235><f$> bracketleftbt Symbol<\n> <f"Symbol"><\#236><f$> bracelefttp Symbol<\n> <f"Symbol"><\#237><f$> braceleftmid Symbol<\n> <f"Symbol"><\#238><f$> braceleftbt Symbol<\n> <f"Symbol"><\#239><f$> braceex Symbol<\n> <f"Symbol"><\#241><f$> angleright Symbol<\n> <f"Symbol"><\#242><f$> integral Symbol<\n> <f"Symbol"><\#243><f$> integraltp Symbol<\n> <f"Symbol"><\#244><f$> integralex Symbol<\n> <f"Symbol"><\#245><f$> integralbt Symbol<\n> <f"Symbol"><\#246><f$> parenrighttp Symbol<\n> <f"Symbol"><\#247><f$> parenrightex Symbol<\n> <f"Symbol"><\#248><f$> parenrightbt Symbol<\n> <f"Symbol"><\#249><f$> bracketrighttp Symbol<\n> <f"Symbol"><\#250><f$> bracketrightex Symbol<\n> <f"Symbol"><\#251><f$> bracketrightbt Symbol<\n> <f"Symbol"><\#252><f$> bracerighttp Symbol<\n> <f"Symbol"><\#253><f$> bracerightmid Symbol<\n> <f"Symbol"><\#254><f$> bracerightbt Symbol<\n> \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.txt new file mode 100755 index 00000000..c951d682 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/junk/symbol-charmap.processed.txt @@ -0,0 +1,532 @@ +\preamble +\loadchars{k:\Research\Tagger\maps\standard-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathsym-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathit-charmap.txt} +\loadchars{k:\Research\Tagger\maps\lucmathext-charmap.txt} +\loadchars{k:\Research\Tagger\maps\symbol-charmap.txt} +\loadstyles{k:\Research\Tagger\examples\styles.txt} + +\section Symbol Charmap + +\space space Symbol\\ +\exclam exclam Symbol\\ +\universal universal Symbol\\ +\numbersign numbersign Symbol\\ +\existential existential Symbol\\ +\percent percent Symbol\\ +\ampersand ampersand Symbol\\ +\suchthat suchthat Symbol\\ +\parenleft parenleft Symbol\\ +\parenright parenright Symbol\\ +\asteriskmath asteriskmath Symbol\\ +\plus plus Symbol\\ +\comma comma Symbol\\ +\minus minus Symbol\\ +\period period Symbol\\ +\slash slash Symbol\\ +\zero zero Symbol\\ +\one one Symbol\\ +\two two Symbol\\ +\three three Symbol\\ +\four four Symbol\\ +\five five Symbol\\ +\six six Symbol\\ +\seven seven Symbol\\ +\eight eight Symbol\\ +\nine nine Symbol\\ +\colon colon Symbol\\ +\semicolon semicolon Symbol\\ +\less less Symbol\\ +\equal equal Symbol\\ +\greater greater Symbol\\ +\question question Symbol\\ +\congruent congruent Symbol\\ +\Alpha Alpha Symbol\\ +\Beta Beta Symbol\\ +\Chi Chi Symbol\\ +\Delta Delta Symbol\\ +\Epsilon Epsilon Symbol\\ +\Phi Phi Symbol\\ +\Gamma Gamma Symbol\\ +\Eta Eta Symbol\\ +\Iota Iota Symbol\\ +\theta1 theta1 Symbol\\ +\Kappa Kappa Symbol\\ +\Lambda Lambda Symbol\\ +\Mu Mu Symbol\\ +\Nu Nu Symbol\\ +\Omicron Omicron Symbol\\ +\Pi Pi Symbol\\ +\Theta Theta Symbol\\ +\Rho Rho Symbol\\ +\Sigma Sigma Symbol\\ +\Tau Tau Symbol\\ +\Upsilon Upsilon Symbol\\ +\sigma1 sigma1 Symbol\\ +\Omega Omega Symbol\\ +\Xi Xi Symbol\\ +\Psi Psi Symbol\\ +\Zeta Zeta Symbol\\ +\bracketleft bracketleft Symbol\\ +\therefore therefore Symbol\\ +\bracketright bracketright Symbol\\ +\perpendicular perpendicular Symbol\\ +\underscore underscore Symbol\\ +\radicalex radicalex Symbol\\ +\alpha alpha Symbol\\ +\beta beta Symbol\\ +\chi chi Symbol\\ +\delta delta Symbol\\ +\epsilon epsilon Symbol\\ +\phi phi Symbol\\ +\gamma gamma Symbol\\ +\eta eta Symbol\\ +\iota iota Symbol\\ +\phi1 phi1 Symbol\\ +\kappa kappa Symbol\\ +\lambda lambda Symbol\\ +\mu mu Symbol\\ +\nu nu Symbol\\ +\omicron omicron Symbol\\ +\pi pi Symbol\\ +\theta theta Symbol\\ +\rho rho Symbol\\ +\sigma sigma Symbol\\ +\tau tau Symbol\\ +\upsilon upsilon Symbol\\ +\omega1 omega1 Symbol\\ +\omega omega Symbol\\ +\xi xi Symbol\\ +\psi psi Symbol\\ +\zeta zeta Symbol\\ +\braceleft braceleft Symbol\\ +\bar bar Symbol\\ +\braceright braceright Symbol\\ +\similar similar Symbol\\ +\Euro Euro Symbol\\ +\Upsilon1 Upsilon1 Symbol\\ +\minute minute Symbol\\ +\lessequal lessequal Symbol\\ +\fraction fraction Symbol\\ +\infinity infinity Symbol\\ +\florin florin Symbol\\ +\club club Symbol\\ +\diamond diamond Symbol\\ +\heart heart Symbol\\ +\spade spade Symbol\\ +\arrowboth arrowboth Symbol\\ +\arrowleft arrowleft Symbol\\ +\arrowup arrowup Symbol\\ +\arrowright arrowright Symbol\\ +\arrowdown arrowdown Symbol\\ +\degree degree Symbol\\ +\plusminus plusminus Symbol\\ +\second second Symbol\\ +\greaterequal greaterequal Symbol\\ +\multiply multiply Symbol\\ +\proportional proportional Symbol\\ +\partialdiff partialdiff Symbol\\ +\bullet bullet Symbol\\ +\divide divide Symbol\\ +\notequal notequal Symbol\\ +\equivalence equivalence Symbol\\ +\approxequal approxequal Symbol\\ +\arrowvertex arrowvertex Symbol\\ +\arrowhorizex arrowhorizex Symbol\\ +\carriagereturn carriagereturn Symbol\\ +\aleph aleph Symbol\\ +\Ifraktur Ifraktur Symbol\\ +\Rfraktur Rfraktur Symbol\\ +\weierstrass weierstrass Symbol\\ +\circlemultiply circlemultiply Symbol\\ +\circleplus circleplus Symbol\\ +\emptyset emptyset Symbol\\ +\intersection intersection Symbol\\ +\union union Symbol\\ +\propersuperset propersuperset Symbol\\ +\reflexsuperset reflexsuperset Symbol\\ +\notsubset notsubset Symbol\\ +\propersubset propersubset Symbol\\ +\reflexsubset reflexsubset Symbol\\ +\element element Symbol\\ +\notelement notelement Symbol\\ +\angle angle Symbol\\ +\gradient gradient Symbol\\ +\registerserif registerserif Symbol\\ +\copyrightserif copyrightserif Symbol\\ +\trademarkserif trademarkserif Symbol\\ +\product product Symbol\\ +\radical radical Symbol\\ +\dotmath dotmath Symbol\\ +\logicalnot logicalnot Symbol\\ +\logicaland logicaland Symbol\\ +\logicalor logicalor Symbol\\ +\arrowdblboth arrowdblboth Symbol\\ +\arrowdblleft arrowdblleft Symbol\\ +\arrowdblup arrowdblup Symbol\\ +\arrowdblright arrowdblright Symbol\\ +\arrowdbldown arrowdbldown Symbol\\ +\lozenge lozenge Symbol\\ +\angleleft angleleft Symbol\\ +\registersans registersans Symbol\\ +\copyrightsans copyrightsans Symbol\\ +\trademarksans trademarksans Symbol\\ +\summation summation Symbol\\ +\parenlefttp parenlefttp Symbol\\ +\parenleftex parenleftex Symbol\\ +\parenleftbt parenleftbt Symbol\\ +\bracketlefttp bracketlefttp Symbol\\ +\bracketleftex bracketleftex Symbol\\ +\bracketleftbt bracketleftbt Symbol\\ +\bracelefttp bracelefttp Symbol\\ +\braceleftmid braceleftmid Symbol\\ +\braceleftbt braceleftbt Symbol\\ +\braceex braceex Symbol\\ +\angleright angleright Symbol\\ +\integral integral Symbol\\ +\integraltp integraltp Symbol\\ +\integralex integralex Symbol\\ +\integralbt integralbt Symbol\\ +\parenrighttp parenrighttp Symbol\\ +\parenrightex parenrightex Symbol\\ +\parenrightbt parenrightbt Symbol\\ +\bracketrighttp bracketrighttp Symbol\\ +\bracketrightex bracketrightex Symbol\\ +\bracketrightbt bracketrightbt Symbol\\ +\bracerighttp bracerighttp Symbol\\ +\bracerightmid bracerightmid Symbol\\ +\bracerightbt bracerightbt Symbol\\ + + +\section Lucida Math Italic Charmap + +\Gamma Gamma LucidNewMatItaT\\ +\Delta Delta LucidNewMatItaT\\ +\Theta Theta LucidNewMatItaT\\ +\Lambda Lambda LucidNewMatItaT\\ +\Xi Xi LucidNewMatItaT\\ +\Pi Pi LucidNewMatItaT\\ +\Sigma Sigma LucidNewMatItaT\\ +\Upsilon Upsilon LucidNewMatItaT\\ +\Phi Phi LucidNewMatItaT\\ +\Psi Psi LucidNewMatItaT\\ +\Omega Omega LucidNewMatItaT\\ +\alpha alpha LucidNewMatItaT\\ +\beta beta LucidNewMatItaT\\ +\gamma gamma LucidNewMatItaT\\ +\delta delta LucidNewMatItaT\\ +\epsilon1 epsilon1 LucidNewMatItaT\\ +\zeta zeta LucidNewMatItaT\\ +\eta eta LucidNewMatItaT\\ +\theta theta LucidNewMatItaT\\ +\iota iota LucidNewMatItaT\\ +\kappa kappa LucidNewMatItaT\\ +\lambda lambda LucidNewMatItaT\\ +\mu mu LucidNewMatItaT\\ +\nu nu LucidNewMatItaT\\ +\xi xi LucidNewMatItaT\\ +\pi pi LucidNewMatItaT\\ +\rho rho LucidNewMatItaT\\ +\sigma sigma LucidNewMatItaT\\ +\tau tau LucidNewMatItaT\\ +\upsilon upsilon LucidNewMatItaT\\ +\phi phi LucidNewMatItaT\\ +\chi chi LucidNewMatItaT\\ +\psi psi LucidNewMatItaT\\ +\tie tie LucidNewMatItaT\\ +\omega omega LucidNewMatItaT\\ +\epsilon epsilon LucidNewMatItaT\\ +\theta1 theta1 LucidNewMatItaT\\ +\pi1 pi1 LucidNewMatItaT\\ +\rho1 rho1 LucidNewMatItaT\\ +\sigma1 sigma1 LucidNewMatItaT\\ +\phi1 phi1 LucidNewMatItaT\\ +\arrowlefttophalf arrowlefttophalf LucidNewMatItaT\\ +\arrowleftbothalf arrowleftbothalf LucidNewMatItaT\\ +\arrowrighttophalf arrowrighttophalf LucidNewMatItaT\\ +\arrowrightbothalf arrowrightbothalf LucidNewMatItaT\\ +\arrowhookleft arrowhookleft LucidNewMatItaT\\ +\arrowhookright arrowhookright LucidNewMatItaT\\ +\triangleright triangleright LucidNewMatItaT\\ +\triangleleft triangleleft LucidNewMatItaT\\ +\period period LucidNewMatItaT\\ +\comma comma LucidNewMatItaT\\ +\less less LucidNewMatItaT\\ +\slash slash LucidNewMatItaT\\ +\greater greater LucidNewMatItaT\\ +\star star LucidNewMatItaT\\ +\partialdiff partialdiff LucidNewMatItaT\\ +\flat flat LucidNewMatItaT\\ +\natural natural LucidNewMatItaT\\ +\sharp sharp LucidNewMatItaT\\ +\slurbelow slurbelow LucidNewMatItaT\\ +\slurabove slurabove LucidNewMatItaT\\ +\lscript lscript LucidNewMatItaT\\ +\dotlessi dotlessi LucidNewMatItaT\\ +\dotlessj dotlessj LucidNewMatItaT\\ +\weierstrass weierstrass LucidNewMatItaT\\ +\vector vector LucidNewMatItaT\\ + +\section Lucida Math Symbol Charmap + +\minus minus LucidNewMatSymT\\ +\periodcentered periodcentered LucidNewMatSymT\\ +\multiply multiply LucidNewMatSymT\\ +\asteriskmath asteriskmath LucidNewMatSymT\\ +\divide divide LucidNewMatSymT\\ +\diamondmath diamondmath LucidNewMatSymT\\ +\plusminus plusminus LucidNewMatSymT\\ +\minusplus minusplus LucidNewMatSymT\\ +\circleplus circleplus LucidNewMatSymT\\ +\circleminus circleminus LucidNewMatSymT\\ +\circlemultiply circlemultiply LucidNewMatSymT\\ +\circledivide circledivide LucidNewMatSymT\\ +\circledot circledot LucidNewMatSymT\\ +\circlecopyrt circlecopyrt LucidNewMatSymT\\ +\openbullet openbullet LucidNewMatSymT\\ +\bullet bullet LucidNewMatSymT\\ +\equivasymptotic equivasymptotic LucidNewMatSymT\\ +\equivalence equivalence LucidNewMatSymT\\ +\reflexsubset reflexsubset LucidNewMatSymT\\ +\reflexsuperset reflexsuperset LucidNewMatSymT\\ +\lessequal lessequal LucidNewMatSymT\\ +\greaterequal greaterequal LucidNewMatSymT\\ +\precedesequal precedesequal LucidNewMatSymT\\ +\followsequal followsequal LucidNewMatSymT\\ +\similar similar LucidNewMatSymT\\ +\approxequal approxequal LucidNewMatSymT\\ +\propersubset propersubset LucidNewMatSymT\\ +\propersuperset propersuperset LucidNewMatSymT\\ +\lessmuch lessmuch LucidNewMatSymT\\ +\greatermuch greatermuch LucidNewMatSymT\\ +\precedes precedes LucidNewMatSymT\\ +\follows follows LucidNewMatSymT\\ +\arrowleft arrowleft LucidNewMatSymT\\ +\spade spade LucidNewMatSymT\\ +\arrowright arrowright LucidNewMatSymT\\ +\arrowup arrowup LucidNewMatSymT\\ +\arrowdown arrowdown LucidNewMatSymT\\ +\arrowboth arrowboth LucidNewMatSymT\\ +\arrownortheast arrownortheast LucidNewMatSymT\\ +\arrowsoutheast arrowsoutheast LucidNewMatSymT\\ +\similarequal similarequal LucidNewMatSymT\\ +\arrowdblleft arrowdblleft LucidNewMatSymT\\ +\arrowdblright arrowdblright LucidNewMatSymT\\ +\arrowdblup arrowdblup LucidNewMatSymT\\ +\arrowdbldown arrowdbldown LucidNewMatSymT\\ +\arrowdblboth arrowdblboth LucidNewMatSymT\\ +\arrownorthwest arrownorthwest LucidNewMatSymT\\ +\arrowsouthwest arrowsouthwest LucidNewMatSymT\\ +\proportional proportional LucidNewMatSymT\\ +\prime prime LucidNewMatSymT\\ +\infinity infinity LucidNewMatSymT\\ +\element element LucidNewMatSymT\\ +\owner owner LucidNewMatSymT\\ +\triangle triangle LucidNewMatSymT\\ +\triangleinv triangleinv LucidNewMatSymT\\ +\negationslash negationslash LucidNewMatSymT\\ +\mapsto mapsto LucidNewMatSymT\\ +\universal universal LucidNewMatSymT\\ +\existential existential LucidNewMatSymT\\ +\logicalnot logicalnot LucidNewMatSymT\\ +\emptyset emptyset LucidNewMatSymT\\ +\Rfractur Rfractur LucidNewMatSymT\\ +\Ifractur Ifractur LucidNewMatSymT\\ +\latticetop latticetop LucidNewMatSymT\\ +\perpendicular perpendicular LucidNewMatSymT\\ +\aleph aleph LucidNewMatSymT\\ +\scriptA scriptA LucidNewMatSymT\\ +\scriptB scriptB LucidNewMatSymT\\ +\scriptC scriptC LucidNewMatSymT\\ +\scriptD scriptD LucidNewMatSymT\\ +\scriptE scriptE LucidNewMatSymT\\ +\scriptF scriptF LucidNewMatSymT\\ +\scriptG scriptG LucidNewMatSymT\\ +\scriptH scriptH LucidNewMatSymT\\ +\scriptI scriptI LucidNewMatSymT\\ +\scriptJ scriptJ LucidNewMatSymT\\ +\scriptK scriptK LucidNewMatSymT\\ +\scriptL scriptL LucidNewMatSymT\\ +\scriptM scriptM LucidNewMatSymT\\ +\scriptN scriptN LucidNewMatSymT\\ +\scriptO scriptO LucidNewMatSymT\\ +\scriptP scriptP LucidNewMatSymT\\ +\scriptQ scriptQ LucidNewMatSymT\\ +\scriptR scriptR LucidNewMatSymT\\ +\scriptS scriptS LucidNewMatSymT\\ +\scriptT scriptT LucidNewMatSymT\\ +\scriptU scriptU LucidNewMatSymT\\ +\scriptV scriptV LucidNewMatSymT\\ +\scriptW scriptW LucidNewMatSymT\\ +\scriptX scriptX LucidNewMatSymT\\ +\scriptY scriptY LucidNewMatSymT\\ +\scriptZ scriptZ LucidNewMatSymT\\ +\union union LucidNewMatSymT\\ +\intersection intersection LucidNewMatSymT\\ +\unionmulti unionmulti LucidNewMatSymT\\ +\logicaland logicaland LucidNewMatSymT\\ +\logicalor logicalor LucidNewMatSymT\\ +\turnstileleft turnstileleft LucidNewMatSymT\\ +\turnstileright turnstileright LucidNewMatSymT\\ +\floorleft floorleft LucidNewMatSymT\\ +\floorright floorright LucidNewMatSymT\\ +\ceilingleft ceilingleft LucidNewMatSymT\\ +\ceilingright ceilingright LucidNewMatSymT\\ +\braceleft braceleft LucidNewMatSymT\\ +\braceright braceright LucidNewMatSymT\\ +\angbracketleft angbracketleft LucidNewMatSymT\\ +\angbracketright angbracketright LucidNewMatSymT\\ +\bar bar LucidNewMatSymT\\ +\bardbl bardbl LucidNewMatSymT\\ +\arrowbothv arrowbothv LucidNewMatSymT\\ +\arrowdblbothv arrowdblbothv LucidNewMatSymT\\ +\backslash backslash LucidNewMatSymT\\ +\wreathproduct wreathproduct LucidNewMatSymT\\ +\radical radical LucidNewMatSymT\\ +\coproduct coproduct LucidNewMatSymT\\ +\nabla nabla LucidNewMatSymT\\ +\integral integral LucidNewMatSymT\\ +\unionsq unionsq LucidNewMatSymT\\ +\intersectionsq intersectionsq LucidNewMatSymT\\ +\subsetsqequal subsetsqequal LucidNewMatSymT\\ +\supersetsqequal supersetsqequal LucidNewMatSymT\\ +\section section LucidNewMatSymT\\ +\dagger dagger LucidNewMatSymT\\ +\daggerdbl daggerdbl LucidNewMatSymT\\ +\paragraph paragraph LucidNewMatSymT\\ +\club club LucidNewMatSymT\\ +\diamond diamond LucidNewMatSymT\\ +\heart heart LucidNewMatSymT\\ + +\section Lucida Math Extended Charmap + +\parenleftbig parenleftbig LucidNewMatExtT\\ +\parenrightbig parenrightbig LucidNewMatExtT\\ +\bracketleftbig bracketleftbig LucidNewMatExtT\\ +\bracketrightbig bracketrightbig LucidNewMatExtT\\ +\floorleftbig floorleftbig LucidNewMatExtT\\ +\floorrightbig floorrightbig LucidNewMatExtT\\ +\ceilingleftbig ceilingleftbig LucidNewMatExtT\\ +\ceilingrightbig ceilingrightbig LucidNewMatExtT\\ +\braceleftbig braceleftbig LucidNewMatExtT\\ +\bracerightbig bracerightbig LucidNewMatExtT\\ +\angbracketleftbig angbracketleftbig LucidNewMatExtT\\ +\angbracketrightbig angbracketrightbig LucidNewMatExtT\\ +\vextendsingle vextendsingle LucidNewMatExtT\\ +\vextenddouble vextenddouble LucidNewMatExtT\\ +\slashbig slashbig LucidNewMatExtT\\ +\backslashbig backslashbig LucidNewMatExtT\\ +\parenleftBig parenleftBig LucidNewMatExtT\\ +\parenrightBig parenrightBig LucidNewMatExtT\\ +\parenleftbigg parenleftbigg LucidNewMatExtT\\ +\parenrightbigg parenrightbigg LucidNewMatExtT\\ +\bracketleftbigg bracketleftbigg LucidNewMatExtT\\ +\bracketrightbigg bracketrightbigg LucidNewMatExtT\\ +\floorleftbigg floorleftbigg LucidNewMatExtT\\ +\floorrightbigg floorrightbigg LucidNewMatExtT\\ +\ceilingleftbigg ceilingleftbigg LucidNewMatExtT\\ +\ceilingrightbigg ceilingrightbigg LucidNewMatExtT\\ +\braceleftbigg braceleftbigg LucidNewMatExtT\\ +\bracerightbigg bracerightbigg LucidNewMatExtT\\ +\angbracketleftbigg angbracketleftbigg LucidNewMatExtT\\ +\angbracketrightbigg angbracketrightbigg LucidNewMatExtT\\ +\slashbigg slashbigg LucidNewMatExtT\\ +\backslashbigg backslashbigg LucidNewMatExtT\\ +\parenleftBigg parenleftBigg LucidNewMatExtT\\ +\parenrightBigg parenrightBigg LucidNewMatExtT\\ +\bracketleftBigg bracketleftBigg LucidNewMatExtT\\ +\bracketrightBigg bracketrightBigg LucidNewMatExtT\\ +\floorleftBigg floorleftBigg LucidNewMatExtT\\ +\floorrightBigg floorrightBigg LucidNewMatExtT\\ +\ceilingleftBigg ceilingleftBigg LucidNewMatExtT\\ +\ceilingrightBigg ceilingrightBigg LucidNewMatExtT\\ +\braceleftBigg braceleftBigg LucidNewMatExtT\\ +\bracerightBigg bracerightBigg LucidNewMatExtT\\ +\angbracketleftBigg angbracketleftBigg LucidNewMatExtT\\ +\angbracketrightBigg angbracketrightBigg LucidNewMatExtT\\ +\slashBigg slashBigg LucidNewMatExtT\\ +\backslashBigg backslashBigg LucidNewMatExtT\\ +\slashBig slashBig LucidNewMatExtT\\ +\backslashBig backslashBig LucidNewMatExtT\\ +\parenlefttp parenlefttp LucidNewMatExtT\\ +\parenrighttp parenrighttp LucidNewMatExtT\\ +\bracketlefttp bracketlefttp LucidNewMatExtT\\ +\bracketrighttp bracketrighttp LucidNewMatExtT\\ +\bracketleftbt bracketleftbt LucidNewMatExtT\\ +\bracketrightbt bracketrightbt LucidNewMatExtT\\ +\bracketleftex bracketleftex LucidNewMatExtT\\ +\bracketrightex bracketrightex LucidNewMatExtT\\ +\bracelefttp bracelefttp LucidNewMatExtT\\ +\bracerighttp bracerighttp LucidNewMatExtT\\ +\braceleftbt braceleftbt LucidNewMatExtT\\ +\bracerightbt bracerightbt LucidNewMatExtT\\ +\braceleftmid braceleftmid LucidNewMatExtT\\ +\bracerightmid bracerightmid LucidNewMatExtT\\ +\braceex braceex LucidNewMatExtT\\ +\arrowvertex arrowvertex LucidNewMatExtT\\ +\parenleftbt parenleftbt LucidNewMatExtT\\ +\parenrightbt parenrightbt LucidNewMatExtT\\ +\parenleftex parenleftex LucidNewMatExtT\\ +\parenrightex parenrightex LucidNewMatExtT\\ +\angbracketleftBig angbracketleftBig LucidNewMatExtT\\ +\angbracketrightBig angbracketrightBig LucidNewMatExtT\\ +\unionsqtext unionsqtext LucidNewMatExtT\\ +\unionsqdisplay unionsqdisplay LucidNewMatExtT\\ +\contintegraltext contintegraltext LucidNewMatExtT\\ +\contintegraldisplay contintegraldisplay LucidNewMatExtT\\ +\circledottext circledottext LucidNewMatExtT\\ +\circledotdisplay circledotdisplay LucidNewMatExtT\\ +\circleplustext circleplustext LucidNewMatExtT\\ +\circleplusdisplay circleplusdisplay LucidNewMatExtT\\ +\circlemultiplytext circlemultiplytext LucidNewMatExtT\\ +\circlemultiplydisplay circlemultiplydisplay LucidNewMatExtT\\ +\summationtext summationtext LucidNewMatExtT\\ +\producttext producttext LucidNewMatExtT\\ +\integraltext integraltext LucidNewMatExtT\\ +\uniontext uniontext LucidNewMatExtT\\ +\intersectiontext intersectiontext LucidNewMatExtT\\ +\unionmultitext unionmultitext LucidNewMatExtT\\ +\logicalandtext logicalandtext LucidNewMatExtT\\ +\logicalortext logicalortext LucidNewMatExtT\\ +\summationdisplay summationdisplay LucidNewMatExtT\\ +\productdisplay productdisplay LucidNewMatExtT\\ +\integraldisplay integraldisplay LucidNewMatExtT\\ +\uniondisplay uniondisplay LucidNewMatExtT\\ +\intersectiondisplay intersectiondisplay LucidNewMatExtT\\ +\unionmultidisplay unionmultidisplay LucidNewMatExtT\\ +\logicalanddisplay logicalanddisplay LucidNewMatExtT\\ +\logicalordisplay logicalordisplay LucidNewMatExtT\\ +\coproducttext coproducttext LucidNewMatExtT\\ +\coproductdisplay coproductdisplay LucidNewMatExtT\\ +\hatwide hatwide LucidNewMatExtT\\ +\hatwider hatwider LucidNewMatExtT\\ +\hatwidest hatwidest LucidNewMatExtT\\ +\tildewide tildewide LucidNewMatExtT\\ +\tildewider tildewider LucidNewMatExtT\\ +\tildewidest tildewidest LucidNewMatExtT\\ +\bracketleftBig bracketleftBig LucidNewMatExtT\\ +\bracketrightBig bracketrightBig LucidNewMatExtT\\ +\floorleftBig floorleftBig LucidNewMatExtT\\ +\floorrightBig floorrightBig LucidNewMatExtT\\ +\ceilingleftBig ceilingleftBig LucidNewMatExtT\\ +\ceilingrightBig ceilingrightBig LucidNewMatExtT\\ +\braceleftBig braceleftBig LucidNewMatExtT\\ +\bracerightBig bracerightBig LucidNewMatExtT\\ +\radicalbig radicalbig LucidNewMatExtT\\ +\radicalBig radicalBig LucidNewMatExtT\\ +\radicalbigg radicalbigg LucidNewMatExtT\\ +\radicalBigg radicalBigg LucidNewMatExtT\\ +\radicalbt radicalbt LucidNewMatExtT\\ +\radicalvertex radicalvertex LucidNewMatExtT\\ +\radicaltp radicaltp LucidNewMatExtT\\ +\arrowvertexdbl arrowvertexdbl LucidNewMatExtT\\ +\arrowtp arrowtp LucidNewMatExtT\\ +\arrowbt arrowbt LucidNewMatExtT\\ +\bracehtipdownleft bracehtipdownleft LucidNewMatExtT\\ +\bracehtipdownright bracehtipdownright LucidNewMatExtT\\ +\bracehtipupleft bracehtipupleft LucidNewMatExtT\\ +\bracehtipupright bracehtipupright LucidNewMatExtT\\ +\arrowdbltp arrowdbltp LucidNewMatExtT\\ +\arrowdblbt arrowdblbt LucidNewMatExtT\\ + + diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathext-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathext-charmap.txt new file mode 100755 index 00000000..623b3040 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathext-charmap.txt @@ -0,0 +1,130 @@ +# character map for Lucida New Math Extended font + +<char:parenleftbig><font:LucidNewMatExtT><index:161> +<char:parenrightbig><font:LucidNewMatExtT><index:162> +<char:bracketleftbig><font:LucidNewMatExtT><index:163> +<char:bracketrightbig><font:LucidNewMatExtT><index:164> +<char:floorleftbig><font:LucidNewMatExtT><index:165> +<char:floorrightbig><font:LucidNewMatExtT><index:166> +<char:ceilingleftbig><font:LucidNewMatExtT><index:167> +<char:ceilingrightbig><font:LucidNewMatExtT><index:168> +<char:braceleftbig><font:LucidNewMatExtT><index:169> +<char:bracerightbig><font:LucidNewMatExtT><index:170> +<char:angbracketleftbig><font:LucidNewMatExtT><index:173> +<char:angbracketrightbig><font:LucidNewMatExtT><index:174> +<char:vextendsingle><font:LucidNewMatExtT><index:175> +<char:vextenddouble><font:LucidNewMatExtT><index:176> +<char:slashbig><font:LucidNewMatExtT><index:177> +<char:backslashbig><font:LucidNewMatExtT><index:178> +<char:parenleftBig><font:LucidNewMatExtT><index:179> +<char:parenrightBig><font:LucidNewMatExtT><index:180> +<char:parenleftbigg><font:LucidNewMatExtT><index:181> +<char:parenrightbigg><font:LucidNewMatExtT><index:182> +<char:bracketleftbigg><font:LucidNewMatExtT><index:183> +<char:bracketrightbigg><font:LucidNewMatExtT><index:184> +<char:floorleftbigg><font:LucidNewMatExtT><index:185> +<char:floorrightbigg><font:LucidNewMatExtT><index:186> +<char:ceilingleftbigg><font:LucidNewMatExtT><index:187> +<char:ceilingrightbigg><font:LucidNewMatExtT><index:188> +<char:braceleftbigg><font:LucidNewMatExtT><index:189> +<char:bracerightbigg><font:LucidNewMatExtT><index:190> +<char:angbracketleftbigg><font:LucidNewMatExtT><index:28> +<char:angbracketrightbigg><font:LucidNewMatExtT><index:29> +<char:slashbigg><font:LucidNewMatExtT><index:193> +<char:backslashbigg><font:LucidNewMatExtT><index:194> +<char:parenleftBigg><font:LucidNewMatExtT><index:195> +<char:parenrightBigg><font:LucidNewMatExtT><index:33> +<char:bracketleftBigg><font:LucidNewMatExtT><index:34> +<char:bracketrightBigg><font:LucidNewMatExtT><index:35> +<char:floorleftBigg><font:LucidNewMatExtT><index:36> +<char:floorrightBigg><font:LucidNewMatExtT><index:37> +<char:ceilingleftBigg><font:LucidNewMatExtT><index:38> +<char:ceilingrightBigg><font:LucidNewMatExtT><index:39> +<char:braceleftBigg><font:LucidNewMatExtT><index:40> +<char:bracerightBigg><font:LucidNewMatExtT><index:41> +<char:angbracketleftBigg><font:LucidNewMatExtT><index:42> +<char:angbracketrightBigg><font:LucidNewMatExtT><index:43> +<char:slashBigg><font:LucidNewMatExtT><index:44> +<char:backslashBigg><font:LucidNewMatExtT><index:45> +<char:slashBig><font:LucidNewMatExtT><index:46> +<char:backslashBig><font:LucidNewMatExtT><index:47> +<char:parenlefttp><font:LucidNewMatExtT><index:48> +<char:parenrighttp><font:LucidNewMatExtT><index:49> +<char:bracketlefttp><font:LucidNewMatExtT><index:50> +<char:bracketrighttp><font:LucidNewMatExtT><index:51> +<char:bracketleftbt><font:LucidNewMatExtT><index:52> +<char:bracketrightbt><font:LucidNewMatExtT><index:53> +<char:bracketleftex><font:LucidNewMatExtT><index:54> +<char:bracketrightex><font:LucidNewMatExtT><index:55> +<char:bracelefttp><font:LucidNewMatExtT><index:56> +<char:bracerighttp><font:LucidNewMatExtT><index:57> +<char:braceleftbt><font:LucidNewMatExtT><index:58> +<char:bracerightbt><font:LucidNewMatExtT><index:59> +<char:braceleftmid><font:LucidNewMatExtT><index:60> +<char:bracerightmid><font:LucidNewMatExtT><index:61> +<char:braceex><font:LucidNewMatExtT><index:62> +<char:arrowvertex><font:LucidNewMatExtT><index:63> +<char:parenleftbt><font:LucidNewMatExtT><index:64> +<char:parenrightbt><font:LucidNewMatExtT><index:65> +<char:parenleftex><font:LucidNewMatExtT><index:66> +<char:parenrightex><font:LucidNewMatExtT><index:67> +<char:angbracketleftBig><font:LucidNewMatExtT><index:68> +<char:angbracketrightBig><font:LucidNewMatExtT><index:69> +<char:unionsqtext><font:LucidNewMatExtT><index:70> +<char:unionsqdisplay><font:LucidNewMatExtT><index:71> +<char:contintegraltext><font:LucidNewMatExtT><index:72> +<char:contintegraldisplay><font:LucidNewMatExtT><index:73> +<char:circledottext><font:LucidNewMatExtT><index:74> +<char:circledotdisplay><font:LucidNewMatExtT><index:75> +<char:circleplustext><font:LucidNewMatExtT><index:76> +<char:circleplusdisplay><font:LucidNewMatExtT><index:77> +<char:circlemultiplytext><font:LucidNewMatExtT><index:78> +<char:circlemultiplydisplay><font:LucidNewMatExtT><index:79> +<char:summationtext><font:LucidNewMatExtT><index:80> +<char:producttext><font:LucidNewMatExtT><index:81> +<char:integraltext><font:LucidNewMatExtT><index:82> +<char:uniontext><font:LucidNewMatExtT><index:83> +<char:intersectiontext><font:LucidNewMatExtT><index:84> +<char:unionmultitext><font:LucidNewMatExtT><index:85> +<char:logicalandtext><font:LucidNewMatExtT><index:86> +<char:logicalortext><font:LucidNewMatExtT><index:87> +<char:summationdisplay><font:LucidNewMatExtT><index:88> +<char:productdisplay><font:LucidNewMatExtT><index:89> +<char:integraldisplay><font:LucidNewMatExtT><index:90> +<char:uniondisplay><font:LucidNewMatExtT><index:91> +<char:intersectiondisplay><font:LucidNewMatExtT><index:92> +<char:unionmultidisplay><font:LucidNewMatExtT><index:93> +<char:logicalanddisplay><font:LucidNewMatExtT><index:94> +<char:logicalordisplay><font:LucidNewMatExtT><index:95> +<char:coproducttext><font:LucidNewMatExtT><index:96> +<char:coproductdisplay><font:LucidNewMatExtT><index:97> +<char:hatwide><font:LucidNewMatExtT><index:98> +<char:hatwider><font:LucidNewMatExtT><index:99> +<char:hatwidest><font:LucidNewMatExtT><index:100> +<char:tildewide><font:LucidNewMatExtT><index:101> +<char:tildewider><font:LucidNewMatExtT><index:102> +<char:tildewidest><font:LucidNewMatExtT><index:103> +<char:bracketleftBig><font:LucidNewMatExtT><index:104> +<char:bracketrightBig><font:LucidNewMatExtT><index:105> +<char:floorleftBig><font:LucidNewMatExtT><index:106> +<char:floorrightBig><font:LucidNewMatExtT><index:107> +<char:ceilingleftBig><font:LucidNewMatExtT><index:108> +<char:ceilingrightBig><font:LucidNewMatExtT><index:109> +<char:braceleftBig><font:LucidNewMatExtT><index:110> +<char:bracerightBig><font:LucidNewMatExtT><index:111> +<char:radicalbig><font:LucidNewMatExtT><index:112> +<char:radicalBig><font:LucidNewMatExtT><index:113> +<char:radicalbigg><font:LucidNewMatExtT><index:114> +<char:radicalBigg><font:LucidNewMatExtT><index:115> +<char:radicalbt><font:LucidNewMatExtT><index:116> +<char:radicalvertex><font:LucidNewMatExtT><index:117> +<char:radicaltp><font:LucidNewMatExtT><index:118> +<char:arrowvertexdbl><font:LucidNewMatExtT><index:119> +<char:arrowtp><font:LucidNewMatExtT><index:120> +<char:arrowbt><font:LucidNewMatExtT><index:121> +<char:bracehtipdownleft><font:LucidNewMatExtT><index:122> +<char:bracehtipdownright><font:LucidNewMatExtT><index:123> +<char:bracehtipupleft><font:LucidNewMatExtT><index:124> +<char:bracehtipupright><font:LucidNewMatExtT><index:125> +<char:arrowdbltp><font:LucidNewMatExtT><index:126> +<char:arrowdblbt><font:LucidNewMatExtT><index:196> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathit-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathit-charmap.txt new file mode 100755 index 00000000..612d09c4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathit-charmap.txt @@ -0,0 +1,68 @@ +# character map for Lucida Math Italic font + +<char:Gamma><font:LucidNewMatItaT><index:161> +<char:Delta><font:LucidNewMatItaT><index:162> +<char:Theta><font:LucidNewMatItaT><index:163> +<char:Lambda><font:LucidNewMatItaT><index:164> +<char:Xi><font:LucidNewMatItaT><index:165> +<char:Pi><font:LucidNewMatItaT><index:166> +<char:Sigma><font:LucidNewMatItaT><index:167> +<char:Upsilon><font:LucidNewMatItaT><index:7> +<char:Phi><font:LucidNewMatItaT><index:169> +<char:Psi><font:LucidNewMatItaT><index:170> +<char:Omega><font:LucidNewMatItaT><index:173> +<char:alpha><font:LucidNewMatItaT><index:174> +<char:beta><font:LucidNewMatItaT><index:175> +<char:gamma><font:LucidNewMatItaT><index:176> +<char:delta><font:LucidNewMatItaT><index:177> +<char:epsilon1><font:LucidNewMatItaT><index:178> +<char:zeta><font:LucidNewMatItaT><index:179> +<char:eta><font:LucidNewMatItaT><index:180> +<char:theta><font:LucidNewMatItaT><index:181> +<char:iota><font:LucidNewMatItaT><index:182> +<char:kappa><font:LucidNewMatItaT><index:183> +<char:lambda><font:LucidNewMatItaT><index:184> +<char:mu><font:LucidNewMatItaT><index:185> +<char:nu><font:LucidNewMatItaT><index:186> +<char:xi><font:LucidNewMatItaT><index:187> +<char:pi><font:LucidNewMatItaT><index:188> +<char:rho><font:LucidNewMatItaT><index:189> +<char:sigma><font:LucidNewMatItaT><index:190> +<char:tau><font:LucidNewMatItaT><index:191> +<char:upsilon><font:LucidNewMatItaT><index:192> +<char:phi><font:LucidNewMatItaT><index:193> +<char:chi><font:LucidNewMatItaT><index:194> +<char:psi><font:LucidNewMatItaT><index:195> +<char:tie><font:LucidNewMatItaT><index:196> +<char:omega><font:LucidNewMatItaT><index:33> +<char:epsilon><font:LucidNewMatItaT><index:34> +<char:theta1><font:LucidNewMatItaT><index:35> +<char:pi1><font:LucidNewMatItaT><index:36> +<char:rho1><font:LucidNewMatItaT><index:37> +<char:sigma1><font:LucidNewMatItaT><index:38> +<char:phi1><font:LucidNewMatItaT><index:39> +<char:arrowlefttophalf><font:LucidNewMatItaT><index:40> +<char:arrowleftbothalf><font:LucidNewMatItaT><index:41> +<char:arrowrighttophalf><font:LucidNewMatItaT><index:42> +<char:arrowrightbothalf><font:LucidNewMatItaT><index:43> +<char:arrowhookleft><font:LucidNewMatItaT><index:44> +<char:arrowhookright><font:LucidNewMatItaT><index:45> +<char:triangleright><font:LucidNewMatItaT><index:46> +<char:triangleleft><font:LucidNewMatItaT><index:47> +<char:period><font:LucidNewMatItaT><index:58> +<char:comma><font:LucidNewMatItaT><index:59> +<char:less><font:LucidNewMatItaT><index:60> +<char:slash><font:LucidNewMatItaT><index:61> +<char:greater><font:LucidNewMatItaT><index:62> +<char:star><font:LucidNewMatItaT><index:63> +<char:partialdiff><font:LucidNewMatItaT><index:64> +<char:flat><font:LucidNewMatItaT><index:91> +<char:natural><font:LucidNewMatItaT><index:92> +<char:sharp><font:LucidNewMatItaT><index:93> +<char:slurbelow><font:LucidNewMatItaT><index:94> +<char:slurabove><font:LucidNewMatItaT><index:95> +<char:lscript><font:LucidNewMatItaT><index:96> +<char:dotlessi><font:LucidNewMatItaT><index:123> +<char:dotlessj><font:LucidNewMatItaT><index:124> +<char:weierstrass><font:LucidNewMatItaT><index:125> +<char:vector><font:LucidNewMatItaT><index:126> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathsym-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathsym-charmap.txt new file mode 100755 index 00000000..bdde61d2 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/lucmathsym-charmap.txt @@ -0,0 +1,130 @@ +# mathematical characters for Lucida New Math Symbol font + +<char:minus><font:LucidNewMatSymT><index:161> +<char:periodcentered><font:LucidNewMatSymT><index:162> +<char:multiply><font:LucidNewMatSymT><index:163> +<char:asteriskmath><font:LucidNewMatSymT><index:164> +<char:divide><font:LucidNewMatSymT><index:165> +<char:diamondmath><font:LucidNewMatSymT><index:166> +<char:plusminus><font:LucidNewMatSymT><index:167> +<char:minusplus><font:LucidNewMatSymT><index:168> +<char:circleplus><font:LucidNewMatSymT><index:169> +<char:circleminus><font:LucidNewMatSymT><index:170> +<char:circlemultiply><font:LucidNewMatSymT><index:173> +<char:circledivide><font:LucidNewMatSymT><index:174> +<char:circledot><font:LucidNewMatSymT><index:175> +<char:circlecopyrt><font:LucidNewMatSymT><index:176> +<char:openbullet><font:LucidNewMatSymT><index:177> +<char:bullet><font:LucidNewMatSymT><index:178> +<char:equivasymptotic><font:LucidNewMatSymT><index:179> +<char:equivalence><font:LucidNewMatSymT><index:180> +<char:reflexsubset><font:LucidNewMatSymT><index:181> +<char:reflexsuperset><font:LucidNewMatSymT><index:182> +<char:lessequal><font:LucidNewMatSymT><index:183> +<char:greaterequal><font:LucidNewMatSymT><index:184> +<char:precedesequal><font:LucidNewMatSymT><index:185> +<char:followsequal><font:LucidNewMatSymT><index:186> +<char:similar><font:LucidNewMatSymT><index:187> +<char:approxequal><font:LucidNewMatSymT><index:188> +<char:propersubset><font:LucidNewMatSymT><index:189> +<char:propersuperset><font:LucidNewMatSymT><index:190> +<char:lessmuch><font:LucidNewMatSymT><index:191> +<char:greatermuch><font:LucidNewMatSymT><index:192> +<char:precedes><font:LucidNewMatSymT><index:193> +<char:follows><font:LucidNewMatSymT><index:194> +<char:arrowleft><font:LucidNewMatSymT><index:195> +<char:spade><font:LucidNewMatSymT><index:196> +<char:arrowright><font:LucidNewMatSymT><index:33> +<char:arrowup><font:LucidNewMatSymT><index:34> +<char:arrowdown><font:LucidNewMatSymT><index:35> +<char:arrowboth><font:LucidNewMatSymT><index:36> +<char:arrownortheast><font:LucidNewMatSymT><index:37> +<char:arrowsoutheast><font:LucidNewMatSymT><index:38> +<char:similarequal><font:LucidNewMatSymT><index:39> +<char:arrowdblleft><font:LucidNewMatSymT><index:40> +<char:arrowdblright><font:LucidNewMatSymT><index:41> +<char:arrowdblup><font:LucidNewMatSymT><index:42> +<char:arrowdbldown><font:LucidNewMatSymT><index:43> +<char:arrowdblboth><font:LucidNewMatSymT><index:44> +<char:arrownorthwest><font:LucidNewMatSymT><index:45> +<char:arrowsouthwest><font:LucidNewMatSymT><index:46> +<char:proportional><font:LucidNewMatSymT><index:47> +<char:prime><font:LucidNewMatSymT><index:48> +<char:infinity><font:LucidNewMatSymT><index:49> +<char:element><font:LucidNewMatSymT><index:50> +<char:owner><font:LucidNewMatSymT><index:51> +<char:triangle><font:LucidNewMatSymT><index:52> +<char:triangleinv><font:LucidNewMatSymT><index:53> +<char:negationslash><font:LucidNewMatSymT><index:54> +<char:mapsto><font:LucidNewMatSymT><index:55> +<char:universal><font:LucidNewMatSymT><index:56> +<char:existential><font:LucidNewMatSymT><index:57> +<char:logicalnot><font:LucidNewMatSymT><index:58> +<char:emptyset><font:LucidNewMatSymT><index:59> +<char:Rfractur><font:LucidNewMatSymT><index:60> +<char:Ifractur><font:LucidNewMatSymT><index:61> +<char:latticetop><font:LucidNewMatSymT><index:62> +<char:perpendicular><font:LucidNewMatSymT><index:63> +<char:aleph><font:LucidNewMatSymT><index:64> +<char:scriptA><font:LucidNewMatSymT><index:65> +<char:scriptB><font:LucidNewMatSymT><index:66> +<char:scriptC><font:LucidNewMatSymT><index:67> +<char:scriptD><font:LucidNewMatSymT><index:68> +<char:scriptE><font:LucidNewMatSymT><index:69> +<char:scriptF><font:LucidNewMatSymT><index:70> +<char:scriptG><font:LucidNewMatSymT><index:71> +<char:scriptH><font:LucidNewMatSymT><index:72> +<char:scriptI><font:LucidNewMatSymT><index:73> +<char:scriptJ><font:LucidNewMatSymT><index:74> +<char:scriptK><font:LucidNewMatSymT><index:75> +<char:scriptL><font:LucidNewMatSymT><index:76> +<char:scriptM><font:LucidNewMatSymT><index:77> +<char:scriptN><font:LucidNewMatSymT><index:78> +<char:scriptO><font:LucidNewMatSymT><index:79> +<char:scriptP><font:LucidNewMatSymT><index:80> +<char:scriptQ><font:LucidNewMatSymT><index:81> +<char:scriptR><font:LucidNewMatSymT><index:82> +<char:scriptS><font:LucidNewMatSymT><index:83> +<char:scriptT><font:LucidNewMatSymT><index:84> +<char:scriptU><font:LucidNewMatSymT><index:85> +<char:scriptV><font:LucidNewMatSymT><index:86> +<char:scriptW><font:LucidNewMatSymT><index:87> +<char:scriptX><font:LucidNewMatSymT><index:88> +<char:scriptY><font:LucidNewMatSymT><index:89> +<char:scriptZ><font:LucidNewMatSymT><index:90> +<char:union><font:LucidNewMatSymT><index:91> +<char:intersection><font:LucidNewMatSymT><index:92> +<char:unionmulti><font:LucidNewMatSymT><index:93> +<char:logicaland><font:LucidNewMatSymT><index:94> +<char:logicalor><font:LucidNewMatSymT><index:95> +<char:turnstileleft><font:LucidNewMatSymT><index:96> +<char:turnstileright><font:LucidNewMatSymT><index:97> +<char:floorleft><font:LucidNewMatSymT><index:98> +<char:floorright><font:LucidNewMatSymT><index:99> +<char:ceilingleft><font:LucidNewMatSymT><index:100> +<char:ceilingright><font:LucidNewMatSymT><index:101> +<char:braceleft><font:LucidNewMatSymT><index:102> +<char:braceright><font:LucidNewMatSymT><index:103> +<char:angbracketleft><font:LucidNewMatSymT><index:104> +<char:angbracketright><font:LucidNewMatSymT><index:105> +<char:bar><font:LucidNewMatSymT><index:106> +<char:bardbl><font:LucidNewMatSymT><index:107> +<char:arrowbothv><font:LucidNewMatSymT><index:108> +<char:arrowdblbothv><font:LucidNewMatSymT><index:109> +<char:backslash><font:LucidNewMatSymT><index:110> +<char:wreathproduct><font:LucidNewMatSymT><index:111> +<char:radical><font:LucidNewMatSymT><index:112> +<char:coproduct><font:LucidNewMatSymT><index:113> +<char:nabla><font:LucidNewMatSymT><index:114> +<char:integral><font:LucidNewMatSymT><index:115> +<char:unionsq><font:LucidNewMatSymT><index:116> +<char:intersectionsq><font:LucidNewMatSymT><index:117> +<char:subsetsqequal><font:LucidNewMatSymT><index:118> +<char:supersetsqequal><font:LucidNewMatSymT><index:119> +<char:section><font:LucidNewMatSymT><index:120> +<char:dagger><font:LucidNewMatSymT><index:121> +<char:daggerdbl><font:LucidNewMatSymT><index:122> +<char:paragraph><font:LucidNewMatSymT><index:123> +<char:club><font:LucidNewMatSymT><index:124> +<char:diamond><font:LucidNewMatSymT><index:125> +<char:heart><font:LucidNewMatSymT><index:126> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/make-charmap-display.pl b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/make-charmap-display.pl new file mode 100755 index 00000000..a6540ae4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/make-charmap-display.pl @@ -0,0 +1,25 @@ +# perl script +# makes tagged text to show contents of char map + +# generate file names for input +$sourcefile = $ARGV[0]; +# generate file names for output +# trim off suffix starting with dot +($base = $sourcefile) =~ s/(.+)\..*/$1/; +$resultfile = "$base.processed.txt"; + +print "Converting $sourcefile to $resultfile ...\n"; +open (SOURCE, "<$sourcefile"); +open (RESULT, ">$resultfile"); + +LINE: while ($line = <SOURCE>) { + next LINE if $line =~ /^#/; # skip if line matches pattern + + if ($line =~ /<char:(\w+)><font:(\w*)>/) { + $charname = $1; + $fontname = $2; + print RESULT "\\$charname\t$charname\t$fontname\\\\\n"; + } + } +close (SOURCE); +close (RESULT); diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/make-map.pl b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/make-map.pl new file mode 100755 index 00000000..127de4f4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/make-map.pl @@ -0,0 +1,29 @@ +# perl script +# makes character map +# input line: number tab name +# output line: <char:name><font:><index:number> + +# generate file names for input +$sourcefile = $ARGV[0]; +# generate file names for output +# trim off suffix starting with dot +($base = $sourcefile) =~ s/(.+)\..*/$1/; +$resultfile = "$base.processed.txt"; + +print "Converting $sourcefile to $resultfile ...\n"; +open (SOURCE, "<$sourcefile"); +open (RESULT, ">$resultfile"); + +LINE: while ($line = <SOURCE>) { + next LINE if $line =~ /^#/; # skip if line matches pattern + next LINE if $line =~ /^\s*$/; # skip if source line is empty + + if ($line =~ /^(\w+)\s+(\w+)/) { + $number = $1; + $name = $2; + print RESULT "<char:$name><font:><index:$number>\n"; + } + } + +close (SOURCE); +close (RESULT); diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-extended-map.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-extended-map.txt new file mode 100755 index 00000000..5423c01d --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-extended-map.txt @@ -0,0 +1,133 @@ +LucidaMath-Extended + + +161 parenleftbig + +162 parenrightbig +163 bracketleftbig +164 bracketrightbig +165 floorleftbig +166 floorrightbig +167 ceilingleftbig +168 ceilingrightbig +169 braceleftbig +170 bracerightbig +173 angbracketleftbig +174 angbracketrightbig +175 vextendsingle +176 vextenddouble +177 slashbig +178 backslashbig +179 parenleftBig +180 parenrightBig +181 parenleftbigg +182 parenrightbigg +183 bracketleftbigg +184 bracketrightbigg +185 floorleftbigg +186 floorrightbigg +187 ceilingleftbigg +188 ceilingrightbigg +189 braceleftbigg +190 bracerightbigg +28 angbracketleftbigg +29 angbracketrightbigg +193 slashbigg +194 backslashbigg +195 parenleftBigg + +33 parenrightBigg +34 bracketleftBigg +35 bracketrightBigg +36 floorleftBigg +37 floorrightBigg +38 ceilingleftBigg +39 ceilingrightBigg +40 braceleftBigg +41 bracerightBigg +42 angbracketleftBigg +43 angbracketrightBigg +44 slashBigg +45 backslashBigg +46 slashBig +47 backslashBig +48 parenlefttp +49 parenrighttp +50 bracketlefttp +51 bracketrighttp +52 bracketleftbt +53 bracketrightbt +54 bracketleftex +55 bracketrightex +56 bracelefttp +57 bracerighttp +58 braceleftbt +59 bracerightbt +60 braceleftmid +61 bracerightmid +62 braceex +63 arrowvertex +64 parenleftbt +65 parenrightbt +66 parenleftex +67 parenrightex +68 angbracketleftBig +69 angbracketrightBig +70 unionsqtext +71 unionsqdisplay +72 contintegraltext +73 contintegraldisplay +74 circledottext +75 circledotdisplay +76 circleplustext +77 circleplusdisplay +78 circlemultiplytext +79 circlemultiplydisplay +80 summationtext +81 producttext +82 integraltext +83 uniontext +84 intersectiontext +85 unionmultitext +86 logicalandtext +87 logicalortext +88 summationdisplay +89 productdisplay +90 integraldisplay +91 uniondisplay +92 intersectiondisplay +93 unionmultidisplay +94 logicalanddisplay +95 logicalordisplay +96 coproducttext +97 coproductdisplay +98 hatwide +99 hatwider +100 hatwidest +101 tildewide +102 tildewider +103 tildewidest +104 bracketleftBig +105 bracketrightBig +106 floorleftBig +107 floorrightBig +108 ceilingleftBig +109 ceilingrightBig +110 braceleftBig +111 bracerightBig +112 radicalbig +113 radicalBig +114 radicalbigg +115 radicalBigg +116 radicalbt +117 radicalvertex +118 radicaltp +119 arrowvertexdbl +120 arrowtp +121 arrowbt +122 bracehtipdownleft +123 bracehtipdownright +124 bracehtipupleft +125 bracehtipupright +126 arrowdbltp +196 arrowdblbt diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-italic-map.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-italic-map.txt new file mode 100755 index 00000000..1e35531b --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-italic-map.txt @@ -0,0 +1,71 @@ +lucida math italic + +161 Gamma +162 Delta +163 Theta +164 Lambda +165 Xi +166 Pi +167 Sigma +7 Upsilon +169 Phi +170 Psi + +173 Omega +174 alpha +175 beta +176 gamma +177 delta +178 epsilon1 +179 zeta +180 eta +181 theta +182 iota +183 kappa +184 lambda +185 mu +186 nu +187 xi +188 pi +189 rho +190 sigma +191 tau +192 upsilon +193 phi +194 chi +195 psi +196 tie + +33 omega +34 epsilon +35 theta1 +36 pi1 +37 rho1 +38 sigma1 +39 phi1 +40 arrowlefttophalf +41 arrowleftbothalf +42 arrowrighttophalf +43 arrowrightbothalf +44 arrowhookleft +45 arrowhookright +46 triangleright +47 triangleleft +58 period +59 comma +60 less +61 slash +62 greater +63 star +64 partialdiff +91 flat +92 natural +93 sharp +94 slurbelow +95 slurabove +96 lscript +123 dotlessi +124 dotlessj +125 weierstrass +126 vector + diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-symbol-map.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-symbol-map.txt new file mode 100755 index 00000000..7868fa92 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/lucida-math-symbol-map.txt @@ -0,0 +1,130 @@ +lucida math symbol + +161 minus +162 periodcentered +163 multiply +164 asteriskmath +165 divide +166 diamondmath +167 plusminus +168 minusplus +169 circleplus +170 circleminus +173 circlemultiply +174 circledivide +175 circledot +176 circlecopyrt +177 openbullet +178 bullet +179 equivasymptotic +180 equivalence +181 reflexsubset +182 reflexsuperset +183 lessequal +184 greaterequal +185 precedesequal +186 followsequal +187 similar +188 approxequal +189 propersubset +190 propersuperset +191 lessmuch +192 greatermuch +193 precedes +194 follows +195 arrowleft +196 spade +33 arrowright +34 arrowup +35 arrowdown +36 arrowboth +37 arrownortheast +38 arrowsoutheast +39 similarequal +40 arrowdblleft +41 arrowdblright +42 arrowdblup +43 arrowdbldown +44 arrowdblboth +45 arrownorthwest +46 arrowsouthwest +47 proportional +48 prime +49 infinity +50 element +51 owner +52 triangle +53 triangleinv +54 negationslash +55 mapsto +56 universal +57 existential +58 logicalnot +59 emptyset +60 Rfractur +61 Ifractur +62 latticetop +63 perpendicular +64 aleph +65 scriptA +66 scriptB +67 scriptC +68 scriptD +69 scriptE +70 scriptF +71 scriptG +72 scriptH +73 scriptI +74 scriptJ +75 scriptK +76 scriptL +77 scriptM +78 scriptN +79 scriptO +80 scriptP +81 scriptQ +82 scriptR +83 scriptS +84 scriptT +85 scriptU +86 scriptV +87 scriptW +88 scriptX +89 scriptY +90 scriptZ +91 union +92 intersection +93 unionmulti +94 logicaland +95 logicalor +96 turnstileleft +97 turnstileright +98 floorleft +99 floorright +100 ceilingleft +101 ceilingright +102 braceleft +103 braceright +104 angbracketleft +105 angbracketright +106 bar +107 bardbl +108 arrowbothv +109 arrowdblbothv +110 backslash +111 wreathproduct +112 radical +113 coproduct +114 nabla +115 integral +116 unionsq +117 intersectionsq +118 subsetsqequal +119 supersetsqequal +120 section +121 dagger +122 daggerdbl +123 paragraph +124 club +125 diamond +126 heart diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/standard-map.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/standard-map.txt new file mode 100755 index 00000000..e56033b5 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/standard-map.txt @@ -0,0 +1,221 @@ +# character map for standard font + +32 space +33 exclam +34 quotedbl +35 numbersign +36 dollar +37 percent +38 ampersand +39 quotesingle +40 parenleft +41 parenright +42 asterisk +43 plus +44 comma +45 hyphen +46 period +47 slash +48 zero +49 one +50 two +51 three +52 four +53 five +54 six +55 seven +56 eight +57 nine +58 colon +59 semicolon +60 less +61 equal +62 greater +63 question +64 at +65 A +66 B +67 C +68 D +69 E +70 F +71 G +72 H +73 I +74 J +75 K +76 L +77 M +78 N +79 O +80 P +81 Q +82 R +83 S +84 T +85 U +86 V +87 W +88 X +89 Y +90 Z +91 bracketleft +92 backslash +93 bracketright +94 asciicircum +95 underscore +96 grave +97 a +98 b +99 c +100 d +101 e +102 f +103 g +104 h +105 i +106 j +107 k +108 l +109 m +110 n +111 o +112 p +113 q +114 r +115 s +116 t +117 u +118 v +119 w +120 x +121 y +122 z +123 braceleft +124 bar +125 braceright +126 asciitilde + +128 euro +130 quotesinglbase +131 florin +132 quotedblbase +133 ellipsis +134 dagger +135 daggerdbl +136 circumflex +137 perthousand +138 Scaron +139 guilsinglleft +140 OE +142 Zcaron +145 quoteleft +146 quoteright +147 quotedblleft +148 quotedblright +149 bullet +150 endash +151 emdash +152 tilde +153 trademark +154 scaron +155 guilsinglright +156 oe +158 zcaron +159 Ydieresis +160 nbspace +161 exclamdown +162 cent +163 sterling +164 currency +165 yen +166 brokenbar +167 section +168 dieresis +169 copyright +170 ordfeminine +171 guillemotleft +172 logicalnot +173 sfthyphen +174 registered +175 macron +176 degree +177 plusminus +178 twosuperior +179 threesuperior +180 acute +181 mu +182 paragraph +183 periodcentered +184 cedilla +185 onesuperior +186 ordmasculine +187 guillemotright +188 onequarter +189 onehalf +190 threequarters +191 questiondown +192 Agrave +193 Aacute +194 Acircumflex +195 Atilde +196 Adieresis +197 Aring +198 AE +199 Ccedilla +200 Egrave +201 Eacute +202 Ecircumflex +203 Edieresis +204 Igrave +205 Iacute +206 Icircumflex +207 Idieresis +208 Eth +209 Ntilde +210 Ograve +211 Oacute +212 Ocircumflex +213 Otilde +214 Odieresis +215 multiply +216 Oslash +217 Ugrave +218 Uacute +219 Ucircumflex +220 Udieresis +221 Yacute +222 Thorn +223 germandbls +224 agrave +225 aacute +226 acircumflex +227 atilde +228 adieresis +229 aring +230 ae +231 ccedilla +232 egrave +233 eacute +234 ecircumflex +235 edieresis +236 igrave +237 iacute +238 icircumflex +239 idieresis +240 eth +241 ntilde +242 ograve +243 oacute +244 ocircumflex +245 otilde +246 odieresis +247 divide +248 oslash +249 ugrave +250 uacute +251 ucircumflex +252 udieresis +253 yacute +254 thorn +255 ydieresis diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/symbol-map.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/symbol-map.txt new file mode 100755 index 00000000..808fec7a --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/sources/symbol-map.txt @@ -0,0 +1,193 @@ +for Symbol font + +32 space +33 exclam +34 universal +35 numbersign +36 existential +37 percent +38 ampersand +39 suchthat +40 parenleft +41 parenright +42 asteriskmath +43 plus +44 comma +45 minus +46 period +47 slash +48 zero +49 one +50 two +51 three +52 four +53 five +54 six +55 seven +56 eight +57 nine +58 colon +59 semicolon +60 less +61 equal +62 greater +63 question +64 congruent +65 Alpha +66 Beta +67 Chi +68 Delta +69 Epsilon +70 Phi +71 Gamma +72 Eta +73 Iota +74 theta1 +75 Kappa +76 Lambda +77 Mu +78 Nu +79 Omicron +80 Pi +81 Theta +82 Rho +83 Sigma +84 Tau +85 Upsilon +86 sigma1 +87 Omega +88 Xi +89 Psi +90 Zeta +91 bracketleft +92 therefore +93 bracketright +94 perpendicular +95 underscore +96 radicalex +97 alpha +98 beta +99 chi +100 delta +101 epsilon +102 phi +103 gamma +104 eta +105 iota +106 phi1 +107 kappa +108 lambda +109 mu +110 nu +111 omicron +112 pi +113 theta +114 rho +115 sigma +116 tau +117 upsilon +118 omega1 +119 omega +120 xi +121 psi +122 zeta +123 braceleft +124 bar +125 braceright +126 similar + +160 Euro +161 Upsilon1 +162 minute +163 lessequal +164 fraction +165 infinity +166 florin +167 club +168 diamond +169 heart +170 spade +171 arrowboth +172 arrowleft +173 arrowup +174 arrowright +175 arrowdown +176 degree +177 plusminus +178 second +179 greaterequal +180 multiply +181 proportional +182 partialdiff +183 bullet +184 divide +185 notequal +186 equivalence +187 approxequal +188 ellipsis +189 arrowvertex +190 arrowhorizex +191 carriagereturn +192 aleph +193 Ifraktur +194 Rfraktur +195 weierstrass +196 circlemultiply +197 circleplus +198 emptyset +199 intersection +200 union +201 propersuperset +202 reflexsuperset +203 notsubset +204 propersubset +205 reflexsubset +206 element +207 notelement +208 angle +209 gradient +210 registerserif +211 copyrightserif +212 trademarkserif +213 product +214 radical +215 dotmath +216 logicalnot +217 logicaland +218 logicalor +219 arrowdblboth +220 arrowdblleft +221 arrowdblup +222 arrowdblright +223 arrowdbldown +224 lozenge +225 angleleft +226 registersans +227 copyrightsans +228 trademarksans +229 summation +230 parenlefttp +231 parenleftex +232 parenleftbt +233 bracketlefttp +234 bracketleftex +235 bracketleftbt +236 bracelefttp +237 braceleftmid +238 braceleftbt +239 braceex +% 240 apple +241 angleright +242 integral +243 integraltp +244 integralex +245 integralbt +246 parenrighttp +247 parenrightex +248 parenrightbt +249 bracketrighttp +250 bracketrightex +251 bracketrightbt +252 bracerighttp +253 bracerightmid +254 bracerightbt diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/standard-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/standard-charmap.txt new file mode 100755 index 00000000..9e6a44ff --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/standard-charmap.txt @@ -0,0 +1,220 @@ +# character map for standard font + +<char:space><font:><index:32> +<char:exclam><font:><index:33> +<char:quotedbl><font:><index:34> +<char:numbersign><font:><index:35> +<char:dollar><font:><index:36> +<char:percent><font:><index:37> +<char:ampersand><font:><index:38> +<char:quotesingle><font:><index:39> +<char:parenleft><font:><index:40> +<char:parenright><font:><index:41> +<char:asterisk><font:><index:42> +<char:plus><font:><index:43> +<char:comma><font:><index:44> +<char:hyphen><font:><index:45> +<char:period><font:><index:46> +<char:slash><font:><index:47> +<char:zero><font:><index:48> +<char:one><font:><index:49> +<char:two><font:><index:50> +<char:three><font:><index:51> +<char:four><font:><index:52> +<char:five><font:><index:53> +<char:six><font:><index:54> +<char:seven><font:><index:55> +<char:eight><font:><index:56> +<char:nine><font:><index:57> +<char:colon><font:><index:58> +<char:semicolon><font:><index:59> +<char:less><font:><index:60> +<char:equal><font:><index:61> +<char:greater><font:><index:62> +<char:question><font:><index:63> +<char:at><font:><index:64> +<char:A><font:><index:65> +<char:B><font:><index:66> +<char:C><font:><index:67> +<char:D><font:><index:68> +<char:E><font:><index:69> +<char:F><font:><index:70> +<char:G><font:><index:71> +<char:H><font:><index:72> +<char:I><font:><index:73> +<char:J><font:><index:74> +<char:K><font:><index:75> +<char:L><font:><index:76> +<char:M><font:><index:77> +<char:N><font:><index:78> +<char:O><font:><index:79> +<char:P><font:><index:80> +<char:Q><font:><index:81> +<char:R><font:><index:82> +<char:S><font:><index:83> +<char:T><font:><index:84> +<char:U><font:><index:85> +<char:V><font:><index:86> +<char:W><font:><index:87> +<char:X><font:><index:88> +<char:Y><font:><index:89> +<char:Z><font:><index:90> +<char:bracketleft><font:><index:91> +<char:backslash><font:><index:92> +<char:bracketright><font:><index:93> +<char:asciicircum><font:><index:94> +<char:underscore><font:><index:95> +<char:grave><font:><index:96> +<char:a><font:><index:97> +<char:b><font:><index:98> +<char:c><font:><index:99> +<char:d><font:><index:100> +<char:e><font:><index:101> +<char:f><font:><index:102> +<char:g><font:><index:103> +<char:h><font:><index:104> +<char:i><font:><index:105> +<char:j><font:><index:106> +<char:k><font:><index:107> +<char:l><font:><index:108> +<char:m><font:><index:109> +<char:n><font:><index:110> +<char:o><font:><index:111> +<char:p><font:><index:112> +<char:q><font:><index:113> +<char:r><font:><index:114> +<char:s><font:><index:115> +<char:t><font:><index:116> +<char:u><font:><index:117> +<char:v><font:><index:118> +<char:w><font:><index:119> +<char:x><font:><index:120> +<char:y><font:><index:121> +<char:z><font:><index:122> +<char:braceleft><font:><index:123> +<char:bar><font:><index:124> +<char:braceright><font:><index:125> +<char:asciitilde><font:><index:126> +<char:euro><font:><index:128> +<char:quotesinglbase><font:><index:130> +<char:florin><font:><index:131> +<char:quotedblbase><font:><index:132> +<char:ellipsis><font:><index:133> +<char:dagger><font:><index:134> +<char:daggerdbl><font:><index:135> +<char:circumflex><font:><index:136> +<char:perthousand><font:><index:137> +<char:Scaron><font:><index:138> +<char:guilsinglleft><font:><index:139> +<char:OE><font:><index:140> +<char:Zcaron><font:><index:142> +<char:quoteleft><font:><index:145> +<char:quoteright><font:><index:146> +<char:quotedblleft><font:><index:147> +<char:quotedblright><font:><index:148> +<char:bullet><font:><index:149> +<char:endash><font:><index:150> +<char:emdash><font:><index:151> +<char:tilde><font:><index:152> +<char:trademark><font:><index:153> +<char:scaron><font:><index:154> +<char:guilsinglright><font:><index:155> +<char:oe><font:><index:156> +<char:zcaron><font:><index:158> +<char:Ydieresis><font:><index:159> +<char:nbspace><font:><index:160> +<char:exclamdown><font:><index:161> +<char:cent><font:><index:162> +<char:sterling><font:><index:163> +<char:currency><font:><index:164> +<char:yen><font:><index:165> +<char:brokenbar><font:><index:166> +<char:section><font:><index:167> +<char:dieresis><font:><index:168> +<char:copyright><font:><index:169> +<char:ordfeminine><font:><index:170> +<char:guillemotleft><font:><index:171> +<char:logicalnot><font:><index:172> +<char:sfthyphen><font:><index:173> +<char:registered><font:><index:174> +<char:macron><font:><index:175> +<char:degree><font:><index:176> +<char:plusminus><font:><index:177> +<char:twosuperior><font:><index:178> +<char:threesuperior><font:><index:179> +<char:acute><font:><index:180> +<char:mu><font:><index:181> +<char:paragraph><font:><index:182> +<char:periodcentered><font:><index:183> +<char:cedilla><font:><index:184> +<char:onesuperior><font:><index:185> +<char:ordmasculine><font:><index:186> +<char:guillemotright><font:><index:187> +<char:onequarter><font:><index:188> +<char:onehalf><font:><index:189> +<char:threequarters><font:><index:190> +<char:questiondown><font:><index:191> +<char:Agrave><font:><index:192> +<char:Aacute><font:><index:193> +<char:Acircumflex><font:><index:194> +<char:Atilde><font:><index:195> +<char:Adieresis><font:><index:196> +<char:Aring><font:><index:197> +<char:AE><font:><index:198> +<char:Ccedilla><font:><index:199> +<char:Egrave><font:><index:200> +<char:Eacute><font:><index:201> +<char:Ecircumflex><font:><index:202> +<char:Edieresis><font:><index:203> +<char:Igrave><font:><index:204> +<char:Iacute><font:><index:205> +<char:Icircumflex><font:><index:206> +<char:Idieresis><font:><index:207> +<char:Eth><font:><index:208> +<char:Ntilde><font:><index:209> +<char:Ograve><font:><index:210> +<char:Oacute><font:><index:211> +<char:Ocircumflex><font:><index:212> +<char:Otilde><font:><index:213> +<char:Odieresis><font:><index:214> +<char:multiply><font:><index:215> +<char:Oslash><font:><index:216> +<char:Ugrave><font:><index:217> +<char:Uacute><font:><index:218> +<char:Ucircumflex><font:><index:219> +<char:Udieresis><font:><index:220> +<char:Yacute><font:><index:221> +<char:Thorn><font:><index:222> +<char:germandbls><font:><index:223> +<char:agrave><font:><index:224> +<char:aacute><font:><index:225> +<char:acircumflex><font:><index:226> +<char:atilde><font:><index:227> +<char:adieresis><font:><index:228> +<char:aring><font:><index:229> +<char:ae><font:><index:230> +<char:ccedilla><font:><index:231> +<char:egrave><font:><index:232> +<char:eacute><font:><index:233> +<char:ecircumflex><font:><index:234> +<char:edieresis><font:><index:235> +<char:igrave><font:><index:236> +<char:iacute><font:><index:237> +<char:icircumflex><font:><index:238> +<char:idieresis><font:><index:239> +<char:eth><font:><index:240> +<char:ntilde><font:><index:241> +<char:ograve><font:><index:242> +<char:oacute><font:><index:243> +<char:ocircumflex><font:><index:244> +<char:otilde><font:><index:245> +<char:odieresis><font:><index:246> +<char:divide><font:><index:247> +<char:oslash><font:><index:248> +<char:ugrave><font:><index:249> +<char:uacute><font:><index:250> +<char:ucircumflex><font:><index:251> +<char:udieresis><font:><index:252> +<char:yacute><font:><index:253> +<char:thorn><font:><index:254> +<char:ydieresis><font:><index:255> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/maps/symbol-charmap.txt b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/symbol-charmap.txt new file mode 100755 index 00000000..4481c174 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/maps/symbol-charmap.txt @@ -0,0 +1,195 @@ +# character map for Symbol font + +<char:Symbol><font:Symbol><index:for> +<char:space><font:Symbol><index:32> +<char:exclam><font:Symbol><index:33> +<char:universal><font:Symbol><index:34> +<char:numbersign><font:Symbol><index:35> +<char:existential><font:Symbol><index:36> +<char:percent><font:Symbol><index:37> +<char:ampersand><font:Symbol><index:38> +<char:suchthat><font:Symbol><index:39> +<char:parenleft><font:Symbol><index:40> +<char:parenright><font:Symbol><index:41> +<char:asteriskmath><font:Symbol><index:42> +<char:plus><font:Symbol><index:43> +<char:comma><font:Symbol><index:44> +<char:minus><font:Symbol><index:45> +<char:period><font:Symbol><index:46> +<char:slash><font:Symbol><index:47> +<char:zero><font:Symbol><index:48> +<char:one><font:Symbol><index:49> +<char:two><font:Symbol><index:50> +<char:three><font:Symbol><index:51> +<char:four><font:Symbol><index:52> +<char:five><font:Symbol><index:53> +<char:six><font:Symbol><index:54> +<char:seven><font:Symbol><index:55> +<char:eight><font:Symbol><index:56> +<char:nine><font:Symbol><index:57> +<char:colon><font:Symbol><index:58> +<char:semicolon><font:Symbol><index:59> +<char:less><font:Symbol><index:60> +<char:equal><font:Symbol><index:61> +<char:greater><font:Symbol><index:62> +<char:question><font:Symbol><index:63> +<char:congruent><font:Symbol><index:64> +<char:Alpha><font:Symbol><index:65> +<char:Beta><font:Symbol><index:66> +<char:Chi><font:Symbol><index:67> +<char:Delta><font:Symbol><index:68> +<char:Epsilon><font:Symbol><index:69> +<char:Phi><font:Symbol><index:70> +<char:Gamma><font:Symbol><index:71> +<char:Eta><font:Symbol><index:72> +<char:Iota><font:Symbol><index:73> +<char:theta1><font:Symbol><index:74> +<char:Kappa><font:Symbol><index:75> +<char:Lambda><font:Symbol><index:76> +<char:Mu><font:Symbol><index:77> +<char:Nu><font:Symbol><index:78> +<char:Omicron><font:Symbol><index:79> +<char:Pi><font:Symbol><index:80> +<char:Theta><font:Symbol><index:81> +<char:Rho><font:Symbol><index:82> +<char:Sigma><font:Symbol><index:83> +<char:Tau><font:Symbol><index:84> +<char:Upsilon><font:Symbol><index:85> +<char:sigma1><font:Symbol><index:86> +<char:Omega><font:Symbol><index:87> +<char:Xi><font:Symbol><index:88> +<char:Psi><font:Symbol><index:89> +<char:Zeta><font:Symbol><index:90> +<char:bracketleft><font:Symbol><index:91> +<char:therefore><font:Symbol><index:92> +<char:bracketright><font:Symbol><index:93> +<char:perpendicular><font:Symbol><index:94> +<char:underscore><font:Symbol><index:95> +<char:radicalex><font:Symbol><index:96> +<char:alpha><font:Symbol><index:97> +<char:beta><font:Symbol><index:98> +<char:chi><font:Symbol><index:99> +<char:delta><font:Symbol><index:100> +<char:epsilon><font:Symbol><index:101> +<char:phi><font:Symbol><index:102> +<char:gamma><font:Symbol><index:103> +<char:eta><font:Symbol><index:104> +<char:iota><font:Symbol><index:105> +<char:phi1><font:Symbol><index:106> +<char:kappa><font:Symbol><index:107> +<char:lambda><font:Symbol><index:108> +<char:mu><font:Symbol><index:109> +<char:nu><font:Symbol><index:110> +<char:omicron><font:Symbol><index:111> +<char:pi><font:Symbol><index:112> +<char:theta><font:Symbol><index:113> +<char:rho><font:Symbol><index:114> +<char:sigma><font:Symbol><index:115> +<char:tau><font:Symbol><index:116> +<char:upsilon><font:Symbol><index:117> +<char:omega1><font:Symbol><index:118> +<char:omega><font:Symbol><index:119> +<char:xi><font:Symbol><index:120> +<char:psi><font:Symbol><index:121> +<char:zeta><font:Symbol><index:122> +<char:braceleft><font:Symbol><index:123> +<char:bar><font:Symbol><index:124> +<char:braceright><font:Symbol><index:125> +<char:similar><font:Symbol><index:126> +<char:Euro><font:Symbol><index:160> +<char:Upsilon1><font:Symbol><index:161> +<char:minute><font:Symbol><index:162> +<char:lessequal><font:Symbol><index:163> +<char:fraction><font:Symbol><index:164> +<char:infinity><font:Symbol><index:165> +<char:florin><font:Symbol><index:166> +<char:club><font:Symbol><index:167> +<char:diamond><font:Symbol><index:168> +<char:heart><font:Symbol><index:169> +<char:spade><font:Symbol><index:170> +<char:arrowboth><font:Symbol><index:171> +<char:arrowleft><font:Symbol><index:172> +<char:arrowup><font:Symbol><index:173> +<char:arrowright><font:Symbol><index:174> +<char:arrowdown><font:Symbol><index:175> +<char:degree><font:Symbol><index:176> +<char:plusminus><font:Symbol><index:177> +<char:second><font:Symbol><index:178> +<char:greaterequal><font:Symbol><index:179> +<char:multiply><font:Symbol><index:180> +<char:proportional><font:Symbol><index:181> +<char:partialdiff><font:Symbol><index:182> +<char:bullet><font:Symbol><index:183> +<char:divide><font:Symbol><index:184> +<char:notequal><font:Symbol><index:185> +<char:equivalence><font:Symbol><index:186> +<char:approxequal><font:Symbol><index:187> + +# seems to be a quarter fraction +# <char:ellipsis><font:Symbol><index:188> + +<char:arrowvertex><font:Symbol><index:189> +<char:arrowhorizex><font:Symbol><index:190> +<char:carriagereturn><font:Symbol><index:191> +<char:aleph><font:Symbol><index:192> +<char:Ifraktur><font:Symbol><index:193> +<char:Rfraktur><font:Symbol><index:194> +<char:weierstrass><font:Symbol><index:195> +<char:circlemultiply><font:Symbol><index:196> +<char:circleplus><font:Symbol><index:197> +<char:emptyset><font:Symbol><index:198> +<char:intersection><font:Symbol><index:199> +<char:union><font:Symbol><index:200> +<char:propersuperset><font:Symbol><index:201> +<char:reflexsuperset><font:Symbol><index:202> +<char:notsubset><font:Symbol><index:203> +<char:propersubset><font:Symbol><index:204> +<char:reflexsubset><font:Symbol><index:205> +<char:element><font:Symbol><index:206> +<char:notelement><font:Symbol><index:207> +<char:angle><font:Symbol><index:208> +<char:gradient><font:Symbol><index:209> +<char:registerserif><font:Symbol><index:210> +<char:copyrightserif><font:Symbol><index:211> +<char:trademarkserif><font:Symbol><index:212> +<char:product><font:Symbol><index:213> +<char:radical><font:Symbol><index:214> +<char:dotmath><font:Symbol><index:215> +<char:logicalnot><font:Symbol><index:216> +<char:logicaland><font:Symbol><index:217> +<char:logicalor><font:Symbol><index:218> +<char:arrowdblboth><font:Symbol><index:219> +<char:arrowdblleft><font:Symbol><index:220> +<char:arrowdblup><font:Symbol><index:221> +<char:arrowdblright><font:Symbol><index:222> +<char:arrowdbldown><font:Symbol><index:223> +<char:lozenge><font:Symbol><index:224> +<char:angleleft><font:Symbol><index:225> +<char:registersans><font:Symbol><index:226> +<char:copyrightsans><font:Symbol><index:227> +<char:trademarksans><font:Symbol><index:228> +<char:summation><font:Symbol><index:229> +<char:parenlefttp><font:Symbol><index:230> +<char:parenleftex><font:Symbol><index:231> +<char:parenleftbt><font:Symbol><index:232> +<char:bracketlefttp><font:Symbol><index:233> +<char:bracketleftex><font:Symbol><index:234> +<char:bracketleftbt><font:Symbol><index:235> +<char:bracelefttp><font:Symbol><index:236> +<char:braceleftmid><font:Symbol><index:237> +<char:braceleftbt><font:Symbol><index:238> +<char:braceex><font:Symbol><index:239> +<char:angleright><font:Symbol><index:241> +<char:integral><font:Symbol><index:242> +<char:integraltp><font:Symbol><index:243> +<char:integralex><font:Symbol><index:244> +<char:integralbt><font:Symbol><index:245> +<char:parenrighttp><font:Symbol><index:246> +<char:parenrightex><font:Symbol><index:247> +<char:parenrightbt><font:Symbol><index:248> +<char:bracketrighttp><font:Symbol><index:249> +<char:bracketrightex><font:Symbol><index:250> +<char:bracketrightbt><font:Symbol><index:251> +<char:bracerighttp><font:Symbol><index:252> +<char:bracerightmid><font:Symbol><index:253> +<char:bracerightbt><font:Symbol><index:254> diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Action.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Action.java new file mode 100755 index 00000000..d8eac4ba --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Action.java @@ -0,0 +1,28 @@ +/** + * Action class + * Represents an action performed in response to a token + * Instance of command pattern + * + * @author Daniel Jackson + * @version 0, 07/06/01 + */ + +package tagger; +import java.util.*; + +public abstract class Action { + /** + * requires: iter is an iterator that just yielded this + * ensures: performs action for token, and may remove itself from iter + * default behaviour is equivalent to perform + */ + public void perform (Token token, Iterator iter) { + perform (token); + } + + public void perform (Token token) { + ; + } + } + + diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Assert.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Assert.java new file mode 100755 index 00000000..6cad6ba5 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Assert.java @@ -0,0 +1,25 @@ +/** + * Assert class + * Provides assertion checking + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +package tagger; +import java.io.*; + +public class Assert { + static PrintStream error_stream = Tagger.error_stream; + + public static void assert (boolean cond) { + if (!cond) { + error_stream.println ("Assertion failure"); + // print stack trace + } + } + + public static void unreachable () { + error_stream.println ("Assertion failure"); + } + } diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Counter.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Counter.java new file mode 100755 index 00000000..888e0791 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Counter.java @@ -0,0 +1,125 @@ +/** + * Counter class + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +package tagger; +import java.io.*; + +public class Counter { + private int count; + private int initial; + private int type; + + final static int NO_SUCH_TYPE = -1; + final static int ARABIC = 0; + final static int ROMAN_UPPER = 1; + final static int ROMAN_LOWER = 2; + final static int ALPHA_UPPER = 3; + final static int ALPHA_LOWER = 4; + + // eventually recognize counter_type and set initial count and output format + // takes style and stream for error reporting + /* + * requires: count_prop and style are non null + * + */ + public Counter (String count_prop, String style, PrintStream error_stream) { + Assert.assert (count_prop != null); + Assert.assert (style != null); + type = get_type (count_prop); + switch (type) { + case NO_SUCH_TYPE: + type = ARABIC; + initial = 0; + break; + case ALPHA_LOWER: + case ALPHA_UPPER: + if (count_prop.length () != 1) { + error_stream.println ("Bad counter type for style " + style + ": " + count_prop); + initial = 0; + break; + } + initial = count_prop.toLowerCase().charAt (0) - 'a'; + break; + case ARABIC: + try { + initial = Integer.parseInt (count_prop) - 1; + } catch (NumberFormatException e) { + error_stream.println ("Bad counter type for style " + style + ": " + count_prop + "; " + e.getMessage()); + } + break; + case ROMAN_LOWER: + case ROMAN_UPPER: + // not yet implemented + initial = 0; + type = ARABIC; + break; + default: + Assert.unreachable (); + } + count = initial; + } + + /** + * ensures: increments counter + * returns true iff successful, false otherwise (eg, because alphabetic counter went past 'z') + */ + public boolean increment () { + if ((type == ALPHA_UPPER || type == ALPHA_LOWER) && count == 26) + return false; + count++; + return true; + } + + public void reset () { + count = initial; + } + + public String unparse () { + switch (type) { + case ALPHA_LOWER: { + char c = (char) ('a' + count - 1); + return new Character (c).toString(); + } + case ALPHA_UPPER: { + char c = (char) ('A' + count - 1); + return new Character (c).toString(); + } + case ARABIC: + return String.valueOf (count); + case ROMAN_LOWER: + case ROMAN_UPPER: + // not yet implemented + Assert.unreachable (); + break; + default: + Assert.unreachable (); + } + return "DUMMY"; + } + + /** + * + * ensures: returns counter type of counter given in the string counter_type + * as an int, being equal to one of the values of the constants declared in the Counter class. + * returns Counter.NO_SUCH_TYPE if the string is not well formed. + */ + public static int get_type (String counter_type) { + if (counter_type.length() == 0) return NO_SUCH_TYPE; + char c = counter_type.charAt (0); + if (c >= 'a' && c <= 'z') + return ALPHA_LOWER; + if (c >= 'A' && c <= 'Z') + return ALPHA_UPPER; + if (c == 'i' || c == 'v' || c == 'x' ||c == 'l' || c == 'c' || c == 'm') + return ROMAN_LOWER; + if (c == 'I' || c == 'V' || c == 'X' ||c == 'L' || c == 'C' || c == 'M') + return ROMAN_LOWER; + if (c >= '0' && c <= '9') + return ARABIC; + return NO_SUCH_TYPE; + } +} \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Engine.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Engine.java new file mode 100755 index 00000000..72a8e47c --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Engine.java @@ -0,0 +1,78 @@ +/** + * Engine class + * Maps token types to actions + * + * @author Daniel Jackson + * @version 0, 07/06/01 + */ + +package tagger; +import java.util.*; + +public class Engine { + /** + * There are some very tricky concurrent modification issues with this class. + * Can't execute a register or unregister method during an execution of consume_token + * if the register or unregister affects the same list of actions associated with the token. + * This means that during a consume_token for some type, can't register or unregister for + * that type, or for the all types. + * Note that a version of the perform method of action takes an iterator argument to + * allow an action to remove itself. + */ + + // array of Action lists indexed on token type + private LinkedList [] actions; + + // actions performed for all token types + private LinkedList default_actions; + + public Engine () { + actions = new LinkedList [Token.MAXTOKEN + 1]; + for (int i = 0; i < actions.length; i++) + actions[i] = new LinkedList (); + default_actions = new LinkedList (); + } + + public void register_by_type (Action action, int type) { + register_by_type_front (action, type); + } + + public void register_for_all (Action action) { + default_actions.addFirst (action); + } + + public void unregister_for_all (Action action) { + default_actions.remove (action); + } + + public void register_by_type_front (Action action, int type) { + Assert.assert (type >= 0); + Assert.assert (type <= Token.MAXTOKEN); + actions[type].addFirst (action); + } + + public void register_by_type_back (Action action, int type) { + Assert.assert (type >= 0); + Assert.assert (type <= Token.MAXTOKEN); + actions[type].addLast (action); + } + + public void unregister_by_type (Action action, int type) { + Assert.assert (type >= 0); + Assert.assert (type <= Token.MAXTOKEN); + actions[type].remove (action); + } + + public void consume_token (Token token) { + perform_actions (default_actions, token); + perform_actions (actions[token.type], token); + } + + public static void perform_actions (LinkedList actions, Token token) { + Iterator i = actions.iterator (); + while (i.hasNext ()) { + Action a = (Action) i.next (); + a.perform (token, i); + } + } +} \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Generator.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Generator.java new file mode 100755 index 00000000..d5b2dc44 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Generator.java @@ -0,0 +1,42 @@ +/** + * Generator interface + * Generic backend tagged text generator + * + * @author Daniel Jackson + * @version 0, 07/08/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public interface Generator { + // formats to pass to push_format + int ROMAN = 0; + int ITALICS = 1; + int BOLD = 2; + int SUBSCRIPT = 3; + int SUPERSCRIPT = 4; + + // prints new line to output + void linefeed (); + + void new_para (String style); + + // inserts code for new line + void new_line (); + + void special_char (String font, String index); + + // for dashes, ellipses, etc + void special_char (String index); + + void plaintext (String text); + void push_format (int format); + void pop_format (); + + // turn output suppression on and off + void suppress_on (); + void suppress_off (); + + } diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Numbering.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Numbering.java new file mode 100755 index 00000000..ff01162a --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Numbering.java @@ -0,0 +1,198 @@ +/** + * Numbering class + * Provides special navigations for numbering + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class Numbering { + private PrintStream error_stream; + private PropertyMap style_map; + + static String PARENT_PROPNAME = "parent"; + static String CHILD_PROPNAME = "child"; + static String ROOT_PROPNAME = "root"; + static String COUNTER_PROPNAME = "counter"; + static String SEPARATOR_PROPNAME = "separator"; + static String LEADER_PROPNAME = "leader"; + static String TRAILER_PROPNAME = "trailer"; + + /* + * The graph structure of the numbering relations is represented using + * properties in the paragraph style property map. + * Each style is mapped to its root -- the ancestor with no parent in the + * numbering relationship -- and to its parent and child. + * The child and root properties are added; the parent property is given + * in the style sheet file. + * + * If a style is numbered, its ancestors must be also. + * This property is not currently checked. + */ + + /* + * Representation invariant + * + * Definition: A style is numbered if it has a counter property. + * A numbered style has a root property. + * A root style has itself as root and has no parent. + * There is a bidirectional parent/child chain from a style to its root + * + * Checking that style sheet is well formed? + */ + + + // maps paragraph style names to counters + // styles that are not numbered are not mapped + private HashMap counter_map; // String -> Counter + + /** + * ensures: constructs a Numbering + * not well formed until incorporate called + */ + public Numbering (PropertyMap style_map, PrintStream error_stream) { + this.style_map = style_map; + this.error_stream = error_stream; + counter_map = new HashMap (); + } + + /** + * ensures: constructs a Numbering + * modifies: property lists in style_map + */ + /* + public Numbering (PropertyMap style_map) { + this.style_map = style_map; + add_extra_properties (style_map); + initialize_counters (style_map); + } + */ + + /** + * ensures: constructs a Numbering using current entries in style_map + * modifies: property lists in style_map + */ + public void incorporate () { + add_extra_properties (); + initialize_counters (); + } + + /* + * requires: all ancestor and descendant styles of style are numbered iff style is numbered + * ensures: returns the numbering string for a new paragraph whose style name is _style_ + * + * format of numbering string is: + * <mytrailer><rootcounter><rootseparator>...<counter><separator>...<mycounter><mytrailer> + */ + public String get_numbering_string (String style) { + // return empty string if style is not numbered + if (!style_has_property (style, COUNTER_PROPNAME)) return ""; + + // initialize numbering string to leader + String leader = style_map.get_property (style, LEADER_PROPNAME); + String numbering = leader == null ? "" : leader; + + // append numbering for each style from root to this style + // each followed by its separator + String s = style_map.get_property (style, ROOT_PROPNAME); + Assert.assert (s != null); + while (! s.equals (style)) { + numbering += ((Counter) counter_map.get(s)).unparse (); + String separator = style_map.get_property (s, SEPARATOR_PROPNAME); + numbering += separator == null ? "" : separator; + s = style_map.get_property (s, CHILD_PROPNAME); + } + + // increment numbering for this style and append its string + Counter c = (Counter) counter_map.get (s); + boolean success = c.increment (); + if (!success) + error_stream.println ("Counter overrun for style: " + style); + numbering += c.unparse (); + + // append trailer + String trailer = style_map.get_property (s, TRAILER_PROPNAME); + numbering += trailer == null ? "" : trailer; + + // reset counters for all descendant styles + s = style_map.get_property (s, CHILD_PROPNAME); + while (s != null) { + c = (Counter) counter_map.get (s); + c.reset (); + s = style_map.get_property (s, CHILD_PROPNAME); + } + return numbering; + } + + private void add_extra_properties () { + add_child_property (); + add_root_property (); + } + + // for each style with a counter property, insert into counter_map + private void initialize_counters () { + Set styles = style_map.get_items (); + Iterator iter = styles.iterator (); + while (iter.hasNext ()) { + String style = (String) iter.next (); + if (style_has_property (style, COUNTER_PROPNAME)) { + // get counter type (arabic, roman, etc) + String count_prop = style_map.get_property (style, COUNTER_PROPNAME); + int count_type = Counter.get_type (count_prop); + if (count_type == Counter.NO_SUCH_TYPE) { + error_stream.println ("Bad counter type for style " + style + ": " + count_prop); + // and insert into counter_map anyway to preserve rep invariant + // so must check counter type when counter is created and default if bad + } + counter_map.put (style, new Counter (count_prop, style, error_stream)); + } + } + } + + // add to each style that is a parent of another style a child property to it + private void add_child_property () { + Set styles = style_map.get_items (); + Iterator iter = styles.iterator (); + while (iter.hasNext ()) { + String style = (String) iter.next (); + String pstyle = (String) style_map.get_property (style, PARENT_PROPNAME); + // if parent exists, add child property to it + if (pstyle != null) { + List props = style_map.get_property_list (pstyle); + props.add (new Property (CHILD_PROPNAME, style)); + } + } + } + + // add root property to each numbered style + private void add_root_property () { + Set styles = style_map.get_items (); + Iterator iter = styles.iterator (); + while (iter.hasNext ()) { + String style = (String) iter.next (); + if (!style_has_property (style, PARENT_PROPNAME)) { + // if no parent, then it's a root, so add root property for it and all descendants + String root = style; + while (style != null) { + List props = style_map.get_property_list (style); + props.add (new Property (ROOT_PROPNAME, root)); + style = style_map.get_property (style, CHILD_PROPNAME); + } + } + } + } + + // ensures: returns true iff style has property prop_name + private boolean style_has_property (String style, String prop_name) { + String p = (String) style_map.get_property (style, prop_name); + return p != null; + } + + public String toString () { + return "UNIMPLEMENTED"; + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Property.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Property.java new file mode 100755 index 00000000..651858a9 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Property.java @@ -0,0 +1,25 @@ +/** + * Property class + * <p> + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class Property { + public String property; + public String value; + + public Property (String p, String v) { + property = p; + value = v; + } + + public String toString () { + return "<" + property + ":" + value + ">"; + } + } \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/PropertyMap.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/PropertyMap.java new file mode 100755 index 00000000..4b3efdf5 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/PropertyMap.java @@ -0,0 +1,84 @@ +/** + * PropertyMap class + * Maps identifiers to property lists + * Used for stylesheets, character maps, etc + * + * @author Daniel Jackson + * @version 0, 07/03/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class PropertyMap { + private HashMap map; // String -> LinkedList [Property] + + /** + * ensures: constructs an empty property map + */ + public PropertyMap () { + map = new HashMap (); + } + + /** + * ensures: constructs a property map using the parser <code>p</code>. + */ + public PropertyMap (PropertyParser p) { + map = new HashMap (); + try { + while (p.has_more_properties ()) { + LinkedList props = p.get_property_list (); + Property prop = (Property) props.removeFirst (); + map.put (prop.value, props); + } + } catch (IOException e) {Assert.unreachable ();} + } + + /** + * ensures: incorporates properties using the parser <code>p</code>. + */ + public void incorporate (PropertyParser p) { + try { + while (p.has_more_properties ()) { + LinkedList props = p.get_property_list (); + Property prop = (Property) props.removeFirst (); + map.put (prop.value, props); + } + } catch (IOException e) {Assert.unreachable ();} + } + + /** + * @return the property list for item <code>item</code>. Returns null if no such item. + */ + public List get_property_list (String item) { + return (List) map.get (item); + } + + /** + * @return the value of property <code>prop</code> for item <code>item</code> + * or null if it does not exist + */ + public String get_property (String item, String prop) { + List props = (List) map.get (item); + if (props == null) return null; + ListIterator iter = props.listIterator (); + while (iter.hasNext ()) { + Property p = (Property) iter.next (); + if (p.property.equals (prop)) + return p.value; + } + return null; + } + + /** + * @return the set of items with property lists in the map + */ + public Set get_items () { + return map.keySet (); + } + + public String toString () { + return map.toString (); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/PropertyParser.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/PropertyParser.java new file mode 100755 index 00000000..14cb790e --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/PropertyParser.java @@ -0,0 +1,104 @@ +/** + * PropertyParser class + * Parses property files + * <p> + * <code>int</code>. + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class PropertyParser { + private LineNumberReader reader; + private String token; + private int next_char; + private PrintStream error_reporter; + + public PropertyParser (Reader r, PrintStream s) throws IOException { + reader = new LineNumberReader (r); + error_reporter = s; + next_char = reader.read (); + consume_comments (); + } + + private void consume_comments () throws IOException { + // consume lines that don't start with < + while (next_char != '<' && !is_eos (next_char)) { + if (!is_eol (next_char)) + reader.readLine (); + consume_char (); + } + } + + private void consume_char () throws IOException { + token += (char) next_char; + next_char = reader.read (); + } + + private void error (String msg) { + // correct to number from 1, not zero + int line_number = reader.getLineNumber() + 1; + error_reporter.println (line_number + ": " + msg); + } + + public boolean has_more_properties () { + return (!is_eos (next_char)); + } + + /** + * requires: next_char contains next character in reader <p> + * ensures: returns list of properties until end of line or stream <p> + * according to the following syntax: + * property list is sequence of properties followed by eol of eos + * property is left-angle, property-name, colon, value, right-angle + * property-name is alphanumeric string, but value is any char sequence + * skips lines that do not start with < + * reports syntax errors on this.error_reporter + * Syntax + * @return list of properties until end of line or stream. + * Notes: chose LinkedList because it provides removeFirst, to support common + * case in which first property is removed (eg, because it's a style name) + */ + public LinkedList get_property_list () throws IOException { + LinkedList result = new LinkedList (); + while (!is_eol (next_char) && !is_eos(next_char)) + result.add (get_property ()); + consume_char (); + consume_comments (); + return result; + } + + private Property get_property () throws IOException { + if (next_char != '<') + error ("Found " + next_char + " when expecting <"); + consume_char (); + token = ""; + while (is_alphanumeric (next_char)) consume_char (); + String property = token; + if (next_char != ':') + error ("Found " + next_char + " following " + token + " when expecting :"); + consume_char (); + token = ""; + while (next_char != '>' && !is_eol(next_char) && !is_eos (next_char)) + consume_char (); + String value = token; + if (next_char != '>') + error ("Found " + next_char + " following " + token + " when expecting >"); + consume_char (); + return new Property (property, value); + } + + static boolean is_eol (int c) {return c == '\n';} + static boolean is_eos (int c) {return c == -1;} + static boolean is_alphabetic (int c) { + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; + } + static boolean is_numeric (int c) {return c >= '0' && c <= '9';} + static boolean is_alphanumeric (int c) { + return is_numeric (c) || is_alphabetic (c); + } +} diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/QuarkGenerator.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/QuarkGenerator.java new file mode 100755 index 00000000..68c3b7b4 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/QuarkGenerator.java @@ -0,0 +1,92 @@ +/** + * QuarkGenerator interface + * Backend tagged text generator for QuarkXpress + * + * @author Daniel Jackson + * @version 0, 07/08/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class QuarkGenerator implements Generator { + PrintStream output_stream; + Stack format_stack; + private boolean on = true; + + public QuarkGenerator (PrintStream s) { + output_stream = s; + + // stack holds strings used to terminate formats + format_stack = new Stack (); + } + + public void suppress_on () { + on = false; + } + + public void suppress_off () { + on = true; + } + + private void print (String s) { + if (on) output_stream.print (s); + } + + public void linefeed () { + if (on) output_stream.println (); + } + + // print "@style:" + public void new_para (String style) { + print ("@" + style + ":"); + } + + // print "<\n>" + public void new_line () { + print ("<\\n>"); + } + + public void special_char (String font, String index) { + print ("<f\"" + font + "\"><\\#" + index + "><f$>"); + } + + public void special_char (String index) { + print ("<\\#" + index + ">"); + } + + public void plaintext (String text) { + print (text); + } + + public void push_format (int format) { + switch (format) { + case Generator.ROMAN: + case Generator.ITALICS: + print ("<I>"); + format_stack.push ("<I>"); + return; + case Generator.BOLD: + print ("<B>"); + format_stack.push ("<B>"); + return; + case Generator.SUBSCRIPT: + print ("<->"); + format_stack.push ("<->"); + return; + case Generator.SUPERSCRIPT: + print ("<+>"); + format_stack.push ("<+>"); + return; + default: + Assert.unreachable (); + } + } + + public void pop_format () { + // for now, handle too many pops without report + if (format_stack.isEmpty ()) return; + print ((String) format_stack.pop ()); + } + } diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/SourceParser.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/SourceParser.java new file mode 100755 index 00000000..a5d5fbf5 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/SourceParser.java @@ -0,0 +1,285 @@ +/** + * SourceParser class + * <p> + * <code>int</code>. + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class SourceParser { + final static String loadcharmapcommand_name = "loadchars"; + final static String loadstylesheetcommand_name = "loadstyles"; + final static String preamblecommand_name = "preamble"; + final static String refcommand_name = "ref"; + final static String tagcommand_name = "tag"; + final static String citecommand_name = "cite"; + final static String separatorcommand_name = "sep"; + + private LineNumberReader reader; + + // holds set of strings recognized as paragraph styles + private Set parastyles; + + // holds the previous value of next_char + private int last_char; + private int next_char; + private boolean within_single_quotes; + private boolean within_double_quotes; + private boolean at_start_of_line; + private String token; + + public SourceParser (Reader reader, Set parastyles) throws IOException { + this.reader = new LineNumberReader (reader); + this.parastyles = parastyles; + next_char = reader.read (); + last_char = -1; + at_start_of_line = true; + } + + public boolean has_more_tokens () { + return (next_char != -1); + } + + private void consume_char () throws IOException { + token += (char) next_char; + last_char = next_char; + next_char = reader.read (); + } + + // consume until next close curly and return string excluding curly + private String consume_arg () throws IOException { + consume_char (); // consume open curly + token = ""; + consume_char (); + while (!is_close_curly (next_char) && !is_eol (next_char)) consume_char (); + String arg = token; + consume_char (); // consume close curly + return arg; + } + + /** + * requires: next_char contains next character in reader <p> + * ensures: returns next token according to one of these productions: <p> + * <blockquote><pre> + * char-sequence = alphanumeric+ + * whitespace ::= (space | tab)+ + * command ::= slash alphanum* [star] + * paragraph-break ::= <blank line> + * line-break ::= slash slash + * hyphen-sequence ::= hyphen+ + * dot-sequence ::= dot+ + * underscore ::= underscore + * </pre></blockquote> + * quote characters, disambiguated by context: + * open-single-quote: when not preceded by alphanumeric + * close-single-quote: when not followed by alphanumeric and preceded by + * open-single-quote + * open-double-quote: when not preceded by open-double-quote + * close-double-quote: when preceded by open-double-quote + * apostrophe: between alphanumerics, or when followed by numeric + * prime: after alphanumeric, when not followed by alphanumeric, + * and not preceded by open-single-quote + * @return the next token. + * explicitly returns end of stream token. + */ + public Token get_token () throws IOException { + token = new String (); + if (is_eos (next_char)) + return new Token (Token.ENDOFSTREAM, reader.getLineNumber ()); + if (at_start_of_line) { + if (is_eol (next_char)) { + consume_char (); + within_single_quotes = false; + within_double_quotes = false; + return new Token (Token.PARABREAK, reader.getLineNumber ()); + } + else if (is_hash (next_char)) { + String line = reader.readLine (); + consume_char (); + return new Token (Token.COMMENT, line, reader.getLineNumber ()); + } + else + at_start_of_line = false; + } + if (is_eol (next_char)) { + consume_char (); + at_start_of_line = true; + if (is_eol (next_char)) { + consume_char (); + within_single_quotes = false; + within_double_quotes = false; + return new Token (Token.PARABREAK, reader.getLineNumber ()); + } + // check this + return new Token (Token.WHITESPACE, " ", reader.getLineNumber ()); + } + if (is_slash (next_char)) { + consume_char (); + token = ""; + if (is_slash (next_char)) { + consume_char (); + return new Token (Token.LINEBREAK, reader.getLineNumber ()); + } + if (!is_alphabetic (next_char)) { + // next character assumed prefixed with slash to avoid special treatment + // eg, \< for <, \$ for $ + token = new Character ((char) next_char).toString (); + return new Token (Token.OTHER, token, reader.getLineNumber ()); + } + while (is_alphanumeric (next_char)) consume_char (); + String command_name = token; + if (is_star (next_char)) consume_char (); + if (command_name.equals (preamblecommand_name)) { + return new Token (Token.PREAMBLECOMMAND, reader.getLineNumber ()); + } + if (command_name.equals (separatorcommand_name)) { + // consume whitespace until next token + while (is_whitespace (next_char)) consume_char (); + return new Token (Token.SEPARATORCOMMAND, reader.getLineNumber ()); + } + if (is_less_than (next_char)) { + consume_char (); + return new Token (Token.FORMATCOMMAND, command_name, reader.getLineNumber ()); + } + if (is_open_curly (next_char)) { + String arg = consume_arg (); + if (command_name.equals (loadcharmapcommand_name)) { + return new Token (Token.LOADCHARMAPCOMMAND, arg, reader.getLineNumber ()); + } + if (command_name.equals (loadstylesheetcommand_name)) { + return new Token (Token.LOADSTYLESHEETCOMMAND, arg, reader.getLineNumber ()); + } + if (command_name.equals (refcommand_name)) { + return new Token (Token.REFCOMMAND, arg, reader.getLineNumber ()); + } + if (command_name.equals (tagcommand_name)) { + return new Token (Token.TAGCOMMAND, arg, reader.getLineNumber ()); + } + if (command_name.equals (citecommand_name)) { + return new Token (Token.CITECOMMAND, arg, reader.getLineNumber ()); + } + } + if (parastyles.contains (command_name)) { + while (is_whitespace (next_char)) consume_char (); + // paragraph style command consumes the first linebreak following it also + if (is_eol (next_char)) consume_char (); + return new Token (Token.PARASTYLECOMMAND, command_name, reader.getLineNumber ()); + } + else + // temporary + return new Token (Token.CHARCOMMAND, command_name, reader.getLineNumber ()); + } + if (is_alphabetic (next_char)) { + consume_char (); + while (is_alphabetic (next_char)) consume_char (); + return new Token (Token.ALPHABETIC, token, reader.getLineNumber ()); + } + if (is_numeric (next_char)) { + consume_char (); + while (is_numeric (next_char)) consume_char (); + return new Token (Token.NUMERIC, token, reader.getLineNumber ()); + } + if (is_whitespace (next_char)) { + consume_char (); + while (is_whitespace (next_char)) consume_char (); + if (is_eol (next_char)) { + consume_char (); + // check this + return new Token (Token.WHITESPACE, " ", reader.getLineNumber ()); + } + return new Token (Token.WHITESPACE, token, reader.getLineNumber ()); + } + if (is_hyphen (next_char)) { + consume_char (); + while (is_hyphen (next_char)) consume_char (); + return new Token (Token.HYPHENS, token, reader.getLineNumber ()); + } + if (is_dot (next_char)) { + consume_char (); + while (is_dot (next_char)) consume_char (); + return new Token (Token.DOTS, token, reader.getLineNumber ()); + } + if (is_underscore (next_char)) { + consume_char (); + return new Token (Token.UNDERSCORE, reader.getLineNumber ()); + } + if (is_dollar (next_char)) { + consume_char (); + return new Token (Token.DOLLAR, reader.getLineNumber ()); + } + if (is_greater_than (next_char)) { + consume_char (); + return new Token (Token.POPFORMATCOMMAND, reader.getLineNumber ()); + } + if (is_single_quote (next_char)) { + if (is_alphanumeric (last_char)) { + if (is_alphanumeric (next_char)) { + consume_char (); + return new Token (Token.APOSTROPHE, reader.getLineNumber ()); + } + else if (within_single_quotes) { + within_single_quotes = false; + consume_char (); + return new Token (Token.CLOSESINGLEQUOTE, reader.getLineNumber ()); + } + else { + consume_char (); + return new Token (Token.PRIME, reader.getLineNumber ()); + } + } + consume_char (); + if (is_numeric (next_char)) { + return new Token (Token.APOSTROPHE, reader.getLineNumber ()); + } + else { + within_single_quotes = true; + return new Token (Token.OPENSINGLEQUOTE, reader.getLineNumber ()); + } + } + if (is_double_quote (next_char)) { + consume_char (); + if (within_double_quotes) { + within_double_quotes = false; + return new Token (Token.CLOSEDOUBLEQUOTE, reader.getLineNumber ()); + } + else { + within_double_quotes = true; + return new Token (Token.OPENDOUBLEQUOTE, reader.getLineNumber ()); + } + } + consume_char (); + return new Token (Token.OTHER, token, reader.getLineNumber ()); + } + + static boolean is_eol (int c) {return c == '\n';} + static boolean is_eos (int c) {return c == -1;} + static boolean is_star (int c) {return c == '*';} + static boolean is_hash (int c) {return c == '#';} + static boolean is_dot (int c) {return c == '.';} + static boolean is_slash (int c) {return c == '\\';} + static boolean is_hyphen (int c) {return c == '-';} + static boolean is_underscore (int c) {return c == '_';} + static boolean is_dollar (int c) {return c == '$';} + static boolean is_single_quote (int c) {return c == '\'';} + static boolean is_double_quote (int c) {return c == '\"';} + static boolean is_open_curly (int c) {return c == '{';} + static boolean is_close_curly (int c) {return c == '}';} + static boolean is_less_than (int c) {return c == '<';} + static boolean is_greater_than (int c) {return c == '>';} + + // should perhaps use Character.isLetter? not sure, because that allows Unicode chars for + // other languages that are outside the a-Z range. + static boolean is_alphabetic (int c) { + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; + } + static boolean is_numeric (int c) {return c >= '0' && c <= '9';} + static boolean is_alphanumeric (int c) { + return is_numeric (c) || is_alphabetic (c); + } + static boolean is_whitespace (int c) {return c == ' ' || c == '\t';} +} diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/StandardEngine.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/StandardEngine.java new file mode 100755 index 00000000..b6a600d1 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/StandardEngine.java @@ -0,0 +1,377 @@ +/** + * StandardEngine class + * Standard registration of actions + * Implemented as a subclass of Engine for no good reason + * + * @author Daniel Jackson + * @version 0, 07/08/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class StandardEngine extends Engine { + + static Engine STANDARD; + + // reserved words for property files + + // character table + static final String index_prop_name = "index"; + static final String font_prop_name = "font"; + + static final String apostrophe_char_name = "quoteright"; + static final String prime_char_name = "prime"; + static final String opensinglequote_char_name = "quoteleft"; + static final String closesinglequote_char_name = "quoteright"; + static final String opendoublequote_char_name = "quotedblleft"; + static final String closedoublequote_char_name = "quotedblright"; + static final String hyphen_char_name = "hyphen"; + static final String endash_char_name = "endash"; + static final String emdash_char_name = "emdash"; + static final String period_char_name = "period"; + static final String twodotleader_char_name = "twodotleader"; + static final String ellipsis_char_name = "ellipsis"; + + static final String ROMAN_COMMANDNAME = "roman"; + static final String BOLD_COMMANDNAME = "bold"; + static final String ITALICS_COMMANDNAME = "italic"; + static final String SUBSCRIPT_COMMANDNAME = "sub"; + static final String SUPERSCRIPT_COMMANDNAME = "sup"; + + // style sheet + static final String next_style_prop_name = "next"; + static final String default_style_name = "body"; + + public StandardEngine ( + final Generator generator, + final PropertyMap style_map, + final PrintStream error_stream, final PrintStream index_stream + ) { + + final PropertyMap char_map = new PropertyMap (); + final Numbering numbering = new Numbering (style_map, error_stream); + + // a hack to work around lack of proper closures in Java + // can't assign to local variable within actions + class StringBox { + String string; + StringBox (String s) {string = s;} + void set (String s) {string = s;} + } + final StringBox current_para_style = new StringBox (default_style_name); + + // special action for start of paragraph + // created once, but dynamically inserted and removed + // so that it's performed once at the start of each paragraph + final Action paragraph_action = new Action () { + boolean first_para = true; + public void perform (Token t, Iterator iter) { + if (t.type != Token.PARASTYLECOMMAND) { + if (!first_para) generator.linefeed (); + generator.new_para (current_para_style.string); + String numstr = numbering.get_numbering_string (current_para_style.string); + if (numstr.length() != 0) { + // display numbering as evidence of progress + error_stream.println (numstr); + /* + // this doesn't work. not sure why. + // because it becomes a recursive call! + // need an impoverished engine specially for this, without paras? + + Reader numreader = new StringReader (numstr); + try { + Tagger.consume_source (StandardEngine.STANDARD, style_map, numreader); + } + catch (IOException e) {Assert.unreachable ();} + */ + generator.plaintext (numstr); + } + + iter.remove (); + first_para = false; + } + }}; + + register_by_type (new Action () { + public void perform (Token t) { + generator.plaintext (t.arg); + }}, + Token.ALPHABETIC); + + register_by_type (new Action () { + public void perform (Token t) { + generator.plaintext (t.arg); + }}, + Token.NUMERIC); + + register_by_type (new Action () { + public void perform (Token t) { + generator.plaintext (t.arg); + }}, + Token.WHITESPACE); + + register_by_type (new Action () { + public void perform (Token t) { + generator.new_line (); + }}, + Token.LINEBREAK); + + register_by_type (new Action () { + public void perform (Token t) { + put_special_char (generator, char_map, apostrophe_char_name, error_stream, t.line); + }}, + Token.APOSTROPHE); + + register_by_type (new Action () { + public void perform (Token t) { + put_special_char (generator, char_map, prime_char_name, error_stream, t.line); + }}, + Token.PRIME); + + register_by_type (new Action () { + public void perform (Token t) { + put_special_char (generator, char_map, opensinglequote_char_name, error_stream, t.line); + }}, + Token.OPENSINGLEQUOTE); + + register_by_type (new Action () { + public void perform (Token t) { + put_special_char (generator, char_map, closesinglequote_char_name, error_stream, t.line); + }}, + Token.CLOSESINGLEQUOTE); + + register_by_type (new Action () { + public void perform (Token t) { + put_special_char (generator, char_map, opendoublequote_char_name, error_stream, t.line); + }}, + Token.OPENDOUBLEQUOTE); + + register_by_type (new Action () { + public void perform (Token t) { + put_special_char (generator, char_map, closedoublequote_char_name, error_stream, t.line); + }}, + Token.CLOSEDOUBLEQUOTE); + + register_by_type (new Action () { + public void perform (Token t) { + int len = t.arg.length (); + if (len == 1) + put_special_char (generator, char_map, hyphen_char_name, error_stream, t.line); + else if (len == 2) + put_special_char (generator, char_map, endash_char_name, error_stream, t.line); + else if (len == 3) + put_special_char (generator, char_map, emdash_char_name, error_stream, t.line); + else + error_stream.println (t.line + ": Too many hyphens: " + t.arg); + }}, + Token.HYPHENS); + + register_by_type (new Action () { + public void perform (Token t) { + int len = t.arg.length (); + if (len == 1) + generator.plaintext ("."); + else if (len == 2) + put_special_char (generator, char_map, twodotleader_char_name, error_stream, t.line); + else if (len == 3) + put_special_char (generator, char_map, ellipsis_char_name, error_stream, t.line); + else + error_stream.println (t.line + ": Too many dots: " + t.arg); + }}, + Token.DOTS); + + register_by_type (new Action () { + public void perform (Token t) { + // open file with given name and load char map from it + String file_name = t.arg; + try { + File f = new File (file_name); + FileInputStream s = new FileInputStream (f); + InputStreamReader r = new InputStreamReader (s); + PropertyParser p = new PropertyParser (r, error_stream); + char_map.incorporate (p); + } catch (IOException e) { + error_stream.println (t.line + ": Can't open char map file: " + file_name); + } + }}, + Token.LOADCHARMAPCOMMAND); + + register_by_type (new Action () { + public void perform (Token t) { + // open file with given name and load char map from it + String file_name = t.arg; + try { + File f = new File (file_name); + FileInputStream s = new FileInputStream (f); + InputStreamReader r = new InputStreamReader (s); + PropertyParser p = new PropertyParser (r, error_stream); + style_map.incorporate (p); + numbering.incorporate (); + } catch (IOException e) { + error_stream.println (t.line + ": Can't open style sheet file: " + file_name); + } + }}, + Token.LOADSTYLESHEETCOMMAND); + + final Action unsuppress_action = new Action () { + public void perform (Token t, Iterator i) { + generator.suppress_off (); + i.remove (); + }}; + + // preamble command switches on output suppression + // registers action to turn suppression off with paragraph break command + register_by_type (new Action () { + public void perform (Token t) { + generator.suppress_on (); + register_by_type (unsuppress_action, Token.PARABREAK); + }}, + Token.PREAMBLECOMMAND); + + register_by_type (new Action () { + public void perform (Token t) { + String next_style = style_map.get_property (current_para_style.string, next_style_prop_name); + if (next_style == null) { + error_stream.println (t.line + ": No next style property given for style: " + current_para_style.string); + return; + } + current_para_style.set (next_style); + register_for_all (paragraph_action); + }}, + Token.PARABREAK); + + register_by_type (new Action () { + public void perform (Token t) { + current_para_style.set (t.arg); + }}, + Token.PARASTYLECOMMAND); + + register_by_type (new Action () { + public void perform (Token t) { + String index = char_map.get_property (t.arg, index_prop_name); + if (index == null) { + error_stream.println (t.line + ": No index property given for character: " + t.arg); + return; + } + String font = char_map.get_property (t.arg, font_prop_name); + // if no font is listed, generate special character in standard font + if (font == null) + generator.special_char (index); + else + generator.special_char (font, index); + }}, + Token.CHARCOMMAND); + + register_by_type (new Action () { + boolean italic_mode_on = false; + public void perform (Token t) { + if (italic_mode_on) { + italic_mode_on = false; + generator.pop_format (); + } + else { + italic_mode_on = true; + generator.push_format (Generator.ITALICS); + } + }}, + Token.UNDERSCORE); + + // used to italicize alphabetic tokens in math mode + final Action push_italics_action = new Action () { + public void perform (Token t, Iterator iter) { + Assert.assert (t.type == Token.ALPHABETIC); + generator.push_format (Generator.ITALICS); + }}; + final Action pop_italics_action = new Action () { + public void perform (Token t, Iterator iter) { + Assert.assert (t.type == Token.ALPHABETIC); + generator.pop_format (); + }}; + + register_by_type (new Action () { + boolean math_mode_on = false; + public void perform (Token t) { + if (math_mode_on) { + math_mode_on = false; + unregister_by_type (push_italics_action, Token.ALPHABETIC); + unregister_by_type (pop_italics_action, Token.ALPHABETIC); + } + else { + math_mode_on = true; + register_by_type_back (pop_italics_action, Token.ALPHABETIC); + register_by_type_front (push_italics_action, Token.ALPHABETIC); + } + }}, + Token.DOLLAR); + + register_by_type (new Action () { + public void perform (Token t) { + if (t.arg.equals (ROMAN_COMMANDNAME)) + generator.push_format (Generator.ROMAN); + else if (t.arg.equals (BOLD_COMMANDNAME)) + generator.push_format (Generator.BOLD); + else if (t.arg.equals (ITALICS_COMMANDNAME)) + generator.push_format (Generator.ITALICS); + else if (t.arg.equals (SUBSCRIPT_COMMANDNAME)) + generator.push_format (Generator.SUBSCRIPT); + else if (t.arg.equals (SUPERSCRIPT_COMMANDNAME)) + generator.push_format (Generator.SUPERSCRIPT); + }}, + Token.FORMATCOMMAND); + + register_by_type (new Action () { + public void perform (Token t) { + generator.pop_format (); + }}, + Token.POPFORMATCOMMAND); + + register_by_type (new Action () { + public void perform (Token t) { + generator.plaintext (t.arg); + }}, + Token.OTHER); + + register_by_type (new Action () { + public void perform (Token t) { + error_stream.println ("... done"); + }}, + Token.ENDOFSTREAM); + + STANDARD = this; + } + + /* no actions for these token types: + COMMENT + SEPARATORCOMMAND + */ + +/* + not yet coded: + + public static final int REFCOMMAND = 32; + public static final int TAGCOMMAND = 33; + public static final int CITECOMMAND = 34; +*/ + + + /* general form of action registration is this: + register_by_type (new Action () { + public void perform (Token t) { + // put code to be executed for token type here + }}, + Token.TYPENAME); + */ + + void put_special_char (Generator generator, PropertyMap char_map, + String char_name, PrintStream error_stream, int line) { + String index = char_map.get_property (char_name, index_prop_name); + if (index == null) { + error_stream.println (line + ": Unresolved character: " + char_name); + } + else + generator.special_char (index); + } + +} \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Tagger.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Tagger.java new file mode 100755 index 00000000..ef187f3a --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Tagger.java @@ -0,0 +1,103 @@ +/** + * Tagger class + * Main class of Tagger application + * + * @author Daniel Jackson + * @version 0, 07/02/01 + */ + + +package tagger; +import java.io.*; +import java.util.*; + +public class Tagger { + + public static PrintStream error_stream = System.out; + + // holds mapping of token types to actions + Engine engine; + + /** + * The main method of the Tagger application. + * @param args The command line arguments, described in usage method + */ + public static void main (String[] args) { + check_usage (args); + + String base_name = args[0]; + String source_file_name = base_name + ".txt"; + String output_file_name = base_name + ".tag.txt"; + String index_file_name = base_name + ".index.txt"; + Reader source_reader; + PrintStream output_stream; + PrintStream index_stream; + + try {source_reader = get_reader_from_file_name (source_file_name); + } catch (IOException e) { + error_stream.println ( + "Unable to open source file " + source_file_name + ": " + e.getMessage ()); + return; + }; + try {output_stream = get_stream_from_file_name (output_file_name); + } catch (IOException e) { + error_stream.println ( + "Unable to open output file " + output_file_name + ": " + e.getMessage ()); + return; + }; + try {index_stream = get_stream_from_file_name (index_file_name); + } catch (IOException e) { + error_stream.println ( + "Unable to open index file " + index_file_name + ": " + e.getMessage ()); + return; + }; + + // for now, hardwire to Quark + Generator generator = new QuarkGenerator (output_stream); + + PropertyMap style_map = new PropertyMap (); + Engine engine = new StandardEngine (generator, style_map, error_stream, index_stream); + try { + consume_source (engine, style_map, source_reader); + } catch (IOException e) {Assert.unreachable ();} + output_stream.close (); + } + + public static void consume_source (Engine engine, PropertyMap style_map, Reader source_reader) + throws IOException { + Set para_styles = style_map.get_items (); + SourceParser p = new SourceParser (source_reader, para_styles); + Token token; + while (p.has_more_tokens ()) { + token = p.get_token (); + engine.consume_token (token); + } + // consume end of stream token explicitly + // depends on get_token returning ENDOFSTREAM token when no more tokens + token = p.get_token (); + engine.consume_token (token); + } + + static void check_usage (String args []) { + if (args.length == 0) { + error_stream.println ( + "one argument required, should be name of source file, excluding .txt extension" + ); + } + } + + static Reader get_reader_from_file_name(String file_name) throws IOException { + File f = new File (file_name); + FileInputStream s = new FileInputStream (f); + InputStreamReader r = new InputStreamReader (s); + return r; + } + + static PrintStream get_stream_from_file_name (String file_name) throws IOException { + File f = new File (file_name); + FileOutputStream s = new FileOutputStream (f); + PrintStream ps = new PrintStream (s); + return ps; + } + + } diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Token.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Token.java new file mode 100755 index 00000000..af033209 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/Token.java @@ -0,0 +1,72 @@ +/** + * Token class + * Represents tokens generated by lexer + * <p> + * + * @author Daniel Jackson + * @version 0, 07/06/01 + */ + +package tagger; +import java.io.*; +import java.util.*; + +public class Token { + // may be null + public String arg; + public int line; + public int type; + + public static final int COMMENT = 0; + public static final int WHITESPACE = 1; + public static final int ALPHABETIC = 2; + public static final int NUMERIC = 3; + + public static final int PARABREAK = 4; + public static final int LINEBREAK = 5; + + public static final int APOSTROPHE = 10; + public static final int PRIME = 11; + public static final int OPENSINGLEQUOTE = 12; + public static final int CLOSESINGLEQUOTE = 13; + public static final int OPENDOUBLEQUOTE = 14; + public static final int CLOSEDOUBLEQUOTE = 15; + public static final int HYPHENS = 16; + public static final int DOTS = 17; + + public static final int PARASTYLECOMMAND = 20; + public static final int FORMATCOMMAND = 21; + public static final int POPFORMATCOMMAND = 22; + public static final int REFCOMMAND = 23; + public static final int TAGCOMMAND = 24; + public static final int CITECOMMAND = 25; + public static final int CHARCOMMAND = 26; + public static final int LOADCHARMAPCOMMAND = 27; + public static final int LOADSTYLESHEETCOMMAND = 28; + public static final int PREAMBLECOMMAND = 29; + public static final int SEPARATORCOMMAND = 30; + + // treated as a command for italicizing or as a char style + public static final int UNDERSCORE = 31; + public static final int DOLLAR = 32; + public static final int OTHER = 33; + public static final int ENDOFSTREAM = 34; + + public static final int MAXTOKEN = 34; + + public Token (int type, String arg, int line) { + this.type = type; + this.arg = arg; + this.line = line; + } + + public Token (int type, int line) { + this.type = type; + this.line = line; + } + + // temporary implementation + public String toString () { + return arg; + } +} \ No newline at end of file diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/makefile b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/makefile new file mode 100644 index 00000000..603bbe58 --- /dev/null +++ b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/makefile @@ -0,0 +1,6 @@ +all: + javac -nowarn -source 1.3 *.java + +clean: + rm -f *.class + rm -f *~