A new dag combine; several permutations of this

[oota-llvm.git] / docs / LangRef.html
diff --git a/docs/LangRef.html b/docs/LangRef.html

index 5a77020187dcf3783c22127350d13398913977bb..83bd667fac30e2f2356813300b73a9cd47dfaca2 100644 (file)
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -24,7 +24,7 @@
        <li><a href="#callingconv">Calling Conventions</a></li>
        <li><a href="#globalvars">Global Variables</a></li>
        <li><a href="#functionstructure">Functions</a></li>
-      <li><a href="#aliasstructure">Aliases</a>
+      <li><a href="#aliasstructure">Aliases</a></li>
        <li><a href="#paramattrs">Parameter Attributes</a></li>
        <li><a href="#fnattrs">Function Attributes</a></li>
        <li><a href="#gc">Garbage Collector Names</a></li>
@@ -58,16 +58,16 @@
    </li>
    <li><a href="#constants">Constants</a>
      <ol>
-      <li><a href="#simpleconstants">Simple Constants</a>
-      <li><a href="#aggregateconstants">Aggregate Constants</a>
-      <li><a href="#globalconstants">Global Variable and Function Addresses</a>
-      <li><a href="#undefvalues">Undefined Values</a>
-      <li><a href="#constantexprs">Constant Expressions</a>
+      <li><a href="#simpleconstants">Simple Constants</a></li>
+      <li><a href="#aggregateconstants">Aggregate Constants</a></li>
+      <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
+      <li><a href="#undefvalues">Undefined Values</a></li>
+      <li><a href="#constantexprs">Constant Expressions</a></li>
      </ol>
    </li>
    <li><a href="#othervalues">Other Values</a>
      <ol>
-      <li><a href="#inlineasm">Inline Assembler Expressions</a>
+      <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
      </ol>
    </li>
    <li><a href="#instref">Instruction Reference</a>
@@ -143,6 +143,7 @@
            <li><a href="#i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a></li>
            <li><a href="#i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a></li>
          </ol>
+      </li>
        <li><a href="#otherops">Other Operations</a>
          <ol>
            <li><a href="#i_icmp">'<tt>icmp</tt>' Instruction</a></li>
@@ -213,31 +214,33 @@
            <li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
          </ol>
        </li>
-          <li><a href="#int_atomics">Atomic intrinsics</a>
-            <ol>
-              <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></a></li>
-              <li><a href="#int_atomic_cmp_swap"><tt>llvm.atomic.cmp.swap</tt></a></li>
-              <li><a href="#int_atomic_swap"><tt>llvm.atomic.swap</tt></a></li>
-              <li><a href="#int_atomic_load_add"><tt>llvm.atomic.load.add</tt></a></li>
-              <li><a href="#int_atomic_load_sub"><tt>llvm.atomic.load.sub</tt></a></li>
-              <li><a href="#int_atomic_load_and"><tt>llvm.atomic.load.and</tt></a></li>
-              <li><a href="#int_atomic_load_nand"><tt>llvm.atomic.load.nand</tt></a></li>
-              <li><a href="#int_atomic_load_or"><tt>llvm.atomic.load.or</tt></a></li>
-              <li><a href="#int_atomic_load_xor"><tt>llvm.atomic.load.xor</tt></a></li>
-              <li><a href="#int_atomic_load_max"><tt>llvm.atomic.load.max</tt></a></li>
-              <li><a href="#int_atomic_load_min"><tt>llvm.atomic.load.min</tt></a></li>
-              <li><a href="#int_atomic_load_umax"><tt>llvm.atomic.load.umax</tt></a></li>
-              <li><a href="#int_atomic_load_umin"><tt>llvm.atomic.load.umin</tt></a></li>
-            </ol>
-          </li>
+      <li><a href="#int_atomics">Atomic intrinsics</a>
+        <ol>
+          <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></a></li>
+          <li><a href="#int_atomic_cmp_swap"><tt>llvm.atomic.cmp.swap</tt></a></li>
+          <li><a href="#int_atomic_swap"><tt>llvm.atomic.swap</tt></a></li>
+          <li><a href="#int_atomic_load_add"><tt>llvm.atomic.load.add</tt></a></li>
+          <li><a href="#int_atomic_load_sub"><tt>llvm.atomic.load.sub</tt></a></li>
+          <li><a href="#int_atomic_load_and"><tt>llvm.atomic.load.and</tt></a></li>
+          <li><a href="#int_atomic_load_nand"><tt>llvm.atomic.load.nand</tt></a></li>
+          <li><a href="#int_atomic_load_or"><tt>llvm.atomic.load.or</tt></a></li>
+          <li><a href="#int_atomic_load_xor"><tt>llvm.atomic.load.xor</tt></a></li>
+          <li><a href="#int_atomic_load_max"><tt>llvm.atomic.load.max</tt></a></li>
+          <li><a href="#int_atomic_load_min"><tt>llvm.atomic.load.min</tt></a></li>
+          <li><a href="#int_atomic_load_umax"><tt>llvm.atomic.load.umax</tt></a></li>
+          <li><a href="#int_atomic_load_umin"><tt>llvm.atomic.load.umin</tt></a></li>
+        </ol>
+      </li>
        <li><a href="#int_general">General intrinsics</a>
          <ol>
            <li><a href="#int_var_annotation">
-            <tt>llvm.var.annotation</tt>' Intrinsic</a></li>
+            '<tt>llvm.var.annotation</tt>' Intrinsic</a></li>
            <li><a href="#int_annotation">
-            <tt>llvm.annotation.*</tt>' Intrinsic</a></li>
+            '<tt>llvm.annotation.*</tt>' Intrinsic</a></li>
            <li><a href="#int_trap">
-            <tt>llvm.trap</tt>' Intrinsic</a></li>
+            '<tt>llvm.trap</tt>' Intrinsic</a></li>
+          <li><a href="#int_stackprotector">
+            '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
          </ol>
        </li>
      </ol>
@@ -327,15 +330,16 @@ the parser.</p>
    <p>LLVM identifiers come in two basic types: global and local. Global
    identifiers (functions, global variables) begin with the @ character. Local
    identifiers (register names, types) begin with the % character. Additionally,
-  there are three different formats for identifiers, for different purposes:
+  there are three different formats for identifiers, for different purposes:</p>
  
  <ol>
    <li>Named values are represented as a string of characters with their prefix.
    For example, %foo, @DivisionByZero, %a.really.long.identifier.  The actual
    regular expression used is '<tt>[%@][a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.
    Identifiers which require other characters in their names can be surrounded
-  with quotes.  In this way, anything except a <tt>&quot;</tt> character can 
-  be used in a named value.</li>
+  with quotes. Special characters may be escaped using "\xx" where xx is the 
+  ASCII code for the character in hexadecimal.  In this way, any character can 
+  be used in a name value, even quotes themselves.
  
    <li>Unnamed values are represented as an unsigned numeric value with their
    prefix.  For example, %12, @2, %44.</li>
@@ -561,7 +565,7 @@ All Global Variables and Functions have one of the following types of linkage:
  
  </dl>
  
-<p><a name="linkage_external"></a>For example, since the "<tt>.LC0</tt>"
+<p>For example, since the "<tt>.LC0</tt>"
  variable is defined to be internal, if another module defined a "<tt>.LC0</tt>"
  variable and was linked with this one, one of the two would be renamed,
  preventing a collision.  Since "<tt>main</tt>" and "<tt>puts</tt>" are
@@ -571,7 +575,7 @@ outside of the current module.</p>
  to have any linkage type other than "externally visible", <tt>dllimport</tt>,
  or <tt>extern_weak</tt>.</p>
  <p>Aliases can have only <tt>external</tt>, <tt>internal</tt> and <tt>weak</tt>
-linkages.
+linkages.</p>
  </div>
  
  <!-- ======================================================================= -->
@@ -749,8 +753,9 @@ an optional <a href="#linkage">linkage type</a>, an optional
  <a href="#callingconv">calling convention</a>, a return type, an optional
  <a href="#paramattrs">parameter attribute</a> for the return type, a function 
  name, a (possibly empty) argument list (each with optional 
-<a href="#paramattrs">parameter attributes</a>), an optional section, an
-optional alignment, an optional <a href="#gc">garbage collector name</a>, 
+<a href="#paramattrs">parameter attributes</a>), optional 
+<a href="#fnattrs">function attributes</a>, an optional section, 
+an optional alignment, an optional <a href="#gc">garbage collector name</a>, 
  an opening curly brace, a list of basic blocks, and a closing curly brace.
  
  LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
@@ -783,6 +788,18 @@ to whatever it feels convenient.  If an explicit alignment is specified, the
  function is forced to have at least that much alignment.  All alignments must be
  a power of 2.</p>
  
+  <h5>Syntax:</h5>
+
+<div class="doc_code">
+<tt>
+define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
+      [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>]
+      &lt;ResultType&gt; @&lt;FunctionName&gt; ([argument list])
+      [<a href="#fnattrs">fn Attrs</a>] [section "name"] [align N]
+      [<a href="#gc">gc</a>] { ... }
+</tt>
+</div>
+
  </div>
  
  
@@ -852,29 +869,39 @@ declare signext i8 @returns_signed_char()
      to memory, though some targets use it to distinguish between two different
      kinds of registers).  Use of this attribute is target-specific.</dd>
  
-    <dt><tt>byval</tt></dt>
+    <dt><tt><a name="byval">byval</a></tt></dt>
      <dd>This indicates that the pointer parameter should really be passed by
      value to the function.  The attribute implies that a hidden copy of the
      pointee is made between the caller and the callee, so the callee is unable
      to modify the value in the callee.  This attribute is only valid on LLVM
      pointer arguments.  It is generally used to pass structs and arrays by
-    value, but is also valid on pointers to scalars.</dd>
+    value, but is also valid on pointers to scalars.  The copy is considered to
+    belong to the caller not the callee (for example,
+    <tt><a href="#readonly">readonly</a></tt> functions should not write to
+    <tt>byval</tt> parameters). This is not a valid attribute for return
+    values. </dd>
  
      <dt><tt>sret</tt></dt>
      <dd>This indicates that the pointer parameter specifies the address of a
      structure that is the return value of the function in the source program.
      This pointer must be guaranteed by the caller to be valid: loads and stores
      to the structure may be assumed by the callee to not to trap.  This may only
-    be applied to the first parameter.</dd>
+    be applied to the first parameter. This is not a valid attribute for
+    return values. </dd>
  
      <dt><tt>noalias</tt></dt>
-    <dd>This indicates that the parameter does not alias any global or any other
-    parameter.  The caller is responsible for ensuring that this is the case,
-    usually by placing the value in a stack allocation.</dd>
+    <dd>This indicates that the pointer does not alias any global or any other
+    parameter.  The caller is responsible for ensuring that this is the
+    case. On a function return value, <tt>noalias</tt> additionally indicates
+    that the pointer does not alias any other pointers visible to the
+    caller. Note that this applies only to pointers that can be used to actually
+    load/store a value: NULL, unique pointers from malloc(0), and freed pointers
+    are considered to not alias anything.</dd>
  
      <dt><tt>nest</tt></dt>
      <dd>This indicates that the pointer parameter can be excised using the
-    <a href="#int_trampoline">trampoline intrinsics</a>.</dd>
+    <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
+    attribute for return values.</dd>
    </dl>
  
  </div>
@@ -948,19 +975,42 @@ unwind or exceptional control flow.  If the function does unwind, its runtime
  behavior is undefined.</dd>
  
  <dt><tt>readnone</tt></dt>
-<dd>This attribute indicates that the function computes its result (or its
-thrown exception) based strictly on its arguments.  It does not read any global
-mutable state (e.g. memory, control registers, etc) visible to caller functions.
-Furthermore, <tt>readnone</tt> functions never change any state visible to their
-caller.
-
-<dt><tt>readonly</tt></dt>
-<dd>This function attribute indicates that the function has no side-effects on
-the calling function, but that it depends on state (memory state, control
-register state, etc) that may be set in the caller.  A readonly function always
-returns the same value (or throws the same exception) whenever it is called with
-a particular set of arguments and global state.</dd>
-
+<dd>This attribute indicates that the function computes its result (or the
+exception it throws) based strictly on its arguments, without dereferencing any
+pointer arguments or otherwise accessing any mutable state (e.g. memory, control
+registers, etc) visible to caller functions.  It does not write through any
+pointer arguments (including <tt><a href="#byval">byval</a></tt> arguments) and
+never changes any state visible to callers.</dd>
+
+<dt><tt><a name="readonly">readonly</a></tt></dt>
+<dd>This attribute indicates that the function does not write through any
+pointer arguments (including <tt><a href="#byval">byval</a></tt> arguments)
+or otherwise modify any state (e.g. memory, control registers, etc) visible to
+caller functions.  It may dereference pointer arguments and read state that may
+be set in the caller.  A readonly function always returns the same value (or
+throws the same exception) when called with the same set of arguments and global
+state.</dd>
+
+<dt><tt><a name="ssp">ssp</a></tt></dt>
+<dd>This attribute indicates that the function should emit a stack smashing
+protector. It is in the form of a "canary"&mdash;a random value placed on the
+stack before the local variables that's checked upon return from the function to
+see if it has been overwritten. A heuristic is used to determine if a function
+needs stack protectors or not.
+
+<p>If a function that has an <tt>ssp</tt> attribute is inlined into a function
+that doesn't have an <tt>ssp</tt> attribute, then the resulting function will
+have an <tt>ssp</tt> attribute.</p></dd>
+
+<dt><tt>sspreq</tt></dt>
+<dd>This attribute indicates that the function should <em>always</em> emit a
+stack smashing protector. This overrides the <tt><a href="#ssp">ssp</a></tt>
+function attribute.
+
+<p>If a function that has an <tt>sspreq</tt> attribute is inlined into a
+function that doesn't have an <tt>sspreq</tt> attribute or which has
+an <tt>ssp</tt> attribute, then the resulting function will have
+an <tt>sspreq</tt> attribute.</p></dd>
  </dl>
  
  </div>
@@ -1055,7 +1105,7 @@ are given in this list:</p>
    <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
  </ul>
  <p>When LLVM is determining the alignment for a given type, it uses the 
-following rules:
+following rules:</p>
  <ol>
    <li>If the type sought is an exact match for one of the specifications, that
    specification is used.</li>
@@ -1067,8 +1117,8 @@ following rules:
    i65 and i256 will use the alignment of i64 (largest specified).</li>
    <li>If no match is found, and the type sought is a vector type, then the
    largest vector type that is smaller than the sought vector type will be used
-  as a fall back.  This happens because <128 x double> can be implemented in 
-  terms of 64 <2 x double>, for example.</li>
+  as a fall back.  This happens because &lt;128 x double&gt; can be implemented
+  in terms of 64 &lt;2 x double&gt;, for example.</li>
  </ol>
  </div>
  
@@ -1134,6 +1184,7 @@ classifications:</p>
            <a href="#t_pstruct">packed structure</a>,
            <a href="#t_vector">vector</a>,
            <a href="#t_opaque">opaque</a>.
+      </td>
      </tr>
    </tbody>
  </table>
@@ -1355,8 +1406,8 @@ Variable argument functions can access their arguments with the <a
      </td>
    </tr><tr class="layout">
      <td class="left"><tt>{i32, i32} (i32)</tt></td>
-    <td class="left">A function taking an <tt>i32></tt>, returning two 
-        <tt> i32 </tt> values as an aggregate of type <tt>{ i32, i32 }</tt>
+    <td class="left">A function taking an <tt>i32</tt>, returning two 
+        <tt>i32</tt> values as an aggregate of type <tt>{ i32, i32 }</tt>
      </td>
    </tr>
  </table>
@@ -1777,7 +1828,7 @@ following is the syntax for constant expressions:</p>
    <dt><b><tt>extractelement ( VAL, IDX )</tt></b></dt>
  
    <dd>Perform the <a href="#i_extractelement">extractelement
-  operation</a> on constants.
+  operation</a> on constants.</dd>
  
    <dt><b><tt>insertelement ( VAL, ELT, IDX )</tt></b></dt>
  
@@ -1932,7 +1983,7 @@ the instruction after the call.  If the caller was an "<a
   href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues
  at the beginning of the "normal" destination block.  If the instruction
  returns a value, that value shall set the call or invoke instruction's
-return value.
+return value.</p>
  
  <h5>Example:</h5>
  
@@ -2036,7 +2087,7 @@ branches or with a lookup table.</p>
  <h5>Syntax:</h5>
  
  <pre>
-  &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] &lt;ptr to function ty&gt; &lt;function ptr val&gt;(&lt;function args&gt;) 
+  &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ptr to function ty&gt; &lt;function ptr val&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
                  to label &lt;normal label&gt; unwind label &lt;exception label&gt;
  </pre>
  
@@ -2049,7 +2100,7 @@ function, with the possibility of control flow transfer to either the
  "<tt><a href="#i_ret">ret</a></tt>" instruction, control flow will return to the
  "normal" label.  If the callee (or any indirect callees) returns with the "<a
  href="#i_unwind"><tt>unwind</tt></a>" instruction, control is interrupted and
-continued at the dynamically nearest "exception" label.
+continued at the dynamically nearest "exception" label.</p>
  
  <h5>Arguments:</h5>
  
@@ -2061,6 +2112,11 @@ continued at the dynamically nearest "exception" label.
      convention</a> the call should use.  If none is specified, the call defaults
      to using C calling conventions.
    </li>
+
+  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
+   return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', 
+   and '<tt>inreg</tt>' attributes are valid here.</li>
+
    <li>'<tt>ptr to function ty</tt>': shall be the signature of the pointer to
    function value being invoked.  In most cases, this is a direct function
    invocation, but indirect <tt>invoke</tt>s are just as possible, branching off
@@ -2081,6 +2137,9 @@ continued at the dynamically nearest "exception" label.
    <li>'<tt>exception label</tt>': the label reached when a callee returns with
    the <a href="#i_unwind"><tt>unwind</tt></a> instruction. </li>
  
+  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
+  '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
+  '<tt>readnone</tt>' attributes are valid here.</li>
  </ol>
  
  <h5>Semantics:</h5>
@@ -2525,13 +2584,16 @@ type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
  
  <p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod 2<sup>n</sup>,
  where n is the width of the result.  If <tt>op2</tt> is (statically or dynamically) negative or
-equal to or larger than the number of bits in <tt>op1</tt>, the result is undefined.</p>
+equal to or larger than the number of bits in <tt>op1</tt>, the result is undefined.
+If the arguments are vectors, each vector element of <tt>op1</tt> is shifted by the
+corresponding shift amount in <tt>op2</tt>.</p>
  
  <h5>Example:</h5><pre>
    &lt;result&gt; = shl i32 4, %var   <i>; yields {i32}: 4 &lt;&lt; %var</i>
    &lt;result&gt; = shl i32 4, 2      <i>; yields {i32}: 16</i>
    &lt;result&gt; = shl i32 1, 10     <i>; yields {i32}: 1024</i>
    &lt;result&gt; = shl i32 1, 32     <i>; undefined</i>
+  &lt;result&gt; = shl &lt;2 x i32&gt; &lt; i32 1, i32 1&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 2, i32 4&gt;</i>
  </pre>
  </div>
  <!-- _______________________________________________________________________ -->
@@ -2556,7 +2618,9 @@ type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
  <p>This instruction always performs a logical shift right operation. The most
  significant bits of the result will be filled with zero bits after the 
  shift.  If <tt>op2</tt> is (statically or dynamically) equal to or larger than
-the number of bits in <tt>op1</tt>, the result is undefined.</p>
+the number of bits in <tt>op1</tt>, the result is undefined. If the arguments are
+vectors, each vector element of <tt>op1</tt> is shifted by the corresponding shift
+amount in <tt>op2</tt>.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -2565,6 +2629,7 @@ the number of bits in <tt>op1</tt>, the result is undefined.</p>
    &lt;result&gt; = lshr i8  4, 3   <i>; yields {i8}:result = 0</i>
    &lt;result&gt; = lshr i8 -2, 1   <i>; yields {i8}:result = 0x7FFFFFFF </i>
    &lt;result&gt; = lshr i32 1, 32  <i>; undefined</i>
+  &lt;result&gt; = lshr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0x7FFFFFFF, i32 1&gt;</i>
  </pre>
  </div>
  
@@ -2590,8 +2655,9 @@ type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
  <p>This instruction always performs an arithmetic shift right operation, 
  The most significant bits of the result will be filled with the sign bit 
  of <tt>op1</tt>.  If <tt>op2</tt> is (statically or dynamically) equal to or
-larger than the number of bits in <tt>op1</tt>, the result is undefined.
-</p>
+larger than the number of bits in <tt>op1</tt>, the result is undefined. If the
+arguments are vectors, each vector element of <tt>op1</tt> is shifted by the
+corresponding shift amount in <tt>op2</tt>.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -2600,6 +2666,7 @@ larger than the number of bits in <tt>op1</tt>, the result is undefined.
    &lt;result&gt; = ashr i8  4, 3   <i>; yields {i8}:result = 0</i>
    &lt;result&gt; = ashr i8 -2, 1   <i>; yields {i8}:result = -1</i>
    &lt;result&gt; = ashr i32 1, 32  <i>; undefined</i>
+  &lt;result&gt; = ashr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 3&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 -1, i32 0&gt;</i>
  </pre>
  </div>
  
@@ -2899,23 +2966,25 @@ exceeds the length of <tt>val</tt>, the results are undefined.
  <h5>Syntax:</h5>
  
  <pre>
-  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;n x i32&gt; &lt;mask&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
+  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;m x i32&gt; &lt;mask&gt;    <i>; yields &lt;m x &lt;ty&gt;&gt;</i>
  </pre>
  
  <h5>Overview:</h5>
  
  <p>
  The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
-from two input vectors, returning a vector of the same type.
+from two input vectors, returning a vector with the same element type as
+the input and length that is the same as the shuffle mask.
  </p>
  
  <h5>Arguments:</h5>
  
  <p>
-The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
-with types that match each other and types that match the result of the
-instruction.  The third argument is a shuffle mask, which has the same number
-of elements as the other vector type, but whose element type is always 'i32'.
+The first two operands of a '<tt>shufflevector</tt>' instruction are vectors 
+with types that match each other. The third argument is a shuffle mask whose
+element type is always 'i32'.  The result of the instruction is a vector whose
+length is the same as the shuffle mask and whose element type is the same as
+the element type of the first two operands.
  </p>
  
  <p>
@@ -2928,7 +2997,7 @@ constant integer or undef values.
  <p>
  The elements of the two input vectors are numbered from left to right across
  both of the vectors.  The shuffle mask operand specifies, for each element of
-the result vector, which element of the two input registers the result element
+the result vector, which element of the two input vectors the result element
  gets.  The element selector may be undef (meaning "don't care") and the second
  operand may be undef if performing a shuffle from only one vector.
  </p>
@@ -2940,6 +3009,10 @@ operand may be undef if performing a shuffle from only one vector.
                            &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
    %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef, 
                            &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
+  %result = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef, 
+                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
+  %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
+                          &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
  </pre>
  </div>
  
@@ -3035,6 +3108,7 @@ indices in a
  '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
  The value to insert must have the same type as the value identified
  by the indices.
+</p>
  
  <h5>Semantics:</h5>
  
@@ -3101,7 +3175,7 @@ choose to align the allocation on any convenient boundary.</p>
  <h5>Semantics:</h5>
  
  <p>Memory is allocated using the system "<tt>malloc</tt>" function, and
-a pointer is returned.  The result of a zero byte allocattion is undefined.  The
+a pointer is returned.  The result of a zero byte allocation is undefined.  The
  result is null if there is insufficient memory available.</p>
  
  <h5>Example:</h5>
@@ -3290,25 +3364,34 @@ at the location specified by the '<tt>&lt;pointer&gt;</tt>' operand.</p>
  <div class="doc_text">
  <h5>Syntax:</h5>
  <pre>
-  &lt;result&gt; = getelementptr &lt;ty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
  </pre>
  
  <h5>Overview:</h5>
  
  <p>
  The '<tt>getelementptr</tt>' instruction is used to get the address of a
-subelement of an aggregate data structure.</p>
+subelement of an aggregate data structure. It performs address calculation only
+and does not access memory.</p>
  
  <h5>Arguments:</h5>
  
-<p>This instruction takes a list of integer operands that indicate what
-elements of the aggregate object to index to.  The actual types of the arguments
-provided depend on the type of the first pointer argument.  The
-'<tt>getelementptr</tt>' instruction is used to index down through the type
-levels of a structure or to a specific index in an array.  When indexing into a
-structure, only <tt>i32</tt> integer constants are allowed.  When indexing 
-into an array or pointer, only integers of 32 or 64 bits are allowed; 32-bit 
-values will be sign extended to 64-bits if required.</p>
+<p>The first argument is always a pointer, and forms the basis of the
+calculation. The remaining arguments are indices, that indicate which of the
+elements of the aggregate object are indexed. The interpretation of each index
+is dependent on the type being indexed into. The first index always indexes the
+pointer value given as the first argument, the second index indexes a value of
+the type pointed to (not necessarily the value directly pointed to, since the
+first index can be non-zero), etc. The first type indexed into must be a pointer
+value, subsequent types can be arrays, vectors and structs. Note that subsequent
+types being indexed into can never be pointers, since that would require loading
+the pointer before continuing calculation.</p>
+
+<p>The type of each index argument depends on the type it is indexing into.
+When indexing into a (packed) structure, only <tt>i32</tt> integer
+<b>constants</b> are allowed.  When indexing into an array, pointer or vector,
+only integers of 32 or 64 bits are allowed (also non-constants). 32-bit values
+will be sign extended to 64-bits if required.</p>
  
  <p>For example, let's consider a C code fragment and how it gets
  compiled to LLVM:</p>
@@ -3349,13 +3432,6 @@ entry:
  
  <h5>Semantics:</h5>
  
-<p>The index types specified for the '<tt>getelementptr</tt>' instruction depend
-on the pointer type that is being indexed into. <a href="#t_pointer">Pointer</a>
-and <a href="#t_array">array</a> types can use a 32-bit or 64-bit
-<a href="#t_integer">integer</a> type but the value will always be sign extended
-to 64-bits.  <a href="#t_struct">Structure</a> and <a href="#t_pstruct">packed
-structure</a> types require <tt>i32</tt> <b>constants</b>.</p>
-
  <p>In the example above, the first index is indexing into the '<tt>%ST*</tt>'
  type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
  }</tt>' type, a structure.  The second index indexes into the third element of
@@ -3395,7 +3471,11 @@ FAQ</a>.</p>
  
  <pre>
      <i>; yields [12 x i8]*:aptr</i>
-    %aptr = getelementptr {i32, [12 x i8]}* %sptr, i64 0, i32 1
+    %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
+    <i>; yields i8*:vptr</i>
+    %vptr = getelementptr {i32, &lt;2 x i8&gt;}* %svptr, i64 0, i32 1, i32 1
+    <i>; yields i8*:eptr</i>
+    %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
  </pre>
  </div>
  
@@ -3697,7 +3777,7 @@ the value cannot fit in the floating point value, the results are undefined.</p>
  <h5>Example:</h5>
  <pre>
    %X = uitofp i32 257 to float         <i>; yields float:257.0</i>
-  %Y = uitofp i8  -1 to double         <i>; yields double:255.0</i>
+  %Y = uitofp i8 -1 to double          <i>; yields double:255.0</i>
  </pre>
  </div>
  
@@ -3731,7 +3811,7 @@ the value cannot fit in the floating point value, the results are undefined.</p>
  <h5>Example:</h5>
  <pre>
    %X = sitofp i32 257 to float         <i>; yields float:257.0</i>
-  %Y = sitofp i8  -1 to double         <i>; yields double:-1.0</i>
+  %Y = sitofp i8 -1 to double          <i>; yields double:-1.0</i>
  </pre>
  </div>
  
@@ -3753,7 +3833,7 @@ the integer type <tt>ty2</tt>.</p>
  <h5>Arguments:</h5>
  <p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which 
  must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
-<tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type. 
+<tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
  
  <h5>Semantics:</h5>
  <p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
@@ -3789,7 +3869,7 @@ a pointer type, <tt>ty2</tt>.</p>
  <h5>Arguments:</h5>
  <p>The '<tt>inttoptr</tt>' instruction takes an <a href="#t_integer">integer</a>
  value to cast, and a type to cast it to, which must be a 
-<a href="#t_pointer">pointer</a> type.
+<a href="#t_pointer">pointer</a> type.</p>
  
  <h5>Semantics:</h5>
  <p>The '<tt>inttoptr</tt>' instruction converts <tt>value</tt> to type
@@ -3847,7 +3927,7 @@ other types, use the <a href="#i_inttoptr">inttoptr</a> or
  <pre>
    %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
    %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast <2xint> %V to i64;        <i>; yields i64: %V</i>   
+  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>   
  </pre>
  </div>
  
@@ -3863,7 +3943,7 @@ instructions, which defy better classification.</p>
  </div>
  <div class="doc_text">
  <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt}:result</i>
+<pre>  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
  </pre>
  <h5>Overview:</h5>
  <p>The '<tt>icmp</tt>' instruction returns a boolean value or
@@ -3873,6 +3953,7 @@ of its two integer, integer vector, or pointer operands.</p>
  <p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
  the condition code indicating the kind of comparison to perform. It is not
  a value, just a keyword. The possible condition code are:
+</p>
  <ol>
    <li><tt>eq</tt>: equal</li>
    <li><tt>ne</tt>: not equal </li>
@@ -3893,12 +3974,13 @@ They must also be identical types.</p>
  <p>The '<tt>icmp</tt>' compares <tt>op1</tt> and <tt>op2</tt> according to 
  the condition code given as <tt>cond</tt>. The comparison performed always
  yields either an <a href="#t_primitive"><tt>i1</tt></a> or vector of <tt>i1</tt> result, as follows: 
+</p>
  <ol>
    <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal, 
    <tt>false</tt> otherwise. No sign interpretation is necessary or performed.
    </li>
    <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal, 
-  <tt>false</tt> otherwise. No sign interpretation is necessary or performed.
+  <tt>false</tt> otherwise. No sign interpretation is necessary or performed.</li>
    <li><tt>ugt</tt>: interprets the operands as unsigned values and yields
    <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
    <li><tt>uge</tt>: interprets the operands as unsigned values and yields
@@ -3939,12 +4021,12 @@ Otherwise, the result is an <tt>i1</tt>.
  </div>
  <div class="doc_text">
  <h5>Syntax:</h5>
-<pre>  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt}:result</i>
+<pre>  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
  </pre>
  <h5>Overview:</h5>
  <p>The '<tt>fcmp</tt>' instruction returns a boolean value
  or vector of boolean values based on comparison
-of its operands.
+of its operands.</p>
  <p>
  If the operands are floating point scalars, then the result
  type is a boolean (<a href="#t_primitive"><tt>i1</tt></a>).
@@ -3955,7 +4037,7 @@ operands being compared.</p>
  <h5>Arguments:</h5>
  <p>The '<tt>fcmp</tt>' instruction takes three operands. The first operand is
  the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:
+a value, just a keyword. The possible condition code are:</p>
  <ol>
    <li><tt>false</tt>: no comparison, always returns false</li>
    <li><tt>oeq</tt>: ordered and equal</li>
@@ -3986,7 +4068,7 @@ according to the condition code given as <tt>cond</tt>.
  If the operands are vectors, then the vectors are compared
  element by element.
  Each comparison performed 
-always yields an <a href="#t_primitive">i1</a> result, as follows: 
+always yields an <a href="#t_primitive">i1</a> result, as follows:</p>
  <ol>
    <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
    <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
@@ -4040,7 +4122,7 @@ element-wise comparison of its two integer vector operands.</p>
  <h5>Arguments:</h5>
  <p>The '<tt>vicmp</tt>' instruction takes three operands. The first operand is
  the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:
+a value, just a keyword. The possible condition code are:</p>
  <ol>
    <li><tt>eq</tt>: equal</li>
    <li><tt>ne</tt>: not equal </li>
@@ -4063,7 +4145,7 @@ identical type as the values being compared.  The most significant bit in each
  element is 1 if the element-wise comparison evaluates to true, and is 0
  otherwise.  All other bits of the result are undefined.  The condition codes
  are evaluated identically to the <a href="#i_icmp">'<tt>icmp</tt>'
-instruction</a>.
+instruction</a>.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -4086,7 +4168,7 @@ elements have the same width as the input elements.</p>
  <h5>Arguments:</h5>
  <p>The '<tt>vfcmp</tt>' instruction takes three operands. The first operand is
  the condition code indicating the kind of comparison to perform. It is not
-a value, just a keyword. The possible condition code are:
+a value, just a keyword. The possible condition code are:</p>
  <ol>
    <li><tt>false</tt>: no comparison, always returns false</li>
    <li><tt>oeq</tt>: ordered and equal</li>
@@ -4117,12 +4199,15 @@ having identical with to the width of the floating point elements. The most
  significant bit in each element is 1 if the element-wise comparison evaluates to
  true, and is 0 otherwise.  All other bits of the result are undefined.  The
  condition codes are evaluated identically to the 
-<a href="#i_fcmp">'<tt>fcmp</tt>' instruction</a>.
+<a href="#i_fcmp">'<tt>fcmp</tt>' instruction</a>.</p>
  
  <h5>Example:</h5>
  <pre>
-  &lt;result&gt; = vfcmp oeq &lt;2 x float&gt; &lt; float 4, float 0 &gt;, &lt; float 5, float 0 &gt;       <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0, i32 -1 &gt;</i>
-  &lt;result&gt; = vfcmp ult &lt;2 x double&gt; &lt; double 1, double 2 &gt;, &lt; double 2, double 2&gt;   <i>; yields: result=&lt;2 x i64&gt; &lt; i64 -1, i64 0 &gt;</i>
+  <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0, i32 -1 &gt;</i>
+  &lt;result&gt; = vfcmp oeq &lt;2 x float&gt; &lt; float 4, float 0 &gt;, &lt; float 5, float 0 &gt;
+  
+  <i>; yields: result=&lt;2 x i64&gt; &lt; i64 -1, i64 0 &gt;</i>
+  &lt;result&gt; = vfcmp ult &lt;2 x double&gt; &lt; double 1, double 2 &gt;, &lt; double 2, double 2&gt;
  </pre>
  </div>
  
@@ -4179,7 +4264,7 @@ Loop:       ; Infinite loop that counts from 0 on up...
  <pre>
    &lt;result&gt; = select <i>selty</i> &lt;cond&gt;, &lt;ty&gt; &lt;val1&gt;, &lt;ty&gt; &lt;val2&gt;             <i>; yields ty</i>
  
-  <i>selty</i> is either i1 or {&lt;N x i1&gt}
+  <i>selty</i> is either i1 or {&lt;N x i1&gt;}
  </pre>
  
  <h5>Overview:</h5>
@@ -4230,7 +4315,7 @@ by element.
  
  <h5>Syntax:</h5>
  <pre>
-  &lt;result&gt; = [tail] call [<a href="#callingconv">cconv</a>] &lt;ty&gt; [&lt;fnty&gt;*] &lt;fnptrval&gt;(&lt;param list&gt;)
+  &lt;result&gt; = [tail] call [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ty&gt; [&lt;fnty&gt;*] &lt;fnptrval&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
  </pre>
  
  <h5>Overview:</h5>
@@ -4247,13 +4332,20 @@ by element.
      any allocas or varargs in the caller.  If the "tail" marker is present, the
      function call is eligible for tail call optimization.  Note that calls may
      be marked "tail" even if they do not occur before a <a
-    href="#i_ret"><tt>ret</tt></a> instruction.
+    href="#i_ret"><tt>ret</tt></a> instruction.</p>
    </li>
    <li>
      <p>The optional "cconv" marker indicates which <a href="#callingconv">calling
      convention</a> the call should use.  If none is specified, the call defaults
-    to using C calling conventions.
+    to using C calling conventions.</p>
    </li>
+
+  <li>
+    <p>The optional <a href="#paramattrs">Parameter Attributes</a> list for
+    return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', 
+    and '<tt>inreg</tt>' attributes are valid here.</p>
+  </li>
+
    <li>
      <p>'<tt>ty</tt>': the type of the call instruction itself which is also
      the type of the return value.  Functions that return no value are marked
@@ -4278,6 +4370,11 @@ by element.
      indicates the function accepts a variable number of arguments, the extra 
      arguments can be specified.</p>
    </li>
+  <li> 
+  <p>The optional <a href="#fnattrs">function attributes</a> list. Only
+  '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
+  '<tt>readnone</tt>' attributes are valid here.</p>
+  </li>
  </ol>
  
  <h5>Semantics:</h5>
@@ -4287,7 +4384,7 @@ transfer to a specified function, with its incoming arguments bound to
  the specified values. Upon a '<tt><a href="#i_ret">ret</a></tt>'
  instruction in the called function, control flow continues with the
  instruction after the function call, and the return value of the
-function is bound to the result argument.
+function is bound to the result argument.</p>
  
  <h5>Example:</h5>
  
@@ -4299,9 +4396,11 @@ function is bound to the result argument.
    call void %foo(i8 97 signext)
  
    %struct.A = type { i32, i8 }
-  %r = call %struct.A @foo()                     <i>; yields { 32, i8 }</i>
+  %r = call %struct.A @foo()                        <i>; yields { 32, i8 }</i>
    %gr = extractvalue %struct.A %r, 0                <i>; yields i32</i>
    %gr1 = extractvalue %struct.A %r, 1               <i>; yields i8</i>
+  %Z = call void @foo() noreturn                    <i>; indicates that %foo never returns normally</i>
+  %ZZ = call zeroext i32 @bar()                     <i>; Return value is %zero extended</i>
  </pre>
  
  </div>
@@ -4465,17 +4564,17 @@ declare void @llvm.va_end(i8*)
  <h5>Syntax:</h5>
  <pre>  declare void %llvm.va_start(i8* &lt;arglist&gt;)<br></pre>
  <h5>Overview:</h5>
-<P>The '<tt>llvm.va_start</tt>' intrinsic initializes
+<p>The '<tt>llvm.va_start</tt>' intrinsic initializes
  <tt>*&lt;arglist&gt;</tt> for subsequent use by <tt><a
  href="#i_va_arg">va_arg</a></tt>.</p>
  
  <h5>Arguments:</h5>
  
-<P>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
+<p>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
  
  <h5>Semantics:</h5>
  
-<P>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
+<p>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
  macro available in C.  In a target-dependent way, it initializes the
  <tt>va_list</tt> element to which the argument points, so that the next call to
  <tt>va_arg</tt> will produce the first variable argument passed to the function.
@@ -4991,7 +5090,13 @@ for more efficient code generation.
  <div class="doc_text">
  
  <h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.memcpy on any integer bit
+width. Not all targets support all bit widths however.</p>
  <pre>
+  declare void @llvm.memcpy.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                i8 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memcpy.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                i16 &lt;len&gt;, i32 &lt;align&gt;)
    declare void @llvm.memcpy.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
                                  i32 &lt;len&gt;, i32 &lt;align&gt;)
    declare void @llvm.memcpy.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
@@ -5045,7 +5150,13 @@ be set to 0 or 1.
  <div class="doc_text">
  
  <h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
+width. Not all targets support all bit widths however.</p>
  <pre>
+  declare void @llvm.memmove.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                 i8 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memmove.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                 i16 &lt;len&gt;, i32 &lt;align&gt;)
    declare void @llvm.memmove.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
                                   i32 &lt;len&gt;, i32 &lt;align&gt;)
    declare void @llvm.memmove.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
@@ -5100,7 +5211,13 @@ be set to 0 or 1.
  <div class="doc_text">
  
  <h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
+width. Not all targets support all bit widths however.</p>
  <pre>
+  declare void @llvm.memset.i8(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+                                i8 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memset.i16(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+                                i16 &lt;len&gt;, i32 &lt;align&gt;)
    declare void @llvm.memset.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
                                  i32 &lt;len&gt;, i32 &lt;align&gt;)
    declare void @llvm.memset.i64(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
@@ -5155,7 +5272,7 @@ this can be specified as the fourth argument, otherwise it should be set to 0 or
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any 
  floating point or vector of floating point type. Not all targets support all
-types however.
+types however.</p>
  <pre>
    declare float     @llvm.sqrt.f32(float %Val)
    declare double    @llvm.sqrt.f64(double %Val)
@@ -5199,7 +5316,7 @@ floating point number.
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any 
  floating point or vector of floating point type. Not all targets support all
-types however.
+types however.</p>
  <pre>
    declare float     @llvm.powi.f32(float  %Val, i32 %power)
    declare double    @llvm.powi.f64(double %Val, i32 %power)
@@ -5241,7 +5358,7 @@ unspecified sequence of rounding operations.</p>
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any 
  floating point or vector of floating point type. Not all targets support all
-types however.
+types however.</p>
  <pre>
    declare float     @llvm.sin.f32(float  %Val)
    declare double    @llvm.sin.f64(double %Val)
@@ -5280,7 +5397,7 @@ conditions in the same way.</p>
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any 
  floating point or vector of floating point type. Not all targets support all
-types however.
+types however.</p>
  <pre>
    declare float     @llvm.cos.f32(float  %Val)
    declare double    @llvm.cos.f64(double %Val)
@@ -5319,7 +5436,7 @@ conditions in the same way.</p>
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any 
  floating point or vector of floating point type. Not all targets support all
-types however.
+types however.</p>
  <pre>
    declare float     @llvm.pow.f32(float  %Val, float %Power)
    declare double    @llvm.pow.f64(double %Val, double %Power)
@@ -5374,7 +5491,7 @@ These allow efficient code generation for some algorithms.
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic function. You can use bswap on any integer
-type that is an even number of bytes (i.e. BitWidth % 16 == 0).
+type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
  <pre>
    declare i16 @llvm.bswap.i16(i16 &lt;id&gt;)
    declare i32 @llvm.bswap.i32(i32 &lt;id&gt;)
@@ -5413,7 +5530,7 @@ additional even-byte lengths (6 bytes, 8 bytes and more, respectively).
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
-width. Not all targets support all bit widths however.
+width. Not all targets support all bit widths however.</p>
  <pre>
    declare i8 @llvm.ctpop.i8 (i8  &lt;src&gt;)
    declare i16 @llvm.ctpop.i16(i16 &lt;src&gt;)
@@ -5452,7 +5569,7 @@ The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any 
-integer bit width. Not all targets support all bit widths however.
+integer bit width. Not all targets support all bit widths however.</p>
  <pre>
    declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
    declare i16 @llvm.ctlz.i16(i16 &lt;src&gt;)
@@ -5495,7 +5612,7 @@ of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any 
-integer bit width. Not all targets support all bit widths however.
+integer bit width. Not all targets support all bit widths however.</p>
  <pre>
    declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
    declare i16 @llvm.cttz.i16(i16 &lt;src&gt;)
@@ -5536,7 +5653,7 @@ of src.  For example, <tt>llvm.cttz(2) = 1</tt>.
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.part.select</tt> 
-on any integer bit width.
+on any integer bit width.</p>
  <pre>
    declare i17 @llvm.part.select.i17 (i17 %val, i32 %loBit, i32 %hiBit)
    declare i29 @llvm.part.select.i29 (i29 %val, i32 %loBit, i32 %hiBit)
@@ -5566,7 +5683,7 @@ only the <tt>%hiBit - %loBit</tt> bits set, as follows:</p>
    <li>The <tt>%loBits</tt> value is subtracted from the <tt>%hiBits</tt> value
    to determine the number of bits to retain.</li>
    <li>A mask of the retained bits is created by shifting a -1 value.</li>
-  <li>The mask is ANDed with <tt>%val</tt> to produce the result.
+  <li>The mask is ANDed with <tt>%val</tt> to produce the result.</li>
  </ol>
  <p>In reverse mode, a similar computation is made except that the bits are
  returned in the reverse order. So, for example, if <tt>X</tt> has the value
@@ -5583,7 +5700,7 @@ returned in the reverse order. So, for example, if <tt>X</tt> has the value
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.part.set</tt> 
-on any integer bit width.
+on any integer bit width.</p>
  <pre>
    declare i17 @llvm.part.set.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
    declare i29 @llvm.part.set.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
@@ -5612,10 +5729,10 @@ up to that size.</p>
  <p>In forward mode, the bits between <tt>%lo</tt> and <tt>%hi</tt> (inclusive)
  are replaced with corresponding bits from <tt>%repl</tt>. That is the 0th bit
  in <tt>%repl</tt> replaces the <tt>%lo</tt>th bit in <tt>%val</tt> and etc. up
-to the <tt>%hi</tt>th bit. 
+to the <tt>%hi</tt>th bit.</p>
  <p>In reverse mode, a similar computation is made except that the bits are
  reversed.  That is, the <tt>0</tt>th bit in <tt>%repl</tt> replaces the 
-<tt>%hi</tt> bit in <tt>%val</tt> and etc. down to the <tt>%lo</tt>th bit.
+<tt>%hi</tt> bit in <tt>%val</tt> and etc. down to the <tt>%lo</tt>th bit.</p>
  <h5>Examples:</h5>
  <pre>
    llvm.part.set(0xFFFF, 0, 4, 7) -&gt; 0xFF0F
@@ -5782,7 +5899,7 @@ i1 &lt;device&gt; )
      <li><tt>ls</tt>: load-store barrier</li>
      <li><tt>sl</tt>: store-load barrier</li>
      <li><tt>ss</tt>: store-store barrier</li>
-    <li><tt>device</tt>: barrier applies to device and uncached memory also.
+    <li><tt>device</tt>: barrier applies to device and uncached memory also.</li>
    </ul>
  <h5>Semantics:</h5>
  <p>
@@ -6306,6 +6423,7 @@ This intrinsic allows annotations to be put on arbitrary expressions
  with arbitrary strings.  This can be useful for special purpose optimizations 
  that want to look for these annotations.  These have no other defined use, they 
  are ignored by code generation and optimization.
+</p>
  </div>
  
  <!-- _______________________________________________________________________ -->
@@ -6341,13 +6459,47 @@ call of the abort() function.
  </p>
  </div>
  
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_stackprotector">'<tt>llvm.stackprotector</tt>' Intrinsic</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre>
+declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
+
+</pre>
+<h5>Overview:</h5>
+<p>
+  The <tt>llvm.stackprotector</tt> intrinsic takes the <tt>guard</tt> and stores
+  it onto the stack at <tt>slot</tt>. The stack slot is adjusted to ensure that
+  it is placed on the stack before local variables.
+</p>
+<h5>Arguments:</h5>
+<p>
+  The <tt>llvm.stackprotector</tt> intrinsic requires two pointer arguments. The
+  first argument is the value loaded from the stack guard
+  <tt>@__stack_chk_guard</tt>. The second variable is an <tt>alloca</tt> that
+  has enough space to hold the value of the guard.
+</p>
+<h5>Semantics:</h5>
+<p>
+  This intrinsic causes the prologue/epilogue inserter to force the position of
+  the <tt>AllocaInst</tt> stack slot to be before local variables on the
+  stack. This is to ensure that if a local variable on the stack is overwritten,
+  it will destroy the value of the guard. When the function exits, the guard on
+  the stack is checked against the original guard. If they're different, then
+  the program aborts by calling the <tt>__stack_chk_fail()</tt> function.
+</p>
+</div>
+
  <!-- *********************************************************************** -->
  <hr>
  <address>
    <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
    <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
  
    <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
    <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>